diff --git a/monitoring/compose.yaml b/monitoring/compose.yaml new file mode 100644 index 0000000..4c8c84b --- /dev/null +++ b/monitoring/compose.yaml @@ -0,0 +1,99 @@ +volumes: + prometheus_data: {} + grafana_data: {} + +networks: + front-tier: + name: npmplus + external: true + back-tier: + +services: + + prometheus: + image: prom/prometheus:v2.36.2 + volumes: + - ./prometheus/:/etc/prometheus/ + - prometheus_data:/prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.path=/prometheus' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + ports: + - 9090:9090 + links: + - cadvisor:cadvisor + - alertmanager:alertmanager + depends_on: + - cadvisor + networks: + - back-tier + restart: always + + node-exporter: + image: quay.io/prometheus/node-exporter:latest + volumes: + - /proc:/host/proc:ro + - /sys:/host/sys:ro + - /:/rootfs:ro + - /:/host:ro,rslave + command: + - '--path.rootfs=/host' + - '--path.procfs=/host/proc' + - '--path.sysfs=/host/sys' + - --collector.filesystem.ignored-mount-points + - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" + ports: + - 9100:9100 + networks: + - back-tier + restart: always + deploy: + mode: global + + alertmanager: + image: prom/alertmanager + ports: + - 9093:9093 + volumes: + - ./alertmanager/:/etc/alertmanager/ + networks: + - back-tier + restart: always + command: + - '--config.file=/etc/alertmanager/config.yml' + - '--storage.path=/alertmanager' + + cadvisor: + image: gcr.io/cadvisor/cadvisor + volumes: + - /:/rootfs:ro + - /var/run:/var/run:rw + - /sys:/sys:ro + - /var/lib/docker/:/var/lib/docker:ro + ports: + - 8080:8080 + networks: + - back-tier + restart: always + deploy: + mode: global + + grafana: + image: grafana/grafana + user: "472" + depends_on: + - prometheus + ports: + - 3000:3000 + volumes: + - grafana_data:/var/lib/grafana + - ./grafana/provisioning/:/etc/grafana/provisioning/ + env_file: + - ./grafana/config.monitoring + networks: + - back-tier + - front-tier + restart: always + diff --git a/monitoring/grafana/provisioning/datasources/datasource.yml b/monitoring/grafana/provisioning/datasources/datasource.yml new file mode 100644 index 0000000..c02bb38 --- /dev/null +++ b/monitoring/grafana/provisioning/datasources/datasource.yml @@ -0,0 +1,50 @@ +# config file version +apiVersion: 1 + +# list of datasources that should be deleted from the database +deleteDatasources: + - name: Prometheus + orgId: 1 + +# list of datasources to insert/update depending +# whats available in the database +datasources: + # name of the datasource. Required +- name: Prometheus + # datasource type. Required + type: prometheus + # access mode. direct or proxy. Required + access: proxy + # org id. will default to orgId 1 if not specified + orgId: 1 + # url + url: http://prometheus:9090 + # database password, if used + password: + # database user, if used + user: + # database name, if used + database: + # enable/disable basic auth + basicAuth: false + # basic auth username, if used + basicAuthUser: + # basic auth password, if used + basicAuthPassword: + # enable/disable with credentials headers + withCredentials: + # mark as default datasource. Max one per org + isDefault: true + # fields that will be converted to json and stored in json_data + jsonData: + graphiteVersion: "1.1" + tlsAuth: false + tlsAuthWithCACert: false + # json object of data that will be encrypted. + secureJsonData: + tlsCACert: "..." + tlsClientCert: "..." + tlsClientKey: "..." + version: 1 + # allow users to edit datasources from the UI. + editable: true diff --git a/monitoring/prometheus/alerts.rules b/monitoring/prometheus/alerts.rules new file mode 100644 index 0000000..e69de29 diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000..ae76a07 --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,53 @@ +# my global config +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + evaluation_interval: 15s # By default, scrape targets every 15 seconds. + # scrape_timeout is set to the global default (10s). + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'my-project' + +# Load and evaluate rules in this file every 'evaluation_interval' seconds. +rule_files: + - 'alert.rules' + # - "first.rules" + # - "second.rules" + +# alert +alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: + - "alertmanager:9093" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + + - job_name: 'prometheus' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 15s + + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'cadvisor' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 15s + + static_configs: + - targets: ['cadvisor:8080'] + + - job_name: 'node-exporter' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 15s + + static_configs: + - targets: ['node-exporter:9100']