feat(monitoring): add initial stack
This commit is contained in:
99
monitoring/compose.yaml
Normal file
99
monitoring/compose.yaml
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
volumes:
|
||||||
|
prometheus_data: {}
|
||||||
|
grafana_data: {}
|
||||||
|
|
||||||
|
networks:
|
||||||
|
front-tier:
|
||||||
|
name: npmplus
|
||||||
|
external: true
|
||||||
|
back-tier:
|
||||||
|
|
||||||
|
services:
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: prom/prometheus:v2.36.2
|
||||||
|
volumes:
|
||||||
|
- ./prometheus/:/etc/prometheus/
|
||||||
|
- prometheus_data:/prometheus
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
||||||
|
- '--storage.tsdb.path=/prometheus'
|
||||||
|
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
||||||
|
- '--web.console.templates=/usr/share/prometheus/consoles'
|
||||||
|
ports:
|
||||||
|
- 9090:9090
|
||||||
|
links:
|
||||||
|
- cadvisor:cadvisor
|
||||||
|
- alertmanager:alertmanager
|
||||||
|
depends_on:
|
||||||
|
- cadvisor
|
||||||
|
networks:
|
||||||
|
- back-tier
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
node-exporter:
|
||||||
|
image: quay.io/prometheus/node-exporter:latest
|
||||||
|
volumes:
|
||||||
|
- /proc:/host/proc:ro
|
||||||
|
- /sys:/host/sys:ro
|
||||||
|
- /:/rootfs:ro
|
||||||
|
- /:/host:ro,rslave
|
||||||
|
command:
|
||||||
|
- '--path.rootfs=/host'
|
||||||
|
- '--path.procfs=/host/proc'
|
||||||
|
- '--path.sysfs=/host/sys'
|
||||||
|
- --collector.filesystem.ignored-mount-points
|
||||||
|
- "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
|
||||||
|
ports:
|
||||||
|
- 9100:9100
|
||||||
|
networks:
|
||||||
|
- back-tier
|
||||||
|
restart: always
|
||||||
|
deploy:
|
||||||
|
mode: global
|
||||||
|
|
||||||
|
alertmanager:
|
||||||
|
image: prom/alertmanager
|
||||||
|
ports:
|
||||||
|
- 9093:9093
|
||||||
|
volumes:
|
||||||
|
- ./alertmanager/:/etc/alertmanager/
|
||||||
|
networks:
|
||||||
|
- back-tier
|
||||||
|
restart: always
|
||||||
|
command:
|
||||||
|
- '--config.file=/etc/alertmanager/config.yml'
|
||||||
|
- '--storage.path=/alertmanager'
|
||||||
|
|
||||||
|
cadvisor:
|
||||||
|
image: gcr.io/cadvisor/cadvisor
|
||||||
|
volumes:
|
||||||
|
- /:/rootfs:ro
|
||||||
|
- /var/run:/var/run:rw
|
||||||
|
- /sys:/sys:ro
|
||||||
|
- /var/lib/docker/:/var/lib/docker:ro
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
networks:
|
||||||
|
- back-tier
|
||||||
|
restart: always
|
||||||
|
deploy:
|
||||||
|
mode: global
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
image: grafana/grafana
|
||||||
|
user: "472"
|
||||||
|
depends_on:
|
||||||
|
- prometheus
|
||||||
|
ports:
|
||||||
|
- 3000:3000
|
||||||
|
volumes:
|
||||||
|
- grafana_data:/var/lib/grafana
|
||||||
|
- ./grafana/provisioning/:/etc/grafana/provisioning/
|
||||||
|
env_file:
|
||||||
|
- ./grafana/config.monitoring
|
||||||
|
networks:
|
||||||
|
- back-tier
|
||||||
|
- front-tier
|
||||||
|
restart: always
|
||||||
|
|
||||||
50
monitoring/grafana/provisioning/datasources/datasource.yml
Normal file
50
monitoring/grafana/provisioning/datasources/datasource.yml
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
# config file version
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
# list of datasources that should be deleted from the database
|
||||||
|
deleteDatasources:
|
||||||
|
- name: Prometheus
|
||||||
|
orgId: 1
|
||||||
|
|
||||||
|
# list of datasources to insert/update depending
|
||||||
|
# whats available in the database
|
||||||
|
datasources:
|
||||||
|
# <string, required> name of the datasource. Required
|
||||||
|
- name: Prometheus
|
||||||
|
# <string, required> datasource type. Required
|
||||||
|
type: prometheus
|
||||||
|
# <string, required> access mode. direct or proxy. Required
|
||||||
|
access: proxy
|
||||||
|
# <int> org id. will default to orgId 1 if not specified
|
||||||
|
orgId: 1
|
||||||
|
# <string> url
|
||||||
|
url: http://prometheus:9090
|
||||||
|
# <string> database password, if used
|
||||||
|
password:
|
||||||
|
# <string> database user, if used
|
||||||
|
user:
|
||||||
|
# <string> database name, if used
|
||||||
|
database:
|
||||||
|
# <bool> enable/disable basic auth
|
||||||
|
basicAuth: false
|
||||||
|
# <string> basic auth username, if used
|
||||||
|
basicAuthUser:
|
||||||
|
# <string> basic auth password, if used
|
||||||
|
basicAuthPassword:
|
||||||
|
# <bool> enable/disable with credentials headers
|
||||||
|
withCredentials:
|
||||||
|
# <bool> mark as default datasource. Max one per org
|
||||||
|
isDefault: true
|
||||||
|
# <map> fields that will be converted to json and stored in json_data
|
||||||
|
jsonData:
|
||||||
|
graphiteVersion: "1.1"
|
||||||
|
tlsAuth: false
|
||||||
|
tlsAuthWithCACert: false
|
||||||
|
# <string> json object of data that will be encrypted.
|
||||||
|
secureJsonData:
|
||||||
|
tlsCACert: "..."
|
||||||
|
tlsClientCert: "..."
|
||||||
|
tlsClientKey: "..."
|
||||||
|
version: 1
|
||||||
|
# <bool> allow users to edit datasources from the UI.
|
||||||
|
editable: true
|
||||||
0
monitoring/prometheus/alerts.rules
Normal file
0
monitoring/prometheus/alerts.rules
Normal file
53
monitoring/prometheus/prometheus.yml
Normal file
53
monitoring/prometheus/prometheus.yml
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
# my global config
|
||||||
|
global:
|
||||||
|
scrape_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
evaluation_interval: 15s # By default, scrape targets every 15 seconds.
|
||||||
|
# scrape_timeout is set to the global default (10s).
|
||||||
|
|
||||||
|
# Attach these labels to any time series or alerts when communicating with
|
||||||
|
# external systems (federation, remote storage, Alertmanager).
|
||||||
|
external_labels:
|
||||||
|
monitor: 'my-project'
|
||||||
|
|
||||||
|
# Load and evaluate rules in this file every 'evaluation_interval' seconds.
|
||||||
|
rule_files:
|
||||||
|
- 'alert.rules'
|
||||||
|
# - "first.rules"
|
||||||
|
# - "second.rules"
|
||||||
|
|
||||||
|
# alert
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- scheme: http
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- "alertmanager:9093"
|
||||||
|
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
|
# Here it's Prometheus itself.
|
||||||
|
scrape_configs:
|
||||||
|
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
|
||||||
|
|
||||||
|
- job_name: 'prometheus'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
- targets: ['localhost:9090']
|
||||||
|
|
||||||
|
- job_name: 'cadvisor'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
- targets: ['cadvisor:8080']
|
||||||
|
|
||||||
|
- job_name: 'node-exporter'
|
||||||
|
|
||||||
|
# Override the global default and scrape targets from this job every 5 seconds.
|
||||||
|
scrape_interval: 15s
|
||||||
|
|
||||||
|
static_configs:
|
||||||
|
- targets: ['node-exporter:9100']
|
||||||
Reference in New Issue
Block a user