昨天有跟大家分享過Prometheus是什麼東西,今天就來用python實際操作看看囉~
程式碼在這~我的Github
docker-prometheus/
├── alertmanager
│ └── config.yml
├── docker-compose.yml
├── grafana
│ └── config.monitoring
└── prometheus
├── alert.yml
└── prometheus.yml
- prometheus 監控系統本體
- alertmanager 管理發送警報
- node_exporter 收集主機的運行指標如CPU, 內存,磁盤
- grafana 搭配的視覺化 Web UI
version: '3.3'
volumes:
prometheus_data: {}
grafana_data: {}
networks:
monitoring:
driver: bridge
services:
prometheus:
image: prom/prometheus
container_name: prometheus
restart: always
volumes:
- /etc/localtime:/etc/localtime:ro
- $PWD/prometheus/:/etc/prometheus/
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
networks:
- monitoring
links:
- alertmanager
- node_exporter
- flask_web_1
expose:
- '9090'
ports:
- 9090:9090
alertmanager:
image: prom/alertmanager
container_name: alertmanager
restart: always
volumes:
- /etc/localtime:/etc/localtime:ro
- $PWD/alertmanager/:/etc/alertmanager/
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--storage.path=/alertmanager'
networks:
- monitoring
expose:
- '9093'
ports:
- 9093:9093
node_exporter:
image: prom/node-exporter:v0.18.0
container_name: node_exporter
restart: always
volumes:
- /etc/localtime:/etc/localtime:ro
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- --collector.filesystem.ignored-mount-points
- "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
networks:
- monitoring
expose:
- '9100'
grafana:
image: grafana/grafana
user: "104"
container_name: grafana
restart: always
volumes:
- /etc/localtime:/etc/localtime:ro
- grafana_data:/var/lib/grafana
- $PWD/grafana/provisioning/:/etc/grafana/provisioning/
env_file:
- $PWD/grafana/config.monitoring
networks:
- monitoring
links:
- prometheus
ports:
- 3000:3000
depends_on:
- prometheus
└── prometheus
├── alert.yml
└── prometheus.yml
# global config
global:
# 拉取目標的默認時間間隔
# Set the scrape interval to every 15 seconds. Default is every 1 minute.
scrape_interval: 3s
# 執行規則的時間間隔
# Evaluate rules every 3 seconds. The default is every 1 minute.
evaluation_interval: 3s
# scrape_timeout is set to the global default (10s)拉取一個目標原子的時間.
# external_labels:額外的屬性,會添加到拉取的數據並存到數據庫中
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['alertmanager:9093']
# 根據alert.yml 的規則,並依造evaluation_interval的時間做拉取
rule_files:
- "alert.yml"
# 抓取資料的目標
scrape_configs:
# 任務名稱
- job_name: 'prometheus'
# 覆蓋global 時間,這邊可以自行指定該服務要多久抓一次
scrape_interval: 3s
static_configs:
- targets: ['prometheus:9090']
- job_name: 'node_exporter'
scrape_interval: 3s
static_configs:
- targets: ['node_exporter:9100']
- job_name: 'flask_web'
scrape_interval: 3s
static_configs:
- targets: ['web:5000']
groups:
- name: demo
rules:
- alert: service_down
expr: up == 0
labels:
severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 2 minutes."
global:
smtp_smarthost: 'smtp.gmail.com:587'
# 從哪裡寄信
smtp_from: '自己的@gmail.com'
# 哪個帳號管裡
smtp_auth_username: '自己的@gmail.com'
# 密碼
smtp_auth_password: '應用程式的密碼,不是你自己的'
smtp_require_tls: true
route:
group_by: ['alertname']
group_wait: 15s ## 將緩衝警報15S 等待同一組其他條件 要是符合就一起寄
group_interval: 3s ## 設置控制在發送同一組的進一步通知之前等待多長時間,並且時間間隔從發送的最後一個通知開始計算。
# repeat_interval: 10m ## 十分鐘寄一次
receiver: live-monitoring
receivers:
- name: 'live-monitoring'
# 收件人
email_configs:
- to: '收件人@gmail.com'
GF_SECURITY_ADMIN_PASSWORD=密碼
GF_USERS_ALLOW_SIGN_UP=false
GF_SMTP_ENABLED=true
GF_SMTP_HOST=smtp.gmail.com:587
GF_SMTP_USER=自己的@gmail.com@gmail.com
GF_SMTP_PASSWORD=應用程式的密碼,不是你自己的
GF_SMTP_FROM_ADDRESS=自己的@gmail.com
最後就是進到docker-prometheus 執行docker-compose up -d
,就大功告成!!
參考資料