Prometheus、Alertmanager、Grafana监控Linux与MySQL [数据库教程]

database

//检查各个端口的放行

//部署各个模块与应用

cd /usr/local/Prometheus_compose

vim docker-compose.yml

version: "3"

services:

prom:

image: quay.io/prometheus/prometheus:latest

container_name: prometheus

volumes:

- ./prometheus:/etc/prometheus

command: "--config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus"

ports:

- 9090:9090

depends_on:

- exporter

environment:

- TZ=Asia/Shanghai

exporter:

image: prom/node-exporter:latest

container_name: node-exporter

hostname: cicd

ports:

- "9100:9100"

environment:

- TZ=Asia/Shanghai

grafana:

image: grafana/grafana

container_name: grafana

ports:

- "3000:3000"

environment:

- “GF_SECURITY_ADMIN_PASSWORD=123123”

- “GF_INSTALL_PLUGINS=alexanderzobnin-zabbix-app”

- TZ=Asia/Shanghai

restart: "always"

volumes:

- ./grafana:/etc/grafana/”

- ./grafana/conf/grafana.ini:/etc/grafana/grafana.ini

- ./grafana/data:/var/lib/grafana:rw

- ./grafana/plugins:/var/lib/grafana/plugins:rw

- /etc/localtime:/etc/localtime

depends_on:

- prom

alertmanager:

image: prom/alertmanager:latest

container_name: alertmanager

hostname: alertmanager

restart: always

ports:

- ‘9093:9093‘

volumes:

- ‘./alertmanager/config:/etc/alertmanager‘

- ‘./alertmanager/data:/alertmanager/data‘

command:

- ‘--config.file=/etc/alertmanager/alertmanager.yml‘

environment:

- TZ=Asia/Shanghai

cadvisor:

image: google/cadvisor

container_name: cadvisors

restart: always

volumes:

- /:/rootfs:ro

- /var/run:/var/run:rw

- /sys:/sys:ro

- /var/lib/docker/:/var/lib/docker:ro

ports:

- 8080:8080

privileged: true

environment:

- TZ=Asia/Shanghai

grafana-reporter:

image: izakmarais/grafana-reporter

container_name: grafana_reporter

ports:

- 8686:8686

command: "-ip grafana.mitaiot.com"

environment:

- TZ=Asia/Shanghai

//编辑报警模块的配置文件

cd /usr/local/Prometheus_compose/alertmanager/config

cat alertmanager.yml

global:

resolve_timeout: 5m

smtp_from: ‘123456789@sina.com‘

smtp_smarthost: ‘smtp.sina.com:587‘

smtp_auth_username: ‘123456789@sina.com‘

smtp_auth_password: ‘aabbccdd‘

smtp_require_tls: false

smtp_hello: ‘sina.com‘

route:

group_by: [‘alertname‘]

group_wait: 5s

group_interval: 5s

repeat_interval: 5m

receiver: ‘email‘

receivers:

- name: ‘email‘

email_configs:

- to: ‘{{ template "email.to" . }}‘

html: ‘{{ template "email.to.html" . }}‘

send_resolved: true

inhibit_rules:

- source_match:

severity: ‘critical‘

target_match:

severity: ‘warning‘

equal: [‘alertname‘, ‘dev‘, ‘instance‘]

templates:

- "/etc/alertmanager/alertmanager-tmpl/email.tmpl"

//编辑发送的邮件模板

cd /usr/local/Prometheus_compose/alertmanager/config/alertmanager-tmpl

cat email.tmpl

{{ define "email.from" }}123456789@sina.com{{ end }}

{{ define "email.to" }}123456789@sina.com{{ end }}

{{ define "email.to.html" }}

{{ range .Alerts }}

=========start==========<br>

告警程序: prometheus_alert <br>

告警级别: {{ .Labels.severity }} 级 <br>

告警类型: {{ .Labels.alertname }} <br>

故障主机: {{ .Labels.instance }} <br>

告警主题: {{ .Annotations.summary }} <br>

告警详情: {{ .Annotations.description }} <br>

触发时间: {{ .StartsAt.Format "2019-08-04 16:58:15" }} <br>

=========end==========<br>

{{ end }}

{{ end }}

cd /usr/local/Prometheus_compose/grafana/conf

vim grafana.ini # 配置文件太长,标出修改部分

[auth.anonymous]

enabled = true

org_name = Main Org.

org_role = Viewer

[smtp]

enabled = true

host = smtp.sina.com:587

user = 123456789@sina.com

password =dc28ac6ec64af9c1

skip_verify = true

from_address = 123456789@sina.com

from_name = Grafana

ehlo_identity =

default_timezone = Asia/Shanghai #添加时区

//修改 普罗米修斯 的配置文件

cd /usr/local/Prometheus_compose/prometheus

global:

scrape_interval: 5s # Set the scrape interval to every 15 seconds. Default is every 1 minute.

evaluation_interval: 5s # Evaluate rules every 15 seconds. The default is every 1 minute.

scrape_configs:

- job_name: ‘prometheus‘

static_configs:

- targets: [‘172.16.225.154:9090‘]

- job_name: ‘node‘

file_sd_configs:

- files: [‘/etc/prometheus/groups/nodegroups/*.json‘]

static_configs:

- targets:

- ‘172.16.225.154:9100‘

- ‘172.16.225.156:9100‘

- ‘172.16.225.155:9100‘

- ‘172.16.225.157:9100‘

- ‘172.16.225.156:8085‘

- ‘172.16.225.154:8080‘

- ‘172.16.225.155:8085‘

- ‘172.16.225.157:8085‘

- ‘172.16.225.157:9104‘

alerting:

alertmanagers:

- static_configs:

- targets:

- ‘172.16.225.154:9093‘

rule_files:

- "/etc/prometheus/rules/*.yml"

//配置报警规则

cd /usr/local/Prometheus_compose/prometheus/rules

groups:

- name: node-up

rules:

- alert: node-up

expr: up{job="node"} == 0

for: 15s

labels:

severity: 1

team: node

annotations:

summary: "{{ $labels.instance }} 已停止运行!"

description: "{{ $labels.instance }} 检测到异常停止!请重点关注!!!"

- name: node-cpu

rules:

- alert: node-cpu

expr: 100 - ((avg by (instance,job,env)(irate(node_cpu_seconds_total{mode="idle"}[30s]))) *100) > 90

for: 1m

labels:

severity: 1

team: node

level: warning

annotations:

summary: "{{ $labels.instance }} CPU使用率超过 百分之90!"

description: "{{ $labels.instance }} 检测CPU连续1分钟占用率超出90%!请重点关注!!!"

- name: node-mem

rules:

- alert: node-mem

expr: ((node_memory_MemTotal_bytes -(node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes) )/node_memory_MemTotal_bytes ) * 100 > 90 #设置内存使用率高于90时发送告警,计算方式为 总内存-空闲内存 - buffers - cached

for: 5s

labels:

severity: 1

team: node

level: warning

annotations:

summary: "{{ $labels.instance }} MEM使用率超过 百分之90!"

description: "{{ $labels.instance }} 检测CPU连续1分钟占用率超出90%!请重点关注!!!"

- name: node-disk_used

rules:

- alert: node-disk_used

expr: 100 - (node_filesystem_free_bytes{fstype=~"ext3|ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext3|ext4|xfs"} * 100) > 90 #设置挂载分区使用率为95以上时告警

for: 1m

labels:

severity: 1

team: node

level: warning

annotations:

summary: "{{ $labels.instance }} 挂在分区使用率超过 百分之90!"

description: "{{ $labels.instance }} 挂在分区使用率超出90%!请重点关注!!!"

#如需监控 MySQL和容器和主机信息 需要在 主机上部署 prom/node-exporter 、cadvisor、prom/mysqld-exporter。


version: "3"

services:

exporter:

image: prom/node-exporter:latest

container_name: node-exporter

hostname: db01

ports:

- "9100:9100"

cadvisor:

image: google/cadvisor

container_name: cadvisor

restart: always

volumes:

- /:/rootfs:ro

- /var/run:/var/run:rw

- /sys:/sys:ro

- /var/lib/docker/:/var/lib/docker:ro

ports:

- 8085:8080

privileged: true

mysqld-exporter:

image: prom/mysqld-exporter

ports:

- 9104:9104

restart: always

container_name: mysql_exporter

hostname: db01

environment:

- DATA_SOURCE_NAME=root:0GXwwchW4rP@(172.16.225.157:3306)/

- TZ=Asia/Shanghai

导入模板,模板 ID 分别是:8919、7362
更多模块链接访问:点击获取监控模块,只需要导入对应ID即可

Prometheus、Alertmanager、Grafana 监控 Linux 与 MySQL

以上是 Prometheus、Alertmanager、Grafana监控Linux与MySQL [数据库教程] 的全部内容, 来源链接: utcz.com/z/535236.html

回到顶部