配置
#部署钉钉webhook填写钉钉机器人token
docker run -d -p 8060:8060 --name webhook timonwong/prometheus-webhook-dingtalk --ding.profile="webhook1=https://oapi.dingtalk.com/robot/send?access_token=$dingding_token"
修改ops/alert/alertmanager.yml
global:
resolve_timeout: 5m
route:
receiver: stos_ops
group_wait: 30s
group_interval: 5m
repeat_interval: 5h
group_by: [alertname]
routes:
- receiver: stos_ops
group_wait: 30s
receivers:
- name: stos_ops
webhook_configs:
- url: http://XXX:8060/dingtalk/webhook1/send
send_resolved: true
修改alert/alert_rule.yml告警规则
groups:
- name: hs_f0158133_alert_group
rules:
- alert: CPU_负载告警
expr: node_load1{
job="worker_seal_system_monitor"} > 80
for: 30m
labels:
severity: "warning"
annotations:
summary: "{
{
$labels.instance}}:CPU核数使用率过高"
description: "设备 {
{ $labels.instance }} CPU 使用核数超过80核,current value is {
{ $value }} "
- alert: 设备挂机告警
expr: up{
job=~"worker_seal_system_monitor|worker_store_system_monitor|miner_system_monitor|lotus_system_monitor"} == 0
for: 1m
labels:
severity: "critical"
annotations:
summary: "{
{
$labels.instance}} 挂机了"
- alert: /data1使用率告警
expr: ceil((node_filesystem_size_bytes{mountpoint =~"/rootfs/data1",job!="worker_store_system_monitor"}