rancher promethus alertmanager 配置

本文详细介绍了Prometheus监控系统的配置,包括告警规则的定义、Prometheus配置文件详解,以及AlertManager的配置与报警通知方式。涵盖邮件、Webhook及企业微信等多种报警渠道,确保监控报警的有效性和及时性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1、

rules:


groups:
- name: example  
  rules:
  - alert: InstanceDown    
    expr: up == 0
    for: 1m    
    labels:
      serverity: page
    annotations:
      summary: "Instance {{ $labels.instance }} down"
      description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
  - alert: NodeMemoryUsage
    expr: ((node_memory_MemTotal) - (node_memory_MemFree+node_memory_Buffers+node_memory_Cached) ) / (node_memory_MemTotal) * 100 > 50
    for: 2m
    labels:
      team: node
    annotations:
      summary: "{{$labels.instance}}: High Memory usage detected"
      description: "{{$labels.instance}}: Memory usage is above 50% (current value is: {{ $value }}"
  - alert: PodMemory
    expr: sum(container_memory_rss{image!=""}) by(pod_name, namespace) / 1024/ 1024 / 1024 > 4
    for: 2m
    labels:
      team: pod
    annotations:
      summary: "{{$labels.instance}}: High Memory detected"
      description: "{{$labels.instance}}: Memory is above 4G (current value is: {{ $value }}"

2、

 prometheus.yml

 rule_files:
- /etc/config/rules
- /etc/config/alerts
scrape_configs:
- job_name: jvm-pods
  tls_config:
    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  kubernetes_sd_configs:
  - role: endpoints
  relabel_configs:
  - action: keep
    regex: true;true
    source_labels:
    - __meta_kubernetes_service_annotation_prometheus_io_scrape
    - __meta_kubernetes_service_annotation_prometheus_io_jvm_scrape
  - action: replace
    source_labels: 
    - __meta_kubernetes_service_annotation_prometheus_io_app_metrics_patn
    target_label: __metrics_path__
    regex: (.+)
  - action: replace
    source_labels: 
    - __meta_kubernetes_pod_ip
    - __meta_kubernetes_service_annotation_prometheus_io_app_metrics_port
    target_label: __address__
    regex: (.+);(.+)
  - action: replace
    source_labels: 
    - __meta_kubernetes_namespace
    target_label: kubernetes_namespace
  - action: replace
    source_labels: 
    - __meta_kubernetes_pod_name
    target_label: kubernetes_pod_name

- job_name: prometheus
  static_configs:
  - targets:
    - localhost:9090
- job_name: nginx
  static_configs:
  - targets: ['10.147.255.12:80','10.147.255.12:81','10.147.255.12:82','10.147.255.12:83','10.147.255.12:84']
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  job_name: kubernetes-apiservers
  kubernetes_sd_configs:
  - role: endpoints
  relabel_configs:
  - action: keep
    regex: default;kubernetes;https
    source_labels:
    - __meta_kubernetes_namespace
    - __meta_kubernetes_service_name
    - __meta_kubernetes_endpoint_port_name
  scheme: https
  tls_config:
    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    insecure_skip_verify: true
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  job_name: kubernetes-nodes
  kubernetes_sd_configs:
  - role: node
  relabel_configs:
  - action: labelmap
    regex: __meta_kubernetes_node_label_(.+)
  - replacement: kubernetes.default.svc:443
    target_label: __address__
  - regex: (.+)
    replacement: /api/v1/nodes/${1}/proxy/metrics
    source_labels:
    - __meta_kubernetes_node_name
    target_label: __metrics_path__
  scheme: https
  tls_config:
    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    insecure_skip_verify: true
- bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  job_name: kubernetes-nodes-cadvisor
  kubernetes_sd_configs:
  - role: node
  relabel_configs:
  - action: labelmap
    regex: __meta_kubernetes_node_label_(.+)
  - replacement: kubernetes.default.svc:443
    target_label: __address__
  - regex: (.+)
    replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
    source_labels:
    - __meta_kubernetes_node_name
    target_label: __metrics_path__
  scheme: https
  tls_config:
    ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    insecure_skip_verify: true
- job_name: kubernetes-service-endpoints
  kubernetes_sd_configs:
  - role: endpoints
  relabel_configs:
  - action: keep
    regex: true
    source_labels:
    - __meta_kubernetes_service_annotation_prometheus_io_scrape
  - action: replace
    regex: (https?)
    source_labels:
    - __meta_kubernetes_service_annotation_prometheus_io_scheme
    target_label: __scheme__
  - action: replace
    regex: (.+)
    source_labels:
    - __meta_kubernetes_service_annotation_prometheus_io_path
    target_label: __metrics_path__
  - action: replace
    regex: ([^:]+)(?::\d+)?;(\d+)
    replacement: $1:$2
    source_labels:
    - __address__
    - __meta_kubernetes_service_annotation_prometheus_io_port
    target_label: __address__
  - action: labelmap
    regex: __meta_kubernetes_service_label_(.+)
  - action: replace
    source_labels:
    - __meta_kubernetes_namespace
    target_label: kubernetes_namespace
  - action: replace
    source_labels:
    - __meta_kubernetes_service_name
    target_label: kubernetes_name
- honor_labels: true
  job_name: prometheus-pushgateway
  kubernetes_sd_configs:
  - role: service
  relabel_configs:
  - action: keep
    regex: pushgateway
    source_labels:
    - __meta_kubernetes_service_annotation_prometheus_io_probe
- job_name: kubernetes-services
  kubernetes_sd_configs:
  - role: service
  metrics_path: /probe
  params:
    module:
    - http_2xx
  relabel_configs:
  - action: keep
    regex: true
    source_labels:
    - __meta_kubernetes_service_annotation_prometheus_io_probe
  - source_labels:
    - __address__
    target_label: __param_target
  - replacement: blackbox
    target_label: __address__
  - source_labels:
    - __param_target
    target_label: instance
  - action: labelmap
    regex: __meta_kubernetes_service_label_(.+)
  - source_labels:
    - __meta_kubernetes_namespace
    target_label: kubernetes_namespace
  - source_labels:
    - __meta_kubernetes_service_name
    target_label: kubernetes_name
- job_name: kubernetes-pods
  kubernetes_sd_configs:
  - role: pod
  relabel_configs:
  - action: keep
    regex: true
    source_labels:
    - __meta_kubernetes_pod_annotation_prometheus_io_scrape
  - action: replace
    regex: (.+)
    source_labels:
    - __meta_kubernetes_pod_annotation_prometheus_io_path
    target_label: __metrics_path__
  - action: replace
    regex: ([^:]+)(?::\d+)?;(\d+)
    replacement: $1:$2
    source_labels:
    - __address__
    - __meta_kubernetes_pod_annotation_prometheus_io_port
    target_label: __address__
  - action: labelmap
    regex: __meta_kubernetes_pod_label_(.+)
  - action: replace
    source_labels:
    - __meta_kubernetes_namespace
    target_label: kubernetes_namespace
  - action: replace
    source_labels:
    - __meta_kubernetes_pod_name
    target_label: kubernetes_pod_name

alerting:
  alertmanagers:
  - kubernetes_sd_configs:
      - role: pod
    tls_config:
      ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
    bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
    relabel_configs:
    - source_labels: [__meta_kubernetes_namespace]
      regex: prometheus
      action: keep
    - source_labels: [__meta_kubernetes_pod_label_app]
      regex: prometheus
      action: keep
    - source_labels: [__meta_kubernetes_pod_label_component]
      regex: alertmanager
      action: keep
    - source_labels: [__meta_kubernetes_pod_container_port_number]
      regex:
      action: drop

3、
 
 alertmanager.yml

  global:
  resolve_timeout: 5m
  smtp_smarthost: 'smtp.qq.com:465' # 邮箱smtp服务器代理
  smtp_from: '******@qq.com' # 发送邮箱名称
  smtp_auth_username: '******@qq.com' # 邮箱名称
  smtp_auth_password: '****' # 邮箱授权码
  smtp_require_tls: false
# 定义模板信息
templates:
 - '*.tmpl'
route:
  group_by: ['alertname'] # 报警分组依据
  group_wait: 10s        # 最初即第一次等待多久时间发送一组警报的通知
  group_interval: 10s    # 在发送新警报前的等待时间
  repeat_interval: 5m    # 发送重复警报的周期 对于email配置中,此项不可以设置过低,否则将会由于邮件发送太多频繁,被smtp服务器拒绝
  receiver: 'mail'      # 发送警报的接收者的名称,以下receivers name的名称
receivers:
- name: 'web.hook'
  webhook_configs:
  - url: 'http://127.0.0.1:5001/'
- name: 'mail'
  email_configs:
  - to: ******@qq.com
    html: '{{ template "mail.html" . }}' # 设定邮箱的内容模板
    headers: { Subject: "[WARN] 报警邮件"} # 接收邮件的标题
- name: 'wechat'
  wechat_configs:              # 企业微信报警配置
  - send_resolved: true
    to_user: '11066879'
    #to_party: '1'              # 接收组的id
    agent_id: '******'        # (企业微信-->自定应用-->AgentId)
    corp_id: '******'          # 企业信息(我的企业-->CorpId[在底部])
    api_secret: '******'       # 企业微信(企业微信-->自定应用-->Secret)
    message: '{{ template "wechat.html" . }}'   # 发送消息模板的设定
# 一个inhibition规则是在与另一组匹配器匹配的警报存在的条件下,使匹配一组匹配器的警报失效的规则。两个警报必须具有一组相同的标签。
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']


4、   
mail.tmpl

{{ define "mail.html" }}
<table border="1">
        <tr>
                <td>报警项</td>
                <td>实例</td>
                <td>JOB</td>
                <td>namespace</td>
                <td>kubernetes_pod_name</td>
        <td>container_name</td>
        <td>描述</td>
                <td>报警阀值</td>
                <td>开始时间</td>
        </tr>
        {{ range $i, $alert := .Alerts }}
                <tr>
                        <td>{{ index $alert.Labels "alertname" }}</td>
                        <td>{{ index $alert.Labels "instance" }}</td>
                        <td>{{ index $alert.Labels "job" }}</td>
                        <td>{{ index $alert.Labels "kubernetes_namespace" }}</td>
                        <td>{{ index $alert.Labels "pod_name" }}</td>
            <td>{{ index $alert.Labels "container_name" }}</td>
            <td>{{ index $alert.Annotations "description" }}</td>
                        <td>{{ index $alert.Annotations "value" }}</td>
                        <td>{{ $alert.StartsAt }}</td>
                </tr>
        {{ end }}
</table>
{{ end }}

5、

wechat.tmpl

{{ define "wechat.html" }}
  {{ range $i, $alert := .Alerts.Firing }}
    [报警项]:{{ index $alert.Labels "alertname" }}
    [实例]:{{ index $alert.Labels "instance" }}
    [JOB]:{{ index $alert.Labels "job" }}
    [namespace]:{{ index $alert.Labels "kubernetes_namespace" }}
    [kubernetes_pod_name]:{{ index $alert.Labels "pod_name" }}
    [container_name]:{{ index $alert.Labels "container_name" }}
    [描述]:{{ index $alert.Annotations "description" }}
    [报警阀值]:{{ index $alert.Annotations "value" }}
    [开始时间]:{{ $alert.StartsAt }}
  {{ end }}
{{ end }}

 


  

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值