alert是一个单独的模块,需要我们单独的配置
需要声明邮箱地址,配置是以ConfigMap进行配置
alertmanager也是pod部署,部署在k8s集群
inactive表示已经激活的告警指标
pending告警的阀值已经触发,正在等待发送邮件
firing表示告警项已经触发了发送配置(有家,短信,电话,钉钉告警)
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager
namespace: monitor-sa
data:
alertmanager.yml: |-
global:
resolve_timeout: 1m
#定义告警想项发送邮件的超时时间
smtp_smarthost: 'smtp.qq.com:25'
smtp_from: '2040756255@qq.com'
smtp_auth_username: '2040756255@qq.com'
smtp_require_password: 'dfzodxequrrbjhdj'
#设置告警的分发策略
route:
group_by: [alertname]
#分组依据,分组名称
group_wait: 10s
#组告警的等待时间,也就是告警产生后等待10s,如果同组内有其他的告警,一起发出
group_interval: 10s
#上下两个组发送告警的间隔时间
receiver: default-receiver
#定义由谁来收告警
receivers:
- name: 'default-receiver'
email_configs:
- to: '2040756255@qq.com'
#设置告警邮件的收件人地址
send_resolved: true
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-server
namespace: monitor-sa
labels:
app: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
component: server
template:
metadata:
labels:
app: prometheus
component: server
annotations:
prometheus.io/scrape: 'false'
spec:
serviceAccountName: monitor
initContainers:
- name: init-chmod
image: busybox:latest
command: ['sh','-c','chmod -R 777 /prometheus;chmod -R 777 /etc']
volumeMounts:
- mountPath: /prometheus
name: prometheus-storage-volume
- mountPath: /etc/localtime
name: timezone
containers:
- name: prometheus
image: prom/prometheus:v2.45.0
command:
- prometheus
- --config.file=/etc/prometheus/prometheus.yml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention=720h
- --web.enable-lifecycle
ports:
- containerPort: 9090
volumeMounts:
- name: prometheus-config
mountPath: /etc/prometheus/
- mountPath: /prometheus/
name: prometheus-storage-volume
- name: timezone
mountPath: /etc/localtime
- name: k8s-certs
mountPath: /var/run/secrets/kubernetes.io/k8s-certs/etcd/
- name: alertmanager
image: prom/alertmanager:v0.20.0
args:
- "--config.file=/etc/alertmanager/alertmanager.yml"
- "--log.level=debug"
ports:
- containerPort: 9093
protocol: TCP
name: alertmanager
volumeMounts:
- name: alertmanager-config
mountPath: /etc/alertmanager
- name: alertmanager-storage
mountPath: /alertmanager
- name: localtime
mountPath: /etc/localtime
volumes:
- name: prometheus-config
configMap:
name: prometheus-config
defaultMode: 0777
- name: prometheus-storage-volume
hostPath:
path: /data
type: Directory
- name: k8s-certs
secret:
secretName: etcd-certs
- name: timezone
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
- name: alertmanager-config
configMap:
name: alertmanager
- name: alertmanager-storage
hostPath:
path: /data/alertmanager
type: DirectoryOrCreate
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
kind: ConfigMap
apiVersion: v1
metadata:
labels:
app: prometheus
name: prometheus-config
namespace: monitor-sa
data:
prometheus.yml: |
rule_files:
- /etc/prometheus/rules.yml
alerting:
alertmanagers:
- static_configs:
- targets: ["localhost:9093"]
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-node'
kubernetes_sd_configs:
- role: node
relabel_configs:
- source_labels: [__address__]
regex: '(.*):10250'
replacement: '${1}:9100'
target_label: __address__
action: replace
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-node-cadvisor'
kubernetes_sd_configs:
- role: node
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
- job_name: 'kubernetes-apiserver'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_