监控告警的部署与配置(Prothemeus+Alermanager+Grafana)

本文详细介绍了如何部署和配置监控告警系统,包括Prothemeus的安装,Alermanager的单机部署,以及Grafana的单机部署。接着,文章讲解了针对不同服务如node、mariadb、nginx、zookeeper、rocketmq、redis和process的监控配置,以及相应的告警规则设定,帮助运维人员实现全面的系统监控和及时的故障预警。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

    • Prothemeus部署

#新建存放tar目录
mkdir ~/prometheus
#下载tar包
wget https://github.com/prometheus/prometheus/releases/download/v2.29.2/prometheus-2.29.2.linux-amd64.tar.gz -O ~/prometheus
#解压到安装目录,假如为/apps
mkdir /apps
tar -zxf ~/prometheus/prometheus-2.29.2.linux-amd64.tar.gz -C /apps
ln -s /apps/prometheus-2.29.2.linux-amd64 /apps/prometheus
mkdir /apps/prometheus/bin && mv /apps/prometheus/prometheus /apps/prometheus/bin
 
#配置prometheus的systemd启动脚本
cat >> /usr/lib/systemd/system/prometheus.service << EOF
[Unit]
Description=Prometheus
After=network.target
[Service]
Type=simple
ExecStart=/apps/prometheus/bin/prometheus --config.file=/apps/prometheus/prometheus.yml
Restart=on-failure
[Install]
WantedBy=multi-user.target
EOF

# 检查ss -tunlp | grep 9090 或lsof -i:9090
 
curl http://192.168.207.38:9090
    • Alermanager 单机部署

#1、安装Alertmanager
#官网下载https://prometheus.io/download/
tar -xvf alertmanager-0.20.0.linux-amd64.tar.gz  
mv alertmanager-0.20.0.linux-amd64 /usr/local/alertmanager
 
# cat alertmanager.yml
global:
  resolve_timeout: 10m
  smtp_from: monitor@test.com   #接收告警的服务器邮箱
  smtp_hello: '@test.com'
  smtp_smarthost: mail.test.com:587
  smtp_auth_username: monitor@test.com
  smtp_auth_password: Monitor#2021
  smtp_require_tls: false
route:
  group_by: ['alertname']                         # 分组
  group_wait: 30s                                 # 告警等待
  group_interval: 5m                              #
  repeat_interval: 48h
  receiver: 'web.hook'
receivers:
- name: 'web.hook'
  email_configs:
  - to: ' monitor@test.com '                        # 告警接收人,可多个
    html: '{
   { template "test.html" . }}'
    send_resolved: true
#  webhook_configs:
#  - url: 'http://127.0.0.1:5001/'
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']
templates:
- /usr/local/alertmanager/template/*.tmpl    #邮件告警模板存放目录
 
#注意:接收邮件的服务器配置信息要准确,包括地址、端口、密码
 
#告警模板配置说明:
#1、修改alertmanager.yml,配置模板地址,然后在每个receiver引用模板
 ----
templates:
- '/usr/local/alertmanager/template/email.tmpl '
...
...
receivers:
- name: 'web.hook'
  email_configs:
  - to: ' monitor@test.com '                        # 告警接收人,可多个
    html: '{
   { template "test.html" . }}'
    send_resolved: true
#  webhook_configs:
 ----
 
#邮件告警模板【存放目录/usr/local/alertmanager/template】
vim  template/email.tmpl
alertmanager参考模板 
{
   { define "test.html" }}
{
   {- if gt (len .Alerts.Firing) 0 -}}
{
   {- range $index, $alert := .Alerts -}}
========= ERROR ==========<br>
告警名称:{
   { .Labels.alertname }}<br>
告警级别:{
   { .Labels.severity }}<br>
告警机器:{
   { .Labels.instance }} {
   { .Labels.device }}<br>
告警详情:{
   { .Annotations.summary }}<br>
告警时间:{
   { (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
========= END ==========<br>
{
   {- end }}
{
   {- end }}
{
   {- if gt (len .Alerts.Resolved) 0 -}}
{
   {- range $index, $alert := .Alerts -}}
========= INFO ==========<br>
告警名称:{
   { .Labels.alertname }}<br>
告警级别:{
   { .Labels.severity }}<br>
告警机器:{
   { .Labels.instance }}<br>
告警详情:{
   { .Annotations.summary }}<br>
告警时间:{
   { (.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
恢复时间:{
   { (.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }}<br>
========= END ==========<br>
{
   {- end }}
{
   {- end }}
{
   {- end }}
#注:告警模板如果配置有问题,会导致邮件发送失败,注意观察日志。
 
#修改好配置文件后,可以使用amtool工具检查配置
./amtool check-config alertmanager.yml

#Checking 'alertmanager.yml'  SUCCESS
 
#服务开机配置
cat >/usr/lib/systemd/system/alertmanager.service   <<EOF
[Unit]
Description=alertmanager
 
[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml --storage.path=/usr/local/alertmanager/data --web.listen-address=:9093 --data.retention=120h  
Restart=on-failure
 
[Install]
WantedBy=multi-user.target
 
EOF
 
systemctl enable alertmanager    
systemctl restart alertmanager 
 
#alertmanager默认运行端口是:9093
ss -tunlp | grep 9093

#alertmanager也可以同prometheus一样热加载配置
curl -X POST http://prometheus_ip:9093/-/reload
    • Grafana单机部署

#部署grafana
#1、新建tar包目录
mkdir ~/grafana
#下载tar包
wget https://dl.grafana.com/enterprise/release/grafana-enterprise-9.2.3.linux-amd64.tar.gz -O ~/grafana
tar -zxvf ~/grafana/grafana-enterprise-9.2.3.linux-amd64.tar.gz -C /apps
ln -s /apps/grafana-9.2.3 /apps/grafana 
#2、grafana对接数据库mariadb
mysql -uroot -pMySQL@2022.
MariaDB [(none)]> create database grafana;
MariaDB [(none)]> GRANT ALL PRIVILEGES ON grafana.* TO 'grafana'@'' IDENTIFIED BY "MySQL@2022.aaa" WITH GRANT OPTION;
MariaDB [(none)]> flush privileges;
MariaDB [(none)]> exit;
 
#修改grafana连数据库配置
chmod 755 -R /apps/grafana-9.2.3
vim /apps/grafana/conf/defaults.ini
type = mysql
host = 127.0.0.1:3306
name = grafana
user = grafana
password = MySQL@2022.aaa
url = mysql://grafana:MySQL@2022.aaa@192.168.2.110:3306/grafana

#启动 grafana
/apps/grafana/bin/grafana-server &
#查看 
ss -utnlp | grep 3000
ps -ef | grep grafana
#配置开机启动
nohup /apps/grafana/bin/grafana-server >/apps/grafana/grafana.log 2>&1 &
#服务启动配置
cat >> /usr/lib/systemd/system/grafana.service << EOF
[Unit]
Description=Grafana instance
Documentation=http://docs.grafana.org
Wants=network-online.target
After=network-online.target
After=postgresql.service mariadb.service mysqld.service
[Service]
Type=notify
Restart=on-failure
WorkingDirectory=/apps/grafana
RuntimeDirectory=/apps/grafana
#RuntimeDirectoryMode=0750
ExecStart=/usr/sbin/grafana-server  
ExecStart=/apps/grafana/bin/grafana-server --config=/apps/grafana/conf/defaults.ini --pidfile=/var/run/grafana/grafana-server.pid --packaging=tar
cfg:default.paths.logs=/apps/grafana/data/log  
cfg:default.paths.data=/apps/grafana/data 
cfg:default.paths.plugins=/apps/grafana/plugins 
cfg:default.paths.provisioning=/apps/grafana/provisioning  
LimitNOFILE=10000
TimeoutStopSec=20
CapabilityBoundingSet=
DeviceAllow=
LockPersonality=true
MemoryDenyWriteExecute=false
NoNewPrivileges=true
Priva
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值