目录
一、MySQL服务器状态监控脚本树形图
/home/wufei/mysql_status_monitor/ ├── bin │ ├── mysql_monitor_wf.sh ├── conf │ └── mysql_monitor_wf.conf └── log ├── controll_policy_mysql_id.master ├── controll_policy_mysql_id.slave ├── master_status_[IP].log ├── mysql_monitor_wf.log └── slave_status_[IP].log |
1. bin/mysql_monitor_wf.sh:执行脚本。主要实现对MySQL从库状态(主从同步异常、延时)监控、MySQL主库心跳检测、Controll Server 数据库心跳检测,脚本每十分钟执行一次,检测异常或检测信息3秒未获取均会邮件告警。
2. conf/mysql_monitor_wf.conf:全局参数配置文件。
3. log/:各服务器对接监控日志。
二、mysql_monitor_wf.sh
#!/bin/bash
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# Program : MySQL 备份脚本 #
# Author : wufei@xxxxx.com #
# Description : MySQL status monitoring #
# Date : 2018-09-30 #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# 更新时间:2018-10-09;更新人:wufei;更新内容:添加主库心跳检测机制
# 更新时间:2018-10-10;更新人:wufei;更新内容:通过查询获取每个从库的延时阈值
# 更新时间:2018-10-10;更新人:wufei;更新内容:添加邮件抄送人
# 更新时间:2018-10-10;更新人:wufei;更新内容:添加controll server 数据库心跳检测
# 更新时间:2018-11-20;更新人:wufei;更新内容:告警内容添加机器hostname
# 更新时间:2018-11-21;更新人:wufei;更新内容:MySQL主库心跳检测实例名参数获取错误修改
# 更新时间:2018-11-21;更新人:wufei;更新内容:添加通过查询数据库获取各库对应人的抄送邮箱
# 配置文件
conf_file="/home/wufei/mysql_status_monitor/conf/mysql_monitor_wf.conf"
# 读取配置文件中的所有变量,设置为全局变量
# controll server 数据库连接用户
controll_user=`sed '/^controll_user=/!d;s/.*=//' ${conf_file}`
# controll server 数据库连接用户密码
controll_passwd=`sed '/^controll_passwd=/!d;s/.*=//' ${conf_file}`
# controll server 数据库连接主机地址
controll_host=`sed '/^controll_host=/!d;s/.*=//' ${conf_file}`
# controll server 数据库连接端口号
controll_port=`sed '/^controll_port=/!d;s/.*=//' ${conf_file}`
# controll server 数据库
controll_db=`sed '/^controll_db=/!d;s/.*=//' ${conf_file}`
# controll server 数据库信息表
controll_table_policy=`sed '/^controll_table_policy=/!d;s/.*=//' ${conf_file}`
# controll server 数据库状态信息表
controll_table_info=`sed '/^controll_table_info=/!d;s/.*=//' ${conf_file}`
# controll server 数据库告警email信息表
controll_table_email=`sed '/^controll_table_email=/!d;s/.*=//' ${conf_file}`
# 被监控数据库连接用户
monitor_user=`sed '/^monitor_user=/!d;s/.*=//' ${conf_file}`
# 被监控数据库连接用户密码
monitor_passwd=`sed '/^monitor_passwd=/!d;s/.*=//' ${conf_file}`
# 日志文件路径
monitor_log_dir=`sed '/^monitor_log_dir=/!d;s/.*=//' ${conf_file}`
# 主从同步延时告警值
monitor_behind_master=`sed '/^monitor_behind_master=/!d;s/.*=//' ${conf_file}`
# 告警收信人邮箱
monitor_to_email=`sed '/^monitor_to_email=/!d;s/.*=//' ${conf_file}`
# 告警抄送人邮箱
monitor_other_email=`sed '/^monitor_other_email=/!d;s/.*=//' ${conf_file}`
# 监控日期
monitor_date=`date +%F`
monitor_time=`date +%H:%M:%S`
monitor_week=`date +%u`
# controll server 数据库连接
function controll_mysql_conn01(){
mysql -u${controll_user} -p${controll_passwd} -h${controll_host} -P${controll_port} -N -e "${1}"
}
function controll_mysql_conn02(){
mysql -u${controll_user} -p${controll_passwd} -h${controll_host} -P${controll_port}
}
function controll_mysql_conn03(){
mysql -u${controll_user} -p${controll_passwd} -h${controll_host} -P${controll_port} -N
}
# MySQL从库状态(主从同步异常、延时)监控
function monitor_mysql_slave(){
# 获取MySQL从库ID,并循环查询该从库信息
sql01="select mysql_id from ${controll_db}.${controll_table_policy} where mysql_level='slave' and mysql_status=1;"
echo "${sql01}" | controll_mysql_conn03 > ${monitor_log_dir}/controll_policy_mysql_id.slave
while read line
do
mysql_id=${line}
slave_sql="select mysql_id,mysql_hostname,inet_ntoa(mysql_ip) as mysql_ip,mysql_port,mysql_instance,mysql_level,mysql_threshold_slave from ${controll_db}.${controll_table_policy} where mysql_id=${mysql_id}"
read mysql_slave_id monitor_hostname monitor_host monitor_port slave_instance mysql_level monitor_behind_master <<< `controll_mysql_conn01 "${slave_sql}"`
eval $(timeout 5 mysql -h${monitor_host} -P${monitor_port} -u${monitor_user} -p${monitor_passwd} -e "show slave status \G" 2>/dev/null | awk -F'[: ]+' 'NR>1&&$0=$2"="$3' | grep -e "Slave_IO_Running" -e "Slave_SQL_Running" -e "Seconds_Behind_Master")
Slave_IO_Running=${Slave_IO_Running}
Slave_SQL_Running=${Slave_SQL_Running}
Seconds_Behind_Master=${Seconds_Behind_Master}
# 判断Slave_IO_Running、Slave_SQL_Running、Seconds_Behind_Master
if [ ${Slave_IO_Running} != 'Yes' ];then
Slave_IO_Running='no'
else
Slave_IO_Running='yes'
fi
if [ ${Slave_SQL_Running} != 'Yes' ];then
Slave_SQL_Running='no'
else
Slave_SQL_Running='yes'
fi
if [ -z ${Seconds_Behind_Master} ];then
Seconds_Behind_Master=-1
fi
# echo "${monitor_host}:${monitor_port}#${slave_instance}##Slave_IO_Running=${Slave_IO_Running}##Slave_SQL_Running=${Slave_SQL_Running}##Seconds_Behind_Master=${Seconds_Behind_Master}"
# 获取邮件抄送人
cc_sql="select group_concat(email_address) as cc_list from ${controll_db}.${controll_table_email} where mysql_id=${mysql_slave_id} and email_type=2;"
cc_list=`controll_mysql_conn01 "${cc_sql}"`
if [ -z ${cc_list} ]
then
monitor_cc_email=${monitor_other_email}
else
monitor_cc_email=${cc_list}
fi
if [[ ${Slave_IO_Running} == 'no' || ${Slave_SQL_Running} == 'no' || ${Seconds_Behind_Master} -eq -1 ]]
then
# 将错误信息写入日志
echo "${monitor_date} ${monitor_time}#${monitor_hostname}#${monitor_port}::${slave_instance} replication have error;Slave_IO_Running=${Slave_IO_Running} and Slave_SQL_Running=${Slave_SQL_Running}" >> ${monitor_log_dir}/slave_status_${monitor_host}.log
# 发送邮件
email_subject="${slave_instance}主从同步异常"
echo "${monitor_date} ${monitor_time}#${monitor_hostname}#${monitor_host}:${monitor_port}#${slave_instance} replication have error;Slave_IO_Running=${Slave_IO_Running} and Slave_SQL_Running=${Slave_SQL_Running}" | mutt ${monitor_to_email} -s ${email_subject} -c ${monitor_cc_email}
# 记录到数据库MySQL状态信息表
info_sql01="insert into ${controll_db}.${controll_table_info}(mysql_id,info_status,slave_io_running,slave_sql_running,seconds_behind_master) values (${mysql_slave_id},'abnormal','${Slave_IO_Running}','${Slave_SQL_Running}',${Seconds_Behind_Master});"
echo "${info_sql01}" | controll_mysql_conn02
elif [ ${Seconds_Behind_Master} -gt ${monitor_behind_master} ]
then
# 将延时信息写入日志
echo "${monitor_date} ${monitor_time}#${monitor_hostname}#${monitor_port}::${slave_instance} replication have delayed;Seconds_Behind_Master=${Seconds_Behind_Master}" >> ${monitor_log_dir}/slave_status_${monitor_host}.log
# 发送邮件
email_subject="${slave_instance}主从同步延时"
echo "${monitor_date} ${monitor_time}#${monitor_hostname}#${monitor_host}:${monitor_port}#${slave_instance} replication have delayed;Seconds_Behind_Master=${Seconds_Behind_Master}" | mutt ${monitor_to_email} -s ${email_subject} -c ${monitor_cc_email}
# 记录到数据库MySQL状态信息表
info_sql02="insert into ${controll_db}.${controll_table_info}(mysql_id,info_status,slave_io_running,slave_sql_running,seconds_behind_master) values (${mysql_slave_id},'abnormal','${Slave_IO_Running}','${Slave_SQL_Running}',${Seconds_Behind_Master});"
echo "${info_sql02}" | controll_mysql_conn02
else
echo "${monitor_date} ${monitor_time}#${monitor_hostname}#${monitor_port}::${slave_instance} Ok!" >> ${monitor_log_dir}/slave_status_${monitor_host}.log
# 记录到数据库MySQL状态信息表
info_sql03="insert into ${controll_db}.${controll_table_info}(mysql_id,info_status,slave_io_running,slave_sql_running,seconds_behind_master) values (${mysql_slave_id},'normal','${Slave_IO_Running}','${Slave_SQL_Running}',${Seconds_Behind_Master});"
echo "${info_sql03}" | controll_mysql_conn02
fi
done < ${monitor_log_dir}/controll_policy_mysql_id.slave
}
# MySQL主库心跳检测
function monitor_mysql_master(){
# 获取MySQL主库ID,并循环查询该主库信息
sql01="select mysql_id from ${controll_db}.${controll_table_policy} where mysql_level='master' and mysql_master_id=0 and mysql_status=1;"
echo "${sql01}" | controll_mysql_conn03 > ${monitor_log_dir}/controll_policy_mysql_id.master
while read line
do
mysql_id=${line}
master_sql="select mysql_id,mysql_hostname,inet_ntoa(mysql_ip) as mysql_ip,mysql_port,mysql_instance,mysql_level from ${controll_db}.${controll_table_policy} where mysql_id=${mysql_id}"
read mysql_master_id monitor_hostname monitor_host monitor_port master_instance mysql_level <<< `controll_mysql_conn01 "${master_sql}"`
select_user=$(timeout 3 mysql -h${monitor_host} -P${monitor_port} -u${monitor_user} -p${monitor_passwd} -Ne "select user()")
# 获取邮件抄送人
cc_sql="select group_concat(email_address) as cc_list from ${controll_db}.${controll_table_email} where mysql_id=${mysql_master_id} and email_type=2;"
cc_list=`controll_mysql_conn01 "${cc_sql}"`
if [ -z ${cc_list} ]
then
monitor_cc_email=${monitor_other_email}
else
monitor_cc_email=${cc_list}
fi
if [ ${select_user} == "${monitor_user}@${controll_host}" ];then
# 将信息写入日志
echo "${monitor_date} ${monitor_time}#${monitor_hostname}#${monitor_port}::${master_instance} master heartbeat is OK." >> ${monitor_log_dir}/master_status_${monitor_host}.log
# 记录到数据库MySQL状态信息表
info_sql01="insert into ${controll_db}.${controll_table_info}(mysql_id,info_status,info_memo) values (${mysql_master_id},'normal','master heartbeat is OK.');"
echo "${info_sql01}" | controll_mysql_conn02
else
# 将错误信息写入日志
echo "${monitor_date} ${monitor_time}#${monitor_hostname}#${monitor_port}::${master_instance} master heartbeat have error." >> ${monitor_log_dir}/master_status_${monitor_host}.log
# 发送邮件
email_subject="${master_instance}主库心跳检测异常"
echo "${monitor_date} ${monitor_time}#${monitor_hostname}#${monitor_host}:${monitor_port}#${master_instance} master heartbeat have error." | mutt ${monitor_to_email} -s ${email_subject} -c ${monitor_cc_email}
# 记录到数据库MySQL状态信息表
info_sql02="insert into ${controll_db}.${controll_table_info}(mysql_id,info_status,info_memo) values (${mysql_master_id},'abnormal','master heartbeat have error.');"
echo "${info_sql02}" | controll_mysql_conn02
fi
done < ${monitor_log_dir}/controll_policy_mysql_id.master
}
function controll_server_mysql(){
select_user=$(timeout 3 mysql -u${controll_user} -p${controll_passwd} -h${controll_host} -P${controll_port} -N -e "select user()")
controll_hostname='yz-hgdb-gate01.lehe.com'
if [ ${select_user} == "${controll_user}@${controll_host}" ];then
# 将信息写入日志
echo "${monitor_date} ${monitor_time}#${controll_hostname}#${controll_port}::${controll_db} Controll Server MySQL heartbeat is OK." >> ${monitor_log_dir}/master_status_${controll_host}.log
else
# 将错误信息写入日志
echo "${monitor_date} ${monitor_time}#${controll_hostname}#${controll_port}::${controll_db} Controll Server MySQL heartbeat is error." >> ${monitor_log_dir}/master_status_${controll_host}.log
# 发送邮件
email_subject="Controll_Server数据库心跳检测异常"
echo "${monitor_date} ${monitor_time}#${controll_hostname}#${controll_host}${controll_port}#${controll_db} Controll Server MySQL heartbeat is error." | mutt ${monitor_to_email} -s ${email_subject} -c ${monitor_other_email}
fi
}
function main(){
# Controll Server 数据库心跳检测
controll_server_mysql
# MySQL从库状态(主从同步异常、延时)监控
monitor_mysql_slave
# MySQL主库心跳检测
monitor_mysql_master
}
main
三、mysql_monitor_wf.conf
# controll server 数据库连接用户
controll_user=xxxxx
# controll server 数据库连接用户密码
controll_passwd=xxxxx
# controll server 数据库连接主机地址
controll_host=xxxxx
# controll server 数据库连接端口号
controll_port=xxxxx
# controll server 数据库
controll_db=wf_dbbackup
# controll server 数据库信息表
controll_table_policy=db_mysql_policy
# controll server 数据库状态信息表
controll_table_info=db_mysql_info
# controll server 数据库告警email信息表
controll_table_email=db_mysql_email
# 被监控数据库连接用户
monitor_user=xxxxx
# 被监控数据库连接用户密码
monitor_passwd=xxxxx
# 日志文件路径
monitor_log_dir=/home/wufei/mysql_status_monitor/log
# 主从同步延时告警默认值
monitor_behind_master=120
# 添加收信人邮箱
monitor_to_email=wufei@xxxxx.com
# 添加抄送人邮箱
monitor_other_email=xxxxx@xxxxx.com
四、Table:db_mysql_policy
CREATE TABLE `db_mysql_policy` (
`mysql_id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT '自增ID',
`mysql_hostname` varchar(50) NOT NULL DEFAULT 'yz-higo' COMMENT '主机名',
`mysql_expire` date NOT NULL DEFAULT '1000-01-01' COMMENT '服务器过保日期',
`mysql_ip` int(10) unsigned NOT NULL DEFAULT '2130706433' COMMENT 'IP',
`mysql_port` mediumint(5) unsigned NOT NULL DEFAULT '3306' COMMENT '端口',
`mysql_instance` varchar(50) NOT NULL DEFAULT 'mysql' COMMENT 'MySQL实例',
`mysql_level` enum('master','slave','free') DEFAULT 'free' COMMENT 'MySQL级别:主库、从库、空闲',
`mysql_master_id` int(10) unsigned NOT NULL DEFAULT '0' COMMENT 'master ID',
`mysql_version` varchar(10) NOT NULL DEFAULT '5.7.16' COMMENT 'MySQL版本',
`mysql_memory` int(5) unsigned NOT NULL DEFAULT '1' COMMENT '分配内存大小:G',
`mysql_status` tinyint(4) NOT NULL DEFAULT '0' COMMENT '状态:0:异常;1:正常;',
`mysql_sql_safe_updates` enum('on','off') DEFAULT 'off' COMMENT 'MySQL限制update/delete范围参数',
`mysql_read_only` enum('on','off') DEFAULT 'off' COMMENT '是否将MySQL从设置为只读',
`mysql_threshold_slave` int(10) DEFAULT '90' COMMENT '主从延时报警阈值',
`mysql_ctime` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`mysql_mtime` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`mysql_memo` varchar(50) DEFAULT '' COMMENT '备注',
PRIMARY KEY (`mysql_id`),
KEY `idx_master_id` (`mysql_master_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='MySQL信息表';
*************************** 1. row ***************************
mysql_id: 69
mysql_hostname: yz-xxbimdb-m01.lehe.com
mysql_expire: 2018-08-30
mysql_ip: 169082948
mysql_port: 3220
mysql_instance: xxxx_bim
mysql_level: master
mysql_master_id: 0
mysql_version: 5.7.16
mysql_memory: 50
mysql_status: 1
mysql_sql_safe_updates: on
mysql_read_only: off
mysql_threshold_slave: 0
mysql_ctime: 2018-09-26 20:29:47
mysql_mtime: 2018-10-10 09:39:13
mysql_memo:
*************************** 2. row ***************************
mysql_id: 237
mysql_hostname: yz-xxbimdb-s01.lehe.com
mysql_expire: 2018-08-28
mysql_ip: 169082950
mysql_port: 3220
mysql_instance: xxxx_bim
mysql_level: slave
mysql_master_id: 69
mysql_version: 5.7.16
mysql_memory: 50
mysql_status: 1
mysql_sql_safe_updates: off
mysql_read_only: on
mysql_threshold_slave: 10
mysql_ctime: 2018-09-29 13:02:51
mysql_mtime: 2019-03-07 12:21:23
mysql_memo:
五、Table:db_mysql_info
CREATE TABLE `db_mysql_info` (
`info_id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT '自增ID',
`mysql_id` int(10) unsigned NOT NULL DEFAULT '0' COMMENT 'db_mysql_policy表关联ID',
`info_status` varchar(10) NOT NULL DEFAULT 'normal' COMMENT '监控状态:normal/abnormal',
`slave_io_running` enum('yes','no','non') DEFAULT 'non' COMMENT '从库IO进程是否正常',
`slave_sql_running` enum('yes','no','non') DEFAULT 'non' COMMENT '从库SQL进程是否正常',
`seconds_behind_master` int(10) DEFAULT '0' COMMENT '主从延时时间',
`info_ctime` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`info_memo` varchar(50) DEFAULT '' COMMENT '备注',
PRIMARY KEY (`info_id`),
KEY `idx_master_id` (`mysql_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='MySQL状态信息表';
六、Table:db_mysql_email
CREATE TABLE `db_mysql_email` (
`email_id` int(10) unsigned NOT NULL AUTO_INCREMENT COMMENT '自增ID',
`mysql_id` int(10) unsigned NOT NULL COMMENT 'MySQL信息表关联ID',
`email_address` varchar(50) NOT NULL COMMENT '邮箱地址',
`email_type` tinyint(4) NOT NULL DEFAULT '0' COMMENT '状态:0:异常;1:主送;2:抄送;',
`email_ctime` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
`email_mtime` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '修改时间',
`mysql_memo` varchar(50) DEFAULT '' COMMENT '备注',
PRIMARY KEY (`email_id`),
KEY `idx_mysql_id` (`mysql_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='MySQL告警邮件信息表';