#!/usr/bin/env python
import base64
import hashlib
import hmac
import sys
import time
import urllib
import requests
import pymysql
import json
import jsonpath
import datetime
from apscheduler.schedulers.blocking import BlockingScheduler
def get_timestamp_sign():
timestamp = str(round(time.time() * 1000))
secret = "SEC642fb901f9c3674516ed826f51bc9d8cc9521f3d04f569215ce08de616b01e4f" # SEC开头的
secret_enc = secret.encode('utf-8')
string_to_sign = '{}\n{}'.format(timestamp, secret)
string_to_sign_enc = string_to_sign.encode('utf-8')
hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
return (timestamp, sign)
def get_data_from_mysql(sql):
connect = pymysql.connect(
host='127.0.0.1',
port=3306,
database='dolphinscheduler',
user='dolphinscheduler',
passwd='zp@2021',
charset='utf8')
cursor = connect.cursor()
try:
cursor.execute(sql)
results = cursor.fetchall()
return results
except:
print("Error: unable to fetch data")
connect.close()
def send_dingding(text):
timestamp, sign = get_timestamp_sign()
url = "https://oapi.dingtalk.com/robot/send?access_token=dskadkefb68f8fe5dc3bd029ffc8dakpdapbefa79f17e6ebd84fb7a85ace" + "×tamp=" + timestamp + "&sign=" + sign
h = {"Content-type": "application/json"}
values = {
'msgtype': 'text',
'text': {
'content': '告警:%s' % text
}
}
res = requests.post(url, data=json.dumps(values), headers=h)
errmsg = json.loads(res.text)['errmsg']
if errmsg == 'ok':
return 'ok'
return 'fail: %s' % res.text
def analysis_processing():
monitoring_time = (datetime.datetime.now() - datetime.timedelta(minutes=1)).strftime("%Y-%m-%d %H:%M:%S")
process_instance = "select name,task_type,start_time,task_json,app_link,end_time " \
"from t_ds_task_instance where " \
"state=6 and end_time>='%s'" % monitoring_time
alert_instance = "select create_time,title,log,content " \
"from t_ds_alert " \
"where title not like '%%success%%' and create_time>='%s'" % monitoring_time
result_process_instance = get_data_from_mysql(process_instance)
result_alert = get_data_from_mysql(alert_instance)
if len(result_process_instance) > 0:
for result in result_process_instance:
task_name = result[0]
task_typoe = result[1]
start_time = result[2].strftime('%Y-%m-%d %H:%M:%S')
if result[4] is None:
yarn_app_id = ""
else:
yarn_app_id = result[4]
end_time = result[5].strftime('%Y-%m-%d %H:%M:%S')
json_str = json.loads(result[3].replace('\\\\', ''))
id = jsonpath.jsonpath(json_str, "$..id")
# name = jsonpath.jsonpath(json_str, "$..name")
params = jsonpath.jsonpath(json_str, "$..params")
#rawScript = jsonpath.jsonpath(json.loads("".join(params)), "$..rawScript")
text = "任务实例" + task_name + "执行失败,类型:[" + task_typoe + "]任务开始时间:[" + start_time + "] 任务失败时间:[" + end_time + \
"] 任务ID:[" + "".join(id) + "] 关联yarn app id:[" + yarn_app_id + \
"] 执行的命令:[" + "".join(params[0]).replace("\n", " ") + "]"
print(text)
send_dingding(text)
if len(result_alert) > 0:
for result in result_alert:
start_time = result[0].strftime('%Y-%m-%d %H:%M:%S')
title = result[1]
alert_log = result[2]
if title == "Fault tolerance warning":
alert_info = eval(result[3])
typename = jsonpath.jsonpath(alert_info, "$..type")
hosts = jsonpath.jsonpath(alert_info, "$..host")
event = jsonpath.jsonpath(alert_info, "$..event")
level = jsonpath.jsonpath(alert_info, "$..warning level")
text = "服务告警时间:[" + start_time + "] 告警类型:[" + "".join(title) + "] 告警服务器:[" + "".join(hosts) + \
"] 角色:[" + "".join(typename) + "] 事件:[" + "".join(event) + "] 告警级别:[" + "".join(level) + \
"] alert告警日志:[" + alert_log + "]"
print(text)
send_dingding(text)
if title == "start process failed":
alert_info = eval(result[3])
process_instance_name = jsonpath.jsonpath(alert_info, "$..process instance name")
task_name = jsonpath.jsonpath(alert_info, "$..task name")
task_state = jsonpath.jsonpath(alert_info, "$..task state")
host = jsonpath.jsonpath(alert_info, "$..host")
log_path = jsonpath.jsonpath(alert_info, "$..log path")
text = "服务告警时间:[" + start_time + "] 告警类型:[" + "".join(title) + "]" + " 内容:任务实例" + "".join(task_name) + \
"中的" + "".join(process_instance_name) + "工作流执行状态为:" + "".join(task_state) + ",请查看服务器" + \
"".join(host) + "上的" + "".join(log_path) + "日志获取详情。"
print(text)
send_dingding(text)
def main():
try:
scheduler = BlockingScheduler()
# 增加调度任务
# 每5个分钟,执行一次
scheduler.add_job(analysis_processing, 'interval', minutes=1)
# 运行任务
scheduler.start()
#analysis_processing()
except (KeyboardInterrupt, SystemExit):
sys.exit("程序退出~")
if __name__ == '__main__':
main()
dolphinscheduler告警到钉钉
最新推荐文章于 2025-03-13 15:04:28 发布