dolphinscheduler告警到钉钉

该脚本实现了一个自动化监控和告警系统,通过定时从MySQL数据库中查询任务实例和警告信息。当发现失败的任务实例或特定类型的警告时,会发送钉钉消息通知。系统涉及的时间戳签名认证、数据库操作、JSON解析以及 DingTalk API 调用等功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

#!/usr/bin/env python
import base64
import hashlib
import hmac
import sys
import time
import urllib

import requests
import pymysql
import json
import jsonpath
import datetime

from apscheduler.schedulers.blocking import BlockingScheduler


def get_timestamp_sign():
    timestamp = str(round(time.time() * 1000))
    secret = "SEC642fb901f9c3674516ed826f51bc9d8cc9521f3d04f569215ce08de616b01e4f"  # SEC开头的
    secret_enc = secret.encode('utf-8')
    string_to_sign = '{}\n{}'.format(timestamp, secret)
    string_to_sign_enc = string_to_sign.encode('utf-8')
    hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
    sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))
    return (timestamp, sign)


def get_data_from_mysql(sql):
    connect = pymysql.connect(
        host='127.0.0.1',
        port=3306,
        database='dolphinscheduler',
        user='dolphinscheduler',
        passwd='zp@2021',
        charset='utf8')
    cursor = connect.cursor()
    try:
        cursor.execute(sql)
        results = cursor.fetchall()
        return results
    except:
        print("Error: unable to fetch data")
    connect.close()


def send_dingding(text):
    timestamp, sign = get_timestamp_sign()
    url = "https://oapi.dingtalk.com/robot/send?access_token=dskadkefb68f8fe5dc3bd029ffc8dakpdapbefa79f17e6ebd84fb7a85ace" + "&timestamp=" + timestamp + "&sign=" + sign
    h = {"Content-type": "application/json"}
    values = {
        'msgtype': 'text',
        'text': {
            'content': '告警:%s' % text
        }
    }
    res = requests.post(url, data=json.dumps(values), headers=h)
    errmsg = json.loads(res.text)['errmsg']
    if errmsg == 'ok':
        return 'ok'

    return 'fail: %s' % res.text


def analysis_processing():
    monitoring_time = (datetime.datetime.now() - datetime.timedelta(minutes=1)).strftime("%Y-%m-%d %H:%M:%S")

    process_instance = "select name,task_type,start_time,task_json,app_link,end_time " \
                       "from t_ds_task_instance where " \
                       "state=6 and end_time>='%s'" % monitoring_time

    alert_instance = "select create_time,title,log,content " \
                     "from t_ds_alert " \
                     "where title not like '%%success%%' and create_time>='%s'" % monitoring_time

    result_process_instance = get_data_from_mysql(process_instance)
    result_alert = get_data_from_mysql(alert_instance)

    if len(result_process_instance) > 0:
        for result in result_process_instance:
            task_name = result[0]
            task_typoe = result[1]
            start_time = result[2].strftime('%Y-%m-%d  %H:%M:%S')
            if result[4] is None:
                yarn_app_id = ""
            else:
                yarn_app_id = result[4]
            end_time = result[5].strftime('%Y-%m-%d  %H:%M:%S')
            json_str = json.loads(result[3].replace('\\\\', ''))
            id = jsonpath.jsonpath(json_str, "$..id")
            # name = jsonpath.jsonpath(json_str, "$..name")
            params = jsonpath.jsonpath(json_str, "$..params")
            #rawScript = jsonpath.jsonpath(json.loads("".join(params)), "$..rawScript")
            text = "任务实例" + task_name + "执行失败,类型:[" + task_typoe + "]任务开始时间:[" + start_time + "] 任务失败时间:[" + end_time + \
                   "] 任务ID:[" + "".join(id) + "] 关联yarn app id:[" + yarn_app_id + \
                   "] 执行的命令:[" + "".join(params[0]).replace("\n", " ") + "]"
            print(text)
            send_dingding(text)

    if len(result_alert) > 0:
        for result in result_alert:
            start_time = result[0].strftime('%Y-%m-%d  %H:%M:%S')
            title = result[1]
            alert_log = result[2]
            if title == "Fault tolerance warning":
                alert_info = eval(result[3])
                typename = jsonpath.jsonpath(alert_info, "$..type")
                hosts = jsonpath.jsonpath(alert_info, "$..host")
                event = jsonpath.jsonpath(alert_info, "$..event")
                level = jsonpath.jsonpath(alert_info, "$..warning level")
                text = "服务告警时间:[" + start_time + "] 告警类型:[" + "".join(title) + "] 告警服务器:[" + "".join(hosts) + \
                       "] 角色:[" + "".join(typename) + "] 事件:[" + "".join(event) + "] 告警级别:[" + "".join(level) + \
                       "] alert告警日志:[" + alert_log + "]"
                print(text)
                send_dingding(text)
            if title == "start process failed":
                alert_info = eval(result[3])
                process_instance_name = jsonpath.jsonpath(alert_info, "$..process instance name")
                task_name = jsonpath.jsonpath(alert_info, "$..task name")
                task_state = jsonpath.jsonpath(alert_info, "$..task state")
                host = jsonpath.jsonpath(alert_info, "$..host")
                log_path = jsonpath.jsonpath(alert_info, "$..log path")
                text = "服务告警时间:[" + start_time + "] 告警类型:[" + "".join(title) + "]" + "  内容:任务实例" + "".join(task_name) + \
                       "中的" + "".join(process_instance_name) + "工作流执行状态为:" + "".join(task_state) + ",请查看服务器" + \
                       "".join(host) + "上的" + "".join(log_path) + "日志获取详情。"
                print(text)
                send_dingding(text)


def main():
    try:
         scheduler = BlockingScheduler()
        # 增加调度任务
        # 每5个分钟,执行一次
         scheduler.add_job(analysis_processing, 'interval', minutes=1)
        # 运行任务
         scheduler.start()
        #analysis_processing()
    except (KeyboardInterrupt, SystemExit):
        sys.exit("程序退出~")


if __name__ == '__main__':
    main()

评论 7
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值