#! /usr/bin/env python
# -*- coding: utf-8 -*-
import sys, time, random, json, argparse
from tornado import web, ioloop
from datetime import datetime
from processor import prometheus_alert, cronjob_extract
from event_log import EventLogger
from sender.settings import EVENT_LOG_FILE, ALERT_TTL, AGG_LOOP_MS
from processor import kibana_alert
reload(sys)
sys.setdefaultencoding('utf-8')
class AlertPrometheus(web.RequestHandler):
def post(self):
description = ""
summary = ""
sms = ""
payload_str = ""
try:
payload = self.request.body.replace(' ', '')
# 把groupkey字段去掉,否则json有问题
if payload.find("{alertname") > -1:
payload_str1 = payload[0:payload.find("{alertname") - 1]
payload_str2 = payload[payload.find("{alertname") + 1:len(payload)]
payload_str3 = payload_str2[payload_str2.find("}") + 1:len(payload_str2)]
payload_str = payload_str1 + payload_str3
payload_json = eval(payload_str)
print "payload_json====>", json.dumps(payload_json, ensure_ascii=False)
except json.decoder.JSONDecoder:
raise web.HTTPError(400)
try:
aiui_group = payload_json['commonLabels']['aiui_group']
level = payload_json['commonLabels']['level']
service = payload_json['commonLabels']['service'] #告警类别
sms = payload_json['commonLabels']['sms'] #true or false
except:
aiui_group = 'aiui-ops'
level = "high"
service = "xxx告警"
sms = 'false'
alert_row = payload_json['alerts']
# 是数组, 如果是多条告警
print "alert_row", alert_row
try:
# 不同的集群region地区的意思
namespace = alert_row[0]['labels']['region']
except:
namespace = "AIUI"
try:
if payload_json['status'] != "resolved":
if len(alert_row) < 2:
description = alert_row[0]['annotations']['description']
summary = '[ `告警` ]' + alert_row[0]['annotations']['summary']
else:
summary = '[` 聚合告警` ]' + alert_row[0]['annotations']['summary']
count=0
for alert in alert_row:
count += 1
print "len(alert_row)",len(alert_row)
if count == len(alert_row):
description = description + alert['annotations']['description']
print "description----->",description
else:
description = description + alert['annotations']['description'] + '\n'
print "description+++++>" , description
print "description==>>>>", description
else:
if len(alert_row) < 2:
description = alert_row[0]['annotations']['description']
summary = '[<font color="info"> 恢复 </font>]' + alert_row[0]['annotations']['summary']
else:
count = 0
for alert in alert_row:
count +=1
description = description + alert['annotations']['description'] + '\n'
summary = '[<font color="info"> 恢复 </font>]' + alert_row[0]['annotations']['summary']
if count == len(alert_row):
description = description + alert['annotations']['description']
except:
pass
data = {
'type': "prometheus",
'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'level': level,
'description': description,
'summary': summary,
'service': service,
'sms': sms,
'aiui_group': aiui_group,
'namespace': namespace, #也没有用到,集群名称
'status': payload_json['status']
}
print "prom-format-data:", json.dumps(data, ensure_ascii=False) #这样打印的是人可以看的懂的,string类型
prometheus_alert(data)
class KibanaCallbackHandler(web.RequestHandler):
def post(self):
payload = json.loads(self.request.body)
print "payload==>", json.dumps(json.loads(self.request.body)).decode('unicode-escape')
message = payload['message']
subject = payload['subject']
if 'xxx_group' in payload:
xxx_group = payload['xxx_group']
else:
xxx_group = 'xxx-ops' # 默认运维组
percentage = payload['match_percentage']
level = payload['_err_level']
c_count = payload['match_count']
h_hits = payload['denominator']
alert_time = payload['@timestamp']
message_v = '`' + level + '`' + '级别 ,' + message + ' ,比例:' + str(percentage) + '% ,匹配数目:' + str(
c_count) + ' ,总计' + str(h_hits)
data = {
'type': 'kibana',
'@timestamp': int(time.time()),
'message': message_v,
'alert_time': alert_time,
'level': level,
'subject': subject,
'aiui_group': aiui_group
}
print "data===>",json.dumps(data, ensure_ascii=False)
now_time = time.time()
unique_value = str(random.randint(0, 3000000000))
logger = EventLogger(EVENT_LOG_FILE)
logger.write(now_time, "alert", "merge", unique_value, ALERT_TTL, json.dumps(data, ensure_ascii=False))
return kibana_alert(data)
def make_app():
return web.Application([
(r"/alert/prometheus", AlertPrometheus),
(r"/alert/kibana", KibanaCallbackHandler)
])
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--port', type=int, help='监听端口', default=8080)
parser.add_argument('--host', type=str, help='监听地址', default="0.0.0.0")
args = parser.parse_args()
app = make_app()
app.listen(port=args.port, address=args.host)
print ("start app on 8080")
ioloop.PeriodicCallback(cronjob_extract, AGG_LOOP_MS).start() #AGG_LOOP_MS = 65000,65s执行一次
# ioloop.PeriodicCallback(cronjob_aggregation, AGG_LOOP_MS).start()
ioloop.IOLoop.current().start()
alertmanager告警groupkey字段去除
于 2024-08-24 09:23:30 首次发布