azkaban多项目之间依赖检查执行
- 编写检测脚本(python3),azkaban_project_auto.py,内容如下:
# coding=UTF-8
#!/usr/bin/python3
# @Desc: 处理Azkaban project执行流水线
import sys
import time
import pymysql.cursors
import requests
import click
import json
AZKABANURL = 'http://ip:port'
USERNAME = 'username'
PASSWORD = 'password'
mysql_host = 'host'
mysql_port = 3306
mysql_user = 'user'
mysql_pass = 'password'
mysql_db = 'azkaban'
def init_door():
with open("list.conf", "r") as f:
exec_flow_lists = f.readlines()
for flow in exec_flow_lists:
flow = flow.replace("\n", "")
flow_dict = eval(flow)
print("\033[0;36;40m"+"*" * 10 + "开始执行FLOW" + "*" * 10+"\033[0m")
exec_flow(depend_projects=flow_dict["depend_projects"], target_flows=flow_dict["target_flows"])
def exec_flow(depend_projects, target_flows):
# 首先判断传入依赖project是否都存在
for depend_project in depend_projects:
if not judge_online(depend_project):
print(f"\033[0;31;40m依赖的项目:{depend_project}不存在,请核对!!!\033[0m")
sys.exit(1)
# 判断要执行的project是否存在
for target_flow in target_flows:
if not judge_online(target_flow[0]):
print(f"\033[0;31;40m要执行的项目:{target_flow[0]}不存在,请核对!!!\033[0m")
sys.exit(1)
# 判断所有依赖项目今天是否执行成功
if check_project_exec_result(depend_projects):
print("\033[0;32;40m所有依赖项目已执行,开始执行目标flow\033[0m")
session_id = get_session_id()
# 执行目标flow
for target_flow in target_flows:
if check_target_exec_result(target_flow[0]):
print(f"开始执行project:{target_flow[0]},flow:{target_flow[1]}")
exec_id = exec_flows(session_id, target_flow[0], target_flow[1])
if not exec_id:
print(f"执行project:{target_flow[0]},flow:{target_flow[1]}失败!!!")
sys.exit(2)
else:
print(f"执行project:{target_flow[0]},flow:{target_flow[1]}成功!!!")
def execute(sql):
# 执行sql
config = {'host': mysql_host, 'port': mysql_port, 'user': mysql_user, 'password': mysql_pass,
'db': mysql_db,
'charset': 'utf8', 'cursorclass': pymysql.cursors.DictCursor}
connection = pymysql.connect(**config)
with connection.cursor() as cursor1:
cursor1.execute(sql)
result = cursor1.fetchall()
connection.commit()
cursor1.close()
connection.close()
return result
def judge_online(project_name):
"""
判断任务是否在数据库中存在
"""
sql = "select * from projects where name='{}'".format(project_name)
execute_result = execute(sql)
return False if len(execute_result) == 0 else True
def check_project_exec_result(depend_projects):
"""
检查关联项目是否执行完毕
"""
for depend_project in depend_projects:
print(f"检查{depend_project}是否已经执行............")
sql = """SELECT *
FROM (
SELECT t2.name AS project_name, t1.*
FROM (
SELECT project_id, flow_id, status
, substr(FROM_UNIXTIME(start_time / 1000), 1, 19) AS start_time
, substr(FROM_UNIXTIME(end_time / 1000), 1, 19) AS end_time
, enc_type
FROM azkaban.execution_flows
WHERE status = 50 AND substr(FROM_UNIXTIME(end_time/1000), 1, 19)>=DATE_FORMAT(CURDATE(),'%Y-%m-%d %H:%i:%s')
) t1
INNER JOIN (
SELECT *
FROM projects
WHERE name = '{}&