from databricks import sql
from concurrent.futures import ThreadPoolExecutor
from datetime import time, datetime
var_checkbatchno = dbutils.widgets.get("checkbatchno")
var_dt = dbutils.widgets.get("dt")
# dt = dbutils.widgets.get("dt")
# startdate = dbutils.widgets.get("startdate")
# enddate = dbutils.widgets.get("enddate")
# groupRuleCode = dbutils.widgets.get("groupRuleCode")
print("获取到的checkbatchno为:" + var_checkbatchno)
print("获取到的dt为:" + var_dt)
formatted_date_minus_one = datetime.now().strftime("%Y%m%d%H%M%S")
def execute_query_and_insert(sql_statement):
try:
connection_exet = sql.connect(
server_hostname=dbutils.widgets.get("sql_warehouse_server_hostname"),
http_path=dbutils.widgets.get("sql_warehouse_http_path_dm_urp"),
access_token=dbutils.secrets.get(scope="lakehouse", key="cdlurpdctoken"),
catalog=dbutils.widgets.get("databricks_catalog")
# server_hostname=dbutils.widgets.get("server_hostname"),
# http_path=dbutils.widgets.get("server_hostname"),
# access_token=dbutils.widgets.get("access_token"),
# catalog=dbutils.widgets.get("catalog")
)
cursor_exet = connection_exet.cursor()
cursor_exet.execute(sql_statement)
connection_exet.commit()
cursor_exet.close()
connection_exet.close()
except Exception as e:
raise Exception(f"执行 SQL 时出错: {str(e)},SQL 语句为: {sql_statement}")
def execute_queries_and_insert():
try:
# 连接指定catalog ConnCatalog=lakehouse_uat
connection = sql.connect(
server_hostname=dbutils.widgets.get("sql_warehouse_server_hostname"),
http_path=dbutils.widgets.get("sql_warehouse_http_path_dm_urp"),
access_token=dbutils.secrets.get(scope="lakehouse", key="cdlurpdctoken"),
catalog=dbutils.widgets.get("databricks_catalog")
)
cursor = connection.cursor()
cur_sql = ""
outdt = ""
datesql = ""
# 查询特殊业务场景的清单(用于筛选 rulecodesys 出现次数 >1 的情况)
select_special_query = f"""
select rulecodesys, count(1)
from (
select a.rulesql, tmp.startdate, tmp.enddate, tmp.checkbatchno, tmp.datetype,
a.rulesource, tmp.scenecode, tmp.SCENERULECODE, tmp.dt, tmp.rulecodesys
from (
select t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource,
t4.scenecode, t4.SCENERULECODE, '{var_dt}' as dt, t5.rulecodesys
from dm_urp_bui_dgs.ruletypeconfig t1
inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode
inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode
inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode = t1.rulecode
left join dm_urp_bui_dgs.SPECIALBUSINESS t4 on t5.rulecodesys = t4.RULECODE
where t3.systemCode = 'PRIP'
and t3.exestatus = '01'
and t3.checkbatchno = '{var_checkbatchno}'
) tmp
inner join dm_urp_bui_dgs.ruletypeconfig a on tmp.SCENERULECODE = a.rulecode
where a.rulesource = '04' and a.RULESTATUS = '0'
)
group by rulecodesys
"""
print(f"查询特殊业务场景的清单执行的SQL为: {select_special_query}")
cursor.execute(select_special_query)
query_special_results = cursor.fetchall()
print("完整查询结果:", query_special_results)
print("特殊业务规则select_special_query的结果循环执行开始")
for special_row in query_special_results:
rulecodesys = special_row[0]
rulecodecount = special_row[1]
print(f"rulecodesys的值为:{rulecodesys}")
print(f"rulecodecount的值为:{rulecodecount}")
if rulecodecount > 1:
# 查询当前 rulecodesys 对应的详细规则信息
select_query = f"""
select a.rulesql, tmp.startdate, tmp.enddate, tmp.checkbatchno, tmp.datetype,
a.rulesource, tmp.scenecode, tmp.SCENERULECODE, tmp.dt ,substr(a.rulesql,INSTR(a.rulesql, 'concat(') ,INSTR(a.rulesql, ' as') - INSTR(a.rulesql, 'concat(')) as id, tmp.sceneillustrate
from (
select t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource,
t4.scenecode, t4.SCENERULECODE, '{var_dt}' as dt, t5.rulecodesys,t4.sceneillustrate
from dm_urp_bui_dgs.ruletypeconfig t1
inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode
inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode
inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode = t1.rulecode
left join dm_urp_bui_dgs.SPECIALBUSINESS t4 on t5.rulecodesys = t4.RULECODE
where t3.systemCode = 'PRIP'
and t3.exestatus = '01'
and t3.checkbatchno = '{var_checkbatchno}'
) tmp
inner join dm_urp_bui_dgs.ruletypeconfig a on tmp.SCENERULECODE = a.rulecode
where a.rulesource = '04' and a.RULESTATUS = '0'
and tmp.rulecodesys = '{rulecodesys}'
order by tmp.scenecode asc
"""
print(f"当特殊业务场景rulecodecount>1时执行的SQL为:{select_query}")
cursor.execute(select_query)
query_results = cursor.fetchall()
# 用于收集每次循环的 SCENERULECODE
specialrulecodes = []
for idx, row in enumerate(query_results, start=1):
sql_statement = row[0]
chose1 = row[1]
chose2 = row[2]
checkbatchno = row[3]
datetype = row[4]
rulesource = row[5]
scenecode = row[6]
SCENERULECODE = row[7]
dt = row[8]
outdt = row[8]
error_id = row[9]
sceneillustrate = row[10]
error_id = error_id.replace('a.', '')
print(f"规则编码:{SCENERULECODE}的id为:{error_id}")
# 将当前 SCENERULECODE 加入收集列表
specialrulecodes.append(SCENERULECODE)
print(f"当前specialrulecodes值为:{specialrulecodes}")
# 拼接 in 条件,根据已收集的 specialrulecodes 动态生成
in_condition = ", ".join([f"'{code}'" for code in specialrulecodes])
print(f"当前in_condition值为:{in_condition}")
# 原 SQL 语句中需要替换的部分
if datetype == "tsdate":
# 找到第一个 1=1 ? 的位置
first_occurrence = sql_statement.find("1=1 ?")
if first_occurrence != -1:
# 拆分字符串为前后两部分,只替换第一部分
prefix = sql_statement[:first_occurrence]
print(f"prefix的值为:{prefix}")
suffix = sql_statement[first_occurrence:]
print(f"suffix的值为:{suffix}")
# 对后缀进行替换(仅替换第一个匹配项)
replaced_suffix = suffix.replace("1=1 ?", f"""1=1 and etl_time >='{chose1}' and etl_time <='{chose2}'
and checkbatchno='{checkbatchno}'
and {error_id} not in (
select id from dm_urp_bui_dgs.errordatadetail
where checkbatchno = '{checkbatchno}'
and rulecode in ({in_condition})
)""", 1) # 第三个参数 1 表示仅替换第一个匹配项
replaced_suffix = replaced_suffix.replace("1=1 ?",f"1=1 and etl_time >='{chose1}' and etl_time <='{chose2}' and checkbatchno='{checkbatchno}'")
sql_statement = prefix + replaced_suffix
print(f"拼接完prefix + replaced_suffix后的结果为:{sql_statement}")
else:
first_occurrence = sql_statement.find("1=1 ?")
if first_occurrence != -1:
# 拆分字符串为前后两部分,只替换第一部分
prefix = sql_statement[:first_occurrence]
print(f"prefix的值为:{prefix}")
suffix = sql_statement[first_occurrence:]
print(f"suffix的值为:{suffix}")
# 对后缀进行替换(仅替换第一个匹配项)
replaced_suffix = suffix.replace("1=1 ?", f"""1=1 and transdate >='{chose1}' and transdate <='{chose2}'
and checkbatchno='{checkbatchno}'
and {error_id} not in (
select id from dm_urp_bui_dgs.errordatadetail
where checkbatchno = '{checkbatchno}'
and rulecode in ({in_condition})
)""", 1) # 第三个参数 1 表示仅替换第一个匹配项
replaced_suffix = replaced_suffix.replace("1=1 ?",f"1=1 and transdate >='{chose1}' and transdate <='{chose2}' and checkbatchno='{checkbatchno}'")
sql_statement = prefix + replaced_suffix
print(f"拼接完prefix + replaced_suffix后的结果为:{sql_statement}")
# 如果规则来源是特殊业务场景,替换状态等字段
if rulesource == "04":
sql_statement = sql_statement.replace("'1' as state", f"'4' as state")
sql_statement = sql_statement.replace("'' as problemCode", f"'{scenecode}' as problemCode")
sql_statement = sql_statement.replace("'' as innerdes", f"'{sceneillustrate}' as innerdes")
sql_statement = sql_statement.replace("'' as valueno", f"'xt' as valueno")
# 替换 dt 和批次号
sql_statement = sql_statement.replace("'' as dt", f"'{dt}' as dt")
sql_statement = sql_statement.replace("'checkbatchno'", f"'{checkbatchno}'")
current_sql = sql_statement
execute_query_and_insert(current_sql)
else:
select_query = f"""
select a.rulesql, tmp.startdate, tmp.enddate, tmp.checkbatchno, tmp.datetype,
a.rulesource, tmp.scenecode, tmp.SCENERULECODE, tmp.dt
from (
select t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource,
t4.scenecode, t4.SCENERULECODE, '{var_dt}' as dt, t5.rulecodesys
from dm_urp_bui_dgs.ruletypeconfig t1
inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode
inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode
inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode = t1.rulecode
left join dm_urp_bui_dgs.SPECIALBUSINESS t4 on t5.rulecodesys = t4.RULECODE
where t3.systemCode = 'PRIP'
and t3.exestatus = '01'
and t3.checkbatchno = '{var_checkbatchno}'
) tmp
inner join dm_urp_bui_dgs.ruletypeconfig a on tmp.SCENERULECODE = a.rulecode
where a.rulesource = '04' and a.RULESTATUS = '0'
and tmp.rulecodesys = '{rulecodesys}'
"""
cursor.execute(select_query)
query_results = cursor.fetchall()
for row in query_results:
current_sql = ""
sql_statement = row[0]
chose1 = row[1]
chose2 = row[2]
checkbatchno = row[3]
datetype = row[4]
rulesource = row[5]
scenecode = row[6]
SCENERULECODE=row[7]
dt = row[8]
outdt=row[8]
if datetype == "tsdate":
sql_statement = sql_statement.replace(
"1=1 ?",
f"1=1 and etl_time >='{chose1}' and etl_time <='{chose2}' and checkbatchno='{checkbatchno}'",
)
else:
sql_statement = sql_statement.replace(
"1=1 ?",
f"1=1 and transdate >='{chose1}' and transdate <='{chose2}' and checkbatchno='{checkbatchno}'",
)
# 如果规则来源是特殊业务场景,则插入错误明细表的数据 规则状态为特殊业务场景
if rulesource == "04":
sql_statement = sql_statement.replace(
"'1' as state", f"'4' as state"
)
sql_statement = sql_statement.replace(
"'' as problemCode", f"'{scenecode}' as problemCode"
)
sql_statement = sql_statement.replace("'' as innerdes", f"'{sceneillustrate}' as innerdes"
)
sql_statement = sql_statement.replace(
"'' as valueno", f"'xt' as valueno"
)
# 更改dt与批次号
sql_statement = sql_statement.replace("'' as dt", f"'{dt}' as dt")
sql_statement = sql_statement.replace("'checkbatchno'", f"'{checkbatchno}'")
# print("要运行的sql脚本是:"+sql_statement)
current_sql = sql_statement
cur_sql = current_sql
execute_query_and_insert(current_sql)
print("特殊业务规则for循环执行结束")
# 查询保单登记有效的治理规则清单
select_query = f"select t1.rulesql, t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource,t5.rulecodesys,'{var_dt}' as dt from dm_urp_bui_dgs.ruletypeconfig t1 inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode=t1.rulecode where t3.systemCode = 'PRIP' and t3.exestatus = '01' and t1.RULESTATUS='0' and t3.checkbatchno = '{var_checkbatchno}'"
# and t1.runstates = '1' 治理规则
# and t1.rulestatus = '0'
# 0是生效
cur_sql = select_query
print(f"要执行的 保单登记有效的治理规则清单SQL为 :{cur_sql}")
cursor.execute(cur_sql)
# 从查询结果中获取数据
query_results = cursor.fetchall()
# 创建一个用于并发执行的 ThreadPoolExecutor <根据需要调整 max_workers>
with ThreadPoolExecutor(max_workers=100) as executor:
# 为每个 SQL 语句提交任务
print("治理业务规则for循环执行开始")
for row in query_results:
current_sql = ""
sql_statement = row[0]
chose1 = row[1]
chose2 = row[2]
checkbatchno = row[3]
datetype = row[4]
rulesource = row[5]
rulecodesys= row[6]
dt=row[7]
outdt=row[7]
scenecode=''
# if dt=='' :
# dt=row[7]
# outdt=row[7]
if len(rulecodesys) >0 :
queryscenecode=" select distinct CONCAT_WS(',', COLLECT_LIST(scenecode) OVER ()) AS scenecode from dm_urp_bui_dgs.SPECIALBUSINESS where rulecode= '{rulecodesys}' "
cursor.execute(queryscenecode)
query_results = cursor.fetchall()
for row in query_results:
scenecode=row[0]
if datetype == "tsdate":
sql_statement = sql_statement.replace(
"1=1 ?",
f"1=1 and etl_time >='{chose1}' and etl_time <='{chose2}' and checkbatchno='{checkbatchno}'",
)
else:
if rulesource=='02':
sql_statement = sql_statement.replace(
"1=1 ?",
f"1=1 and transdate >='{chose1}' and transdate <='{chose2}' and checkbatchno='{checkbatchno}'",
)
else :
sql_statement = sql_statement.replace(
"1=1 ?",
f"1=1 and transdate >='{chose1}' and transdate <='{chose2}' ",
)
# 如果规则来源是特殊业务场景,则插入错误明细表的数据 规则状态为特殊业务场景
if rulesource == "04":
sql_statement = sql_statement.replace(
"'1' as state", f"'4' as state"
)
sql_statement = sql_statement.replace(
"'' as problemCode", f"'{scenecode}' as problemCode"
)
sql_statement = sql_statement.replace("'' as innerdes", f"'{sceneillustrate}' as innerdes")
sql_statement = sql_statement.replace(
"'' as valueno", f"'xt' as valueno"
)
# 更改dt与批次号
sql_statement = sql_statement.replace("'' as dt", f"'{dt}' as dt")
sql_statement = sql_statement.replace("'checkbatchno'", f"'{checkbatchno}'")
print("要运行的sql脚本是:"+sql_statement)
task = executor.submit(execute_query_and_insert, sql_statement)
print("治理业务规则for循环执行结束")
getbatchno_query = f"select distinct t1.checkbatchno,t2.dt from dm_urp_bui_dgs.checkbatchno t1 join dm_urp_bui_dgs.errordatadetail t2 on t1.checkbatchno = t2.checkbatchno join dm_urp_bui_dgs.ruletypeconfig t3 on t2.rulecode = t3.rulecode where t1.systemCode = 'PRIP' and t1.exestatus = '01' and t3.rulesource<>'04' and t1.checkbatchno ='{checkbatchno}'group by t1.checkbatchno,t2.rulecode,t3.etable,t2.dt;"
print("getbatchno_query 方法执行的SQL为" + getbatchno_query)
cursor.execute(getbatchno_query)
query_results = cursor.fetchall()
# 为每个 SQL 语句提交任务
for row in query_results:
checkbatchno=row[0]
dt=row[1]
updatestatesql="insert OVERWRITE dm_urp_bui_dgs.ERRORDATADETAIL partition(dt='{}') SELECT id , checkbatchno , module , ruleCode, state, problemcode , problemdessou , detaildata , issueid, innerdes, modifydate , checkDate , remark, datasrc , transno , valueno FROM (SELECT A.ID, A.CHECKBATCHNO, A.module, A.RULECODE, case when c.SCENECODE is not null then '4' else A.state end as STATE, C.SCENECODE as PROBLEMCODE, A.PROBLEMDESSOU, A.DETAILDATA as DETAILDATA , A.ISSUEID, C.sceneillustrate as INNERDES, A.modifydate , A.checkDate , A.remark, A.datasrc , A.transno , A.valueno FROM (select * from dm_urp_bui_dgs.ERRORDATADETAIL where checkbatchno='{}' and dt='{}') A LEFT JOIN dm_urp_bui_dgs.RULETYPECONFIG B ON A.RULECODE = B.RULECODE LEFT JOIN dm_urp_bui_dgs.CHECKBATCHNO D ON A.CHECKBATCHNO = D.CHECKBATCHNO LEFT JOIN dm_urp_bui_dgs.RULECODETOSYS E ON B.RULECODE = E.RULECODE LEFT JOIN (select err.DETAILDATA,sp.rulecode,err.id,err.checkbatchno,sp.SCENECODE,sp.sceneillustrate from dm_urp_bui_dgs.ERRORDATADETAIL err inner join dm_urp_bui_dgs.SPECIALBUSINESS sp on err.rulecode = sp.SCENERULECODE where err.checkbatchno='{}' and err.state='4'and err.dt='{}') C ON E.RULECODESYS = C.rulecode and c.id=a.id and c.checkbatchno=a.checkbatchno) A union all SELECT id ,checkbatchno , module , ruleCode, state, problemcode , problemdessou , detaildata , issueid, innerdes, modifydate , checkDate , remark, datasrc , transno , valueno from dm_urp_bui_dgs.ERRORDATADETAIL where dt='{}' and checkbatchno<>'{}'".format(dt,checkbatchno,dt,checkbatchno,dt,dt,checkbatchno)
cursor.execute(updatestatesql)
print("数据打标签开始")
dbutils.notebook.run("/Workspace/Repos/dm_urp_bui_dgs/dm_urp_bui_dgs/dml/biaoqian", 3600, {"dt":dt,"checkbatchno":checkbatchno})
print("数据打标签结束")
remove_id_is_null = f"INSERT OVERWRITE table dm_urp_bui_dgs.errordatadetail partition(dt='{dt}') select id,checkbatchno,module,ruleCode,state,problemcode,problemdessou,detaildata,issueid,innerdes,modifydate,checkDate,remark,datasrc,transno,valueno from dm_urp_bui_dgs.errordatadetail where id is not null and dt = '{dt}'"
print("剔除错误明细中id为null的数据")
cursor.execute(remove_id_is_null)
print("错误明细开窗去重")
remove_distinct = f"insert overwrite table dm_urp_bui_dgs.errordatadetail partition(dt='{dt}') select id ,checkbatchno ,module ,ruleCode ,state ,problemcode ,problemdessou ,detaildata ,issueid ,innerdes ,modifydate ,checkDate ,remark ,datasrc ,transno ,valueno from (select row_number() over (partition by ID, CHECKBATCHNO, MODULE, RULECODE order by 1) rn,* from dm_urp_bui_dgs.errordatadetail where dt = '{dt}') tt where tt.rn = 1;"
cursor.execute(remove_distinct)
# 从校验批次表获得状态还没改变的 checkbatchno
getbatchno_query = f"select distinct t1.checkbatchno,t2.rulecode,t3.etable,t2.dt,count(1) as cnt from dm_urp_bui_dgs.checkbatchno t1 join dm_urp_bui_dgs.errordatadetail t2 on t1.checkbatchno = t2.checkbatchno join dm_urp_bui_dgs.ruletypeconfig t3 on t2.rulecode = t3.rulecode where t1.systemCode = 'PRIP' and t1.exestatus = '01' and t3.rulesource<>'04' and t3.rulesource ='02' and t1.checkbatchno ='{checkbatchno}' group by t1.checkbatchno,t2.rulecode,t3.etable,t2.dt;"
print("getbatchno_query 方法执行的SQL为:" + getbatchno_query)
cursor.execute(getbatchno_query)
query_results = cursor.fetchall()
with ThreadPoolExecutor(max_workers=20) as executor:
# 为每个 SQL 语句提交任务
for row in query_results:
curr_sql = ""
checkbatchno = row[0]
rulecode = row[1]
etable = row[2]
dt = row[3]
errornum = row[4]
# 获得总数据量
print("获得总数据量")
get_totalnum = "select count(1) from dm_urp_bui_prip."+etable+" WHERE checkbatchno = '"+checkbatchno+"' "
curr_sql = get_totalnum
cur_sql = curr_sql
cursor.execute(curr_sql)
totalnum = cursor.fetchone()[0]
# # 获得错误数据量
print("获得错误数据量")
get_errornum = "SELECT COUNT(1) FROM dm_urp_bui_dgs.errordatadetail WHERE checkbatchno = '{}' AND rulecode = '{}' AND dt = '{}';".format(checkbatchno, rulecode,dt)
curr_sql = get_errornum
cur_sql = curr_sql
cursor.execute(get_errornum)
errornum = cursor.fetchone()[0]
# 获得特殊业务场景数量
get_specialnum = "SELECT COUNT(1) FROM dm_urp_bui_dgs.errordatadetail WHERE checkbatchno = '{}' AND rulecode = '{}' AND dt = '{}' and state='4' ".format(checkbatchno, rulecode, dt)
curr_sql = get_specialnum
cur_sql = curr_sql
cursor.execute(get_specialnum)
query_results = cursor.fetchall()
specialnum = 0
for row in query_results:
specialnum = row[0]
special_alldetail = ""
if specialnum != 0:
# 获得这个批次号这个规则下的特殊业务场景明细
get_specialdetail = "SELECT scenerulecode, sceneillustrate,scenecode FROM dm_urp_bui_dgs.specialbusiness WHERE scenecode IN (select problemcode from dm_urp_bui_dgs.errordatadetail where checkbatchno = '{}' AND rulecode = '{}' AND dt = '{}' and state='4')".format(checkbatchno, rulecode, dt)
curr_sql = get_specialdetail
cur_sql = curr_sql
cursor.execute(get_specialdetail)
specialdetail_list = cursor.fetchall()
for row in specialdetail_list:
specialcode = row[0]
specialdetail = row[1]
scenecode = row[2]
# 查询该特殊业务场景的具体数量
spnum_query = "select count(1) as tscount from dm_urp_bui_dgs.errordatadetail a where a.checkbatchno = '{}' and a.rulecode = '{}' and a.dt = '{}'".format(checkbatchno, specialcode, dt)
curr_sql = spnum_query
cur_sql = curr_sql
cursor.execute(spnum_query)
spnum_result = cursor.fetchall()
# 处理查询结果,获取数量
spnum = 0
if spnum_result:
spnum = spnum_result[0][0]
# 根据spnum的值决定是否拼接字符串
if spnum != 0:
special_alldetail = special_alldetail + specialdetail + "-" + str(spnum) + ";"
if spnum == 0:
print(f"手动标签统计: {rulecode}")
spnum_query = "select count(1) as tscount from dm_urp_bui_dgs.errordatadetail a where a.checkbatchno = '{}' and a.problemcode = '{}' and a.dt = '{}' and rulecode = '{}'".format(checkbatchno, scenecode, dt,rulecode)
curr_sql = spnum_query
cur_sql = curr_sql
print(f"手动标签统计执行SQL: {cur_sql}")
cursor.execute(spnum_query)
spnum_result = cursor.fetchall()
spnum = spnum_result[0][0]
special_alldetail = special_alldetail + specialdetail + "-" + str(spnum) + ";"
# 获得错误数据占比
errorratio = 0
# allerrornum=errornum+specialnum
allerrornum=errornum
if totalnum != 0:
errorratio = allerrornum / totalnum
else:
raise ZeroDivisionError("分母“总数据量”为0")
# 获得待处理数据量
get_waitnum = "select count(1) from dm_urp_bui_dgs.errordatadetail where checkbatchno='{}' and rulecode = '{}' and state='1' and dt='{}'".format(checkbatchno, rulecode,dt)
curr_sql = get_waitnum
cur_sql = curr_sql
cursor.execute(get_waitnum)
waitnum_list=cursor.fetchall()
waitnum=0
for row in waitnum_list:
waitnum = row[0]
print('待处理数据量')
print(waitnum)
# 获得已处理数据量
get_completenum = "select count(1) from dm_urp_bui_dgs.errordatadetail where checkbatchno='{}' and rulecode = '{}' and state='2' and dt='{}'".format(checkbatchno, rulecode,dt)
curr_sql = get_completenum
cur_sql = curr_sql
cursor.execute(get_completenum)
complete_list=cursor.fetchall()
completenum=0
for row in complete_list:
completenum = row[0]
completenum=completenum+specialnum
# 获得挂起数据量
get_Suspendnum = "select count(1) from dm_urp_bui_dgs.errordatadetail where checkbatchno='{}' and rulecode = '{}' and state='3' and dt='{}'".format(checkbatchno, rulecode,dt)
curr_sql = get_Suspendnum
cur_sql = curr_sql
cursor.execute(get_Suspendnum)
Suspendnum_list=cursor.fetchall()
Suspendnum=0
for row in Suspendnum_list:
Suspendnum = row[0]
print('挂起数据量')
print(Suspendnum)
# 获得新增数据量(规则分组相同的相邻批次的数据)
addnum = 0
get_newnum = "select count(1) from dm_urp_bui_dgs.errordatadetail where checkbatchno='{}' and rulecode='{}' and dt='{}' ;".format(checkbatchno, rulecode,dt)
curr_sql = get_newnum
cur_sql = curr_sql
cursor.execute(get_newnum)
newnum = cursor.fetchone()[0]
get_oldcheckbatchno = "SELECT t1.checkbatchno FROM dm_urp_bui_dgs.errordatadetail t1 LEFT JOIN dm_urp_bui_dgs.checkbatchno t2 ON t1.CHECKBATCHNO=t2.CHECKBATCHNO WHERE t2.GROUPRULECODE=(SELECT GROUPRULECODE FROM dm_urp_bui_dgs.checkbatchno WHERE checkbatchno='{}') AND t1.CHECKBATCHNO<>'{}' GROUP BY t1.CHECKBATCHNO,t2.GROUPRULECODE ORDER BY t1.CHECKBATCHNO DESC;".format(checkbatchno, checkbatchno)
curr_sql = get_oldcheckbatchno
cur_sql = curr_sql
cursor.execute(get_oldcheckbatchno)
oldcheckbatchno = cursor.fetchone()
if oldcheckbatchno:
oldbatchno=oldcheckbatchno[0]
get_oldnum = "SELECT COUNT(1) FROM dm_urp_bui_dgs.errordatadetail WHERE CHECKBATCHNO= '{}' and RULECODE= '{}' ;".format(oldbatchno, rulecode)
curr_sql = get_oldnum
cur_sql = curr_sql
cursor.execute(get_oldnum)
oldnum = cursor.fetchone()[0]
addnum = newnum - oldnum
# 获得累计数据量
cumulativenum =allerrornum
print('累计数据量')
print(cumulativenum)
# 插入到"calculation"表中
dataCalculation_insert = "INSERT INTO dm_urp_bui_dgs.dataCalculation VALUES ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format(checkbatchno, rulecode,totalnum,cumulativenum,errorratio,specialnum,special_alldetail,waitnum,completenum,Suspendnum,addnum,cumulativenum)
curr_sql = dataCalculation_insert
cur_sql = curr_sql
cursor.execute(dataCalculation_insert)
# 检核完毕+数据量统计完毕 更改状态
taskstartdate = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# prip_checkno = f"insert overwrite table dm_urp_bui_dgs.checkbatchno partition(dt='{dt1}') select * from dm_urp_bui_dgs.checkbatchno where systemCode <> 'PRIP' and dt='{dt1}' union all select systemCode,checkbatchno,checkeds,groupRuleCode,startdate,enddate,datetype,isimage,managecom,'02',checkDate,'{taskstartdate}',taskenddate,issendemail,taskowner from dm_urp_bui_dgs.checkbatchno where systemCode = 'PRIP' and exestatus = '01' and dt='{dt1}' union all select * from dm_urp_bui_dgs.checkbatchno where systemCode = 'PRIP' and exestatus = '02' and dt='{dt1}'"
prip_checkno = f"insert overwrite table dm_urp_bui_dgs.checkbatchno select * from dm_urp_bui_dgs.checkbatchno where systemCode <> 'PRIP' union all select systemCode,checkbatchno,checkeds,groupRuleCode,startdate,enddate,datetype,isimage,managecom,'02',checkDate,'{taskstartdate}','{taskstartdate}',issendemail,taskowner from dm_urp_bui_dgs.checkbatchno where systemCode = 'PRIP' and exestatus = '01' union all select * from dm_urp_bui_dgs.checkbatchno where systemCode = 'PRIP' and exestatus = '02'"
cur_sql = prip_checkno
cursor.execute(cur_sql)
# 获取当前时间
current_time = datetime.now()
# 将当前时间转换为字符串类型
current_time_str = current_time.strftime("%Y-%m-%d %H:%M:%S")
outflag="insert into dm_urp_bui_dgs.outlakesign values ('{}', 'Y','PRIP', '{}')".format(outdt,current_time_str)
execute_query_and_insert(outflag)
# 关闭查询用的游标
cursor.close()
# 关闭数据库连接
connection.close()
except Exception as e:
print(f"执行过程中发生错误: {str(e)}")
# 可根据需要添加事务回滚等逻辑
finally:
# 关闭 cursor 和 connection
if 'cursor' in locals():
cursor.close()
if 'connection' in locals():
connection.close()
execute_queries_and_insert()
以上代码报以下错:
获取到的checkbatchno为:C_PRIP_20250812_003
获取到的dt为:2025071401
查询特殊业务场景的清单执行的SQL为:
select rulecodesys, count(1)
from (
select a.rulesql, tmp.startdate, tmp.enddate, tmp.checkbatchno, tmp.datetype,
a.rulesource, tmp.scenecode, tmp.SCENERULECODE, tmp.dt, tmp.rulecodesys
from (
select t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource,
t4.scenecode, t4.SCENERULECODE, '2025071401' as dt, t5.rulecodesys
from dm_urp_bui_dgs.ruletypeconfig t1
inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode
inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode
inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode = t1.rulecode
left join dm_urp_bui_dgs.SPECIALBUSINESS t4 on t5.rulecodesys = t4.RULECODE
where t3.systemCode = 'PRIP'
and t3.exestatus = '01'
and t3.checkbatchno = 'C_PRIP_20250812_003'
) tmp
inner join dm_urp_bui_dgs.ruletypeconfig a on tmp.SCENERULECODE = a.rulecode
where a.rulesource = '04' and a.RULESTATUS = '0'
)
group by rulecodesys
完整查询结果: [Row(rulecodesys='PRP_3rd_L_A_I_000033', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_010075', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_000028', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_000044', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_010074', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_000040', count(1)=2), Row(rulecodesys='PRP_3rd_L_A_I_010104', count(1)=3), Row(rulecodesys='PRP_3rd_L_A_O_010046', count(1)=2), Row(rulecodesys='PRP_3rd_L_A_I_002768', count(1)=1)]
特殊业务规则select_special_query的结果循环执行开始
rulecodesys的值为:PRP_3rd_L_A_I_000033
rulecodecount的值为:1
执行过程中发生错误: cannot access local variable 'sceneillustrate' where it is not associated with a value