# *****************************************头部注释模块**************************************
#脚本名称:rpt_a_mfin_tax_model_distg_monitor_dtl.py
#功能说明:税易贷模型区分度监控
#更新方式:每月全量
#更新对象:{RPT}.A_MFIN_TAX_MODEL_DISTG_MONITOR_dtl
#创 建 人:gsp
#创建日期:20240410
#运行周期:月
#备 注:
#维护信息:
# *******************************************************************************************
# -*- coding: utf-8 -*-
import atexit
from datetime import datetime
import sys
import os
import subprocess
import re
# -- 1.验证输入参数是否正确-- #
if len(sys.argv) == 2:
ETL_TX_DT = sys.argv[1]
else:
print("usage :python xxx.py YYYYMMDD")
exit(-1)
ETL_HOME = os.path.abspath(os.environ['ETL_HOME'])
if not ETL_HOME:
print("Error:you must define a ETL_HOME before import etl module")
exit(-1)
# -- 2.日期及变量初始化-- #
# -- 自定义模块-- #
from config import db
from log import Log
import etl
MAX_DATE = etl.MAX_DATE
MIN_DATE = etl.MIN_DATE
NULL_DATE = etl.NULL_DATE
LEG_DATE = etl.LEG_DATE
I_MB_DT = etl.I_MB_DT #月初日期
I_ME_DT = etl.I_ME_DT #月末日期
I_QB_DT = etl.I_QB_DT #季初日期
I_QE_DT = etl.I_QE_DT #季末日期
I_HB_DT = etl.I_HB_DT #半年初日期这个
I_HE_DT = etl.I_HE_DT #半年末日期
I_YB_DT = etl.I_YB_DT #年初日期
I_YE_DT = etl.I_YE_DT #年末日期
I_LAST_DT = etl.I_LAST_DT #上日日期
I_NEXT_DT = etl.I_NEXT_DT #下日日期
I_LMB_DT = etl.I_LMB_DT #上月初日期
I_LME_DT = etl.I_LME_DT #上月末日期
I_LQB_DT = etl.I_LQB_DT #上季初日期
I_LQE_DT = etl.I_LQE_DT #上季末日期
I_NMOM_DT = etl.I_NMOM_DT #下月同期日期
I_NYOY_DT = etl.I_NYOY_DT #明年同期日期
I_LYOY_DT = etl.I_LYOY_DT #去年同期日期
I_LMOM_DT = etl.I_LMOM_DT #上月同期日期
I_LQOQ_DT = etl.I_LQOQ_DT #上季同期日期
I_MATTY_DT = etl.I_MATTY_DT #滚动年开始累积日期
I_MATTH_DT = etl.I_MATTH_DT #滚动半年年开始累积日期
I_MQTTQ_DT = etl.I_MQTTQ_DT #滚动季开始累积日期
I_MMTTM_DT = etl.I_MMTTM_DT #滚动月开始累积日期
MMTTM_DAYS = etl.MMTTM_DAYS #滚动月天数
MQTTQ_DAYS = etl.MQTTQ_DAYS #滚动季天数
MATTY_DAYS = etl.MATTY_DAYS #滚动年天数
MATTH_DAYS = etl.MATTH_DAYS #滚动半年天数
dwa_host = etl.dwa_host
dwa_port = etl.dwa_port
dwa_db_rpt = etl.dwa_db_rpt
dwa_db_fdm = etl.dwa_db_fdm
dwa_db_cdm = etl.dwa_db_cdm
dwa_username = etl.dwa_usr_rpt
dwa_password = etl.dwa_pwd_rpt
RPT = etl.dwa_db_rpt
FDM = etl.dwa_db_fdm
CDM = etl.dwa_db_cdm
#-- 3.初始化日志-- #
log = Log('dwa', ETL_TX_DT)
log_filename = log.logname
log.info("BEGIN . ")
"""
函数名称:execute_gbase_sql
参数(2个): 管道句柄 运行sql
功能: 将sql语句发送到gbase去执行
"""
def execute_gbase_sql(bformatsql):
try:
prefix_sql = "\n"
suffix_sql = "\nquit\n"
sql = prefix_sql + bformatsql + suffix_sql
gsql_str_p = f"set -o pipefail; gccli -h {dwa_host} -P {dwa_port} -u {dwa_username} -p\"{dwa_password}\" -D {dwa_db_rpt} -v -v |tee -a {log_filename}"
p = subprocess.Popen(gsql_str_p, shell=True, stdin=subprocess.PIPE, stdout=sys.stdout, stderr=sys.stdout,close_fds=True,universal_newlines=True, bufsize=1)
output = p.communicate(sql)
return p.returncode
except:
log.exception("Execute [%s] failed.%s" % (gsql_str_p,output.decode('utf-8')))
return -1
@atexit.register
def __exit__():
"""print a log when exit"""
log.info("END\n")
# -- 4.执行sql内容-- #
# -- bsql1支持重跑sql及临时表创建-- #
bsql1="""
/*Step1:创建目标表之前先清除目标表当天数据*/
DELETE FROM {RPT}.A_MFIN_TAX_MODEL_DISTG_MONITOR_dtl WHERE data_dt = DATE '{ETL_TX_DT}';
"""
# -生成mapping映射- #
bsqlmapping="""
"""
# -bsql2算法逻辑主体实现- #
bsql2="""
/*step1:创建临时表得到评分卡模型数据*/
CREATE TEMPORARY TABLE {RPT}.VT_A_MFIN_TAX_MODEL_DISTG_MONITOR_01
(DATA_DT DATE not NULL COMMENT '数据日期'
,PROD_NAME VARCHAR(100) COMMENT '产品名称'
,CONTRACT_SEQ_NO VARCHAR(255) DEFAULT NULL COMMENT '合同流水号'
,MODEL_BRCH_NUM VARCHAR(60) DEFAULT NULL COMMENT '模型分数'
,approve_dt varchar(32) DEFAULT NULL COMMENT '申请时间'
,label VARCHAR(12) DEFAULT NULL COMMENT 'label'
)DISTRIBUTED BY ('CONTRACT_SEQ_NO')
comment='得到评分卡模型数据';
/*step2:往临时表插入数据*/
INSERT INTO {RPT}.VT_A_MFIN_TAX_MODEL_DISTG_MONITOR_01
SELECT
data_dt,
PROD_NAME,
CONTRACT_SEQ_NO,
idx_val AS MODEL_BRCH_NUM,
approve_dt,
label
FROM
{RPT}.A_MFIN_TAX_MODEL_DTCT_TAB_DTL a
WHERE
MODEL_NAME LIKE '%评分卡模型%'
AND idx_name LIKE '%得分'
AND length(CONTRACT_SEQ_NO)>0
AND data_dt=DATE '{ETL_TX_DT}'
;
/*step3:向模型区分度监测插入AUC季、月数据*/
INSERT INTO {RPT}.A_MFIN_TAX_MODEL_DISTG_MONITOR_dtl (DATA_DT -- '数据日期'
,PROD_NAME-- '产品名称'
,FLAG -- '标志'
,appl_date -- '申请季度'
,AUC )
SELECT
DATE '{ETL_TX_DT}' AS data_dt-- '数据日期'
,PROD_NAME-- '产品名称'
,'季'AS tag-- '标志'
,appl_quart-- '申请季度'
,SUM(( tpr + pre_tpr )/ 2 *( fpr - pre_fpr )) AS AUC
FROM
(
SELECT
PROD_NAME,
MODEL_BRCH_NUM,
appl_quart,
cum_bad / tot_bad AS tpr,
cum_good / tot_good AS fpr,
lead(cum_bad / tot_bad) OVER(ORDER BY cum_bad / tot_bad DESC) AS pre_tpr,
lead(cum_good / tot_good) OVER(ORDER BY cum_good / tot_good DESC) AS pre_fpr
FROM
(
SELECT
PROD_NAME
,MODEL_BRCH_NUM
,CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt))AS appl_quart
,COUNT( CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt)) ORDER BY MODEL_BRCH_NUM ASC) AS cum_bad
,COUNT( CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt)) ) AS tot_bad
,COUNT( CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt)) ORDER BY MODEL_BRCH_NUM ASC) AS cum_good
,COUNT( CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt)) ) AS tot_good
FROM {RPT}.VT_A_MFIN_TAX_MODEL_DISTG_MONITOR_01 a
WHERE data_dt=DATE '{ETL_TX_DT}'
AND DATE '{ETL_TX_DT}' = DATE '{I_QE_DT}'-- 只需要季度末
) b
) t
GROUP BY
PROD_NAME
,appl_quart
UNION ALL
SELECT
DATE '{ETL_TX_DT}' AS data_dt-- '数据日期'
,PROD_NAME-- '产品名称'
,'月'AS tag-- '标志'
,appl_month-- '申请季度'
,SUM(( tpr + pre_tpr )/ 2 *( fpr - pre_fpr )) AS AUC
FROM
(
SELECT
PROD_NAME,
MODEL_BRCH_NUM,
appl_month,
cum_bad / tot_bad AS tpr,
cum_good / tot_good AS fpr,
lead(cum_bad / tot_bad) OVER(ORDER BY cum_bad / tot_bad DESC) AS pre_tpr,
lead(cum_good / tot_good) OVER(ORDER BY cum_good / tot_good DESC) AS pre_fpr
FROM
(
SELECT
PROD_NAME
,MODEL_BRCH_NUM
,LEFT(approve_dt,7)AS appl_month
,COUNT( CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY LEFT(approve_dt,7) ORDER BY MODEL_BRCH_NUM ASC) AS cum_bad
,COUNT( CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY LEFT(approve_dt,7) ) AS tot_bad
,COUNT( CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY LEFT(approve_dt,7) ORDER BY MODEL_BRCH_NUM ASC) AS cum_good
,COUNT( CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY LEFT(approve_dt,7) ) AS tot_good
FROM {RPT}.VT_A_MFIN_TAX_MODEL_DISTG_MONITOR_01 a
WHERE data_dt=DATE '{ETL_TX_DT}'
AND DATE '{ETL_TX_DT}' = DATE '{I_ME_DT}' -- 只需要月末数据
) b
) t
GROUP BY
PROD_NAME
,appl_month;
/*step4:向模型区分度监测插入KS季、月数据*/
INSERT INTO {RPT}.A_MFIN_TAX_MODEL_DISTG_MONITOR_dtl (DATA_DT -- '数据日期'
,PROD_NAME-- '产品名称'
,FLAG -- '标志'
,appl_date -- '申请季度'
,CONTRACT_NUM -- '合同数'
,KS )
SELECT DATE '{ETL_TX_DT}' AS data_dt-- '数据日期'
,PROD_NAME-- '产品名称'
,'季'AS tag-- '标志'
,appl_quart-- '申请季度'
,COUNT(DISTINCT CONTRACT_SEQ_NO)AS 合同数-- '合同数'
,MAX(abs(quarter_cum_bad/quarter_tot_bad -quarter_cum_good/quarter_tot_good))AS KS
from (
SELECT PROD_NAME
,CONTRACT_SEQ_NO
,MODEL_BRCH_NUM
,CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt))AS appl_quart
,COUNT(CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt)) ORDER BY MODEL_BRCH_NUM asc) AS quarter_cum_bad
,COUNT(CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt))) AS quarter_tot_bad
,COUNT(CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt))ORDER BY MODEL_BRCH_NUM asc) AS quarter_cum_good
,COUNT(CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY CONCAT(LEFT(approve_dt,4),'Q',quarter(approve_dt))) AS quarter_tot_good
FROM {RPT}.VT_A_MFIN_TAX_MODEL_DISTG_MONITOR_01 a
WHERE data_dt=DATE '{ETL_TX_DT}'
AND DATE '{ETL_TX_DT}' = DATE '{I_QE_DT}'-- 只需要季度末
) c
GROUP BY PROD_NAME
,appl_quart
UNION ALL
SELECT DATE '{ETL_TX_DT}' AS data_dt-- '数据日期'
,PROD_NAME-- '产品名称'
,'月'AS tag-- '标志'
,appl_month-- '申请月份'
,COUNT(DISTINCT CONTRACT_SEQ_NO)AS 合同数-- '合同数'
,MAX(abs(month_cum_bad/month_tot_bad -month_cum_good/month_tot_good))AS KS
from (
SELECT PROD_NAME
,CONTRACT_SEQ_NO
,MODEL_BRCH_NUM
,LEFT(approve_dt,7)AS appl_month
,COUNT(CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY LEFT(approve_dt,7) ORDER BY MODEL_BRCH_NUM asc) AS month_cum_bad
,COUNT(CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY LEFT(approve_dt,7)) AS month_tot_bad
,COUNT(CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY LEFT(approve_dt,7) ORDER BY MODEL_BRCH_NUM asc) AS month_cum_good
,COUNT(CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(PARTITION BY LEFT(approve_dt,7)) AS month_tot_good
FROM {RPT}.VT_A_MFIN_TAX_MODEL_DISTG_MONITOR_01 a
WHERE data_dt=DATE '{ETL_TX_DT}'
AND DATE '{ETL_TX_DT}' = DATE '{I_ME_DT}' -- 只需要月末数据
) c
GROUP BY PROD_NAME
,appl_month;
/*step5:向模型区分度监测插入AUC上线以来数据*/
INSERT INTO {RPT}.A_MFIN_TAX_MODEL_DISTG_MONITOR_dtl (DATA_DT -- '数据日期'
,PROD_NAME-- '产品名称'
,FLAG -- '标志'
,AUC )
SELECT
DATE '{ETL_TX_DT}' AS data_dt-- '数据日期'
,PROD_NAME-- '产品名称'
,'上线以来'AS tag -- '标志'
,SUM(( tpr + pre_tpr )/ 2 *( fpr - pre_fpr )) AS AUC
FROM
(
SELECT
PROD_NAME,
MODEL_BRCH_NUM,
cum_bad / tot_bad AS tpr,
cum_good / tot_good AS fpr,
lead(cum_bad / tot_bad) OVER(ORDER BY cum_bad / tot_bad DESC) AS pre_tpr,
lead(cum_good / tot_good) OVER(ORDER BY cum_good / tot_good DESC) AS pre_fpr
FROM
(
SELECT
PROD_NAME,
MODEL_BRCH_NUM,
COUNT( CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(ORDER BY MODEL_BRCH_NUM ASC) AS cum_bad,
COUNT( CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER() AS tot_bad,
COUNT( CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(ORDER BY MODEL_BRCH_NUM ASC) AS cum_good,
COUNT( CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER() AS tot_good
FROM {RPT}.VT_A_MFIN_TAX_MODEL_DISTG_MONITOR_01 a
WHERE data_dt=DATE '{ETL_TX_DT}'
AND DATE '{ETL_TX_DT}' = DATE '{I_ME_DT}' -- 只需要月末数据
) b
) t
GROUP BY
PROD_NAME;
/*step6:向模型区分度监测插入KS上线以来数据*/
INSERT INTO {RPT}.A_MFIN_TAX_MODEL_DISTG_MONITOR_dtl (DATA_DT -- '数据日期'
,PROD_NAME-- '产品名称'
,FLAG -- '标志'
,CONTRACT_NUM -- '合同数'
,KS )
SELECT
DATE '{ETL_TX_DT}' AS data_dt-- '数据日期'
,PROD_NAME-- '产品名称'
,'上线以来'AS tag-- '标志'
,count(DISTINCT CONTRACT_SEQ_NO)AS 合同数-- '合同数'
,max(abs(cum_bad/tot_bad -cum_good/tot_good))AS KS
FROM (
SELECT PROD_NAME
,CONTRACT_SEQ_NO
,MODEL_BRCH_NUM
,COUNT(CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(ORDER BY MODEL_BRCH_NUM asc) AS cum_bad
,COUNT(CASE WHEN label = 1 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER() AS tot_bad
,COUNT(CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER(ORDER BY MODEL_BRCH_NUM asc) AS cum_good
,COUNT(CASE WHEN label = 0 THEN CONTRACT_SEQ_NO ELSE NULL END ) OVER() AS tot_good
FROM {RPT}.VT_A_MFIN_TAX_MODEL_DISTG_MONITOR_01 a
WHERE data_dt=DATE '{ETL_TX_DT}'
AND DATE '{ETL_TX_DT}' = DATE '{I_ME_DT}' -- 只需要月末数据
) c
GROUP BY PROD_NAME
;
"""
# -- 主调程序-- #
bsql=bsql1+bsqlmapping+bsql2
bformatsql = bsql.format_map(vars())
ret_code = execute_gbase_sql(bformatsql)
if ret_code :
log.error(" ERROR - ERROR .")
exit(-1)
else:
log.info("SUCESS . ")
对比下这个的auc
最新发布