F. One Occurrence

https://codeforces.com/problemset/problem/1000/F

感觉可以用莫队草过去但是我不会写常数小的莫队+值域分块

看看tle 莫队code

// Problem: F. One Occurrence
// Contest: Codeforces - Educational Codeforces Round 46 (Rated for Div. 2)
// URL: https://codeforces.com/problemset/problem/1000/F
// Memory Limit: 768 MB
// Time Limit: 3000 ms
// 
// Powered by CP Editor (https://cpeditor.org)

#include<iostream>
#include<algorithm>
#include<cmath>
using namespace std;
const int N=5e5+9;
const int M=5e5+1;
int a[N];
int t,tt;
int L[N],R[N],pos[N];
int LL[M],RR[M],PP[M];
int c[M],vis[M];
int ans[N];
struct Q{
    int l,r,id;
    friend bool operator < (const Q &a,const Q &b){
        return pos[a.l]^pos[b.l]?pos[a.l]<pos[b.l]:pos[a.l]&1?a.r<b.r:a.r>b.r;
    }
}que[N];
int query(){
    int ans=0;
	for(int i=1;i<=PP[M-1];i++){
		if(vis[i]){
			for(int j=LL[i];j<=RR[i];j++){
				if(c[j]==1){
					ans=j;
					break;
				}
			}
		}
	}
    return ans;
}
void modify(int pos,int val){
    if(val==1){
        if(c[pos]==1){
            vis[PP[pos]]--;
        }
        if(c[pos]==0){
            vis[PP[pos]]++;
        }
    }else{
        if(c[pos]==1){
            vis[PP[pos]]--;
        }
        if(c[pos]==2){
            vis[PP[pos]]++;
        }
    }
    c[pos]+=val;
}
void add(int pos){
    modify(a[pos],1);
}
void del(int pos){
    modify(a[pos],-1);
}
int main(){
    ios::sync_with_stdio(false);
    cin.tie(0),cout.tie(0);
    int n;
    cin>>n;
    for(int i=1;i<=n;i++){
        cin>>a[i];
    }
    t=sqrt(n);
    for(int i=1;i<=t;i++){
        L[i]=(i-1)*t+1;
        R[i]=i*t;
    }
    if(R[t]<n){
        t++;
        L[t]=R[t-1]+1;
        R[t]=n;
    }
    for(int i=1;i<=t;i++){
        for(int j=L[i];j<=R[i];j++){
            pos[j]=i;
        }
    }
    tt=sqrt(M);
    for(int i=1;i<=tt;i++){
        LL[i]=(i-1)*tt+1;
        RR[i]=i*tt;
    }
    if(RR[tt]<M){
        tt++;
        LL[tt]=RR[tt-1]+1;
        RR[tt]=M;
    }
    for(int i=1;i<=tt;i++){
        for(int j=LL[i];j<=RR[i];j++){
            PP[j]=i;
        }
    }
    int q;
    cin>>q;
    for(int i=1;i<=q;i++){
        cin>>que[i].l>>que[i].r;
        que[i].id=i;
    }
    sort(que+1,que+1+q);
    int l=1,r=0;
    for(int i=1;i<=q;i++){
        while(que[i].l>l)del(l++);
        while(que[i].l<l)add(--l);
        while(que[i].r>r)add(++r);
        while(que[i].r<r)del(r--);
        ans[que[i].id]=query();
    }
    for(int i=1;i<=q;i++){
        cout<<ans[i]<<'\n';
    }
    return 0;
}

 接下来是ac的主席树

很像HH的项链,维护上一个相同的值的位置

我们维护上一个相同值的位置,维护区间最小值,如果最小值小于l,就有说明有[l,r]有一个值的数量是1

为了防止第一个出现的值影响答案,我们还要把第一个出现的值修改成INF(原本是0)

这样就可以在线操作完

因为每个数有可能修改2次,把N要再开大2倍

// Problem: F. One Occurrence
// Contest: Codeforces - Educational Codeforces Round 46 (Rated for Div. 2)
// URL: https://codeforces.com/problemset/problem/1000/F
// Memory Limit: 768 MB
// Time Limit: 3000 ms
// 
// Powered by CP Editor (https://cpeditor.org)

#include<iostream>
using namespace std;
const int N=5e5+9;
int a[N],vis[N],last[N];
//可持久化线段树
struct KCJSEG{
    #define tl(id) seg[id].l
    #define tr(id) seg[id].r
    #define ll long long
    #define INF 1e6
    struct node{
    	int L,R;
        int l,r;
        int mn,index;
    }seg[N<<6];
    void pushup(node &id,node &l,node &r){
    	if(l.mn>r.mn){
    		id.mn=r.mn;
    		id.index=r.index;
    	}else{
    		id.mn=l.mn;
    		id.index=l.index;
    	}
    }
	void pushup(int id){pushup(seg[id],seg[tl(id)],seg[tr(id)]);}
    int root[N],index,mx;
    int inrange(int L,int R,int l,int r){return L>=l && R<=r;}
    int outofrange(int L,int R,int l,int r){return L>r || l>R;}
    void build(int &id,int l,int r){
        id=++index;
        seg[id]={l,r};
        if(l==r){
            seg[id].mn=INF;
            seg[id].index=l;
            return;
        }
        int mid=(l+r)>>1;
        build(tl(id),l,mid);
        build(tr(id),mid+1,r);
        pushup(id);
    }
    void update(int post,int &curr,int l,int r,int pos,int v){
        curr=++index;
        seg[curr]=seg[post];
        if(l==r){
            seg[curr].mn=v;
            return;
        }
        int mid=(l+r)>>1;
        if(mid>=pos){
            update(tl(post),tl(curr),l,mid,pos,v);
        }else{
            update(tr(post),tr(curr),mid+1,r,pos,v);
        }
        pushup(curr);
    }
    node query(int curr,int l,int r){
    	if((seg[curr].L>=l && seg[curr].R<=r)){
    		return seg[curr];
    	}else{
    		int mid=(seg[curr].L+seg[curr].R)>>1;
    		if(mid>=r){
    			return query(tl(curr),l,r);
    		}else if(mid<l){
    			return query(tr(curr),l,r);
    		}else{
    			node res;
    			node left=query(tl(curr),l,r);
    			node right=query(tr(curr),l,r);
    			pushup(res,left,right);
    			return res;
    		}
    	}
    }
}t;
#define node KCJSEG::node
int main(){
	ios::sync_with_stdio(false);
	cin.tie(0),cout.tie(0);
	int n;
	cin>>n;
	t.build(t.root[0],1,n);
	for(int i=1;i<=n;i++){
		cin>>a[i];
		t.root[i]=t.root[i-1];
		if(vis[a[i]]){
			t.update(t.root[i],t.root[i],1,n,vis[a[i]],INF);
			last[a[i]]=vis[a[i]];
			t.update(t.root[i],t.root[i],1,n,i,last[a[i]]);
			vis[a[i]]=i;
		}else{
			vis[a[i]]=i;
			t.update(t.root[i],t.root[i],1,n,i,0);
		}
	}
	int q;
	cin>>q;
	for(int i=1;i<=q;i++){
		int l,r;
		cin>>l>>r;
		node ans=t.query(t.root[r],l,r);
		if(ans.mn>=l){
			cout<<0<<'\n';
		}else{
			cout<<a[ans.index]<<'\n';
		}
	}
	return 0;
}

from databricks import sql from concurrent.futures import ThreadPoolExecutor from datetime import time, datetime var_checkbatchno = dbutils.widgets.get("checkbatchno") var_dt = dbutils.widgets.get("dt") # dt = dbutils.widgets.get("dt") # startdate = dbutils.widgets.get("startdate") # enddate = dbutils.widgets.get("enddate") # groupRuleCode = dbutils.widgets.get("groupRuleCode") print("获取到的checkbatchno为:" + var_checkbatchno) print("获取到的dt为:" + var_dt) formatted_date_minus_one = datetime.now().strftime("%Y%m%d%H%M%S") def execute_query_and_insert(sql_statement): try: connection_exet = sql.connect( server_hostname=dbutils.widgets.get("sql_warehouse_server_hostname"), http_path=dbutils.widgets.get("sql_warehouse_http_path_dm_urp"), access_token=dbutils.secrets.get(scope="lakehouse", key="cdlurpdctoken"), catalog=dbutils.widgets.get("databricks_catalog") # server_hostname=dbutils.widgets.get("server_hostname"), # http_path=dbutils.widgets.get("server_hostname"), # access_token=dbutils.widgets.get("access_token"), # catalog=dbutils.widgets.get("catalog") ) cursor_exet = connection_exet.cursor() cursor_exet.execute(sql_statement) connection_exet.commit() cursor_exet.close() connection_exet.close() except Exception as e: raise Exception(f"执行 SQL 时出错: {str(e)},SQL 语句为: {sql_statement}") def execute_queries_and_insert(): try: # 连接指定catalog ConnCatalog=lakehouse_uat connection = sql.connect( server_hostname=dbutils.widgets.get("sql_warehouse_server_hostname"), http_path=dbutils.widgets.get("sql_warehouse_http_path_dm_urp"), access_token=dbutils.secrets.get(scope="lakehouse", key="cdlurpdctoken"), catalog=dbutils.widgets.get("databricks_catalog") ) cursor = connection.cursor() cur_sql = "" outdt = "" datesql = "" # 查询特殊业务场景的清单(用于筛选 rulecodesys 出现次数 >1 的情况) select_special_query = f""" select rulecodesys, count(1) from ( select a.rulesql, tmp.startdate, tmp.enddate, tmp.checkbatchno, tmp.datetype, a.rulesource, tmp.scenecode, tmp.SCENERULECODE, tmp.dt, tmp.rulecodesys from ( select t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource, t4.scenecode, t4.SCENERULECODE, '{var_dt}' as dt, t5.rulecodesys from dm_urp_bui_dgs.ruletypeconfig t1 inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode = t1.rulecode left join dm_urp_bui_dgs.SPECIALBUSINESS t4 on t5.rulecodesys = t4.RULECODE where t3.systemCode = 'PRIP' and t3.exestatus = '01' and t3.checkbatchno = '{var_checkbatchno}' ) tmp inner join dm_urp_bui_dgs.ruletypeconfig a on tmp.SCENERULECODE = a.rulecode where a.rulesource = '04' and a.RULESTATUS = '0' ) group by rulecodesys """ print(f"查询特殊业务场景的清单执行的SQL为: {select_special_query}") cursor.execute(select_special_query) query_special_results = cursor.fetchall() print("完整查询结果:", query_special_results) print("特殊业务规则select_special_query的结果循环执行开始") for special_row in query_special_results: rulecodesys = special_row[0] rulecodecount = special_row[1] print(f"rulecodesys的值为:{rulecodesys}") print(f"rulecodecount的值为:{rulecodecount}") if rulecodecount > 1: # 查询当前 rulecodesys 对应的详细规则信息 select_query = f""" select a.rulesql, tmp.startdate, tmp.enddate, tmp.checkbatchno, tmp.datetype, a.rulesource, tmp.scenecode, tmp.SCENERULECODE, tmp.dt ,substr(a.rulesql,INSTR(a.rulesql, 'concat(') ,INSTR(a.rulesql, ' as') - INSTR(a.rulesql, 'concat(')) as id, tmp.sceneillustrate from ( select t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource, t4.scenecode, t4.SCENERULECODE, '{var_dt}' as dt, t5.rulecodesys,t4.sceneillustrate from dm_urp_bui_dgs.ruletypeconfig t1 inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode = t1.rulecode left join dm_urp_bui_dgs.SPECIALBUSINESS t4 on t5.rulecodesys = t4.RULECODE where t3.systemCode = 'PRIP' and t3.exestatus = '01' and t3.checkbatchno = '{var_checkbatchno}' ) tmp inner join dm_urp_bui_dgs.ruletypeconfig a on tmp.SCENERULECODE = a.rulecode where a.rulesource = '04' and a.RULESTATUS = '0' and tmp.rulecodesys = '{rulecodesys}' order by tmp.scenecode asc """ print(f"当特殊业务场景rulecodecount>1时执行的SQL为:{select_query}") cursor.execute(select_query) query_results = cursor.fetchall() # 用于收集每次循环的 SCENERULECODE specialrulecodes = [] for idx, row in enumerate(query_results, start=1): sql_statement = row[0] chose1 = row[1] chose2 = row[2] checkbatchno = row[3] datetype = row[4] rulesource = row[5] scenecode = row[6] SCENERULECODE = row[7] dt = row[8] outdt = row[8] error_id = row[9] sceneillustrate = row[10] error_id = error_id.replace('a.', '') print(f"规则编码:{SCENERULECODE}的id为:{error_id}") # 将当前 SCENERULECODE 加入收集列表 specialrulecodes.append(SCENERULECODE) print(f"当前specialrulecodes值为:{specialrulecodes}") # 拼接 in 条件,根据已收集的 specialrulecodes 动态生成 in_condition = ", ".join([f"'{code}'" for code in specialrulecodes]) print(f"当前in_condition值为:{in_condition}") # 原 SQL 语句中需要替换的部分 if datetype == "tsdate": # 找到第一个 1=1 ? 的位置 first_occurrence = sql_statement.find("1=1 ?") if first_occurrence != -1: # 拆分字符串为前后两部分,只替换第一部分 prefix = sql_statement[:first_occurrence] print(f"prefix的值为:{prefix}") suffix = sql_statement[first_occurrence:] print(f"suffix的值为:{suffix}") # 对后缀进行替换(仅替换第一个匹配项) replaced_suffix = suffix.replace("1=1 ?", f"""1=1 and etl_time >='{chose1}' and etl_time <='{chose2}' and checkbatchno='{checkbatchno}' and {error_id} not in ( select id from dm_urp_bui_dgs.errordatadetail where checkbatchno = '{checkbatchno}' and rulecode in ({in_condition}) )""", 1) # 第三个参数 1 表示仅替换第一个匹配项 replaced_suffix = replaced_suffix.replace("1=1 ?",f"1=1 and etl_time >='{chose1}' and etl_time <='{chose2}' and checkbatchno='{checkbatchno}'") sql_statement = prefix + replaced_suffix print(f"拼接完prefix + replaced_suffix后的结果为:{sql_statement}") else: first_occurrence = sql_statement.find("1=1 ?") if first_occurrence != -1: # 拆分字符串为前后两部分,只替换第一部分 prefix = sql_statement[:first_occurrence] print(f"prefix的值为:{prefix}") suffix = sql_statement[first_occurrence:] print(f"suffix的值为:{suffix}") # 对后缀进行替换(仅替换第一个匹配项) replaced_suffix = suffix.replace("1=1 ?", f"""1=1 and transdate >='{chose1}' and transdate <='{chose2}' and checkbatchno='{checkbatchno}' and {error_id} not in ( select id from dm_urp_bui_dgs.errordatadetail where checkbatchno = '{checkbatchno}' and rulecode in ({in_condition}) )""", 1) # 第三个参数 1 表示仅替换第一个匹配项 replaced_suffix = replaced_suffix.replace("1=1 ?",f"1=1 and transdate >='{chose1}' and transdate <='{chose2}' and checkbatchno='{checkbatchno}'") sql_statement = prefix + replaced_suffix print(f"拼接完prefix + replaced_suffix后的结果为:{sql_statement}") # 如果规则来源是特殊业务场景,替换状态等字段 if rulesource == "04": sql_statement = sql_statement.replace("'1' as state", f"'4' as state") sql_statement = sql_statement.replace("'' as problemCode", f"'{scenecode}' as problemCode") sql_statement = sql_statement.replace("'' as innerdes", f"'{sceneillustrate}' as innerdes") sql_statement = sql_statement.replace("'' as valueno", f"'xt' as valueno") # 替换 dt 和批次号 sql_statement = sql_statement.replace("'' as dt", f"'{dt}' as dt") sql_statement = sql_statement.replace("'checkbatchno'", f"'{checkbatchno}'") current_sql = sql_statement execute_query_and_insert(current_sql) else: select_query = f""" select a.rulesql, tmp.startdate, tmp.enddate, tmp.checkbatchno, tmp.datetype, a.rulesource, tmp.scenecode, tmp.SCENERULECODE, tmp.dt from ( select t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource, t4.scenecode, t4.SCENERULECODE, '{var_dt}' as dt, t5.rulecodesys from dm_urp_bui_dgs.ruletypeconfig t1 inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode = t1.rulecode left join dm_urp_bui_dgs.SPECIALBUSINESS t4 on t5.rulecodesys = t4.RULECODE where t3.systemCode = 'PRIP' and t3.exestatus = '01' and t3.checkbatchno = '{var_checkbatchno}' ) tmp inner join dm_urp_bui_dgs.ruletypeconfig a on tmp.SCENERULECODE = a.rulecode where a.rulesource = '04' and a.RULESTATUS = '0' and tmp.rulecodesys = '{rulecodesys}' """ cursor.execute(select_query) query_results = cursor.fetchall() for row in query_results: current_sql = "" sql_statement = row[0] chose1 = row[1] chose2 = row[2] checkbatchno = row[3] datetype = row[4] rulesource = row[5] scenecode = row[6] SCENERULECODE=row[7] dt = row[8] outdt=row[8] if datetype == "tsdate": sql_statement = sql_statement.replace( "1=1 ?", f"1=1 and etl_time >='{chose1}' and etl_time <='{chose2}' and checkbatchno='{checkbatchno}'", ) else: sql_statement = sql_statement.replace( "1=1 ?", f"1=1 and transdate >='{chose1}' and transdate <='{chose2}' and checkbatchno='{checkbatchno}'", ) # 如果规则来源是特殊业务场景,则插入错误明细表的数据 规则状态为特殊业务场景 if rulesource == "04": sql_statement = sql_statement.replace( "'1' as state", f"'4' as state" ) sql_statement = sql_statement.replace( "'' as problemCode", f"'{scenecode}' as problemCode" ) sql_statement = sql_statement.replace("'' as innerdes", f"'{sceneillustrate}' as innerdes" ) sql_statement = sql_statement.replace( "'' as valueno", f"'xt' as valueno" ) # 更改dt与批次号 sql_statement = sql_statement.replace("'' as dt", f"'{dt}' as dt") sql_statement = sql_statement.replace("'checkbatchno'", f"'{checkbatchno}'") # print("要运行的sql脚本是:"+sql_statement) current_sql = sql_statement cur_sql = current_sql execute_query_and_insert(current_sql) print("特殊业务规则for循环执行结束") # 查询保单登记有效的治理规则清单 select_query = f"select t1.rulesql, t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource,t5.rulecodesys,'{var_dt}' as dt from dm_urp_bui_dgs.ruletypeconfig t1 inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode=t1.rulecode where t3.systemCode = 'PRIP' and t3.exestatus = '01' and t1.RULESTATUS='0' and t3.checkbatchno = '{var_checkbatchno}'" # and t1.runstates = '1' 治理规则 # and t1.rulestatus = '0' # 0是生效 cur_sql = select_query print(f"要执行的 保单登记有效的治理规则清单SQL为 :{cur_sql}") cursor.execute(cur_sql) # 从查询结果中获取数据 query_results = cursor.fetchall() # 创建一个用于并发执行的 ThreadPoolExecutor <根据需要调整 max_workers> with ThreadPoolExecutor(max_workers=100) as executor: # 为每个 SQL 语句提交任务 print("治理业务规则for循环执行开始") for row in query_results: current_sql = "" sql_statement = row[0] chose1 = row[1] chose2 = row[2] checkbatchno = row[3] datetype = row[4] rulesource = row[5] rulecodesys= row[6] dt=row[7] outdt=row[7] scenecode='' # if dt=='' : # dt=row[7] # outdt=row[7] if len(rulecodesys) >0 : queryscenecode=" select distinct CONCAT_WS(',', COLLECT_LIST(scenecode) OVER ()) AS scenecode from dm_urp_bui_dgs.SPECIALBUSINESS where rulecode= '{rulecodesys}' " cursor.execute(queryscenecode) query_results = cursor.fetchall() for row in query_results: scenecode=row[0] if datetype == "tsdate": sql_statement = sql_statement.replace( "1=1 ?", f"1=1 and etl_time >='{chose1}' and etl_time <='{chose2}' and checkbatchno='{checkbatchno}'", ) else: if rulesource=='02': sql_statement = sql_statement.replace( "1=1 ?", f"1=1 and transdate >='{chose1}' and transdate <='{chose2}' and checkbatchno='{checkbatchno}'", ) else : sql_statement = sql_statement.replace( "1=1 ?", f"1=1 and transdate >='{chose1}' and transdate <='{chose2}' ", ) # 如果规则来源是特殊业务场景,则插入错误明细表的数据 规则状态为特殊业务场景 if rulesource == "04": sql_statement = sql_statement.replace( "'1' as state", f"'4' as state" ) sql_statement = sql_statement.replace( "'' as problemCode", f"'{scenecode}' as problemCode" ) sql_statement = sql_statement.replace("'' as innerdes", f"'{sceneillustrate}' as innerdes") sql_statement = sql_statement.replace( "'' as valueno", f"'xt' as valueno" ) # 更改dt与批次号 sql_statement = sql_statement.replace("'' as dt", f"'{dt}' as dt") sql_statement = sql_statement.replace("'checkbatchno'", f"'{checkbatchno}'") print("要运行的sql脚本是:"+sql_statement) task = executor.submit(execute_query_and_insert, sql_statement) print("治理业务规则for循环执行结束") getbatchno_query = f"select distinct t1.checkbatchno,t2.dt from dm_urp_bui_dgs.checkbatchno t1 join dm_urp_bui_dgs.errordatadetail t2 on t1.checkbatchno = t2.checkbatchno join dm_urp_bui_dgs.ruletypeconfig t3 on t2.rulecode = t3.rulecode where t1.systemCode = 'PRIP' and t1.exestatus = '01' and t3.rulesource<>'04' and t1.checkbatchno ='{checkbatchno}'group by t1.checkbatchno,t2.rulecode,t3.etable,t2.dt;" print("getbatchno_query 方法执行的SQL为" + getbatchno_query) cursor.execute(getbatchno_query) query_results = cursor.fetchall() # 为每个 SQL 语句提交任务 for row in query_results: checkbatchno=row[0] dt=row[1] updatestatesql="insert OVERWRITE dm_urp_bui_dgs.ERRORDATADETAIL partition(dt='{}') SELECT id , checkbatchno , module , ruleCode, state, problemcode , problemdessou , detaildata , issueid, innerdes, modifydate , checkDate , remark, datasrc , transno , valueno FROM (SELECT A.ID, A.CHECKBATCHNO, A.module, A.RULECODE, case when c.SCENECODE is not null then '4' else A.state end as STATE, C.SCENECODE as PROBLEMCODE, A.PROBLEMDESSOU, A.DETAILDATA as DETAILDATA , A.ISSUEID, C.sceneillustrate as INNERDES, A.modifydate , A.checkDate , A.remark, A.datasrc , A.transno , A.valueno FROM (select * from dm_urp_bui_dgs.ERRORDATADETAIL where checkbatchno='{}' and dt='{}') A LEFT JOIN dm_urp_bui_dgs.RULETYPECONFIG B ON A.RULECODE = B.RULECODE LEFT JOIN dm_urp_bui_dgs.CHECKBATCHNO D ON A.CHECKBATCHNO = D.CHECKBATCHNO LEFT JOIN dm_urp_bui_dgs.RULECODETOSYS E ON B.RULECODE = E.RULECODE LEFT JOIN (select err.DETAILDATA,sp.rulecode,err.id,err.checkbatchno,sp.SCENECODE,sp.sceneillustrate from dm_urp_bui_dgs.ERRORDATADETAIL err inner join dm_urp_bui_dgs.SPECIALBUSINESS sp on err.rulecode = sp.SCENERULECODE where err.checkbatchno='{}' and err.state='4'and err.dt='{}') C ON E.RULECODESYS = C.rulecode and c.id=a.id and c.checkbatchno=a.checkbatchno) A union all SELECT id ,checkbatchno , module , ruleCode, state, problemcode , problemdessou , detaildata , issueid, innerdes, modifydate , checkDate , remark, datasrc , transno , valueno from dm_urp_bui_dgs.ERRORDATADETAIL where dt='{}' and checkbatchno<>'{}'".format(dt,checkbatchno,dt,checkbatchno,dt,dt,checkbatchno) cursor.execute(updatestatesql) print("数据打标签开始") dbutils.notebook.run("/Workspace/Repos/dm_urp_bui_dgs/dm_urp_bui_dgs/dml/biaoqian", 3600, {"dt":dt,"checkbatchno":checkbatchno}) print("数据打标签结束") remove_id_is_null = f"INSERT OVERWRITE table dm_urp_bui_dgs.errordatadetail partition(dt='{dt}') select id,checkbatchno,module,ruleCode,state,problemcode,problemdessou,detaildata,issueid,innerdes,modifydate,checkDate,remark,datasrc,transno,valueno from dm_urp_bui_dgs.errordatadetail where id is not null and dt = '{dt}'" print("剔除错误明细中id为null的数据") cursor.execute(remove_id_is_null) print("错误明细开窗去重") remove_distinct = f"insert overwrite table dm_urp_bui_dgs.errordatadetail partition(dt='{dt}') select id ,checkbatchno ,module ,ruleCode ,state ,problemcode ,problemdessou ,detaildata ,issueid ,innerdes ,modifydate ,checkDate ,remark ,datasrc ,transno ,valueno from (select row_number() over (partition by ID, CHECKBATCHNO, MODULE, RULECODE order by 1) rn,* from dm_urp_bui_dgs.errordatadetail where dt = '{dt}') tt where tt.rn = 1;" cursor.execute(remove_distinct) # 从校验批次表获得状态还没改变的 checkbatchno getbatchno_query = f"select distinct t1.checkbatchno,t2.rulecode,t3.etable,t2.dt,count(1) as cnt from dm_urp_bui_dgs.checkbatchno t1 join dm_urp_bui_dgs.errordatadetail t2 on t1.checkbatchno = t2.checkbatchno join dm_urp_bui_dgs.ruletypeconfig t3 on t2.rulecode = t3.rulecode where t1.systemCode = 'PRIP' and t1.exestatus = '01' and t3.rulesource<>'04' and t3.rulesource ='02' and t1.checkbatchno ='{checkbatchno}' group by t1.checkbatchno,t2.rulecode,t3.etable,t2.dt;" print("getbatchno_query 方法执行的SQL为:" + getbatchno_query) cursor.execute(getbatchno_query) query_results = cursor.fetchall() with ThreadPoolExecutor(max_workers=20) as executor: # 为每个 SQL 语句提交任务 for row in query_results: curr_sql = "" checkbatchno = row[0] rulecode = row[1] etable = row[2] dt = row[3] errornum = row[4] # 获得总数据量 print("获得总数据量") get_totalnum = "select count(1) from dm_urp_bui_prip."+etable+" WHERE checkbatchno = '"+checkbatchno+"' " curr_sql = get_totalnum cur_sql = curr_sql cursor.execute(curr_sql) totalnum = cursor.fetchone()[0] # # 获得错误数据量 print("获得错误数据量") get_errornum = "SELECT COUNT(1) FROM dm_urp_bui_dgs.errordatadetail WHERE checkbatchno = '{}' AND rulecode = '{}' AND dt = '{}';".format(checkbatchno, rulecode,dt) curr_sql = get_errornum cur_sql = curr_sql cursor.execute(get_errornum) errornum = cursor.fetchone()[0] # 获得特殊业务场景数量 get_specialnum = "SELECT COUNT(1) FROM dm_urp_bui_dgs.errordatadetail WHERE checkbatchno = '{}' AND rulecode = '{}' AND dt = '{}' and state='4' ".format(checkbatchno, rulecode, dt) curr_sql = get_specialnum cur_sql = curr_sql cursor.execute(get_specialnum) query_results = cursor.fetchall() specialnum = 0 for row in query_results: specialnum = row[0] special_alldetail = "" if specialnum != 0: # 获得这个批次号这个规则下的特殊业务场景明细 get_specialdetail = "SELECT scenerulecode, sceneillustrate,scenecode FROM dm_urp_bui_dgs.specialbusiness WHERE scenecode IN (select problemcode from dm_urp_bui_dgs.errordatadetail where checkbatchno = '{}' AND rulecode = '{}' AND dt = '{}' and state='4')".format(checkbatchno, rulecode, dt) curr_sql = get_specialdetail cur_sql = curr_sql cursor.execute(get_specialdetail) specialdetail_list = cursor.fetchall() for row in specialdetail_list: specialcode = row[0] specialdetail = row[1] scenecode = row[2] # 查询该特殊业务场景的具体数量 spnum_query = "select count(1) as tscount from dm_urp_bui_dgs.errordatadetail a where a.checkbatchno = '{}' and a.rulecode = '{}' and a.dt = '{}'".format(checkbatchno, specialcode, dt) curr_sql = spnum_query cur_sql = curr_sql cursor.execute(spnum_query) spnum_result = cursor.fetchall() # 处理查询结果,获取数量 spnum = 0 if spnum_result: spnum = spnum_result[0][0] # 根据spnum的值决定是否拼接字符串 if spnum != 0: special_alldetail = special_alldetail + specialdetail + "-" + str(spnum) + ";" if spnum == 0: print(f"手动标签统计: {rulecode}") spnum_query = "select count(1) as tscount from dm_urp_bui_dgs.errordatadetail a where a.checkbatchno = '{}' and a.problemcode = '{}' and a.dt = '{}' and rulecode = '{}'".format(checkbatchno, scenecode, dt,rulecode) curr_sql = spnum_query cur_sql = curr_sql print(f"手动标签统计执行SQL: {cur_sql}") cursor.execute(spnum_query) spnum_result = cursor.fetchall() spnum = spnum_result[0][0] special_alldetail = special_alldetail + specialdetail + "-" + str(spnum) + ";" # 获得错误数据占比 errorratio = 0 # allerrornum=errornum+specialnum allerrornum=errornum if totalnum != 0: errorratio = allerrornum / totalnum else: raise ZeroDivisionError("分母“总数据量”为0") # 获得待处理数据量 get_waitnum = "select count(1) from dm_urp_bui_dgs.errordatadetail where checkbatchno='{}' and rulecode = '{}' and state='1' and dt='{}'".format(checkbatchno, rulecode,dt) curr_sql = get_waitnum cur_sql = curr_sql cursor.execute(get_waitnum) waitnum_list=cursor.fetchall() waitnum=0 for row in waitnum_list: waitnum = row[0] print('待处理数据量') print(waitnum) # 获得已处理数据量 get_completenum = "select count(1) from dm_urp_bui_dgs.errordatadetail where checkbatchno='{}' and rulecode = '{}' and state='2' and dt='{}'".format(checkbatchno, rulecode,dt) curr_sql = get_completenum cur_sql = curr_sql cursor.execute(get_completenum) complete_list=cursor.fetchall() completenum=0 for row in complete_list: completenum = row[0] completenum=completenum+specialnum # 获得挂起数据量 get_Suspendnum = "select count(1) from dm_urp_bui_dgs.errordatadetail where checkbatchno='{}' and rulecode = '{}' and state='3' and dt='{}'".format(checkbatchno, rulecode,dt) curr_sql = get_Suspendnum cur_sql = curr_sql cursor.execute(get_Suspendnum) Suspendnum_list=cursor.fetchall() Suspendnum=0 for row in Suspendnum_list: Suspendnum = row[0] print('挂起数据量') print(Suspendnum) # 获得新增数据量(规则分组相同的相邻批次的数据) addnum = 0 get_newnum = "select count(1) from dm_urp_bui_dgs.errordatadetail where checkbatchno='{}' and rulecode='{}' and dt='{}' ;".format(checkbatchno, rulecode,dt) curr_sql = get_newnum cur_sql = curr_sql cursor.execute(get_newnum) newnum = cursor.fetchone()[0] get_oldcheckbatchno = "SELECT t1.checkbatchno FROM dm_urp_bui_dgs.errordatadetail t1 LEFT JOIN dm_urp_bui_dgs.checkbatchno t2 ON t1.CHECKBATCHNO=t2.CHECKBATCHNO WHERE t2.GROUPRULECODE=(SELECT GROUPRULECODE FROM dm_urp_bui_dgs.checkbatchno WHERE checkbatchno='{}') AND t1.CHECKBATCHNO<>'{}' GROUP BY t1.CHECKBATCHNO,t2.GROUPRULECODE ORDER BY t1.CHECKBATCHNO DESC;".format(checkbatchno, checkbatchno) curr_sql = get_oldcheckbatchno cur_sql = curr_sql cursor.execute(get_oldcheckbatchno) oldcheckbatchno = cursor.fetchone() if oldcheckbatchno: oldbatchno=oldcheckbatchno[0] get_oldnum = "SELECT COUNT(1) FROM dm_urp_bui_dgs.errordatadetail WHERE CHECKBATCHNO= '{}' and RULECODE= '{}' ;".format(oldbatchno, rulecode) curr_sql = get_oldnum cur_sql = curr_sql cursor.execute(get_oldnum) oldnum = cursor.fetchone()[0] addnum = newnum - oldnum # 获得累计数据量 cumulativenum =allerrornum print('累计数据量') print(cumulativenum) # 插入到"calculation"表中 dataCalculation_insert = "INSERT INTO dm_urp_bui_dgs.dataCalculation VALUES ('{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}', '{}')".format(checkbatchno, rulecode,totalnum,cumulativenum,errorratio,specialnum,special_alldetail,waitnum,completenum,Suspendnum,addnum,cumulativenum) curr_sql = dataCalculation_insert cur_sql = curr_sql cursor.execute(dataCalculation_insert) # 检核完毕+数据量统计完毕 更改状态 taskstartdate = datetime.now().strftime("%Y-%m-%d %H:%M:%S") # prip_checkno = f"insert overwrite table dm_urp_bui_dgs.checkbatchno partition(dt='{dt1}') select * from dm_urp_bui_dgs.checkbatchno where systemCode <> 'PRIP' and dt='{dt1}' union all select systemCode,checkbatchno,checkeds,groupRuleCode,startdate,enddate,datetype,isimage,managecom,'02',checkDate,'{taskstartdate}',taskenddate,issendemail,taskowner from dm_urp_bui_dgs.checkbatchno where systemCode = 'PRIP' and exestatus = '01' and dt='{dt1}' union all select * from dm_urp_bui_dgs.checkbatchno where systemCode = 'PRIP' and exestatus = '02' and dt='{dt1}'" prip_checkno = f"insert overwrite table dm_urp_bui_dgs.checkbatchno select * from dm_urp_bui_dgs.checkbatchno where systemCode <> 'PRIP' union all select systemCode,checkbatchno,checkeds,groupRuleCode,startdate,enddate,datetype,isimage,managecom,'02',checkDate,'{taskstartdate}','{taskstartdate}',issendemail,taskowner from dm_urp_bui_dgs.checkbatchno where systemCode = 'PRIP' and exestatus = '01' union all select * from dm_urp_bui_dgs.checkbatchno where systemCode = 'PRIP' and exestatus = '02'" cur_sql = prip_checkno cursor.execute(cur_sql) # 获取当前时间 current_time = datetime.now() # 将当前时间转换为字符串类型 current_time_str = current_time.strftime("%Y-%m-%d %H:%M:%S") outflag="insert into dm_urp_bui_dgs.outlakesign values ('{}', 'Y','PRIP', '{}')".format(outdt,current_time_str) execute_query_and_insert(outflag) # 关闭查询用的游标 cursor.close() # 关闭数据库连接 connection.close() except Exception as e: print(f"执行过程中发生错误: {str(e)}") # 可根据需要添加事务回滚等逻辑 finally: # 关闭 cursor 和 connection if 'cursor' in locals(): cursor.close() if 'connection' in locals(): connection.close() execute_queries_and_insert() 以上代码报以下错: 获取到的checkbatchno为:C_PRIP_20250812_003 获取到的dt为:2025071401 查询特殊业务场景的清单执行的SQL为: select rulecodesys, count(1) from ( select a.rulesql, tmp.startdate, tmp.enddate, tmp.checkbatchno, tmp.datetype, a.rulesource, tmp.scenecode, tmp.SCENERULECODE, tmp.dt, tmp.rulecodesys from ( select t3.startdate, t3.enddate, t3.checkbatchno, t3.datetype, t1.rulesource, t4.scenecode, t4.SCENERULECODE, '2025071401' as dt, t5.rulecodesys from dm_urp_bui_dgs.ruletypeconfig t1 inner join dm_urp_bui_dgs.rulegrouprelation t2 on t1.rulecode = t2.rulecode inner join dm_urp_bui_dgs.checkbatchno t3 on t2.groupcode = t3.groupRuleCode inner join dm_urp_bui_dgs.rulecodetosys t5 on t5.rulecode = t1.rulecode left join dm_urp_bui_dgs.SPECIALBUSINESS t4 on t5.rulecodesys = t4.RULECODE where t3.systemCode = 'PRIP' and t3.exestatus = '01' and t3.checkbatchno = 'C_PRIP_20250812_003' ) tmp inner join dm_urp_bui_dgs.ruletypeconfig a on tmp.SCENERULECODE = a.rulecode where a.rulesource = '04' and a.RULESTATUS = '0' ) group by rulecodesys 完整查询结果: [Row(rulecodesys='PRP_3rd_L_A_I_000033', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_010075', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_000028', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_000044', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_010074', count(1)=1), Row(rulecodesys='PRP_3rd_L_A_I_000040', count(1)=2), Row(rulecodesys='PRP_3rd_L_A_I_010104', count(1)=3), Row(rulecodesys='PRP_3rd_L_A_O_010046', count(1)=2), Row(rulecodesys='PRP_3rd_L_A_I_002768', count(1)=1)] 特殊业务规则select_special_query的结果循环执行开始 rulecodesys的值为:PRP_3rd_L_A_I_000033 rulecodecount的值为:1 执行过程中发生错误: cannot access local variable 'sceneillustrate' where it is not associated with a value
08-13
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值