defget_qu3(x):
len_x =len(x)if len_x ==1or len_x >5or x =="":return"未识别"elif"-"in x:return x.split("-","")elif"六合区"in x:return"六合区"else:
return x
df["区县"]= df["区县"].apply(get_qu3)
复盘检查数据
defqu_jiancha(x):if"区"in x or"县"in x or"镇"in x or x =="未识别":
return1elif x.endswith("路")or x.endswith("村")or x.endswith("街")or x.endswith("道"):
return1else:return0
df["quxian"]= df["区县"].apply(qu_jiancha)
a = df[df["quxian"]==0]["区县"]
b = pd.DataFrame(a.drop_duplicates())
b.to_csv("区县_check.csv",index=False)defquxian_jiancha(quxian,quxian_jiancha):if quxian_jiancha ==1:return quxian
else:"未识别"
df["区县"]= df.apply(lambda x:quxian_jiancha(x["区县"],x["quxian"]),axis=1)del df["quxian"]# 删除辅助类
df.to_csv("data.csv",header=True,index=False)