import pymssql
import pandas as pd
import re
###连接sql sever并读取数据到dataframe
conn = pymssql.connect('WIN****', 'sa', '*****', 'jsontest')
cursor = conn.cursor()
sql = "select drugid,pclass,pName,pNameen from importdrugs_plus"
df_import = pd.read_sql(sql, conn)
sql1 = "select en,ch,atccode1 from plus"
df_ATC = pd.read_sql(sql1,conn)
def matchedkeys(x,list_EN):
frontvar = x.lower()
matchedkeys = [i for i in list_EN if all([j in re.split(r"[ ,/,;]",frontvar) for j in i.split()])]
matchedkeys = list(set(matchedkeys))
if matchedkeys:
return ";".join(matchedkeys)
else:
return "未找到关键词匹配"
def matchedatc(frontvar,df2):
m = []
for i in re.split(r"[;]",frontvar):
matcheda = df2[(df2.en_lower == i)].atccode.tolist()
if matcheda:
m = list(set(m+matcheda))
if '' in m:
m.remove("")
if m:
return ";".join(m)
else:
return "无"
def lower(x):
fron = x.lower()
return fron
def matchedch(frontvar,df2):
m = []
for i in re.split(r"[;]",frontvar):
matcheda = df2[(df2.en_lower == i)].ch.tolist()
if matcheda:
m = list(set(m+matcheda))
if '' in m:
m.remove("")
if m:
return ";".join(m)
else:
return "无"
df_ATC = df_ATC.rename(columns={'atccode1':'atccode'})
df_ATC['en_lower'] = df_ATC['en'].apply(lambda x:lower(x))
df_import['matched_en'] = df_import['pNameen'].apply(lambda x:matchedkeys(x,list(df_ATC['en_lower'])))
df_import['matched_ATCcode'] = df_import['matched_en'].apply(lambda x:matchedatc(x,df_ATC))
df_import['matched_ch'] = df_import['matched_en'].apply(lambda x:matchedch(x,df_ATC))