查询匹配(python)

import pymssql
import pandas as pd
import re

###连接sql sever并读取数据到dataframe
conn = pymssql.connect('WIN****', 'sa', '*****', 'jsontest')
cursor = conn.cursor()

sql = "select drugid,pclass,pName,pNameen from importdrugs_plus"
df_import = pd.read_sql(sql, conn)

sql1 = "select en,ch,atccode1 from plus"
df_ATC = pd.read_sql(sql1,conn)


def matchedkeys(x,list_EN):
    frontvar = x.lower()
    matchedkeys = [i for i in list_EN if all([j in re.split(r"[ ,/,;]",frontvar) for j in i.split()])]
    matchedkeys = list(set(matchedkeys))
    if matchedkeys:
        return ";".join(matchedkeys)
    else:
        return "未找到关键词匹配"


def matchedatc(frontvar,df2):
    m = []
    for i in re.split(r"[;]",frontvar):
        matcheda = df2[(df2.en_lower == i)].atccode.tolist()
        if matcheda:
            m = list(set(m+matcheda))
            if '' in m:
                m.remove("")
    if m:
        return ";".join(m)
    else:
        return "无"

def lower(x):
    fron = x.lower()
    return fron

def matchedch(frontvar,df2):
    m = []
    for i in re.split(r"[;]",frontvar):
        matcheda = df2[(df2.en_lower == i)].ch.tolist()
        if matcheda:
            m = list(set(m+matcheda))
            if '' in m:
                m.remove("")
    if m:
        return ";".join(m)
    else:
        return "无"
    
df_ATC = df_ATC.rename(columns={'atccode1':'atccode'})

df_ATC['en_lower'] = df_ATC['en'].apply(lambda x:lower(x))

df_import['matched_en'] = df_import['pNameen'].apply(lambda x:matchedkeys(x,list(df_ATC['en_lower'])))


df_import['matched_ATCcode'] = df_import['matched_en'].apply(lambda x:matchedatc(x,df_ATC))

df_import['matched_ch'] = df_import['matched_en'].apply(lambda x:matchedch(x,df_ATC))

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值