笔记:主要掌握根据字典内容同时替换多个字符串

import json
import re
import os
import pandas as pd
from sqlalchemy import create_engine

# 根据字典同时替换多个字符串
class make_xlat:
    def __init__(self, *args, **kwargs):
        self.adict = dict(*args, **kwargs)
        self.rx = self.make_rx()

    def make_rx(self):
        return re.compile('|'.join(map(re.escape, self.adict)))

    def one_xlat(self, match):
        return str(self.adict[match.group(0)])

    def __call__(self, text):
        return self.rx.sub(self.one_xlat, text)


def get_file_path(rootdir):
    for (dirpath, dirnames, filenames) in os.walk(rootdir):
        pathname = [os.path.join(dirpath, filename) for filename in filenames]
    return pathname


def write2sql(df, tableName):
    con = create_engine('mysql+pymysql://user:password@host:port/database?charset=utf8')
    df.index = range(1, len(df) + 1)
    df.to_sql(tableName, con, schema='database', if_exists='append', index=True, index_label='id')  # chunksize=10000


def get_data(filename_list):
    data_li = []
    for filename in filename_list:
        print('filename==>', filename)
        with open(filename, 'r+', encoding='utf-8') as f:
            content = f.read().split('=')[1]
            datas = json.loads(re.match(r'.*data: (.*),font:', content).group(1))
            # print('datas==>', datas)
            if datas:
                font = json.loads(re.match(r'.*font:(.*)}', content).group(1))
                adict = {font_map['code']: font_map['value'] for font_map in font['FontMapping']}
                print('adic===>', adict)
                translate = make_xlat(adict)
                expenses = ['sumasset', 'fixedasset', 'monetaryfund', 'accountrec', 'inventory', 'sumliab',
                            'accountpay', 'advancereceive', 'sumshequity', 'cashanddepositcbank',
                            'loanadvances', 'saleablefasset', 'borrowfromcbank', 'acceptdeposit', 'sellbuybackfasset',
                            'settlementprovision', 'borrowfund', 'agenttradesecurity', 'premiumrec', 'stborrow',
                            'premiumadvance']
                tbs = ['accountrec_tb', 'inventory_tb', 'monetaryfund_tb', 'accountpay_tb', 'advancereceive_tb',
                       'sumshequity_tb', 'tsatz', 'tdetz', 'cashanddepositcbank_tb', 'loanadvances_tb',
                       'saleablefasset_tb', 'borrowfromcbank_tb', 'acceptdeposit_tb', 'sellbuybackfasset_tb',
                       'settlementprovision_tb', 'borrowfund_tb', 'agenttradesecurity_tb', 'premiumrec_tb',
                       'stborrow_tb', 'premiumadvance_tb', 'ld', 'zcfzl']
                for data in datas:
                    expenses_dic = {}
                    expenses_dic['gp_code'] = filename.split('.')[0].split('\\')[-1]
                    expenses_dic['reportdate'] = data['reportdate']
                    expenses_dic['noticedate'] = data['noticedate']
                    for expense in expenses:
                        if data[expense] == '-':
                            expenses_dic[expense] = data[expense]
                        else:
                            # print(translate(data[expense]))
                            expenses_dic[expense] = round(float(translate(data[expense])) / 100000000, 2)  # 单位亿
                    for tb in tbs:
                        if data[tb] == '-' or '无穷大' in data[tb] or '非数字' in data[tb]:
                            expenses_dic[tb] = data[tb]
                        else:
                            expenses_dic[tb] = round(float(translate(data[tb])) * 100, 2)  # 百分比    
                    print('expenses_dic=====>', expenses_dic)
                    data_li.append(expenses_dic)
    df = pd.DataFrame(data_li)
    return df


if __name__ == '__main__':
    rootdir = 'dir'
    filename_list = get_file_path(rootdir)
    df = get_data(filename_list)
    print('=========================df')
    print(df)
    # write2sql(df, 'profit_tb')
    # write2sql(df, 'xjll_tb')

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值