import json
import re
import os
import pandas as pd
from sqlalchemy import create_engine
class make_xlat:
def __init__(self, *args, **kwargs):
self.adict = dict(*args, **kwargs)
self.rx = self.make_rx()
def make_rx(self):
return re.compile('|'.join(map(re.escape, self.adict)))
def one_xlat(self, match):
return str(self.adict[match.group(0)])
def __call__(self, text):
return self.rx.sub(self.one_xlat, text)
def get_file_path(rootdir):
for (dirpath, dirnames, filenames) in os.walk(rootdir):
pathname = [os.path.join(dirpath, filename) for filename in filenames]
return pathname
def write2sql(df, tableName):
con = create_engine('mysql+pymysql://user:password@host:port/database?charset=utf8')
df.index = range(1, len(df) + 1)
df.to_sql(tableName, con, schema='database', if_exists='append', index=True, index_label='id')
def get_data(filename_list):
data_li = []
for filename in filename_list:
print('filename==>', filename)
with open(filename, 'r+', encoding='utf-8') as f:
content = f.read().split('=')[1]
datas = json.loads(re.match(r'.*data: (.*),font:', content).group(1))
if datas:
font = json.loads(re.match(r'.*font:(.*)}', content).group(1))
adict = {font_map['code']: font_map['value'] for font_map in font['FontMapping']}
print('adic===>', adict)
translate = make_xlat(adict)
expenses = ['sumasset', 'fixedasset', 'monetaryfund', 'accountrec', 'inventory', 'sumliab',
'accountpay', 'advancereceive', 'sumshequity', 'cashanddepositcbank',
'loanadvances', 'saleablefasset', 'borrowfromcbank', 'acceptdeposit', 'sellbuybackfasset',
'settlementprovision', 'borrowfund', 'agenttradesecurity', 'premiumrec', 'stborrow',
'premiumadvance']
tbs = ['accountrec_tb', 'inventory_tb', 'monetaryfund_tb', 'accountpay_tb', 'advancereceive_tb',
'sumshequity_tb', 'tsatz', 'tdetz', 'cashanddepositcbank_tb', 'loanadvances_tb',
'saleablefasset_tb', 'borrowfromcbank_tb', 'acceptdeposit_tb', 'sellbuybackfasset_tb',
'settlementprovision_tb', 'borrowfund_tb', 'agenttradesecurity_tb', 'premiumrec_tb',
'stborrow_tb', 'premiumadvance_tb', 'ld', 'zcfzl']
for data in datas:
expenses_dic = {}
expenses_dic['gp_code'] = filename.split('.')[0].split('\\')[-1]
expenses_dic['reportdate'] = data['reportdate']
expenses_dic['noticedate'] = data['noticedate']
for expense in expenses:
if data[expense] == '-':
expenses_dic[expense] = data[expense]
else:
expenses_dic[expense] = round(float(translate(data[expense])) / 100000000, 2)
for tb in tbs:
if data[tb] == '-' or '无穷大' in data[tb] or '非数字' in data[tb]:
expenses_dic[tb] = data[tb]
else:
expenses_dic[tb] = round(float(translate(data[tb])) * 100, 2)
print('expenses_dic=====>', expenses_dic)
data_li.append(expenses_dic)
df = pd.DataFrame(data_li)
return df
if __name__ == '__main__':
rootdir = 'dir'
filename_list = get_file_path(rootdir)
df = get_data(filename_list)
print('=========================df')
print(df)