# 需要进入的第三方库
# pip install pypiwin32
# openpyxl
# shutil
# pyinstaller
# 更新了单元格中数值不填写的bug
# 不填写的数据,默认取值为空
# coding:utf-8
import os
import win32com.client as win32
import shutil
import openpyxl
# import time
#####part one
import time
start = time.perf_counter()
def makedir():
mydir = 'bak' # 基本逻辑就是分两种情况,文件夹有和无,有时,为了避免里面数据污染,删了重建一个;没有则是正常建一个就好
if os.path.exists(mydir) == 0:
os.mkdir(mydir)
else:
shutil.rmtree('bak')
if os.path.exists(mydir) == 0:
os.mkdir(mydir)
def movedir():
path = os.getcwd() # 转换前程序所在文件目录
path_bak = os.getcwd() + r'/bak/'
pathdir_bak = os.listdir(path_bak)
pathdir = os.listdir(path)
# print(pathdir)
for s in pathdir:
newdir = os.path.join(path, s)
if os.path.isfile(newdir):
os.chdir(path_bak)
pathdir_bak = os.listdir(path_bak)
# print(pathdir_bak)
if os.path.splitext(newdir)[1] == ".xls":
t = os.path.split(s)[1] + r'x'
if os.path.exists(t):
# print(os.path.exists(os.path.split(s)[1]))
break
else:
excel = win32.gencache.EnsureDispatch('Excel.Application')
wb = excel.Workbooks.Open(newdir)
# print(newdir)
path_dir = os.path.split(newdir)[0]
path_file = os.path.split(newdir)[1]
# print(path_dir)
# print(path_file)
wb.SaveAs(path_dir + '/bak/' + path_file + 'x',
FileFormat=51) # FileFormat = 51 is for .xlsx extension
wb.Close() # FileFormat = 56 is for .xls extension
excel.Application.Quit()
elif os.path.splitext(newdir)[1] == ".xlsx":
file.append(newdir)
elif os.path.isdir(newdir):
continue
for i in range(len(file)):
pcm_file = file[i]
path_0 = os.path.split(file[i])[0]
path_1 = os.path.split(file[i])[1]
oldname = path_0 + '/' + path_1
newname = path_0 + r'/bak/' + path_1
shutil.copyfile(oldname, newname)
# time.sleep(5) #暂停10秒
#####第二步将文件目录切换至bak文件夹中,,切片u v w
def getbaklist(path_deal):
pathdir_deal = os.listdir(path_deal)
# print(pathdir_deal)
path_bak = os.getcwd()
pathdir_bak = os.listdir(path_bak)
# print(pathdir_bak)
for u in pathdir_bak:
newdir_deal = os.path.join(path_deal, u)
file_deal.append(newdir_deal)
# print(file_deal)
# print(file_deal)
# print(file_deal)
def getbooksheet(path, filename):
# print( path + r'/' + filename )
wb = openpyxl.load_workbook(path + r'/' + filename)
allSheets = wb.get_sheet_names()
for i in range(len(allSheets)):
sheet = wb.get_sheet_by_name(allSheets[i])
for row in sheet.iter_rows():
for cell in row:
if str(str(cell.value).replace(' ', '')).find("资产负债表") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# print(path_0_deal)
fn['资产负债表'] = path_1_deal
# print(fn_fuzhaibiao)
ws['资产负债表'] = allSheets[i]
# print(type(ws['资产负债表']))
# print(ws['资产负债表'])
continue
elif str(str(cell.value).replace(' ', '')).find("利润表") != -1 or str(
str(cell.value).replace(' ', '')).find("损益表") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# path['利润表'] = path_0_deal
fn['利润表'] = path_1_deal
ws['利润表'] = allSheets[i]
continue
elif str(str(cell.value).replace(' ', '')).find("一般纳税人适用") != -1 or str(
str(cell.value).replace(' ', '')).find("小规模纳税人适用") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# path['纳税申报表'] = path_0_deal
fn['纳税申报表'] = path_1_deal
# print(path_1_deal)
ws['纳税申报表'] = allSheets[i]
continue
elif str(str(cell.value).replace(' ', '')).find("***子表开始") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# path['财务状况表'] = path_0_deal
fn['财务状况表'] = path_1_deal
# print(fn['财务状况表'])
ws['财务状况表'] = allSheets[i]
# print(type(ws['资产负债表']))
continue
wb.close()
def getdata(path, filename, worksheet, list):
wb = openpyxl.load_workbook(path + r'/' + filename)
ws = wb.get_sheet_by_name(worksheet)
zichan_result = []
for k in range(len(list)):
getdata_result = 0
row_result = 0
row_column = 0
column_result = 0
column_temp = 0
stop_row = 0
stop_column = 0
for row in ws.iter_rows():
for cell in row:
if stop_row == 1:
break
else:
for i in range(len(list[k][0])):
if str(str(cell.value).replace(' ', '')).find(list[k][0][i]) != -1:
#print(zichan_list[k][0][i])
# print(data_row[i])
row_result = cell.row
#print(row_result)
column_temp = cell.column
stop_row = 1
break
for row in ws.iter_rows():
for cell in row:
if stop_column == 1:
break
else:
for i in range(len(list[k][1])):
if str(str(cell.value).replace(' ', '')).find(list[k][1][i]) != -1 and cell.column > column_temp:
# print(data_column[i])
column_result = cell.column
#print(column_result)
stop_column = 1
break
# print(row_result)
# print(column_result)
if row_result == 0 or column_result == 0:
getdata_result = 0
else:
if ws.cell(row=row_result, column=column_result).value != None:
# print(ws_source.cell(row=row_result, column=column_result).value)
getdata_result = ws.cell(row=row_result, column=column_result).value
#print(getdata_result)
zichan_result.append(getdata_result)
print(zichan_result)
#print(getdata_result)
wb.close()
return zichan_result
if __name__ == '__main__':
file = []
file_deal = []
fn = {'资产负债表': 0, '利润表': 0, '纳税申报表': 0, '财务状况表': 0}
ws = {'资产负债表': 0, '利润表': 0, '纳税申报表': 0, '财务状况表': 0}
makedir()
movedir()
getbaklist(os.getcwd())
for i in range(len(file_deal)):
pcm_file_deal = file_deal[i]
path_0_deal = os.path.split(file_deal[i])[0]
path_1_deal = os.path.split(file_deal[i])[1]
# print(path_0_deal)
# print(path_1_deal)
getbooksheet(path_0_deal, path_1_deal)
# print(fn)
# print(ws)
# zichan_list = [[['存货'], ['年初余额', '年初数', '期初余额']],[['流动资产合计'], ['期末余额', '期末数']],[['应收账款'], ['期末余额', '期末数']],[['存货'], ['期末余额', '期末数']].[['固定资产原价'], ['期末余额', '期末数']],[['无形资产'],['期末余额','期末数']],[['累计折旧'], ['期末余额', '期末数']],[['资产总计'], ['期末余额', '期末数']],[['流动负债合计'], ['期末余额', '期末数']],[['应付账款'], ['期末余额', '期末数']],[['负债合计'], ['期末余额', '期末数']],[['所有者权益合计'], ['期末余额', '期末数']],[['实收资本'], ['期末余额', '期末数']],[['个人资本'],['期末余额','期末数']]]
zichan_list = [[['存货'], ['年初余额', '年初数', '期初余额']],[['流动资产合计'], ['期末余额', '期末数']],[['应收账款'], ['期末余额', '期末数']],[['存货'], ['期末余额', '期末数']],[['固定资产原价'], ['期末余额', '期末数']],[['无形资产'],['期末余额','期末数']],[['累计折旧'], ['期末余额', '期末数']],[['资产总计'], ['期末余额', '期末数']],[['流动负债合计'], ['期末余额', '期末数']],[['应付账款'], ['期末余额', '期末数']],[['负债合计'], ['期末余额', '期末数']],[['所有者权益合计'], ['期末余额', '期末数']],[['实收资本'], ['期末余额', '期末数']],[['个人资本'],['期末余额','期末数']]]
print(getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'],zichan_list))
end = time.perf_counter()
print("运行耗时", end - start)

# 需要进入的第三方库
# pip install pypiwin32
# openpyxl
# shutil
# pyinstaller
# 更新了单元格中数值不填写的bug
# 不填写的数据,默认取值为空
# coding:utf-8
import os
import win32com.client as win32
import shutil
import openpyxl
import time
start = time.perf_counter()
# import time
#####part one
def makedir():
mydir = 'bak' # 基本逻辑就是分两种情况,文件夹有和无,有时,为了避免里面数据污染,删了重建一个;没有则是正常建一个就好
if os.path.exists(mydir) == 0:
os.mkdir(mydir)
else:
shutil.rmtree('bak')
if os.path.exists(mydir) == 0:
os.mkdir(mydir)
def movedir():
path = os.getcwd() # 转换前程序所在文件目录
path_bak = os.getcwd() + r'/bak/'
pathdir_bak = os.listdir(path_bak)
pathdir = os.listdir(path)
# print(pathdir)
for s in pathdir:
newdir = os.path.join(path, s)
if os.path.isfile(newdir):
os.chdir(path_bak)
pathdir_bak = os.listdir(path_bak)
# print(pathdir_bak)
if os.path.splitext(newdir)[1] == ".xls":
t = os.path.split(s)[1] + r'x'
if os.path.exists(t):
# print(os.path.exists(os.path.split(s)[1]))
break
else:
excel = win32.gencache.EnsureDispatch('Excel.Application')
wb = excel.Workbooks.Open(newdir)
# print(newdir)
path_dir = os.path.split(newdir)[0]
path_file = os.path.split(newdir)[1]
# print(path_dir)
# print(path_file)
wb.SaveAs(path_dir + '/bak/' + path_file + 'x',
FileFormat=51) # FileFormat = 51 is for .xlsx extension
wb.Close() # FileFormat = 56 is for .xls extension
excel.Application.Quit()
elif os.path.splitext(newdir)[1] == ".xlsx":
file.append(newdir)
elif os.path.isdir(newdir):
continue
for i in range(len(file)):
pcm_file = file[i]
path_0 = os.path.split(file[i])[0]
path_1 = os.path.split(file[i])[1]
oldname = path_0 + '/' + path_1
newname = path_0 + r'/bak/' + path_1
shutil.copyfile(oldname, newname)
# time.sleep(5) #暂停10秒
#####第二步将文件目录切换至bak文件夹中,,切片u v w
def getbaklist(path_deal):
pathdir_deal = os.listdir(path_deal)
# print(pathdir_deal)
path_bak = os.getcwd()
pathdir_bak = os.listdir(path_bak)
# print(pathdir_bak)
for u in pathdir_bak:
newdir_deal = os.path.join(path_deal, u)
file_deal.append(newdir_deal)
# print(file_deal)
# print(file_deal)
# print(file_deal)
def getbooksheet(path, filename):
# print( path + r'/' + filename )
wb = openpyxl.load_workbook(path + r'/' + filename)
allSheets = wb.get_sheet_names()
for i in range(len(allSheets)):
sheet = wb.get_sheet_by_name(allSheets[i])
for row in sheet.iter_rows():
for cell in row:
if str(str(cell.value).replace(' ', '')).find("资产负债表") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# print(path_0_deal)
fn['资产负债表'] = path_1_deal
# print(fn_fuzhaibiao)
ws['资产负债表'] = allSheets[i]
# print(type(ws['资产负债表']))
# print(ws['资产负债表'])
continue
elif str(str(cell.value).replace(' ', '')).find("利润表") != -1 or str(
str(cell.value).replace(' ', '')).find("损益表") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# path['利润表'] = path_0_deal
fn['利润表'] = path_1_deal
ws['利润表'] = allSheets[i]
continue
elif str(str(cell.value).replace(' ', '')).find("一般纳税人适用") != -1 or str(
str(cell.value).replace(' ', '')).find("小规模纳税人适用") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# path['纳税申报表'] = path_0_deal
fn['纳税申报表'] = path_1_deal
# print(path_1_deal)
ws['纳税申报表'] = allSheets[i]
continue
elif str(str(cell.value).replace(' ', '')).find("***子表开始") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# path['财务状况表'] = path_0_deal
fn['财务状况表'] = path_1_deal
# print(fn['财务状况表'])
ws['财务状况表'] = allSheets[i]
# print(type(ws['资产负债表']))
continue
wb.close()
'''
def getbooksheet():
for j in range(len(file_deal)):
pcm_file_deal = file_deal[j]
path_0_deal = os.path.split(file_deal[j])[0] # 文件所在目录
path_1_deal = os.path.split(file_deal[j])[1] # 文件名称带后缀
# print(pcm_file_deal)
# print(path_0_deal)
# print(path_1_deal)
wb = openpyxl.load_workbook(path_1_deal)
allSheets = wb.get_sheet_names()
for i in range(len(allSheets)):
sheet = wb.get_sheet_by_name(allSheets[i])
for row in sheet.iter_rows():
for cell in row:
#print(cell.value)
if str(str(cell.value).replace(' ', '')).find("资产负债表") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
#print(path_0_deal)
fn['资产负债表'] = path_1_deal
# print(fn_fuzhaibiao)
ws['资产负债表'] = allSheets[i]
#print(type(ws['资产负债表']))
#print(ws['资产负债表'])
continue
elif str(str(cell.value).replace(' ', '')).find("利润表") != -1 or str(str(cell.value).replace(' ', '')).find("损益表") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
#path['利润表'] = path_0_deal
fn['利润表'] = path_1_deal
ws['利润表'] = allSheets[i]
continue
elif str(str(cell.value).replace(' ', '')).find("一般纳税人适用") != -1 or str(str(cell.value).replace(' ', '')).find("小规模纳税人适用") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# path['纳税申报表'] = path_0_deal
fn['纳税申报表'] = path_1_deal
#print(path_1_deal)
ws['纳税申报表'] = allSheets[i]
continue
elif str(str(cell.value).replace(' ', '')).find("***子表开始") != -1:
sheet_selected = wb.get_sheet_by_name(allSheets[i])
# print(sheet_selected)
# print(path_1_deal)
# path['财务状况表'] = path_0_deal
fn['财务状况表'] = path_1_deal
#print(fn['财务状况表'])
ws['财务状况表'] = allSheets[i]
# print(type(ws['资产负债表']))
continue
'''
def getdata(path, filename, worksheet, data_row, data_column):
wb = openpyxl.load_workbook(path + r'/' + filename)
ws = wb.get_sheet_by_name(worksheet)
getdata_result = 0
row_result = 0
row_column = 0
column_result = 0
column_temp = 0
stop_row = 0
stop_column = 0
for row in ws.iter_rows():
for cell in row:
if stop_row == 1:
break
else:
for i in range(len(data_row)):
if str(str(cell.value).replace(' ', '')).find(data_row[i]) != -1:
# print(data_row[i])
row_result = cell.row
print(row_result)
column_temp = cell.column
stop_row = 1
break
for row in ws.iter_rows():
for cell in row:
if stop_column == 1:
break
else:
for i in range(len(data_column)):
if str(str(cell.value).replace(' ', '')).find(data_column[i]) != -1 and cell.column > column_temp:
# print(data_column[i])
column_result = cell.column
print(column_result)
stop_column = 1
break
# print(row_result)
# print(column_result)
if row_result == 0 or column_result == 0:
getdata_result = 0
else:
if ws.cell(row=row_result, column=column_result).value != None:
# print(ws_source.cell(row=row_result, column=column_result).value)
getdata_result = ws.cell(row=row_result, column=column_result).value
print(getdata_result)
wb.close()
return getdata_result
def writedata(path, filename, worksheet, data_row, data_column, data_write):
# print( path + r'/' + filename )
wb = openpyxl.load_workbook(path + r'/' + filename)
# print(wb)
ws = wb.get_sheet_by_name(worksheet)
row_result = 0
row_column = 0
column_result = 0
column_temp = 0
stop_row = 0
stop_column = 0
for row in ws.iter_rows():
for cell in row:
if stop_row == 1:
break
else:
for i in range(len(data_row)):
if str(str(cell.value).replace(' ', '')).find(data_row[i]) != -1:
# print(data_row[i])
row_result = cell.row
print(row_result)
column_temp = cell.column
stop_row = 1
break
for row in ws.iter_rows():
for cell in row:
if stop_column == 1:
break
else:
for i in range(len(data_column)):
if str(str(cell.value).replace(' ', '')).find(data_column[i]) != -1 and cell.column > column_temp:
# print(data_column[i])
column_result = cell.column
print(column_result)
stop_column = 1
break
# print(data_write)
if row_result == 0 or column_result == 0:
print("未获取需要写入的行或列号")
else:
print(len(str(data_write)))
if len(str(data_write)) == 0:
data_write = 0
ws.cell(row=row_result, column=column_result).value = round(data_write / 1000, 0)
else:
ws.cell(row=row_result, column=column_result).value = round(data_write / 1000, 0)
# print(data_write)
# print(data_write)
wb.save(filename)
wb.close
def writedata_especial(path, filename, worksheet, data_row, data_row_especial, data_column, data_write):
# print( path + r'/' + filename )
wb = openpyxl.load_workbook(path + r'/' + filename)
# print(wb)
ws = wb.get_sheet_by_name(worksheet)
row_result = 0
row_column = 0
column_result = 0
column_temp = 0
stop_row = 0
stop_column = 0
for row in ws.iter_rows():
for cell in row:
if stop_row == 1:
break
else:
for i in range(len(data_row)):
if str(str(cell.value).replace(' ', '')).find(data_row[i]) != -1 and str(
str(cell.value).replace(' ', '')).find(data_row_especial[0]) == -1:
# print(data_row[i])
row_result = cell.row
print(row_result)
column_temp = cell.column
stop_row = 1
break
for row in ws.iter_rows():
for cell in row:
if stop_column == 1:
break
else:
for i in range(len(data_column)):
if str(str(cell.value).replace(' ', '')).find(data_column[i]) != -1 and cell.column > column_temp:
# print(data_column[i])
column_result = cell.column
print(column_result)
stop_column = 1
break
# print(data_write)
if row_result == 0 or column_result == 0:
print("未获取需要写入的行或列号")
else:
print(len(str(data_write)))
if len(str(data_write)) == 0:
data_write = 0
ws.cell(row=row_result, column=column_result).value = round(data_write / 1000, 0)
else:
ws.cell(row=row_result, column=column_result).value = round(data_write / 1000, 0)
# print(data_write)
wb.save(filename)
wb.close
if __name__ == '__main__':
file = []
file_deal = []
fn = {'资产负债表': 0, '利润表': 0, '纳税申报表': 0, '财务状况表': 0}
ws = {'资产负债表': 0, '利润表': 0, '纳税申报表': 0, '财务状况表': 0}
makedir()
movedir()
getbaklist(os.getcwd())
for i in range(len(file_deal)):
pcm_file_deal = file_deal[i]
path_0_deal = os.path.split(file_deal[i])[0]
path_1_deal = os.path.split(file_deal[i])[1]
# print(path_0_deal)
# print(path_1_deal)
getbooksheet(path_0_deal, path_1_deal)
# print(fn)
# print(ws)
###写入操作
# 季报/年报---年初存货
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['存货'], ['年初余额', '年初数', '期初余额'])
# 季报/年报---流动资产合计
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['流动资产合计'], ['期末余额', '期末数'])
# 季报/年报---应收账款
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['应收账款'], ['期末余额', '期末数'])
# 季报/年报---存货 期末数
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['存货'], ['期末余额', '期末数'])
# 季报/年报---固定资产原价
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['固定资产原价'], ['期末余额', '期末数'])
# 年报---房屋和构筑物
# 年报---机器设备
# writedata(os.getcwd() + r'/', fn['财务状况表'], ws['财务状况表'], ['无形资产'],['1-本季','本年'],getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'],['无形资产'],['期末余额','期末数']))
# 季报/年报---累计折旧
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['累计折旧'], ['期末余额', '期末数'])
# 季报/年报---本年折旧
# 年报---固定资产净额
# 年报---在建工程
# 年报---无形资产
# 年报---土地使用权
# 季报---资产总计
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['资产总计'], ['期末余额', '期末数'])
# 季报/年报---流动负债合计
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['流动负债合计'], ['期末余额', '期末数'])
# 年报---应付账款
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['应付账款'], ['期末余额', '期末数'])
# 季报/年报---负债合计
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['负债合计'], ['期末余额', '期末数'])
# 年报---所有者权益合计
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['所有者权益合计'], ['期末余额', '期末数'])
# 年报---实收资本
getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'], ['实收资本'], ['期末余额', '期末数'])
# 年报---个人资本
# writedata(os.getcwd() + r'/', fn['财务状况表'], ws['财务状况表'], ['个人资本'],['1-本季','本年'],getdata(os.getcwd() + r'/', fn['资产负债表'], ws['资产负债表'],['个人资本'],['期末余额','期末数']))
end = time.perf_counter()
print("运行耗时", end - start)

本文介绍了一个自动化处理财务报表的Python程序,该程序能够批量转换.xls文件为.xlsx格式,并从指定的财务报表中提取关键数据,如资产负债表和利润表等。通过对单元格进行精确匹配,实现了对特定数据项的抓取。
6622

被折叠的 条评论
为什么被折叠?



