2021-01-29

本文档展示了如何使用Python脚本自动化处理Excel文件,从备份文件夹中筛选关键财务报表,并进行数据抓取和格式调整,包括资产负债表、利润表和纳税申报表。通过openpyxl库操作Excel,实现财务数据的高效管理。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

#coding:utf-8
# import os
import win32com.client as win32
import shutil
import openpyxl
import os
import win32com.client as win32
import shutil
import openpyxl

import time

import time

#####part one


fn = {'资产负债表':0,'利润表':0,'纳税申报表':0,'财务状况表':0 }
ws = {'资产负债表':0,'利润表':0,'纳税申报表':0,'财务状况表':0 }



mydir = 'bak'             #基本逻辑就是分两种情况,文件夹有和无,有时,为了避免里面数据污染,删了重建一个;没有则是正常建一个就好

if os.path.exists(mydir) == 0 :
    os.mkdir(mydir)
else:
    shutil.rmtree('bak')
    os.mkdir(mydir)



file = []



path = os.getcwd()  # 转换前程序所在文件目录

path_bak = os.getcwd() + r'/bak/'

pathdir = os.listdir(path)
#print(pathdir)
for s in pathdir:
    newdir = os.path.join(path, s)
    if os.path.isfile(newdir):
        os.chdir(path_bak)
        pathdir_bak = os.listdir(path_bak)
       # print(pathdir_bak)
        if os.path.splitext(newdir)[1] == ".xls":
            t = os.path.split(s)[1] + r'x'
            if os.path.exists(t):
            #    print(os.path.exists(os.path.split(s)[1]))
                break
            else:
                excel = win32.gencache.EnsureDispatch('Excel.Application')
                wb = excel.Workbooks.Open(newdir)
             #   print(newdir)
                path_dir = os.path.split(newdir)[0]
                path_file = os.path.split(newdir)[1]
              #  print(path_dir)
             #   print(path_file)
                wb.SaveAs(path_dir + '/bak/' + path_file + 'x', FileFormat=51)  # FileFormat = 51 is for .xlsx extension
                wb.Close()  # FileFormat = 56 is for .xls extension
                excel.Application.Quit()
        elif os.path.splitext(newdir)[1] == ".xlsx":
            file.append(newdir)


    elif os.path.isdir(newdir):
        continue

for i in range(len(file)):
    pcm_file = file[i]
    path_0 = os.path.split(file[i])[0]
    path_1 = os.path.split(file[i])[1]
    oldname = path_0 + '/' + path_1
    newname = path_0 + r'/bak/' + path_1
    shutil.copyfile(oldname, newname)


#time.sleep(5)  #暂停10秒

#####第二步将文件目录切换至bak文件夹中,,切片u v w




file_deal = []

path_deal = os.getcwd()  # 转换前程序所在文件目录
print(path_deal)





pathdir_deal = os.listdir(path_deal)
print(pathdir_deal)


pathdeal_bak = os.listdir(path_bak)
print("########")
print(pathdeal_bak)
print("########")
for u in pathdeal_bak:
    newdir_deal = os.path.join(path_deal, u)
    file_deal.append(newdir_deal)
    print(file_deal)
   # print(file_deal)




for j in range(len(file_deal)):
    pcm_file_deal = file_deal[j]
    path_0_deal = os.path.split(file_deal[j])[0]   #文件所在目录

    path_1_deal = os.path.split(file_deal[j])[1]   #文件名称带后缀

    wb = openpyxl.load_workbook(path_1_deal)
    print(path_1_deal)
    print(wb)

    allSheets = wb.get_sheet_names()

    for i in range(len(allSheets)):
        sheet = wb.get_sheet_by_name(allSheets[i])
        #(sheet)
        for row in sheet.iter_rows():

            for cell in row:
                #print(cell.value)
                if str(str(cell.value).replace(' ', '')).find("资产负债表") != -1:
                    sheet_selected = wb.get_sheet_by_name(allSheets[i])

                    #print(sheet_selected)
                    #print(path_1_deal)
                    fn['资产负债表'] = path_1_deal
                    #print(fn_fuzhaibiao)
                    ws['资产负债表'] = wb.get_sheet_by_name(sheet_selected.title)
                    #print(type(ws['资产负债表']))
                    continue
                elif str(str(cell.value).replace(' ', '')).find("利润表") != -1 or str(str(cell.value).replace(' ', '')).find("损益表") != -1:
                    sheet_selected = wb.get_sheet_by_name(allSheets[i])

                    #print(sheet_selected)
                    #print(path_1_deal)
                    fn['利润表'] = path_1_deal
                    ws['利润表'] = wb.get_sheet_by_name(sheet_selected.title)
                    continue
                elif str(str(cell.value).replace(' ', '')).find("一般纳税人适用") != -1 or str(str(cell.value).replace(' ', '')).find("小规模纳税人适用") != -1:
                    sheet_selected = wb.get_sheet_by_name(allSheets[i])

                    #print(sheet_selected)
                    #print(path_1_deal)
                    fn['纳税申报表'] = path_1_deal
                    #print(path_1_deal)
                    ws['纳税申报表'] = wb.get_sheet_by_name(sheet_selected.title)
                    continue

                elif str(str(cell.value).replace(' ', '')).find("子表开始") != -1:
                    sheet_selected = wb.get_sheet_by_name(allSheets[i])

                    #print(sheet_selected)
                    #print(path_1_deal)
                    fn['财务状况表'] = path_1_deal
                    #print(fn['财务状况表'])
                    ws['财务状况表'] = wb.get_sheet_by_name(sheet_selected.title)

                    #print(ws['财务状况表'])
                    continue



#print(fn)
#print(ws)
################part two

def getdate(fn_source,ws_source,date_row, date_column):
    wb = openpyxl.load_workbook(fn_source)
    ws = wb.get_sheet_by_name(ws_source)
    row_result = 0
    column_result = 0
    result = 0

    for row in ws.iter_rows():

        for cell in row:
            for i in range(len(date_row)):
                #print(cell.value)
                #print(date_row[i])
                #print(str(str(cell.value).replace(' ', '')).find(date_row[i]))
                #print(cell.value)
                #print(len(str(cell.value).replace(' ', '')))
                #print(len(str(cell.value).replace(' ', '')))
                #print(len(date_row[i]))

                if str(str(cell.value).replace(' ', '')).find(date_row[i]) != -1    :
                    #print(len(str(cell.value).replace(' ', '')))
                    #print(len(date_row[i]))
                    #print(str(str(cell.value).replace(' ', '')).find(date_row[i]))

                    row_result = cell.row
                    column_result = cell.column + 3

                    #print(cell.value)
                    #print(cell.row)
                    #print(row_result)
                break



    if row_result == 0 or column_result == 0:
        result = 0
    else:
        if ws.cell(row = row_result , column = column_result).value != None:
            #print(ws_source.cell(row=row_result, column=column_result).value)
            result = ws.cell(row=row_result, column=column_result).value
    wb.Close()  # FileFormat = 56 is for .xls extension
    return result



def writedate(fn_source,ws_source,date_row, date_column,data_write):
    wb = openpyxl.load_workbook(fn_source)
    ws = wb.get_sheet_by_name(ws_source)
    row_result = 0
    column_result = 0
    result = 0

    for row in ws.iter_rows():

        for cell in row:
            for i in range(len(date_row)):
                if str(str(cell.value).replace(' ', '')).find(date_row[i]) != -1    :


                    row_result = cell.row
                    print(row_result)
                    column_result = cell.column + 3
                    print(column_result)


                break


    ws.cell(row=row_result, column=column_result).value = data_write


    print(wb)
    wb.save(fn['财务状况表'])
    wb.Close()  # FileFormat = 56 is for .xls extension








##############################part three
if __name__ == '__main__':

    #print(getdate(fn['资产负债表'],ws['资产负债表'], ['存货'], ['年初余额', '年初数']))



    ###写入操作
    #年初存货
    writedate(fn['财务状况表'],ws['财务状况表'],['年初存货'],['1-本季'],1169777.31)
    '''
    #流动资产合计
    writedate(ws['财务状况表'],['流动资产合计'],['1-本季'],getdate(ws['资产负债表'],['流动资产合计'],['期末余额','期末数']))
    #应收账款
    writedate(ws['财务状况表'],['应收账款'],['1-本季'],getdate(ws['资产负债表'],['应收账款'],['期末余额','期末数']))
    #存货  ---期末数
    writedate(ws['财务状况表'],['存货'],['1-本季'],getdate(ws['资产负债表'],['存货'],['期末余额','期末数']))
    #固定资产原价
    writedate(ws['财务状况表'],['固定资产原价'],['1-本季'],getdate(ws['资产负债表'],['固定资产原价'],['期末余额','期末数']))
    #房屋和构筑物

    #机器设备

    #无形资产
   # writedate(ws['财务状况表'],['无形资产'],['本年'],getdate(ws['资产负债表'],['无形资产'],['期末余额','期末数']))
    #累计折旧
    writedate(ws['财务状况表'],['累计折旧'],['1-本季'],getdate(ws['资产负债表'],['累计折旧'],['期末余额','期末数']))
    #本年折旧

    #资产总计
    writedate(ws['财务状况表'],['资产总计'],['1-本季'],getdate(ws['资产负债表'],['资产总计'],['期末余额','期末数']))
    #流动负债合计
    writedate(ws['财务状况表'],['流动负债合计'],['本年'],getdate(ws['资产负债表'],['流动负债合计'],['期末余额','期末数']))
    #应付账款
    writedate(ws['财务状况表'],['应付账款'],['本年'],getdate(ws['资产负债表'],['应付账款'],['期末余额','期末数']))

    #负债合计
    writedate(ws['财务状况表'],['负债合计'],['1-本季'],getdate(ws['资产负债表'],['负债合计'],['期末余额','期末数']))
    #所有者权益合计
    writedate(ws['财务状况表'],['所有者权益合计'],['本年'],getdate(ws['资产负债表'],['所有者权益合计'],['期末余额','期末数']))
    #实收资本
    writedate(ws['财务状况表'],['实收资本'],['本年'],getdate(ws['资产负债表'],['实收资本'],['期末余额','期末数']))
    #个人资本
    #writedate(ws['财务状况表'],['个人资本'],['本年'],getdate(ws['资产负债表'],['个人资本'],['期末余额','期末数']))





    #营业收入
    writedate(ws['财务状况表'],['营业收入'],['1-本季'],getdate(ws['利润表'],['营业收入'],['本年累计数','本年累计金额']))
    #营业成本
    writedate(ws['财务状况表'],['营业成本'],['1-本季'],getdate(ws['利润表'],['营业成本'],['本年累计数','本年累计金额']))
    #税金及附加
    writedate(ws['财务状况表'],['税金及附加'],['1-本季'],getdate(ws['利润表'],['税金及附加'],['本年累计数','本年累计金额']))
    #销售费用
    writedate(ws['财务状况表'],['销售费用'],['1-本季'],getdate(ws['利润表'],['销售费用'],['本年累计数','本年累计金额']))
    #管理费用
    writedate(ws['财务状况表'],['管理费用'],['1-本季'],getdate(ws['利润表'],['管理费用'],['本年累计数','本年累计金额']))
    #研发费用
  #  writedate(ws['财务状况表'],['研发费用'],['1-本季'],getdate(ws['利润表'],['研究费用','研发费用'],['本年累计数','本年累计金额']))
    #财务费用
    writedate(ws['财务状况表'],['财务费用'],['1-本季'],getdate(ws['利润表'],['财务费用'],['本年累计数','本年累计金额']))
    #利息收入
    writedate(ws['财务状况表'],['利息收入'],['1-本季'],getdate(ws['利润表'],['利息费用'],['本年累计数','本年累计金额']))
    #利息费用
   # writedate(ws['财务状况表'],['利息费用'],['1-本季'],getdate(ws['利润表'],['利息费用'],['本年累计数','本年累计金额']))
    #营业利润
    writedate(ws['财务状况表'],['营业利润'],['1-本季'],getdate(ws['利润表'],['营业利润'],['本年累计数','本年累计金额']))
    #投资收益
    writedate(ws['财务状况表'],['投资收益'],['本年'],getdate(ws['财务状况表'],['投资收益'],['期末余额','期末数']))


    #营业外收入
    writedate(ws['财务状况表'],['营业外收入'],['1-本季'],getdate(ws['利润表'],['营业外收入'],['本年累计数','本年累计金额']))
    #营业外支出
    writedate(ws['财务状况表'],['营业外支出'],['1-本季'],getdate(ws['利润表'],['营业外支出'],['本年累计数','本年累计金额']))
    #利润总额
    writedate(ws['财务状况表'],['利润总额'],['1-本季'],getdate(ws['利润表'],['利润总额'],['本年累计数','本年累计金额']))
    #所得税费用
    writedate(ws['财务状况表'],['所得税费用'],['1-本季'],getdate(ws['利润表'],['所得税费用'],['本年累计数','本年累计金额']))
    #应交增值税= 销项税额 -(进项税额 - 进项税额转出 - 免、抵、退应退税额)+ 简易计税办法计算的应纳税额 + 按简易计税办法计算的纳税检查应补缴税额 - 应征税额减征额
    writedate(ws['财务状况表'], ['应交增值税'], ['1-本季'], getdate(ws['纳税申报表'], ['销项税额'], ['本年累计', '累计金额']) - ( getdate(ws['纳税申报表'], ['进项税额'], ['本年累计', '累计金额']) - getdate(ws['纳税申报表'], ['进项税额转出'],['本年累计', '累计金额']) - getdate(ws['纳税申报表'],['简易计税办法计算的应纳税额'], ['本年累计', '累计金额'])) + getdate(ws['纳税申报表'], ['按简易计税办法计算的纳税检查应补缴税额'],['本年累计', '累计金额']) - getdate(ws['纳税申报表'],['应征税额减征额'],['本年累计', '累计金额']))

    #从事批发和零售业活动的从业人员平均人数



    # 资产负债表
    print(getdate(ws['资产负债表'], ['存货'], ['年初余额', '年初数']));
    print(getdate(ws['资产负债表'],['流动资产合计'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['应收账款'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['固定资产原价'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['累计折旧'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['资产总计'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['负债合计'],['期末余额','期末数']));

    #利润表
    print(getdate(ws['利润表'],['营业收入'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['营业成本'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['税金及附加'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['销售费用'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['管理费用'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['研究费用','研发费用'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['财务费用'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['利息费用'],['本年累计数','本年累计金额']));   ####这个地方有问题,需要更改下,正负不同区别为利息收入、利息费用
    #############利息收入
    print(getdate(ws['利润表'],['营业利润'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['营业外收入'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['营业外支出'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['利润总额'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['所得税费用'],['本年累计数','本年累计金额']));

    #纳税申报表
    print(getdate(ws['纳税申报表'],['销项税额'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['进项税额'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['进项税额转出'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['免、抵、退应退税额'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['简易计税办法计算的应纳税额'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['按简易计税办法计算的纳税检查应补缴税款'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['应纳税额减征额'],['本年累计','累计金额']));
    '''
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值