一键式统计5.0待完善

Excel自动化处理与数据分析
最新推荐文章于 2025-04-17 21:01:09 发布
原创最新推荐文章于 2025-04-17 21:01:09 发布 · 92 阅读
0 ·
CC 4.0 BY-SA版权
本文介绍了一种使用Python进行Excel文件自动化处理的方法，包括文件格式转换、数据抓取及分析等关键步骤。通过win32com和openpyxl库实现了.xls到.xlsx的批量转换，并从多个工作表中定位特定报表，如资产负债表、利润表等，进一步提取关键财务数据进行分析。
import os
import win32com.client as win32
import shutil
import openpyxl
import os
import win32com.client as win32
import shutil
import openpyxl

import time

import time

#####part one

ws = {'资产负债表':0,'利润表':0,'纳税申报表':0,'纳税申报表':0 }
print(ws)
print(ws['资产负债表'])
print(ws['利润表'])
print(ws['纳税申报表'])
print(ws['纳税申报表'])


mydir = 'bak'             #基本逻辑就是分两种情况，文件夹有和无，有时，为了避免里面数据污染，删了重建一个；没有则是正常建一个就好

if os.path.exists(mydir) == 0 :
    os.mkdir(mydir)
else:
    shutil.rmtree('bak')
    os.mkdir(mydir)



file = []



path = os.getcwd()  # 转换前程序所在文件目录

path_bak = os.getcwd() + r'/bak/'

pathdir = os.listdir(path)
print(pathdir)
for s in pathdir:
    newdir = os.path.join(path, s)
    if os.path.isfile(newdir):
        os.chdir(path_bak)
        pathdir_bak = os.listdir(path_bak)
       # print(pathdir_bak)
        if os.path.splitext(newdir)[1] == ".xls":
            t = os.path.split(s)[1] + r'x'
            if os.path.exists(t):
            #    print(os.path.exists(os.path.split(s)[1]))
                break
            else:
                excel = win32.gencache.EnsureDispatch('Excel.Application')
                wb = excel.Workbooks.Open(newdir)
             #   print(newdir)
                path_dir = os.path.split(newdir)[0]
                path_file = os.path.split(newdir)[1]
              #  print(path_dir)
             #   print(path_file)
                wb.SaveAs(path_dir + '/bak/' + path_file + 'x', FileFormat=51)  # FileFormat = 51 is for .xlsx extension
                wb.Close()  # FileFormat = 56 is for .xls extension
                excel.Application.Quit()
        elif os.path.splitext(newdir)[1] == ".xlsx":
            file.append(newdir)


    elif os.path.isdir(newdir):
        continue

for i in range(len(file)):
    pcm_file = file[i]
    path_0 = os.path.split(file[i])[0]
    path_1 = os.path.split(file[i])[1]
    oldname = path_0 + '/' + path_1
    newname = path_0 + r'/bak/' + path_1
    shutil.copyfile(oldname, newname)


#time.sleep(5)  #暂停10秒

#####第二步将文件目录切换至bak文件夹中，，切片u v w



file_deal = []

path_deal = os.getcwd()  # 转换前程序所在文件目录





pathdir_deal = os.listdir(path_deal)
for u in pathdir_bak:
    newdir_deal = os.path.join(path_deal, u)
    file_deal.append(newdir_deal)
   # print(file_deal)



for j in range(len(file_deal)):
    pcm_file_deal = file_deal[j]
    path_0_deal = os.path.split(file_deal[j])[0]   #文件所在目录
    path_1_deal = os.path.split(file_deal[j])[1]   #文件名称带后缀
  #  print(pcm_file_deal)
  #  print(path_0_deal)
    print(path_1_deal)


    wb = openpyxl.load_workbook(path_1_deal)

    allSheets = wb.get_sheet_names()

    for i in range(len(allSheets)):
        sheet = wb.get_sheet_by_name(allSheets[i])
        for row in sheet.iter_rows():
            for cell in row:
                if str(str(cell.value).replace(' ', '')).find("资产负债表") != -1:
                    sheet_selected = wb.get_sheet_by_name(allSheets[i])

                    print(sheet_selected)
                    print(path_1_deal)
                    #fn_fuzhaibiao = path_1_deal
                    #print(fn_fuzhaibiao)
                    ws['资产负债表'] = wb.get_sheet_by_name(sheet_selected.title)
                    continue
                elif str(str(cell.value).replace(' ', '')).find("利润表") != -1 or str(str(cell.value).replace(' ', '')).find("损益表") != -1:
                    sheet_selected = wb.get_sheet_by_name(allSheets[i])

                    print(sheet_selected)
                    print(path_1_deal)
                    #fn_lirunbiao = path_1_deal
                    ws['利润表'] = wb.get_sheet_by_name(sheet_selected.title)
                    continue
                elif str(str(cell.value).replace(' ', '')).find("一般纳税人适用") != -1 or str(str(cell.value).replace(' ', '')).find("小规模纳税人适用") != -1:
                    sheet_selected = wb.get_sheet_by_name(allSheets[i])

                    print(sheet_selected)
                    print(path_1_deal)
                    #fn_nashuishenbaobiao = path_1_deal
                    ws['纳税申报表'] = wb.get_sheet_by_name(sheet_selected.title)
                    continue
                elif str(str(cell.value).replace(' ', '')).find("财务状况表") != -1 or str(str(cell.value).replace(' ', '')).find("***子表开始") != -1:
                    sheet_selected = wb.get_sheet_by_name(allSheets[i])

                    print(sheet_selected)
                    print(path_1_deal)
                    #fn_nashuishenbaobiao = path_1_deal
                    ws['财务状况表'] = wb.get_sheet_by_name(sheet_selected.title)
                    continue

################part two

def getdate(ws_source,date_row, date_column):
    row_result = 0
    column_result = 0
    result = 0

    for row in ws_source.iter_rows():

        for cell in row:
            for i in range(len(date_row)):
                if str(str(cell.value).replace(' ', '')).find(date_row[i]) != -1 and len(str(cell.value).replace(' ', '')) == len(date_row[i])   :
                    row_result = cell.row
                    #print(row_result)
                    break

    for row in ws_source.iter_rows():
        for cell in row:
            for i in range(len(date_column)):
                if str(str(cell.value).replace(' ', '')).find(date_column[i]) != -1 and  len(str(cell.value).replace(' ', '')) == len(date_column[i]) :
                    column_result = cell.column
                    #print(column_result)
                    break

    if row_result == 0 or column_result == 0:
        result = 0
    else:
        if ws_source.cell(row = row_result , column = column_result).value != None:
            print(ws_source.cell(row=row_result, column=column_result).value)
            result = ws_source.cell(row=row_result, column=column_result).value
    return result



def writedate(ws_source,date_row, date_column,data):
    row_result = 0
    column_result = 0
    result = 0

    for row in ws_source.iter_rows():

        for cell in row:
            for i in range(len(date_row)):
                if str(str(cell.value).replace(' ', '')).find(date_row[i]) != -1 and len(str(cell.value).replace(' ', '')) == len(date_row[i])   :
                    row_result = cell.row
                    #print(row_result)
                    break

    for row in ws_source.iter_rows():
        for cell in row:
            for i in range(len(date_column)):
                if str(str(cell.value).replace(' ', '')).find(date_column[i]) != -1 and  len(str(cell.value).replace(' ', '')) == len(date_column[i]) :
                    column_result = cell.column
                    #print(column_result)
                    break

    ws_source.cell(row=row_result, column=column_result).value = data
    print("********")
    print(ws_source.cell(row=row_result, column=column_result).value)
    print("********")


##############################part three
if __name__ == '__main__':

    #资产负债表
    print(getdate(ws['资产负债表'],['存货'],['年初余额','年初数']));
    '''
    print(getdate(ws['资产负债表'],['流动资产合计'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['应收账款'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['存货'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['固定资产原价'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['累计折旧'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['资产总计'],['期末余额','期末数']));
    print(getdate(ws['资产负债表'],['负债合计'],['期末余额','期末数']));

    #利润表
    print(getdate(ws['利润表'],['营业收入'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['营业成本'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['税金及附加'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['销售费用'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['管理费用'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['研究费用','研发费用'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['财务费用'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['利息费用'],['本年累计数','本年累计金额']));   ####这个地方有问题，需要更改下，正负不同区别为利息收入、利息费用
    #############利息收入
    print(getdate(ws['利润表'],['营业利润'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['营业外收入'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['营业外支出'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['利润总额'],['本年累计数','本年累计金额']));
    print(getdate(ws['利润表'],['所得税费用'],['本年累计数','本年累计金额']));

    #纳税申报表
    print(getdate(ws['纳税申报表'],['销项税额'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['进项税额'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['进项税额转出'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['免、抵、退应退税额'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['简易计税办法计算的应纳税额'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['按简易计税办法计算的纳税检查应补缴税款'],['本年累计','累计金额']));
    print(getdate(ws['纳税申报表'],['应纳税额减征额'],['本年累计','累计金额']));

'''
    ###
    writedate(ws['财务状况表'], ['一、年初存货'], ['1-本季'], getdate(ws['资产负债表'],['存货'],['年初余额','年初数']))