import os
from openpyxl import load_workbook
import pandas as pd
def get_excel_file(directory):
excel_list = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.xlsx') or file.endswith('.xls'):
excel_list.append(file)
return excel_list
def merge_sheet(origin_dir,excel_path,key_merge, aim_path):
df_all = pd.DataFrame()
for xls in excel_list:
print(f'{xls}已完成')
xls = origin_dir + '\\' +xls
df = pd.read_excel(xls, header=0)
xls_name = xls.split('\\')[-1]
factory_name = xls_name.split('_')[0]
factory_status = xls_name.split('_')[1]
# 需要提前指定index
df = df.set_index(key_merge)
columns = df.columns.to_list()
new_columns = []
# 多级index,为了输出到文件中有合并单元格
for item in columns:
new_columns.append((factory_name, factory_status, item))
new_columns = pd.MultiIndex.from_tuples(new_columns)
df.columns = new_columns
result = df_all
if df_all.shape[0] != 0 and df.shape[0] != 0:
# 合并方式有"left", "right", "inner", "outer"等,这里采用outer
# 根据index做合并
result = pd.merge(df_all, df, left_index=True,right_index=True, how='outer')
elif df.shape[0] == 0:
print(f'{excel_path}文件的sheet页:无数据')
elif df_all.shape[0] == 0:
result = df
df_all = result
# 转置
df_all.T.to_excel(aim_path, engine='openpyxl')
print(f'{excel_path}已完成')
if __name__ == '__main__':
origin_dir = r'E:\PycharmProjects\test\inner'
aim_dir = r'E:\PycharmProjects\test\out\test.xlsx'
key_merge = '日期'
excel_list = get_excel_file(origin_dir)
# origin_dir:源文件目录;excel_list:遍历源文件目录获取的excel文件列表
# key_merge:转index,为后续合并的依据;aim_dir:输出文件
merge_sheet(origin_dir, excel_list, key_merge, aim_dir)