import pandas as pd import os work_dir = 'F:\data' split_dir = f'{work_dir}/splits' if not os.path.exists(split_dir): os.mkdir(split_dir) # 拆分单元格 # 读取原excel数据到pandas df_source = pd.read_csv(f'{work_dir}/Narrativedata.csv') print(split_dir) # print(df_source.head()) # print(df_source.shape) total_row_count = df_source.shape[0] # 将一个大excel等份拆分成多个excel # 1. 使用df.iloc方法,将一个大的dataframe拆分成多个小的dataframe # 2. 使用dataframe.to_excel保存每个小的excel # 计算拆分后每个excel的行数 # 先计算大小(每个人对应数据的多少) user_names = ['小赵', '小钱', '小孙', '小李', '小周', '小吴'] split_size = total_row_count // len(user_names) if total_row_count % len(user_names) != 0: split_size += 1 # 划分 df_subs = [] for idx, user_names in enumerate(user_names): begin = idx * split_size end = begin + split_size df_sub = df_source.iloc[begin:end] # 划分数据 df_subs.append((idx, user_names, df_sub)) # print(df_subs) # 将每个dataframe存入excel # for idx, user_name, df_sub in df_subs: # file_name = f'{split_dir}/excel_{idx}_{user_name}.xlsx' # df_sub.to_excel(file_name, index=False) # # print('save ok') # 第二部分,合并excel ''' 1. 遍历文件夹,得到要合并的excel文件列表 2. 分别读取dataframe,给每个df新增一列标记来源 3. 使用pd.concat进行批量合并 4. 将合并的dataframe输出到excel ''' # 1. 遍历文件夹,得到每个excel excel_names = [] # print(split_dir) for excel_name in os.listdir(split_dir): excel_names.append(excel_name) # 2.将excel数据分别读取 df_list = [] for excel_name in excel_names: excel_path = f'{split_dir}/{excel_name}' df_split = pd.read_excel(excel_path) # print(df_split) # 得到username # print(excel_name) username0 = excel_name.replace('excel_', ' ')[3:] username = username0.replace('.xlsx', '') print(username) # 给每个df新增一列标记,即用户名字 df_split['username'] = username df_list.append(df_split) print(df_list) # 使用concat进行合并 df_merged = pd.concat(df_list) # print(df_merged['username'].value_counts()) # 将合并后的dataframe输出到excel df_merged.to_excel(f'{work_dir}/merged.xlsx', index=False) print('ok')
pandas对excel进行拆分与合并
最新推荐文章于 2024-03-13 20:07:49 发布