import csv
import os
defsplit_csv_by_column(file_path, column_index=3, output_dir='chaifen'):# 检查输出目录是否存在,如果不存在则创建它ifnot os.path.exists(output_dir):
os.makedirs(output_dir)# 使用csv.reader读取CSV文件withopen(file_path, mode='r', newline='', encoding='utf-8')asfile:
reader = csv.reader(file)# 跳过表头行(如果需要的话)# next(reader)# 将所有行读入一个列表中
all_rows =[row for row in reader]# 根据指定列的值创建一个字典,键是该列的值,值是包含这些值的所有行的列表
split_data ={}for row in all_rows:
value = row[column_index]if value notin split_data:
split_data[value]=[]
split_data[value].append(row)# 对于每个不同的值,创建一个新的CSV文件,并保存到指定的目录下for value, rows in split_data.items():
output_file_path = os.path.join(output_dir,f'sorted_{value}_{file_path}')withopen(output_file_path, mode='w', newline='', encoding='utf-8')asfile:
writer = csv.writer(file)# 写入排序后的数据
writer.writerows(rows)print(f"Data with D column value '{value}' has been written to {output_file_path}")# 调用函数并指定文件路径、需要拆分的列索引以及输出目录
split_csv_by_column('export_jyt_20241016.csv',3,'chaifen')
3.统计chaifen目录下面所有.csv条数对比,看数量是否一致
import csv
import os
defcount_rows_in_directory(directory_path):
total_rows =0# 遍历目录中的所有文件for filename in os.listdir(directory_path):# 检查是否为 CSV 文件if filename.endswith('.csv'):
file_path = os.path.join(directory_path, filename)# 计算当前 CSV 文件的行数withopen(file_path, mode='r', newline='', encoding='utf-8')asfile:
reader = csv.reader(file)
row_count =sum(1for _ in reader)
total_rows += row_count
print(f"{filename} contains {row_count} rows.")return total_rows
# 统计zz目录下面所有csv条数# 调用函数并指定目录路径
directory_path ='chaifen'
total_rows = count_rows_in_directory(directory_path)print(f"The directory {directory_path} contains a total of {total_rows} rows across all CSV files.")