1.基础python
vi 10csv_reader_sum_average_from_multiple_files.py
#encoding=utf8
#!/usr/bin/env python3
import csv
import glob
import os
import sys
input_path=sys.argv[1]
output_file=sys.argv[2]
output_header_list=['file_name','total_sales','average_sales']
csv_out_file=open(output_file,'ab')
filewriter=csv.writer(csv_out_file)
filewriter.writerow(output_header_list)
for input_file in glob.glob(os.path.join(input_path,'sales_*')):
with open(input_file,'rb') as csv_in_file:
filereader=csv.reader(csv_in_file) #结果是list;
output_list=[]
output_list.append(os.path.basename(input_file))
header=next(filereader) #首行
total_sales=0.0
number_of_sales=0.0
for row in filereader:
sale_amount=row[3]
total_sales+=float(str(sale_amount).strip('$').replace(',','')) #第四个值的总和,金额总和,去掉逗号和$
number_of_sales+=1
average_sales='{0:.2f}'.format(total_sales/number_of_sales) #total_sales转为2位浮点数
output_list.append(total_sales)
output_list.append(average_sales) #一行一行追加
filewriter.writerow(output_list)
csv_out_file.close()
#结果
[root@mysql51 python_scripts]# python 10csv_reader_sum_average_from_multiple_files.py `pwd` 14output.csv
[root@mysql51 python_scripts]# more 14output.csv
file_name,total_sales,average_sales
sales_march_2014.csv,10139.0,1689.83
sales_jannary_2014.csv,8992.0,1498.67
sales_february_2014.csv,9375.0,1562.50
2.pandas方式实现
vi pandas_sum_average_from_multiple_file.py
#encoding=utf8
#!/usr/bin/env python3
import pandas as pd
import glob
import os
import sys
input_path=sys.argv[1]
output_file=sys.argv[2]
all_files=glob.glob(os.path.join(input_path,'sales_*'))
all_data_frames=[]
for input_file in all_files:
data_frame=pd.read_csv(input_file,index_col=None)
total_sales=pd.DataFrame([float(str(value).strip('$').replace(',','')) for value in data_frame.loc[:,'Sale Amount']]).sum()
average_sales=pd.DataFrame([float(str(value).strip('$').replace(',','')) for value in data_frame.loc[:,'Sale Amount']]).mean()
data={'file_name':os.path.basename(input_file),'total_sales':total_sales,'average_sales':average_sales}
all_data_frames.append(pd.DataFrame(data,columns=['file_name','total_sales','average_sales']))
data_frames_concat=pd.concat(all_data_frames,axis=0,ignore_index=True)
data_frames_concat.to_csv(output_file,index=False)
#结果
python C:\Users\4201.HJSC\PycharmProjects\pythonProject\pandas_sum_average_from_multiple_file.py \
C:\Users\4201.HJSC\Desktop\Python_exercise\ \
C:\Users\4201.HJSC\Desktop\Python_exercise\13output.csv
more 13output.csv
file_name,total_sales,average_sales
sales_february_2014.csv,9375.0,1562.5
sales_jannary_2014.csv,8992.0,1498.6666666666667
sales_march_2014.csv,10139.0,1689.8333333333333
3.总结
#encoding=utf8/utf-8/gbk 均可
#pd.DataFrame 方法传输一个数据字典和columns关键词。
#pd.concat(all_data_frames,axis=0,ignore_index=True)将数据垂直连接。
文章展示了两种使用Python处理CSV文件的方法,计算销售数据的总和与平均值。第一种是基础的csv模块,读取多个文件并计算每份文件的销售统计数据。第二种使用Pandas库,更高效地整合数据并进行计算。两种方法均能输出包含文件名、总销售额和平均销售额的结果。
4658

被折叠的 条评论
为什么被折叠?



