python csv file comparator

本文介绍了一个使用Python编写的CSV文件对比脚本,该脚本能够读取两个CSV文件,并通过指定的键进行记录匹配,最后输出匹配成功、不匹配及缺失的记录详情。
import os, csv, sys
import argparse

#compare.py -f1 file1.csv -f2 file2.csv -k key1,key2

parser = argparse.ArgumentParser(description='compare two files to check if they contain same records')
parser.add_argument('-f1', '--file1', required=True)
parser.add_argument('-f2', '--file2', required=True)
parser.add_argument('-k', '--key', required=True, help='used to get the alignment key')

args = parser.parse_args()
file1 = args.file1
file2 = args.file2
key = args.key 
keys = key.split(',')

file1Dict = dict()	
headerDict=""
matched = 0
mismatchedTrades = []
missedInFile1Trades = []
missedInFile2Trades = []
with open(file1, 'rb') as file1:			
	reader1 = csv.reader(file1, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
	header = reader1.next()	
	headerDict = dict(zip(header, range(len(header))))
	for row_in_file1 in reader1:	
		alignmentKey = "";
		for key in keys:	
			if len(alignmentKey) > 0 and len(str(row_in_file1[headerDict[key]]).upper()) > 0:
				alignmentKey = alignmentKey + "|" + str(row_in_file1[headerDict[key]]).upper()
			else:
				alignmentKey = alignmentKey + str(row_in_file1[headerDict[key]]).upper()
		file1Dict[alignmentKey] = row_in_file1

with open(file2, 'rb') as file2:			
	reader2 = csv.reader(file2, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)	
	header = reader2.next()	
	for row_in_file2 in reader2:	
		alignmentKey = "";
		for key in keys:	
			if len(alignmentKey) > 0 and len(str(row_in_file2[headerDict[key]]).upper()) > 0:			
				alignmentKey = alignmentKey + "|" + str(row_in_file2[headerDict[key]]).upper()
			else:
				alignmentKey = alignmentKey + str(row_in_file2[headerDict[key]]).upper()
				
		if alignmentKey in file1Dict:
			row_in_file1 = file1Dict[alignmentKey]
			del file1Dict[alignmentKey]
			if row_in_file2 == row_in_file1:
				print alignmentKey, "matched"	
				matched += 1
			else:				
				print row_in_file2
				print row_in_file1				
				mismatchedTrades.append(alignmentKey)
		else:
			print row_in_file2			
			missedInFile1Trades.append(alignmentKey)

missedInFile2Trades = list(file1Dict.keys())
print "matched = %s"%(matched)	
print "Mismatched trades[%d]:\n%s" % (len(mismatchedTrades), mismatchedTrades)
print "Missed in [%s] trades[%d]:\n%s" % (file1.name, len(missedInFile1Trades), missedInFile1Trades)
print "Missed in [%s] trades[%d]:\n%s" % (file2.name, len(missedInFile2Trades), missedInFile2Trades)

转载于:https://my.oschina.net/u/939893/blog/169329

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值