import os, csv, sys
import argparse
#compare.py -f1 file1.csv -f2 file2.csv -k key1,key2
parser = argparse.ArgumentParser(description='compare two files to check if they contain same records')
parser.add_argument('-f1', '--file1', required=True)
parser.add_argument('-f2', '--file2', required=True)
parser.add_argument('-k', '--key', required=True, help='used to get the alignment key')
args = parser.parse_args()
file1 = args.file1
file2 = args.file2
key = args.key
keys = key.split(',')
file1Dict = dict()
headerDict=""
matched = 0
mismatchedTrades = []
missedInFile1Trades = []
missedInFile2Trades = []
with open(file1, 'rb') as file1:
reader1 = csv.reader(file1, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
header = reader1.next()
headerDict = dict(zip(header, range(len(header))))
for row_in_file1 in reader1:
alignmentKey = "";
for key in keys:
if len(alignmentKey) > 0 and len(str(row_in_file1[headerDict[key]]).upper()) > 0:
alignmentKey = alignmentKey + "|" + str(row_in_file1[headerDict[key]]).upper()
else:
alignmentKey = alignmentKey + str(row_in_file1[headerDict[key]]).upper()
file1Dict[alignmentKey] = row_in_file1
with open(file2, 'rb') as file2:
reader2 = csv.reader(file2, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
header = reader2.next()
for row_in_file2 in reader2:
alignmentKey = "";
for key in keys:
if len(alignmentKey) > 0 and len(str(row_in_file2[headerDict[key]]).upper()) > 0:
alignmentKey = alignmentKey + "|" + str(row_in_file2[headerDict[key]]).upper()
else:
alignmentKey = alignmentKey + str(row_in_file2[headerDict[key]]).upper()
if alignmentKey in file1Dict:
row_in_file1 = file1Dict[alignmentKey]
del file1Dict[alignmentKey]
if row_in_file2 == row_in_file1:
print alignmentKey, "matched"
matched += 1
else:
print row_in_file2
print row_in_file1
mismatchedTrades.append(alignmentKey)
else:
print row_in_file2
missedInFile1Trades.append(alignmentKey)
missedInFile2Trades = list(file1Dict.keys())
print "matched = %s"%(matched)
print "Mismatched trades[%d]:\n%s" % (len(mismatchedTrades), mismatchedTrades)
print "Missed in [%s] trades[%d]:\n%s" % (file1.name, len(missedInFile1Trades), missedInFile1Trades)
print "Missed in [%s] trades[%d]:\n%s" % (file2.name, len(missedInFile2Trades), missedInFile2Trades)
转载于:https://my.oschina.net/u/939893/blog/169329