import xlrd
import json
import os
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
from logger.logs import logger_record as log
def judge_sheet_name(list1, list2):
"""
判断两个表有哪些相同,哪些不同
:param list1:
:param list2:
:return:
"""
log.info("===========开始判断两个表有哪些相同,哪些不同===========")
same_list_values = [x for x in list1 if x in list2]
diff_list = [y for y in (list1 + list2) if y not in same_list_values]
num_1, num_2 = 0, 0
diff_1, diff_2 = [], []
for diff in diff_list:
if diff in list1:
num_1 += 1
diff_1.append(diff)
else:
num_2 += 1
diff_2.append(diff)
log.info("===========判断两个表有哪些相同,哪些不同结束===========")
return same_list_values, diff_1, diff_2
def change_sheet_color(file_object, sheet_object, diff_list, color='CC3204'):
"""
:param file_object: 文件对象
:param sheet_object: 表格对象
:param diff_list: 需要改变颜色的列表
:param color: 颜色默认为红色
:return:
"""
log.info("===========开始更改sheet颜色===========")
for ws in file_object.worksheets:
for diff_value in diff_list:
if diff_value in str(ws):
ws.sheet_properties.tabColor = color
file_object.save(sheet_object)
log.info("===========更改sheet颜色结束===========")
def compare_value(excel_file1, excel_file2):
global sheet_1, sheet_2
log.info("===========比较两个excel文件,找出差异单元格位置===========")
log.debug("不同表的详情字典,存放两张表中不同的表")
diff_dict = {}
work_book_1 = load_workbook(excel_file1)
sheets_names_1 = work_book_1.sheetnames
work_book_2 = load_workbook(excel_file2)
sheets_names_2 = work_book_2.sheetnames
log.debug("判断两Book中都存在的表,和两Book中不同的表")
same_list, diff_1, diff_2 = judge_sheet_name(sheets_names_1, sheets_names_2)
log.debug("获得文件名称")
book_1_name = Path(excel_file1).stem
book_2_name = Path(excel_file2).stem
log.debug("将两张表不一样的子表装入字典")
diff_dict["book1:“{}” 与book2:”{}“不一样的子表".format(book_1_name, book_2_name)] = diff_1
diff_dict["book2:“{}” 与book1:“{}”不一样的子表".format(book_2_name, book_1_name)] = diff_2
"""判断不同的列表1和列表2,若有子表信息不相同,就调用更改sheet颜色的函数,将颜色更改"""
log.debug("判断不同的列表1和列表2,若有子表信息不相同,就调用更改sheet颜色的函数,将颜色更改")
if len(diff_1) > 0:
change_sheet_color(work_book_1, excel_file1, diff_1)
diff_1 = "、".join(diff_1)
log.debug("book1颜色更改完毕!更改的sheet名称为:" + diff_1)
else:
log.debug("book1列表为空(sheet完全一样)")
if len(diff_2) > 0:
change_sheet_color(work_book_2, excel_file2, diff_2)
diff_2 = "、".join(diff_2)
log.debug("book2颜色更改完毕!更改的sheet名称为:" + diff_2)
else:
log.debug("book2列表为空(sheet完全一样)")
log.debug("比较单元格内容")
sheet_dict = {}
sheet_dict_list = []
for li in same_list:
max_rows_1, max_cols_1, max_rows_2, max_cols_2 = 0, 0, 0, 0
count = 0
diff_place = []
for i in range(0, len(sheets_names_1)):
work_sheet_1 = sheets_names_1[i]
if work_sheet_1 == li:
sheet_1 = work_book_1[work_sheet_1]
max_rows_1 = sheet_1.max_row
max_cols_1 = sheet_1.max_column
for j in range(0, len(sheets_names_2)):
work_sheet_2 = sheets_names_2[j]
if work_sheet_2 == li:
sheet_2 = work_book_2[work_sheet_2]
max_rows_2 = sheet_2.max_row
max_cols_2 = sheet_2.max_column
max_row = max_rows_1
if max_rows_1 < max_rows_2:
max_row = max_rows_2
max_col = max_cols_1
if max_cols_1 < max_cols_2:
max_col = max_cols_2
cell_value_1 = 0
cell_value_2 = 0
is_same_list = [None, '']
for r in range(1, max_row + 1):
for c in range(1, max_col + 1):
cell_value_1 = sheet_1.cell(row=r, column=c).value
cell_value_2 = sheet_2.cell(row=r, column=c).value
if cell_value_1 != cell_value_2:
if cell_value_1 in is_same_list and cell_value_2 in is_same_list:
continue
count += 1
diff_place.append((r, c))
sheet_dict[li] = diff_place
log.debug('表名***{}***中,两个表有***{}***处不一样。'.format(li, count))
log.info("===========比较两个excel文件,找出差异单元格位置结束===========")
if sheet_dict:
for diff in sheet_dict.keys():
sheet_dict_list.append(diff)
change_sheet_color(work_book_1, excel_file1, sheet_dict_list, "FFFF00")
change_sheet_color(work_book_2, excel_file2, sheet_dict_list, "FFFF00")
return sheet_dict, same_list, diff_dict
def mkdir(path):
log.info("==================新建文件夹{}开始================".format(path))
folder = os.path.exists(path)
if not folder:
os.makedirs(path)
log.info("==================新建文件夹{}结束================".format(path))
def color_execl(save_path, file_name, sheet_dict, same_list):
log.info("==================文件{}开始上色================".format(file_name))
global wb
book = xlrd.open_workbook(file_name)
sheet_names = []
sheet_index = []
for index, sheet in enumerate(book.sheets()):
if sheet.name in same_list:
sheet_index.append(index)
sheet_names.append(sheet.name)
try:
wb = load_workbook(file_name)
except:
log.error('加载{}文件时错误!'.format(file_name))
save_sheet_list = []
for i in sheet_index:
save_sheet_name = wb._sheets[i]
save_sheet_list.append(save_sheet_name)
common_sheet_names = sheet_dict.keys()
for index, name in enumerate(sheet_names):
sheet_x = book.sheet_by_name(name)
sheet_rows = sheet_x.nrows
sheet_cols = sheet_x.ncols
sht_name = save_sheet_list[index]
if name in common_sheet_names:
diff_list = sheet_dict[name]
for tu in diff_list:
r, c = list(tu)
try:
red_fill = PatternFill(fill_type='solid', fgColor="FFB6C1")
sht_name.cell(row=r, column=c).fill = red_fill
log.debug('给文件《{}》sheet{}写入颜色时成功!'.format(file_name, name))
except:
log.error('给文件《{}》sheet{}写入颜色时错误!!!!'.format(file_name, name))
"""处理新的文件名称以及保存路径"""
log.debug("开始处理新的文件名称以及保存路径...")
file_path = Path(save_path)
new_file_name = Path(file_name).stem + '_new.xlsx'
new_file_name_dir = file_path.joinpath('excel_result')
mkdir(new_file_name_dir)
new_file_name = new_file_name_dir.joinpath(new_file_name)
try:
wb.save(str(new_file_name))
except Exception as err:
log.error('保存写入颜色后的{}文件时错误!{}'.format(file_name, err))
log.warning("单元格上色后的文件文件已保存至“{}”".format(new_file_name))
log.info("==================文件{}上色结束================".format(file_name))
def save_json(result, save_path, dir_files_name):
"""
保存为json文件
:param result: 结果字典
:param save_path: 保存路径
:param dir_files_name: 传进来的文件名称
:return:
"""
log.info("================将结果存为json文件开始==========================")
file_path = Path(dir_files_name)
file_path = Path(save_path).joinpath(file_path).stem
json_name = file_path + ".json"
os.chdir(save_path)
mkdir("json_result")
json_file_save_path = Path(save_path).joinpath("json_result").joinpath(json_name)
with open(json_file_save_path, "w") as fp:
fp.write(json.dumps(result, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': ')))
log.warning("json文件已存入{}".format(json_file_save_path))
log.info("================将结果存为json文件结束==========================")
def get_files(dir_all_files):
"""
获得当前目录下的所有指定类型文件
:param dir_all_files:
:return:
"""
"""参数topdown的默认值是“True”表示首先返回顶级目录下的文件,然后再遍历子目录中的文件。
当topdown的值为"False"时,表示先遍历子目录中的文件,然后再返回顶级目录下的文件。"""
all_file = []
for root, dirs, files in os.walk(dir_all_files, topdown=True):
for file in files:
dirs[:] = []
if os.path.splitext(file)[1] == ".xlsx":
all_file.append(os.path.join(root, file))
return all_file
def json_format(value):
"""
格式化输出
:param value:
:return:
"""
return json.dumps(value, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': '))
def main(path1, path2):
"""
:param path1: 需要对比的文件路径
:param path2: 标答文件路径
:return:
"""
log.warning("文件比对开始*******************************************************************************")
log.info("对比文件路径为{}".format(path1))
log.info("标答文件路径为{}".format(path2))
table1 = get_files(path1)
log.debug("获得路径{}下所有需要比对的文件".format(path1))
table2 = get_files(path2)
log.debug("获得路径{}下所有标答文件".format(path1))
file_dir, _ = os.path.split(path1)
log.debug("循环所有文件")
for file_path1 in table1:
file_name1 = Path(file_path1).stem
for file_path2 in table2:
result_dict = {}
file_name2 = Path(file_path2).stem
log.debug("判断文件《{}》与文件《{}》名称是否一致".format(file_name1, file_name2))
if str(file_name1) in str(file_name2):
log.debug("文件《{}》 与文件《{}》 名称一致,比对开始..".format(file_name1, file_name2))
sheet_dict, same_list, diff_dict = compare_value(file_path1, file_path2)
color_execl(path1, file_path1, sheet_dict, same_list)
color_execl(path1, file_path2, sheet_dict, same_list)
result_dict['两个book中分别有的不同的表为'] = diff_dict
result_dict['两个book中表名相同但是单元格有差异的表'] = [x for x in sheet_dict.keys()]
result_dict['两个book共同拥有的表'] = same_list
save_json(result_dict, path1, file_name1)
else:
log.debug("文件《{}》 与文件《{}》 名称不一致,开始进行下一份文件比对...".format(file_name1, file_name2))
log.warning("文件比对结束*******************************************************************************")
if __name__ == '__main__':
"""
1.将需要对比的文件和标答文件放在同一目录下
2.传入需要对比的文件夹和标答文件夹
3.生成的json文件在根目录下
"""
files_path1 = r"F:\project\a_testfile\file1"
files_path2 = r"F:\project\a_testfile\file2"
main(files_path1, files_path2)