PS D:\check> & C:/Users/siyuanll2102/AppData/Local/Programs/Python/Python310/python.exe d:/check/check.py
C:\Users\siyuanll2102\AppData\Local\Programs\Python\Python310\lib\site-packages\openpyxl\reader\drawings.py:33: UserWarning: DrawingML support is incomplete and limited to charts and images only. Shapes and drawings will
be lost.
warn(“DrawingML support is incomplete and limited to charts and images only. Shapes and drawings will be lost.”)
import os
import tkinter as tk
from tkinter import filedialog, ttk, messagebox
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
import logging
from datetime import datetime
import difflib
import csv
from collections import defaultdict
import re
class EnhancedExcelComparatorApp:
def init(self, root):
self.root = root
self.root.title(“增强型Excel比较工具”)
self.root.geometry(“1000x700”)
# 初始化相似度阈值 self.similarity_threshold = 0.7 # 创建UI组件 self.create_widgets() # 颜色定义 self.yellow_fill = PatternFill(start_color="FFFFFF00", end_color="FFFFFF00", fill_type="solid") # 新增行 (黄色) self.red_fill = PatternFill(start_color="FFFF0000", end_color="FFFF0000", fill_type="solid") # 删除行 (红色) self.blue_fill = PatternFill(start_color="FF0070C0", end_color="FF0070C0", fill_type="solid") # DE列差异 (蓝色0070C0) # 日志记录器 self.logger = logging.getLogger("ExcelComparator") self.logger.setLevel(logging.INFO) def create_widgets(self): # 文件夹选择区域 frame_folder = ttk.LabelFrame(self.root, text="文件夹选择") frame_folder.pack(padx=10, pady=5, fill="x") self.folder_path = tk.StringVar() ttk.Entry(frame_folder, textvariable=self.folder_path, width=80).pack(side="left", padx=5, pady=5, fill="x", expand=True) ttk.Button(frame_folder, text="浏览", command=self.select_folder).pack(side="left", padx=5, pady=5) # 设置区域 frame_settings = ttk.LabelFrame(self.root, text="比较设置") frame_settings.pack(padx=10, pady=5, fill="x") ttk.Label(frame_settings, text="相似度阈值:").pack(side="left", padx=5) self.similarity_var = tk.DoubleVar(value=self.similarity_threshold) ttk.Scale(frame_settings, from_=0.1, to=1.0, variable=self.similarity_var, orient="horizontal", length=200).pack(side="left", padx=5) ttk.Label(frame_settings, textvariable=self.similarity_var).pack(side="left", padx=5) # 比较按钮 frame_controls = ttk.Frame(self.root) frame_controls.pack(pady=10) ttk.Button(frame_controls, text="开始比较", command=self.start_comparison).pack(side="left", padx=5) ttk.Button(frame_controls, text="导出CSV报告", command=self.export_csv_report).pack(side="left", padx=5) # 结果展示区域 frame_results = ttk.LabelFrame(self.root, text="比较结果") frame_results.pack(padx=10, pady=5, fill="both", expand=True) # 创建带滚动条的文本区域 self.result_text = tk.Text(frame_results, wrap="word") scrollbar = ttk.Scrollbar(frame_results, orient="vertical", command=self.result_text.yview) self.result_text.configure(yscrollcommand=scrollbar.set) scrollbar.pack(side="right", fill="y") self.result_text.pack(fill="both", expand=True, padx=5, pady=5) # 底部按钮 frame_buttons = ttk.Frame(self.root) frame_buttons.pack(pady=10) ttk.Button(frame_buttons, text="打开标记文件夹", command=self.open_marked_folder).pack(side="left", padx=5) ttk.Button(frame_buttons, text="打开报告文件夹", command=self.open_reports_folder).pack(side="left", padx=5) ttk.Button(frame_buttons, text="清空结果", command=self.clear_results).pack(side="left", padx=5) def select_folder(self): folder_selected = filedialog.askdirectory() if folder_selected: self.folder_path.set(folder_selected) def start_comparison(self): folder = self.folder_path.get() if not folder: messagebox.showerror("错误", "请先选择文件夹") return # 更新相似度阈值 self.similarity_threshold = self.similarity_var.get() # 检查文件夹结构 before_dir = os.path.join(folder, "before") after_dir = os.path.join(folder, "after") if not os.path.exists(before_dir) or not os.path.exists(after_dir): messagebox.showerror("错误", "文件夹中必须包含before和after子文件夹") return # 创建目录 before_marked_dir = os.path.join(folder, "before_marked") after_marked_dir = os.path.join(folder, "after_marked") reports_dir = os.path.join(folder, "reports") os.makedirs(before_marked_dir, exist_ok=True) os.makedirs(after_marked_dir, exist_ok=True) os.makedirs(reports_dir, exist_ok=True) # 清空结果区域 self.result_text.delete(1.0, tk.END) self.result_text.insert(tk.END, f"开始比较: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n") self.result_text.insert(tk.END, f"比较目录: {folder}\n") self.result_text.insert(tk.END, f"相似度阈值: {self.similarity_threshold}\n") self.result_text.insert(tk.END, "-" * 80 + "\n") # 文件配对和比较 before_files = [f for f in os.listdir(before_dir) if f.endswith(('.xlsx', '.xls'))] after_files = [f for f in os.listdir(after_dir) if f.endswith(('.xlsx', '.xls'))] # 创建文件映射 file_map = self.create_file_mapping(before_files, after_files) # 比较每个文件对 total_changes = 0 for b_file, a_file in file_map.items(): b_path = os.path.join(before_dir, b_file) a_path = os.path.join(after_dir, a_file) if a_file else None # 处理文件存在性 b_exists = os.path.exists(b_path) a_exists = a_file and os.path.exists(a_path) if not b_exists and not a_exists: continue # 创建文件日志 log_filename = f"{b_file}_vs_{a_file}.log" if a_file else f"{b_file}_deleted.log" log_path = os.path.join(reports_dir, log_filename) self.setup_file_logger(log_path) # 执行比较 status = "完成" try: if b_exists and a_exists: self.logger.info(f"开始比较文件对: {b_file} ↔ {a_file}") self.logger.info(f"相似度阈值: {self.similarity_threshold}") result = self.compare_excel_files( b_path, a_path, os.path.join(before_marked_dir, b_file), os.path.join(after_marked_dir, a_file) ) elif b_exists: self.logger.info(f"处理删除文件: {b_file}") result = self.mark_deleted_file(b_path, os.path.join(before_marked_dir, b_file)) status = "删除" else: self.logger.info(f"处理新增文件: {a_file}") result = self.mark_added_file(a_path, os.path.join(after_marked_dir, a_file)) status = "新增" # 更新UI结果 file_pair = f"{b_file} ↔ {a_file}" if a_file else b_file summary = ( f"文件对: {file_pair}\n" f"状态: {status}\n" f"总变更点: {result['total_changes']}\n" f"新增行: {result['added_rows']}\n" f"删除行: {result['removed_rows']}\n" f"修改行: {result['modified_rows']}\n" f"相似度匹配: {result['link_style_matches']}\n" f"详细报告: {log_path}\n" + "-" * 80 + "\n" ) self.result_text.insert(tk.END, summary) total_changes += result["total_changes"] # 关闭文件日志 self.remove_file_logger() except Exception as e: status = f"错误: {str(e)}" self.logger.error(f"比较失败: {str(e)}") error_msg = f"文件对: {b_file} ↔ {a_file} - 错误: {str(e)}\n" self.result_text.insert(tk.END, error_msg) self.remove_file_logger() self.result_text.insert(tk.END, f"\n比较完成! 共发现 {total_changes} 处变更\n") messagebox.showinfo("完成", f"比较完成! 共发现 {total_changes} 处变更") def setup_file_logger(self, log_path): """为当前文件对设置日志记录器""" # 移除现有处理器 for handler in self.logger.handlers[:]: self.logger.removeHandler(handler) handler.close() # 创建文件处理器 file_handler = logging.FileHandler(log_path, mode='w', encoding='utf-8') file_handler.setLevel(logging.INFO) # 设置日志格式 formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') file_handler.setFormatter(formatter) # 添加处理器 self.logger.addHandler(file_handler) def remove_file_logger(self): """移除文件日志处理器""" for handler in self.logger.handlers[:]: self.logger.removeHandler(handler) handler.close() def create_file_mapping(self, before_files, after_files): """创建文件映射,处理文件名相似度匹配""" file_map = {} used_after_files = set() # 优先匹配同名文件 for b_file in before_files: if b_file in after_files: file_map[b_file] = b_file used_after_files.add(b_file) # 匹配相似文件名 for b_file in before_files: if b_file in file_map: continue # 寻找最相似的文件 best_match = None best_score = 0 for a_file in after_files: if a_file in used_after_files: continue # 计算文件名相似度 score = difflib.SequenceMatcher(None, b_file, a_file).ratio() if score > best_score and score > 0.6: best_score = score best_match = a_file if best_match: file_map[b_file] = best_match used_after_files.add(best_match) else: file_map[b_file] = None # 处理未匹配的after文件 for a_file in after_files: if a_file not in used_after_files: file_map[a_file] = a_file # 标记为新增文件 return file_map def is_link_style(self, cell_value): """检查单元格值是否表示样式链接""" if cell_value is None: return False return "link" in str(cell_value).lower() def calculate_ij_similarity(self, ws_before, before_row_idx, ws_after, after_row_idx): """计算IJ列的相似度""" # 获取IJ列值 i_before = ws_before.cell(row=before_row_idx, column=9).value j_before = ws_before.cell(row=before_row_idx, column=10).value i_after = ws_after.cell(row=after_row_idx, column=9).value j_after = ws_after.cell(row=after_row_idx, column=10).value # 处理空值 text_before = f"{i_before or ''} {j_before or ''}".strip() text_after = f"{i_after or ''} {j_after or ''}".strip() if not text_before or not text_after: return 0.0 # 标准化文本 text_before = re.sub(r'\s+', ' ', str(text_before).lower()).strip() text_after = re.sub(r'\s+', ' ', str(text_after).lower()).strip() # 计算相似度 return difflib.SequenceMatcher(None, text_before, text_after).ratio() def compare_excel_files(self, before_path, after_path, before_marked_path, after_marked_path): """根据新规则比较Excel文件并记录详细日志""" # 加载工作簿 wb_before = load_workbook(before_path) wb_after = load_workbook(after_path) # 获取第一个工作表 ws_before = wb_before.active ws_after = wb_after.active # 记录工作表信息 self.logger.info(f"工作表名称: {ws_before.title}") self.logger.info(f"Before行数: {ws_before.max_row}, After行数: {ws_after.max_row}") self.logger.info("-" * 100) # 构建before文件的索引 exact_index = defaultdict(list) # 用于严格匹配的索引 (C, D, E) similarity_index = defaultdict(list) # 用于相似度匹配的索引 \(C\) for row_idx in range(1, ws_before.max_row + 1): c_value = ws_before.cell(row=row_idx, column=3).value d_value = ws_before.cell(row=row_idx, column=4).value e_value = ws_before.cell(row=row_idx, column=5).value # 检查D或E列是否为"link" is_link = (self.is_link_style(d_value) or self.is_link_style(e_value)) if is_link: # 链接行:添加到相似度索引 if c_value is not None: similarity_index[c_value].append(row_idx) else: # 非链接行:添加到严格匹配索引 key = (c_value, d_value, e_value) exact_index[key].append(row_idx) # 初始化结果统计 result = { "total_changes": 0, "added_rows": 0, "removed_rows": 0, "modified_rows": 0, "link_style_matches": 0 } # 记录匹配映射 match_map = {} # 记录before行的匹配状态 before_matched = [False] * (ws_before.max_row + 1) # 遍历after的每一行 for after_row_idx in range(1, ws_after.max_row + 1): c_value_after = ws_after.cell(row=after_row_idx, column=3).value d_value_after = ws_after.cell(row=after_row_idx, column=4).value e_value_after = ws_after.cell(row=after_row_idx, column=5).value matched = False matched_row = None match_type = "exact" # 检查是否为链接行(D或E列为"link") is_link = (self.is_link_style(d_value_after) or self.is_link_style(e_value_after)) # 根据行类型选择匹配策略 if is_link: # 链接行:使用相似度匹配策略 if c_value_after in similarity_index: best_match_idx = None best_similarity = 0.0 # 遍历所有具有相同C值的候选行 for before_row_idx in similarity_index[c_value_after]: if before_matched[before_row_idx]: continue # 计算IJ列相似度 similarity = self.calculate_ij_similarity( ws_before, before_row_idx, ws_after, after_row_idx ) if similarity > best_similarity: best_similarity = similarity best_match_idx = before_row_idx # 检查是否达到相似度阈值 if best_match_idx and best_similarity >= self.similarity_threshold: matched = True matched_row = best_match_idx before_matched[best_match_idx] = True match_map[after_row_idx] = best_match_idx match_type = "similarity" result["link_style_matches"] += 1 # 记录相似度匹配 self.logger.info(f"链接行相似度匹配: After行{after_row_idx} ↔ Before行{best_match_idx}") self.logger.info(f" 相似度: {best_similarity:.2f} (阈值: {self.similarity_threshold})") else: # 非链接行:使用严格匹配策略 key = (c_value_after, d_value_after, e_value_after) if key in exact_index: # 查找所有具有相同键值的候选行 for before_row_idx in exact_index[key]: if not before_matched[before_row_idx]: # 找到匹配行 matched = True matched_row = before_row_idx before_matched[before_row_idx] = True match_map[after_row_idx] = before_row_idx match_type = "exact" # 记录匹配 self.logger.info(f"非链接行严格匹配: After行{after_row_idx} ↔ Before行{before_row_idx}") break # 处理匹配结果 if matched: # 初始化变更列列表 changed_columns = [] # 比较IJLMNOP列 col_names = {9: "I", 10: "J", 12: "L", 13: "M", 14: "N", 15: "O", 16: "P"} for col in [9, 10, 12, 13, 14, 15, 16]: col_match, diff = self.compare_cells( ws_before, ws_after, matched_row, after_row_idx, col, col_names[col] ) if not col_match: changed_columns.append(col_names[col]) # 记录差异 self.logger.info(f" 列{col_names[col]}变更: {diff}") # 如果有变更列 if changed_columns: # 标记DE列为蓝色 self.mark_cell(ws_before, matched_row, 4, self.blue_fill) # Before D列 self.mark_cell(ws_before, matched_row, 5, self.blue_fill) # Before E列 self.mark_cell(ws_after, after_row_idx, 4, self.blue_fill) # After D列 self.mark_cell(ws_after, after_row_idx, 5, self.blue_fill) # After E列 # 将有变更的列字母写入R列 changed_str = ",".join(changed_columns) ws_before.cell(row=matched_row, column=18).value = changed_str ws_after.cell(row=after_row_idx, column=18).value = changed_str # 记录日志 self.logger.info(f" 行{after_row_idx}的D、E列标记蓝色,变更列: {changed_str}") result["modified_rows"] += 1 result["total_changes"] += len(changed_columns) # 未匹配处理 for after_row_idx in range(1, ws_after.max_row + 1): if after_row_idx not in match_map: # 未找到匹配行,标记为新增 self.mark_cell(ws_after, after_row_idx, 1, self.yellow_fill) self.mark_cell(ws_after, after_row_idx, 2, self.yellow_fill) self.mark_cell(ws_after, after_row_idx, 3, self.yellow_fill) # 记录新增行 row_type = "链接行" if is_link else "非链接行" c_val = ws_after.cell(row=after_row_idx, column=3).value self.logger.info(f"新增{row_type}: After行{after_row_idx} (C列值: {c_val}), 标记黄色") result["added_rows"] += 1 result["total_changes"] += 1 # 处理未匹配的before行(删除) for before_row_idx in range(1, ws_before.max_row + 1): if not before_matched[before_row_idx]: self.mark_cell(ws_before, before_row_idx, 1, self.red_fill) self.mark_cell(ws_before, before_row_idx, 2, self.red_fill) self.mark_cell(ws_before, before_row_idx, 3, self.red_fill) # 确定行类型 d_val = ws_before.cell(row=before_row_idx, column=4).value e_val = ws_before.cell(row=before_row_idx, column=5).value is_link = (self.is_link_style(d_val) or self.is_link_style(e_val)) row_type = "链接行" if is_link else "非链接行" # 记录删除行 c_val = ws_before.cell(row=before_row_idx, column=3).value self.logger.info(f"删除{row_type}: Before行{before_row_idx} (C列值: {c_val}), 标记黄色") result["removed_rows"] += 1 result["total_changes"] += 1 # 保存匹配映射到日志 self.logger.info("\n行匹配映射:") for after_idx, before_idx in match_map.items(): self.logger.info(f"After行{after_idx} ↔ Before行{before_idx}") # 保存标记后的文件 wb_before.save(before_marked_path) wb_after.save(after_marked_path) # 记录总结 self.logger.info("\n比较总结:") self.logger.info(f"总变更点: {result['total_changes']}") self.logger.info(f"新增行: {result['added_rows']}") self.logger.info(f"删除行: {result['removed_rows']}") self.logger.info(f"修改行: {result['modified_rows']}") self.logger.info(f"相似度匹配次数: {result['link_style_matches']}") return result def compare_cells(self, ws_before, ws_after, before_row, after_row, col, col_name): """比较两个工作表中指定单元格的值并返回差异详情""" val_before = ws_before.cell(row=before_row, column=col).value val_after = ws_after.cell(row=after_row, column=col).value # 处理空值 if val_before is None and val_after is None: return True, "" if val_before is None or val_after is None: return False, f"{col_name}列: 空值差异 (Before: {val_before}, After: {val_after})" # 数值比较 if isinstance(val_before, (int, float)) and isinstance(val_after, (int, float)): if abs(val_before - val_after) < 1e-9: return True, "" # 字符串比较 str_before = str(val_before).strip() str_after = str(val_after).strip() if str_before == str_after: return True, "" # 处理长文本差异 if len(str_before) > 50 or len(str_after) > 50: diff = f"{col_name}列: 内容长度不同 (Before: {len(str_before)}字符, After: {len(str_after)}字符)" else: diff = f"Before行{before_row} {col_name}列: [{str_before}]\nAfter行{after_row} {col_name}列: [{str_after}]" return False, diff def mark_cell(self, worksheet, row, col, fill): """标记单元格背景色""" cell = worksheet.cell(row=row, column=col) cell.fill = fill def mark_deleted_file(self, src_path, dest_path): """标记整个文件为删除状态""" wb = load_workbook(src_path) ws = wb.active # 记录日志 self.logger.info(f"文件被删除: {os.path.basename(src_path)}") self.logger.info(f"总行数: {ws.max_row}") self.logger.info("所有行标记为黄色") for row in range(1, ws.max_row + 1): self.mark_cell(ws, row, 3, self.red_fill) wb.save(dest_path) return { "total_changes": ws.max_row, "added_rows": 0, "removed_rows": ws.max_row, "modified_rows": 0, "link_style_matches": 0 } def mark_added_file(self, src_path, dest_path): """标记整个文件为新增状态""" wb = load_workbook(src_path) ws = wb.active # 记录日志 self.logger.info(f"文件为新增: {os.path.basename(src_path)}") self.logger.info(f"总行数: {ws.max_row}") self.logger.info("所有行标记为黄色") for row in range(1, ws.max_row + 1): self.mark_cell(ws, row, 3, self.yellow_fill) wb.save(dest_path) return { "total_changes": ws.max_row, "added_rows": ws.max_row, "removed_rows": 0, "modified_rows": 0, "link_style_matches": 0 } def open_marked_folder(self): folder = self.folder_path.get() if not folder: messagebox.showerror("错误", "请先选择文件夹") return marked_dir = os.path.join(folder, "before_marked") if not os.path.exists(marked_dir): marked_dir = os.path.join(folder, "after_marked") if os.path.exists(marked_dir): os.startfile(marked_dir) else: messagebox.showinfo("提示", "请先执行比较操作") def open_reports_folder(self): folder = self.folder_path.get() if not folder: messagebox.showerror("错误", "请先选择文件夹") return reports_dir = os.path.join(folder, "reports") if os.path.exists(reports_dir): os.startfile(reports_dir) else: messagebox.showinfo("提示", "报告目录不存在,请先执行比较操作") def clear_results(self): """清空结果区域""" self.result_text.delete(1.0, tk.END) def export_csv_report(self): """导出CSV格式的报告摘要""" folder = self.folder_path.get() if not folder: messagebox.showerror("错误", "请先选择文件夹") return reports_dir = os.path.join(folder, "reports") if not os.path.exists(reports_dir): messagebox.showinfo("提示", "请先执行比较操作") return csv_path = os.path.join(reports_dir, "comparison_summary.csv") try: # 收集日志文件中的摘要信息 summary_data = [] for log_file in os.listdir(reports_dir): if log_file.endswith(".log"): log_path = os.path.join(reports_dir, log_file) file_pair = log_file.replace(".log", "") added = removed = modified = total = link_matches = 0 # 从日志中提取摘要信息 with open(log_path, 'r', encoding='utf-8') as f: for line in f: if "总变更点:" in line: total = int(line.split(":")[1].strip()) elif "新增行:" in line: added = int(line.split(":")[1].strip()) elif "删除行:" in line: removed = int(line.split(":")[1].strip()) elif "修改行:" in line: modified = int(line.split(":")[1].strip()) elif "相似度匹配次数:" in line: link_matches = int(line.split(":")[1].strip()) summary_data.append({ "file_pair": file_pair, "total_changes": total, "added_rows": added, "removed_rows": removed, "modified_rows": modified, "link_style_matches": link_matches, "log_file": log_file }) # 写入CSV文件 with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ["file_pair", "total_changes", "added_rows", "removed_rows", "modified_rows", "link_style_matches", "log_file"] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for row in summary_data: writer.writerow(row) messagebox.showinfo("成功", f"CSV报告已导出到: {csv_path}") os.startfile(reports_dir) except Exception as e: messagebox.showerror("错误", f"导出CSV报告失败: {str(e)}")
if name == “main”:
root = tk.Tk()
app = EnhancedExcelComparatorApp(root)
root.mainloop()
这个整不了图片,给我做一版可以处理图片的
比如说原after里面有批注,批注是一张图片,这个时候after_marked的文件夹里面的批注是空的,我希望解决这个问题