import os
import numpy as np
import openpyxl
from openpyxl import load_workbook
from openpyxl.styles import PatternFill, Border, Side
from openpyxl.comments import Comment
from collections import defaultdict
import tkinter as tk
from tkinter import ttk, filedialog, messagebox, scrolledtext, END
import time
import threading
from ttkthemes import ThemedTk
from difflib import SequenceMatcher
import hashlib
import re
# 颜色定义
COLOR_ADDED = 'C6EFCE' # 新增 - 浅绿
COLOR_DELETED = 'FFC7CE' # 删除 - 浅红
COLOR_CHANGED = 'FFEB9C' # 修改 - 浅黄
COLOR_BORDER = '002060' # 边框颜色
COLOR_CONTEXT_MATCH = 'D9E1F2' # 上下文匹配 - 浅蓝
COLOR_IGNORED = 'D9D9D9' # 被忽略的差异 - 灰色
class SmartExcelComparator:
def __init__(self):
self.root = ThemedTk(theme="arc")
self.root.title("智能Excel差异检测工具")
self.root.geometry("1200x800")
# 设置应用图标
try:
self.root.iconbitmap("diff_icon.ico")
except:
pass
# 创建UI
self.create_widgets()
self.root.mainloop()
def create_widgets(self):
# 创建主框架
main_frame = ttk.Frame(self.root)
main_frame.pack(fill="both", expand=True, padx=15, pady=15)
# 创建顶部控制面板
control_frame = ttk.LabelFrame(main_frame, text="文件选择与配置")
control_frame.pack(fill="x", padx=10, pady=10)
# 文件选择区域
file_frame = ttk.Frame(control_frame)
file_frame.pack(fill="x", padx=10, pady=10)
# 文件1
ttk.Button(file_frame, text="选择文件1",
command=lambda: self.select_file(1)).grid(row=0, column=0, padx=5, pady=5, sticky="w")
self.file1_label = ttk.Label(file_frame, text="未选择", width=80, anchor="w")
self.file1_label.grid(row=0, column=1, padx=5, pady=5, sticky="w")
# 文件2
ttk.Button(file_frame, text="选择文件2",
command=lambda: self.select_file(2)).grid(row=1, column=0, padx=5, pady=5, sticky="w")
self.file2_label = ttk.Label(file_frame, text="未选择", width=80, anchor="w")
self.file2_label.grid(row=1, column=1, padx=5, pady=5, sticky="w")
# 配置区域
config_frame = ttk.Frame(control_frame)
config_frame.pack(fill="x", padx=10, pady=10)
# 精确度配置
ttk.Label(config_frame, text="精确度阈值 (0.1-0.9):").grid(row=0, column=0, sticky="w", padx=(0, 5))
self.precision_var = tk.DoubleVar(value=0.75)
ttk.Scale(config_frame, from_=0.1, to=0.9, orient="horizontal",
length=250, variable=self.precision_var).grid(row=0, column=1, padx=5)
# 操作按钮
button_frame = ttk.Frame(control_frame)
button_frame.pack(fill="x", padx=10, pady=10)
ttk.Button(button_frame, text="开始比较", command=self.start_comparison,
style="Accent.TButton").pack(side="left", padx=5)
ttk.Button(button_frame, text="查看标记结果", command=self.view_result).pack(side="left", padx=5)
ttk.Button(button_frame, text="导出报告", command=self.export_report).pack(side="left", padx=5)
ttk.Button(button_frame, text="设置", command=self.show_settings).pack(side="right", padx=5)
# 进度条
self.progress = ttk.Progressbar(control_frame, orient="horizontal",
length=600, mode="determinate")
self.progress.pack(fill="x", padx=10, pady=10)
# 结果展示区域
result_frame = ttk.LabelFrame(main_frame, text="比较结果")
result_frame.pack(fill="both", expand=True, padx=10, pady=5)
# 创建标签页
self.notebook = ttk.Notebook(result_frame)
self.notebook.pack(fill="both", expand=True)
# 文本结果标签页
text_tab = ttk.Frame(self.notebook)
self.notebook.add(text_tab, text="文本报告")
self.result_text = scrolledtext.ScrolledText(text_tab, wrap="word", font=("Consolas", 10))
self.result_text.pack(fill="both", expand=True)
# 统计标签页
stats_tab = ttk.Frame(self.notebook)
self.notebook.add(stats_tab, text="统计概览")
self.stats_text = scrolledtext.ScrolledText(stats_tab, wrap="word", font=("Consolas", 10))
self.stats_text.pack(fill="both", expand=True)
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Frame(self.root, height=25)
status_bar.pack(fill="x", side="bottom")
ttk.Label(status_bar, textvariable=self.status_var,
relief="sunken", anchor="w").pack(fill="x")
# 初始化变量
self.file1_path = None
self.file2_path = None
self.marked_file1_path = None
self.marked_file2_path = None
self.report_path = None
# 设置样式
self.set_styles()
def set_styles(self):
"""设置现代化UI样式"""
style = ttk.Style()
style.theme_use('arc')
style.configure('Accent.TButton', font=('Arial', 10, 'bold'),
foreground='white', background='#4CAF50')
style.map('Accent.TButton', background=[('active', '#45a049')])
def select_file(self, file_num):
path = filedialog.askopenfilename(filetypes=[("Excel文件", "*.xlsx *.xls")])
if path:
if file_num == 1:
self.file1_path = path
self.file1_label.config(text=os.path.basename(path))
else:
self.file2_path = path
self.file2_label.config(text=os.path.basename(path))
def update_status(self, message):
self.status_var.set(message)
self.root.update_idletasks()
def start_comparison(self):
if not self.file1_path or not self.file2_path:
messagebox.showerror("错误", "请选择两个要比较的Excel文件")
return
# 禁用按钮防止重复点击
self.root.after(0, self.disable_buttons)
# 在新线程中执行比较
threading.Thread(target=self.run_comparison, daemon=True).start()
def disable_buttons(self):
for widget in self.root.winfo_children():
if isinstance(widget, ttk.Button):
widget.configure(state="disabled")
def enable_buttons(self):
for widget in self.root.winfo_children():
if isinstance(widget, ttk.Button):
widget.configure(state="normal")
def run_comparison(self):
start_time = time.time()
self.result_text.delete(1.0, END)
self.stats_text.delete(1.0, END)
self.progress['value'] = 0
self.update_status("开始比较...")
try:
threshold = self.precision_var.get()
self.marked_file1_path, self.marked_file2_path, stats = self.compare_files(
self.file1_path, self.file2_path, threshold)
self.show_statistics(stats)
elapsed = time.time() - start_time
self.update_status(f"比较完成! 耗时: {elapsed:.2f}秒")
self.result_text.insert(END, f"\n比较完成! 耗时: {elapsed:.2f}秒\n")
except Exception as e:
messagebox.showerror("错误", f"比较过程中出错: {str(e)}")
self.update_status(f"错误: {str(e)}")
import traceback
traceback.print_exc()
finally:
self.root.after(0, self.enable_buttons)
def normalize_value(self, value):
"""标准化单元格值以进行更准确的比较"""
if value is None:
return ""
# 如果是数字,转换为浮点数进行比较
if isinstance(value, (int, float)):
return float(value)
# 如果是字符串,去除两端空白和特殊空白字符
if isinstance(value, str):
# 标准化空白字符(将各种空白替换为空格)
value = re.sub(r'\s+', ' ', value.strip())
# 移除特殊字符差异(可选)
# value = re.sub(r'[^\w\s]', '', value)
# 转换为小写(可选,根据需求)
# value = value.lower()
return value
return value
def calculate_similarity(self, val1, val2):
"""计算两个值的相似度(0.0-1.0),考虑语义相似度"""
# 标准化值
norm_val1 = self.normalize_value(val1)
norm_val2 = self.normalize_value(val2)
# 如果标准化后值相同,相似度为1.0
if norm_val1 == norm_val2:
return 1.0
# 如果其中一个是None,相似度为0.0
if val1 is None or val2 is None:
return 0.0
# 数值相似度计算
if isinstance(norm_val1, float) and isinstance(norm_val2, float):
max_val = max(abs(norm_val1), abs(norm_val2), 1)
return 1.0 - abs(norm_val1 - norm_val2) / max_val
# 字符串相似度计算(使用改进的SequenceMatcher)
if isinstance(norm_val1, str) and isinstance(norm_val2, str):
# 使用SequenceMatcher计算相似度
matcher = SequenceMatcher(None, norm_val1, norm_val2)
# 考虑常见拼写错误和缩写
common_chars = set(norm_val1) & set(norm_val2)
char_similarity = len(common_chars) / max(len(set(norm_val1)), len(set(norm_val2)), 1)
# 加权计算最终相似度
ratio = matcher.ratio()
return 0.7 * ratio + 0.3 * char_similarity
return 0.0
def get_cell_context(self, sheet, row, col, radius=2):
"""获取单元格周围的上下文内容(行和列方向)"""
row_context = []
col_context = []
# 行上下文(同一行左侧和右侧)
for c in range(max(1, col - radius), min(sheet.max_column, col + radius) + 1):
if c == col:
continue
cell = sheet.cell(row=row, column=c)
if cell.value is not None:
row_context.append(str(cell.value))
# 列上下文(同一列上方和下方)
for r in range(max(1, row - radius), min(sheet.max_row, row + radius) + 1):
if r == row:
continue
cell = sheet.cell(row=r, column=col)
if cell.value is not None:
col_context.append(str(cell.value))
return row_context, col_context
def context_similarity(self, context1, context2):
"""计算两个上下文的相似度(使用改进的Jaccard相似度)"""
if not context1 and not context2:
return 1.0
if not context1 or not context2:
return 0.0
# 使用加权Jaccard相似度
set1 = set(context1)
set2 = set(context2)
intersection = set1 & set2
union = set1 | set2
# 考虑顺序相似性(如果上下文顺序重要)
seq_sim = SequenceMatcher(None, context1, context2).ratio()
# 加权组合
jaccard_sim = len(intersection) / len(union) if union else 0.0
return 0.6 * jaccard_sim + 0.4 * seq_sim
def calculate_position_offset(self, sheet1, sheet2):
"""使用特征点匹配计算位置偏移"""
# 查找特征点(唯一值)
feature_points = {}
# 收集sheet1的特征点
for row in range(1, min(100, sheet1.max_row) + 1):
for col in range(1, min(50, sheet1.max_column) + 1):
val = sheet1.cell(row, col).value
if val and val not in feature_points:
feature_points[val] = [(row, col)]
elif val:
feature_points[val].append((row, col))
# 查找在sheet2中唯一的特征点
unique_features = []
for val, positions in feature_points.items():
if len(positions) == 1: # 在sheet1中唯一
# 检查在sheet2中是否唯一
count_in_sheet2 = 0
for row in range(1, min(100, sheet2.max_row) + 1):
for col in range(1, min(50, sheet2.max_column) + 1):
if sheet2.cell(row, col).value == val:
count_in_sheet2 += 1
if count_in_sheet2 == 1: # 在sheet2中也唯一
unique_features.append((val, positions[0]))
if not unique_features:
return 0, 0 # 没有找到足够的特征点
# 计算位置偏移
row_shifts = []
col_shifts = []
for val, (r1, c1) in unique_features:
# 在sheet2中查找该值
for row in range(1, sheet2.max_row + 1):
for col in range(1, sheet2.max_column + 1):
if sheet2.cell(row, col).value == val:
row_shifts.append(row - r1)
col_shifts.append(col - c1)
break
# 使用中位数作为最终偏移量(对异常值更鲁棒)
avg_row_shift = int(np.median(row_shifts)) if row_shifts else 0
avg_col_shift = int(np.median(col_shifts)) if col_shifts else 0
return avg_row_shift, avg_col_shift
def find_best_match(self, sheet1, row1, col1, sheet2, context_map, matched_positions, row_shift, col_shift, threshold):
"""为单元格在另一个sheet中寻找最佳匹配(使用改进的AI匹配算法)"""
val1 = sheet1.cell(row=row1, column=col1).value
if val1 is None:
return None, -1
# 获取行列上下文
row_context1, col_context1 = self.get_cell_context(sheet1, row1, col1)
best_match = None
best_score = -1
# 计算预期位置
expected_row = row1 + row_shift
expected_col = col1 + col_shift
# 搜索范围(基于偏移量和阈值)
search_radius = max(1, int(10 * (1 - threshold))) # 阈值越低,搜索范围越大
row_range = range(max(1, expected_row - search_radius), min(sheet2.max_row, expected_row + search_radius) + 1)
col_range = range(max(1, expected_col - search_radius), min(sheet2.max_column, expected_col + search_radius) + 1)
for r2 in row_range:
for c2 in col_range:
if (r2, c2) in matched_positions:
continue
val2 = sheet2.cell(row=r2, column=c2).value
if val2 is None:
continue
# 计算距离惩罚因子
row_dist = abs(r2 - expected_row)
col_dist = abs(c2 - expected_col)
distance_penalty = max(0, 1.0 - (row_dist + col_dist) * 0.05) # 每格距离降低5%的分数
# 值相似度
value_sim = self.calculate_similarity(val1, val2)
# 获取上下文
row_context2, col_context2 = context_map[(r2, c2)]
# 计算上下文相似度(行和列分开计算)
row_context_sim = self.context_similarity(row_context1, row_context2)
col_context_sim = self.context_similarity(col_context1, col_context2)
# 组合相似度(值 + 行上下文 + 列上下文)
total_sim = (
0.5 * value_sim +
0.3 * row_context_sim +
0.2 * col_context_sim
) * distance_penalty
if total_sim > best_score:
best_score = total_sim
best_match = (r2, c2)
return best_match, best_score
def compare_files(self, file1, file2, threshold):
self.update_status(f"比较文件: {os.path.basename(file1)} 和 {os.path.basename(file2)}")
self.result_text.insert(END, f"===== 文件比较报告 =====\n")
self.result_text.insert(END, f"文件1: {os.path.basename(file1)}\n")
self.result_text.insert(END, f"文件2: {os.path.basename(file2)}\n")
self.result_text.insert(END, f"相似度阈值: {threshold:.2f}\n\n")
# 加载工作簿
wb1 = load_workbook(file1)
wb2 = load_workbook(file2)
# 创建标记后的工作簿
marked_wb1 = load_workbook(file1)
marked_wb2 = load_workbook(file2)
# 设置样式
thin_border = Border(left=Side(style='thin', color=COLOR_BORDER),
right=Side(style='thin', color=COLOR_BORDER),
top=Side(style='thin', color=COLOR_BORDER),
bottom=Side(style='thin', color=COLOR_BORDER))
added_fill = PatternFill(start_color=COLOR_ADDED, end_color=COLOR_ADDED, fill_type="solid")
deleted_fill = PatternFill(start_color=COLOR_DELETED, end_color=COLOR_DELETED, fill_type="solid")
changed_fill = PatternFill(start_color=COLOR_CHANGED, end_color=COLOR_CHANGED, fill_type="solid")
context_fill = PatternFill(start_color=COLOR_CONTEXT_MATCH, end_color=COLOR_CONTEXT_MATCH, fill_type="solid")
ignored_fill = PatternFill(start_color=COLOR_IGNORED, end_color=COLOR_IGNORED, fill_type="solid")
# 比较每个sheet
common_sheets = set(wb1.sheetnames) & set(wb2.sheetnames)
total_sheets = len(common_sheets)
if total_sheets == 0:
self.result_text.insert(END, "两个文件没有共同的sheet\n")
return None, None, {}
self.result_text.insert(END, f"共同sheet: {', '.join(common_sheets)}\n\n")
# 进度设置
progress_step = 100 / total_sheets
current_progress = 0
# 保存标记结果的文件路径
save_dir = os.path.dirname(file1)
timestamp = time.strftime("%Y%m%d_%H%M%S")
marked_file1 = os.path.join(save_dir, f"标记结果_{timestamp}_{os.path.basename(file1)}")
marked_file2 = os.path.join(save_dir, f"标记结果_{timestamp}_{os.path.basename(file2)}")
# 统计信息
stats = {
'total_sheets': total_sheets,
'sheets': {},
'total_changes': 0,
'added': 0,
'deleted': 0,
'modified': 0,
'matched_by_context': 0,
'ignored': 0
}
# 处理每个sheet
for sheet_idx, sheet_name in enumerate(common_sheets):
self.progress['value'] = current_progress
self.update_status(f"处理sheet: {sheet_name} ({sheet_idx+1}/{total_sheets})")
self.result_text.insert(END, f"\n--- Sheet: {sheet_name} ---\n")
# 获取工作表对象
sheet1 = wb1[sheet_name]
sheet2 = wb2[sheet_name]
marked_ws1 = marked_wb1[sheet_name]
marked_ws2 = marked_wb2[sheet_name]
# 初始化sheet统计
sheet_stats = {
'rows': max(sheet1.max_row, sheet2.max_row),
'cols': max(sheet1.max_column, sheet2.max_column),
'added': 0,
'deleted': 0,
'modified': 0,
'matched_by_context': 0,
'ignored': 0,
'changes': []
}
# 计算整体位置偏移
self.update_status(f"计算 {sheet_name} 的位置偏移...")
row_shift, col_shift = self.calculate_position_offset(sheet1, sheet2)
self.result_text.insert(END, f"检测到位置偏移: 行偏移={row_shift}, 列偏移={col_shift}\n")
# 构建sheet2的上下文映射 (位置 -> (行上下文, 列上下文))
context_map = {}
for row in range(1, sheet2.max_row + 1):
for col in range(1, sheet2.max_column + 1):
context_map[(row, col)] = self.get_cell_context(sheet2, row, col)
# 用于跟踪匹配情况
matched_in_ws1 = {}
matched_in_ws2 = {}
# 第一阶段:精确匹配(相同位置且值相同)
self.update_status(f"执行 {sheet_name} 的精确匹配...")
min_row = min(sheet1.max_row, sheet2.max_row)
min_col = min(sheet1.max_column, sheet2.max_column)
for row in range(1, min_row + 1):
for col in range(1, min_col + 1):
cell1 = sheet1.cell(row=row, column=col)
cell2 = sheet2.cell(row=row, column=col)
# 使用相似度计算而不是直接比较
similarity = self.calculate_similarity(cell1.value, cell2.value)
if similarity > threshold:
matched_in_ws1[(row, col)] = (row, col)
matched_in_ws2[(row, col)] = (row, col)
# 第二阶段:基于上下文的智能匹配
self.update_status(f"执行 {sheet_name} 的上下文匹配...")
for row1 in range(1, sheet1.max_row + 1):
for col1 in range(1, sheet1.max_column + 1):
if (row1, col1) in matched_in_ws1:
continue # 已匹配
cell1 = sheet1.cell(row=row1, column=col1)
if cell1.value is None:
continue
# 寻找最佳匹配
best_match, match_score = self.find_best_match(
sheet1, row1, col1, sheet2, context_map,
matched_in_ws2, row_shift, col_shift, threshold)
if best_match and match_score >= threshold:
matched_in_ws1[(row1, col1)] = best_match
matched_in_ws2[best_match] = (row1, col1)
sheet_stats['matched_by_context'] += 1
stats['matched_by_context'] += 1
# 标记上下文匹配
marked_cell1 = marked_ws1.cell(row=row1, column=col1)
marked_cell1.fill = context_fill
marked_cell1.comment = Comment(
f"上下文匹配到: ({best_match[0]},{best_match[1]}), 分数: {match_score:.2f}", "Diff Tool")
marked_cell2 = marked_ws2.cell(row=best_match[0], column=best_match[1])
marked_cell2.fill = context_fill
marked_cell2.comment = Comment(
f"上下文匹配自: ({row1},{col1}), 分数: {match_score:.2f}", "Diff Tool")
# 第三阶段:处理差异
self.update_status(f"标记 {sheet_name} 的差异...")
# 处理文件1中的单元格
for row1 in range(1, sheet1.max_row + 1):
for col1 in range(1, sheet1.max_column + 1):
cell1 = sheet1.cell(row=row1, column=col1)
if cell1.value is None:
continue
marked_cell1 = marked_ws1.cell(row=row1, column=col1)
if (row1, col1) in matched_in_ws1:
# 已匹配 - 检查值是否相同
row2, col2 = matched_in_ws1[(row1, col1)]
cell2 = sheet2.cell(row=row2, column=col2)
# 使用相似度计算而不是直接比较
similarity = self.calculate_similarity(cell1.value, cell2.value)
if similarity < 1.0:
# 值不同 - 标记为修改
marked_cell1.fill = changed_fill
comment = f"修改为: {cell2.value} (相似度: {similarity:.2f})"
marked_cell1.comment = Comment(comment, "Diff Tool")
marked_cell2 = marked_ws2.cell(row=row2, column=col2)
marked_cell2.fill = changed_fill
marked_cell2.comment = Comment(f"原值: {cell1.value} (相似度: {similarity:.2f})", "Diff Tool")
sheet_stats['modified'] += 1
stats['modified'] += 1
sheet_stats['changes'].append(('修改', row1, col1, cell1.value, cell2.value))
self.result_text.insert(END, f"修改: [{row1},{col1}] '{cell1.value}' -> '{cell2.value}' (相似度: {similarity:.2f})\n")
else:
# 值相同但格式不同 - 标记为忽略
marked_cell1.fill = ignored_fill
marked_cell1.comment = Comment(f"内容相同但格式不同 (相似度: {similarity:.2f})", "Diff Tool")
marked_cell2 = marked_ws2.cell(row=row2, column=col2)
marked_cell2.fill = ignored_fill
marked_cell2.comment = Comment(f"内容相同但格式不同 (相似度: {similarity:.2f})", "Diff Tool")
sheet_stats['ignored'] += 1
stats['ignored'] += 1
else:
# 未匹配 - 标记为删除
marked_cell1.fill = deleted_fill
marked_cell1.comment = Comment(f"在文件2中删除: {cell1.value}", "Diff Tool")
sheet_stats['deleted'] += 1
stats['deleted'] += 1
sheet_stats['changes'].append(('删除', row1, col1, cell1.value, None))
self.result_text.insert(END, f"删除: [{row1},{col1}] {cell1.value}\n")
# 处理文件2中的新增单元格
for row2 in range(1, sheet2.max_row + 1):
for col2 in range(1, sheet2.max_column + 1):
if (row2, col2) in matched_in_ws2:
continue # 已匹配
cell2 = sheet2.cell(row=row2, column=col2)
if cell2.value is None:
continue
# 未匹配 - 标记为新增
marked_cell2 = marked_ws2.cell(row=row2, column=col2)
marked_cell2.fill = added_fill
marked_cell2.comment = Comment(f"在文件2中新增: {cell2.value}", "Diff Tool")
sheet_stats['added'] += 1
stats['added'] += 1
sheet_stats['changes'].append(('新增', row2, col2, None, cell2.value))
self.result_text.insert(END, f"新增: [{row2},{col2}] = '{cell2.value}'\n")
# 更新统计
stats['sheets'][sheet_name] = sheet_stats
stats['total_changes'] += sheet_stats['added'] + sheet_stats['deleted'] + sheet_stats['modified']
self.result_text.insert(END, f"变更统计: 新增={sheet_stats['added']}, 删除={sheet_stats['deleted']}, 修改={sheet_stats['modified']}, 忽略={sheet_stats['ignored']}\n")
self.result_text.insert(END, f"上下文匹配: {sheet_stats['matched_by_context']}\n")
current_progress += progress_step
# 保存标记结果
self.update_status("保存标记结果...")
marked_wb1.save(marked_file1)
marked_wb2.save(marked_file2)
self.result_text.insert(END, f"\n文件1标记结果已保存到: {marked_file1}\n")
self.result_text.insert(END, f"文件2标记结果已保存到: {marked_file2}\n")
self.progress['value'] = 100
self.update_status("比较完成! 点击'查看标记结果'查看差异")
return marked_file1, marked_file2, stats
def show_statistics(self, stats):
"""显示统计信息"""
self.stats_text.delete(1.0, END)
self.stats_text.insert(END, "===== 差异统计概览 =====\n\n")
self.stats_text.insert(END, f"总工作表数量: {stats['total_sheets']}\n")
self.stats_text.insert(END, f"总变更数: {stats['total_changes']}\n")
self.stats_text.insert(END, f"上下文匹配数: {stats.get('matched_by_context', 0)}\n\n")
# 按类型统计
total_added = stats['added']
total_deleted = stats['deleted']
total_modified = stats['modified']
self.stats_text.insert(END, "=== 变更类型分布 ===\n")
self.stats_text.insert(END, f"新增单元格: {total_added} ({(total_added/stats['total_changes'] * 100) if stats['total_changes'] > 0 else 0:.1f}%)\n")
self.stats_text.insert(END, f"删除单元格: {total_deleted} ({(total_deleted/stats['total_changes'] * 100) if stats['total_changes'] > 0 else 0:.1f}%)\n")
self.stats_text.insert(END, f"修改单元格: {total_modified} ({(total_modified/stats['total_changes'] * 100) if stats['total_changes'] > 0 else 0:.1f}%)\n\n")
# 各工作表详细统计
self.stats_text.insert(END, "=== 各工作表统计 ===\n")
for sheet_name, sheet_stats in stats['sheets'].items():
self.stats_text.insert(END, f"\n【{sheet_name}】\n")
self.stats_text.insert(END, f" 尺寸: {sheet_stats['rows']}行 × {sheet_stats['cols']}列\n")
self.stats_text.insert(END, f" 新增: {sheet_stats['added']} | 删除: {sheet_stats['deleted']} | 修改: {sheet_stats['modified']}\n")
self.stats_text.insert(END, f" 上下文匹配: {sheet_stats['matched_by_context']}\n")
# 显示前5个主要变更
if sheet_stats['changes']:
self.stats_text.insert(END, " 主要变更:\n")
top_changes = sheet_stats['changes'][:5]
for change_type, row, col, old_val, new_val in top_changes:
if change_type == '新增':
self.stats_text.insert(END, f" [{row},{col}] + '{new_val}'\n")
elif change_type == '删除':
self.stats_text.insert(END, f" [{row},{col}] - '{old_val}'\n")
else: # 修改
self.stats_text.insert(END, f" [{row},{col}] '{old_val}' → '{new_val}'\n")
# 建议
self.stats_text.insert(END, "\n=== 分析建议 ===\n")
if total_modified > total_added and total_modified > total_deleted:
self.stats_text.insert(END, "• 数据修改是主要变更类型,建议重点审查关键字段\n")
if total_added > total_modified and total_added > total_deleted:
self.stats_text.insert(END, "• 大量新增数据,建议验证数据完整性\n")
if total_deleted > total_modified and total_deleted > total_added:
self.stats_text.insert(END, "• 大量数据删除,建议确认是否为预期操作\n")
if stats.get('matched_by_context', 0) > 0:
self.stats_text.insert(END, f"• 检测到 {stats['matched_by_context']} 个上下文匹配,请确认位置变化是否合理\n")
def view_result(self):
"""查看标记结果"""
files = []
if hasattr(self, 'marked_file1_path') and self.marked_file1_path and os.path.exists(self.marked_file1_path):
files.append(self.marked_file1_path)
if hasattr(self, 'marked_file2_path') and self.marked_file2_path and os.path.exists(self.marked_file2_path):
files.append(self.marked_file2_path)
if not files:
messagebox.showinfo("信息", "没有可用的标记结果,请先进行比较")
return
for file_path in files:
try:
# 在默认应用中打开报告
import os
import sys
if os.name == 'nt': # Windows
os.startfile(file_path)
elif os.name == 'posix': # macOS, Linux
import subprocess
subprocess.run(['open', file_path] if sys.platform == 'darwin' else ['xdg-open', file_path])
self.update_status(f"正在打开: {file_path}")
except Exception as e:
messagebox.showerror("错误", f"无法打开文件: {str(e)}")
def export_report(self):
"""导出HTML报告"""
if not hasattr(self, 'marked_file1_path') or not self.marked_file1_path:
messagebox.showinfo("信息", "请先进行比较")
return
# 获取保存路径
save_path = filedialog.asksaveasfilename(
defaultextension=".html",
filetypes=[("HTML文件", "*.html"), ("所有文件", "*.*")],
title="保存HTML报告"
)
if not save_path:
return
try:
# 生成HTML报告
html_content = self.generate_html_report()
with open(save_path, 'w', encoding='utf-8') as f:
f.write(html_content)
self.report_path = save_path
messagebox.showinfo("成功", f"HTML报告已保存到: {save_path}")
self.update_status(f"报告已导出: {save_path}")
except Exception as e:
messagebox.showerror("错误", f"导出报告失败: {str(e)}")
def generate_html_report(self):
"""生成HTML格式的报告"""
import datetime
# 获取当前时间
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
# 构建HTML内容
html = f"""
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Excel差异检测报告</title>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}}
body {{
background-color: #f5f7fa;
color: #333;
line-height: 1.6;
}}
.container {{
width: 90%;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}}
header {{
text-align: center;
padding: 30px 0;
margin-bottom: 30px;
background: linear-gradient(135deg, #4a69bd, #1e3799);
color: white;
border-radius: 10px;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1);
}}
h1 {{
font-size: 2.5rem;
margin-bottom: 10px;
}}
.header-info {{
display: flex;
justify-content: space-around;
flex-wrap: wrap;
background: rgba(255, 255, 255, 0.1);
padding: 15px;
border-radius: 8px;
margin-top: 20px;
}}
.info-item {{
margin: 10px;
padding: 10px;
min-width: 200px;
text-align: left;
}}
.stats-summary {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 20px;
margin-bottom: 30px;
}}
.stat-card {{
background: white;
border-radius: 10px;
padding: 20px;
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.05);
transition: transform 0.3s ease;
}}
.stat-card:hover {{
transform: translateY(-5px);
box-shadow: 0 6px 15px rgba(0, 0, 0, 0.1);
}}
.stat-title {{
font-size: 1.2rem;
font-weight: 600;
margin-bottom: 10px;
color: #4a69bd;
}}
.stat-value {{
font-size: 2.5rem;
font-weight: 700;
margin-bottom: 10px;
}}
.added {{ color: #2ecc71; }}
.deleted {{ color: #e74c3c; }}
.modified {{ color: #f39c12; }}
.matched {{ color: #3498db; }}
.sheet-section {{
background: white;
border-radius: 10px;
padding: 25px;
margin-bottom: 30px;
box-shadow: 0 4px 10px rgba(0, 0, 0, 0.05);
}}
.sheet-header {{
display: flex;
justify-content: space-between;
align-items: center;
flex-wrap: wrap;
margin-bottom: 20px;
padding-bottom: 15px;
border-bottom: 1px solid #eee;
}}
.sheet-name {{
font-size: 1.5rem;
font-weight: 600;
color: #1e3799;
}}
.change-type {{
display: inline-block;
padding: 4px 10px;
border-radius: 20px;
font-size: 0.8rem;
font-weight: 500;
margin-right: 5px;
}}
.change-type-added {{ background: #e6f7ed; color: #2ecc71; }}
.change-type-deleted {{ background: #fce8e6; color: #e74c3c; }}
.change-type-modified {{ background: #fef5e7; color: #f39c12; }}
.change-list {{
margin-top: 15px;
}}
.change-item {{
padding: 10px 15px;
margin: 8px 0;
border-radius: 8px;
background: #f8f9fa;
display: flex;
justify-content: space-between;
align-items: center;
}}
.change-item.added {{ border-left: 4px solid #2ecc71; }}
.change-item.deleted {{ border-left: 4px solid #e74c3c; }}
.change-item.modified {{ border-left: 4px solid #f39c12; }}
.cell-location {{
font-weight: 600;
min-width: 70px;
}}
.change-details {{
flex-grow: 1;
margin: 0 15px;
}}
.old-value {{
text-decoration: line-through;
color: #777;
}}
footer {{
text-align: center;
margin-top: 40px;
padding: 20px;
color: #777;
font-size: 0.9rem;
}}
@media (max-width: 768px) {{
.header-info {{
flex-direction: column;
}}
.stat-card {{
text-align: center;
}}
}}
</style>
</head>
<body>
<div class="container">
<header>
<h1>📊 Excel差异检测报告</h1>
<p>基于上下文感知的智能差异分析</p>
<div class="header-info">
<div class="info-item">
<strong>生成时间:</strong> {current_time}
</div>
<div class="info-item">
<strong>工具版本:</strong> 智能Excel比较工具 v1.0
</div>
<div class="info-item">
<strong>文件1:</strong> {os.path.basename(self.file1_path) if self.file1_path else '未选择'}
</div>
<div class="info-item">
<strong>文件2:</strong> {os.path.basename(self.file2_path) if self.file2_path else '未选择'}
</div>
</div>
</header>
<!-- 总体统计 -->
<div class="stats-summary">
<div class="stat-card">
<div class="stat-title">总变更数</div>
<div class="stat-value">{self.stats['total_changes']}</div>
<div>检测到的所有变更</div>
</div>
<div class="stat-card">
<div class="stat-title">新增单元格</div>
<div class="stat-value added">{self.stats['added']}</div>
<div>文件2中新增的内容</div>
</div>
<div class="stat-card">
<div class="stat-title">删除单元格</div>
<div class="stat-value deleted">{self.stats['deleted']}</div>
<div>文件2中删除的内容</div>
</div>
<div class="stat-card">
<div class="stat-title">修改单元格</div>
<div class="stat-value modified">{self.stats['modified']}</div>
<div>相同位置但值改变的内容</div>
</div>
<div class="stat-card">
<div class="stat-title">上下文匹配</div>
<div class="stat-value matched">{self.stats.get('matched_by_context', 0)}</div>
<div>基于上下文智能匹配的单元格</div>
</div>
</div>
<!-- 各工作表详细变更 -->
<h2 style="margin: 30px 0 20px; color: #1e3799;">工作表详细变更</h2>
"""
# 添加每个工作表的变更详情
for sheet_name, sheet_stats in self.stats['sheets'].items():
html += f"""
<div class="sheet-section">
<div class="sheet-header">
<div class="sheet-name">{sheet_name}</div>
<div>
<span class="change-type change-type-added">+{sheet_stats['added']}</span>
<span class="change-type change-type-deleted">-{sheet_stats['deleted']}</span>
<span class="change-type change-type-modified">~{sheet_stats['modified']}</span>
</div>
</div>
<p>工作表尺寸: {sheet_stats['rows']}行 × {sheet_stats['cols']}列</p>
<p>上下文匹配: {sheet_stats.get('matched_by_context', 0)}个单元格</p>
<div class="change-list">
<h3>主要变更:</h3>
"""
# 添加变更详情
for idx, (change_type, row, col, old_val, new_val) in enumerate(sheet_stats['changes'][:10]):
change_class = ""
change_icon = ""
change_desc = ""
if change_type == '新增':
change_class = "added"
change_icon = "➕"
change_desc = f"<div class='change-details'>新增: <span class='new-value'>{new_val}</span></div>"
elif change_type == '删除':
change_class = "deleted"
change_icon = "➖"
change_desc = f"<div class='change-details'>删除: <span class='old-value'>{old_val}</span></div>"
else: # 修改
change_class = "modified"
change_icon = "🔄"
change_desc = f"""
<div class="change-details">
<span class='old-value'>{old_val}</span> →
<span class='new-value'>{new_val}</span>
</div>
"""
html += f"""
<div class="change-item {change_class}">
<div class="cell-location">{change_icon} [{row},{col}]</div>
{change_desc}
</div>
"""
html += """
</div>
</div>
"""
# 分析建议
total_changes = self.stats['total_changes']
if total_changes > 0:
added_percent = (self.stats['added'] / total_changes) * 100
deleted_percent = (self.stats['deleted'] / total_changes) * 100
modified_percent = (self.stats['modified'] / total_changes) * 100
html += """
<div class="sheet-section">
<h2>分析建议</h2>
<ul style="list-style-type: none; padding: 15px;">
"""
if modified_percent > 50:
html += """
<li style="margin: 10px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0;">⚠️</span>
<strong>主要变更类型: 数据修改</strong> - 建议重点审查关键字段和公式的修改
</li>
"""
if added_percent > 50:
html += """
<li style="margin: 10px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0;">⚠️</span>
<strong>主要变更类型: 新增数据</strong> - 建议验证新增数据的完整性和准确性
</li>
"""
if deleted_percent > 50:
html += """
<li style="margin: 10px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0;">⚠️</span>
<strong>主要变更类型: 数据删除</strong> - 请确认删除操作是否符合预期
</li>
"""
if self.stats.get('matched_by_context', 0) > 0:
html += f"""
<li style="margin: 10px 0; padding-left: 20px; position: relative;">
<span style="position: absolute; left: 0;">ℹ️</span>
<strong>上下文匹配:</strong> 检测到 {self.stats['matched_by_context']} 个上下文匹配项,
表明文件在结构上可能有较大变化
</li>
"""
html += """
</ul>
</div>
"""
# 页脚
html += """
<footer>
<p>报告由智能Excel比较工具生成 | 基于上下文感知的差异检测算法</p>
<p>© 2023 Excel智能分析工具 | 保留所有权利</p>
</footer>
</div>
</body>
</html>
"""
return html
def show_settings(self):
"""显示设置窗口"""
settings_win = tk.Toplevel(self.root)
settings_win.title("设置")
settings_win.geometry("400x300")
settings_win.resizable(False, False)
# 设置窗口内容
ttk.Label(settings_win, text="高级设置", font=("Arial", 12, "bold")).pack(pady=10)
# 上下文半径设置
frame_radius = ttk.Frame(settings_win)
frame_radius.pack(fill="x", padx=20, pady=10)
ttk.Label(frame_radius, text="上下文半径:").pack(side="left")
self.radius_var = tk.IntVar(value=2)
ttk.Spinbox(frame_radius, from_=1, to=5, width=5,
textvariable=self.radius_var).pack(side="left", padx=10)
# 相似度权重设置
frame_weights = ttk.Frame(settings_win)
frame_weights.pack(fill="x", padx=20, pady=10)
ttk.Label(frame_weights, text="值相似度权重:").pack(side="left")
self.value_weight_var = tk.DoubleVar(value=0.6)
ttk.Scale(frame_weights, from_=0.1, to=0.9, orient="horizontal",
length=150, variable=self.value_weight_var).pack(side="left", padx=10)
# 保存按钮
ttk.Button(settings_win, text="保存设置",
command=settings_win.destroy).pack(pady=20)
# 启动应用
if __name__ == "__main__":
app = SmartExcelComparator()
将我这个代码里面生成的比较后的报告文档,生成两份,变为生成一份,将另一个报告的内容如果有差异放到同名sheet的右方,中间空几行用以区别。如果没有变更则不需要放置。
最新发布