import tkinter as tk
from tkinter import filedialog, ttk, messagebox
import openpyxl
from openpyxl.utils import get_column_letter, column_index_from_string
from openpyxl.styles import Font, Border, PatternFill, Alignment, Protection, GradientFill
import threading
import os
import logging
from datetime import datetime
import time
import sys
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
class ExcelDiffApp:
def __init__(self, root):
self.root = root
self.root.title("Excel差分工具")
self.root.geometry("800x500")
self.root.configure(bg="#f0f0f0")
# 设置网格布局
self.root.grid_rowconfigure(0, weight=1)
self.root.grid_columnconfigure(0, weight=1)
# 创建主框架
self.main_frame = ttk.Frame(self.root, padding=20)
self.main_frame.grid(row=0, column=0, sticky="nsew")
self.main_frame.grid_rowconfigure(0, weight=1)
self.main_frame.grid_columnconfigure(0, weight=1)
# 文件路径变量
self.new_file_path = tk.StringVar()
self.old_file_path = tk.StringVar()
# 创建界面组件
self.create_widgets()
def create_widgets(self):
logging.debug("创建界面组件")
# 标题
title_label = ttk.Label(
self.main_frame,
text="Excel差分工具",
font=("Arial", 16, "bold"),
foreground="#2c3e50"
)
title_label.grid(row=0, column=0, columnspan=3, pady=(0, 20))
# 文件选择区域 - 新文件
file_frame = ttk.LabelFrame(self.main_frame, text="文件选择", padding=10)
file_frame.grid(row=1, column=0, sticky="ew", padx=10, pady=5)
file_frame.columnconfigure(1, weight=1)
ttk.Label(file_frame, text="新版本Excel:").grid(row=0, column=0, sticky="w", padx=5, pady=5)
new_entry = ttk.Entry(file_frame, textvariable=self.new_file_path, width=40)
new_entry.grid(row=0, column=1, sticky="ew", padx=5, pady=5)
ttk.Button(file_frame, text="浏览", command=self.select_new_file, width=8).grid(row=0, column=2, padx=5, pady=5)
# 文件选择区域 - 旧文件
ttk.Label(file_frame, text="旧版本Excel:").grid(row=1, column=0, sticky="w", padx=5, pady=5)
old_entry = ttk.Entry(file_frame, textvariable=self.old_file_path, width=40)
old_entry.grid(row=1, column=1, sticky="ew", padx=5, pady=5)
ttk.Button(file_frame, text="浏览", command=self.select_old_file, width=8).grid(row=1, column=2, padx=5, pady=5)
# 开始按钮
button_frame = ttk.Frame(self.main_frame)
button_frame.grid(row=2, column=0, pady=20)
self.start_btn = ttk.Button(
button_frame,
text="开始差分",
command=self.start_process,
width=20,
style="Accent.TButton"
)
self.start_btn.pack()
# 进度条区域
progress_frame = ttk.LabelFrame(self.main_frame, text="处理进度", padding=10)
progress_frame.grid(row=3, column=0, sticky="ew", padx=10, pady=5)
progress_frame.columnconfigure(0, weight=1)
# 进度条
self.progress = ttk.Progressbar(
progress_frame,
orient="horizontal",
length=600,
mode="determinate"
)
self.progress.grid(row=0, column=0, sticky="ew", padx=5, pady=5)
# 百分比标签
self.progress_percent = ttk.Label(
progress_frame,
text="0%",
font=("Arial", 10),
anchor="center"
)
self.progress_percent.grid(row=1, column=0, pady=(0, 5))
# 状态标签
self.status_label = ttk.Label(
progress_frame,
text="就绪",
font=("Arial", 10),
anchor="center",
foreground="#27ae60"
)
self.status_label.grid(row=2, column=0)
# 详情标签
self.detail_label = ttk.Label(
progress_frame,
text="",
font=("Arial", 9),
anchor="center",
foreground="#7f8c8d",
wraplength=550
)
self.detail_label.grid(row=3, column=0, pady=(5, 0))
# 结果路径标签
self.result_label = ttk.Label(
self.main_frame,
text="",
font=("Arial", 9),
anchor="center",
foreground="#2980b9",
wraplength=550
)
self.result_label.grid(row=4, column=0, pady=(10, 0))
# 添加样式
self.style = ttk.Style()
self.style.configure("Accent.TButton", foreground="white", background="#3498db")
self.style.map("Accent.TButton", background=[("active", "#2980b9"), ("disabled", "#bdc3c7")])
logging.debug("界面组件创建完成")
def select_new_file(self):
logging.debug("选择新版本文件")
file = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx *.xls")])
if file:
self.new_file_path.set(file)
logging.debug(f"新版本文件路径: {file}")
def select_old_file(self):
logging.debug("选择旧版本文件")
file = filedialog.askopenfilename(filetypes=[("Excel files", "*.xlsx *.xls")])
if file:
self.old_file_path.set(file)
logging.debug(f"旧版本文件路径: {file}")
def get_last_column(self, ws):
"""获取工作表中最后一列有内容的列号"""
max_col = 0
for row in ws.iter_rows():
for cell in row:
if cell.value is not None:
max_col = max(max_col, cell.column)
return max_col
def get_last_row(self, ws):
"""获取工作表中最后一行有内容的行号"""
max_row = 0
for row in ws.iter_rows():
for cell in row:
if cell.value is not None:
max_row = max(max_row, cell.row)
return max_row
def copy_cell_style(self, source_cell, target_cell):
"""安全复制单元格样式"""
try:
# 复制字体样式
if source_cell.font:
target_cell.font = Font(
name=source_cell.font.name,
size=source_cell.font.size,
bold=source_cell.font.bold,
italic=source_cell.font.italic,
underline=source_cell.font.underline,
strike=source_cell.font.strike,
color=source_cell.font.color
)
# 复制边框样式
if source_cell.border:
target_cell.border = Border(
left=source_cell.border.left,
right=source_cell.border.right,
top=source_cell.border.top,
bottom=source_cell.border.bottom,
diagonal=source_cell.border.diagonal,
diagonal_direction=source_cell.border.diagonal_direction,
outline=source_cell.border.outline,
vertical=source_cell.border.vertical,
horizontal=source_cell.border.horizontal
)
# 复制填充样式
if source_cell.fill:
# 处理 PatternFill
if hasattr(source_cell.fill, 'patternType'):
target_cell.fill = PatternFill(
patternType=source_cell.fill.patternType,
fgColor=source_cell.fill.fgColor,
bgColor=source_cell.fill.bgColor
)
# 处理 GradientFill
elif hasattr(source_cell.fill, 'type'):
target_cell.fill = GradientFill(
degree=source_cell.fill.degree,
stop=source_cell.fill.stop,
type=source_cell.fill.type
)
# 处理简单颜色
elif isinstance(source_cell.fill, openpyxl.styles.colors.Color):
target_cell.fill = PatternFill(patternType="solid", fgColor=source_cell.fill)
# 复制其他样式属性
target_cell.number_format = source_cell.number_format
if source_cell.alignment:
target_cell.alignment = Alignment(
horizontal=source_cell.alignment.horizontal,
vertical=source_cell.alignment.vertical,
textRotation=source_cell.alignment.textRotation,
wrapText=source_cell.alignment.wrapText,
shrinkToFit=source_cell.alignment.shrinkToFit,
indent=source_cell.alignment.indent,
relativeIndent=source_cell.alignment.relativeIndent,
justifyLastLine=source_cell.alignment.justifyLastLine,
readingOrder=source_cell.alignment.readingOrder
)
if source_cell.protection:
target_cell.protection = Protection(
locked=source_cell.protection.locked,
hidden=source_cell.protection.hidden
)
except Exception as e:
logging.warning(f"样式复制失败: {str(e)}")
def get_first_row(self, ws):
"""获取工作表中第一行有内容的行号"""
min_row = None
for row in ws.iter_rows():
for cell in row:
if cell.value is not None:
return cell.row
return 1 # 如果没有找到有内容的行,默认为第1行
def get_first_column(self, ws):
"""获取工作表中第一列有内容的列号"""
min_col = None
for col in ws.iter_cols():
for cell in col:
if cell.value is not None:
return cell.column
return 1 # 如果没有找到有内容的列,默认为第1列
def process_sheet(self, ws_new, ws_old, sheet_name, total_sheets, current_idx):
try:
# 获取行列信息
last_col_new = self.get_last_column(ws_new)
first_col_new = self.get_first_column(ws_new)
last_row_new = self.get_last_row(ws_new)
first_row_new = self.get_first_row(ws_new)
last_col_old = self.get_last_column(ws_old)
first_col_old = self.get_first_column(ws_old)
last_row_old = self.get_last_row(ws_old)
first_row_old = self.get_first_row(ws_old)
# 计算实际数据列数(新增)
data_cols_new = last_col_new - first_col_new + 1
data_cols_old = last_col_old - first_col_old + 1
# 1. 复制旧数据
start_col = last_col_new + 3 # 原始数据后空3列放置旧数据
self.detail_label.config(text=f"正在复制旧数据...")
# 计算总单元格数
total_cells = (last_row_old - first_row_old + 1) * data_cols_old
processed_cells = 0
# 只复制有数据的区域
for row_idx in range(first_row_old, last_row_old + 1):
for col_idx in range(first_col_old, last_col_old + 1):
cell = ws_old.cell(row=row_idx, column=col_idx)
if cell.value is None:
continue
new_col = start_col + (col_idx - first_col_old)
new_cell = ws_new.cell(row=row_idx, column=new_col, value=cell.value)
self.copy_cell_style(cell, new_cell)
# 更新进度
processed_cells += 1
if processed_cells % 100 == 0 or processed_cells == total_cells:
sheet_progress = int((processed_cells / total_cells) * 50)
total_progress = int((current_idx + (sheet_progress / 100.0)) / total_sheets * 100)
self.update_progress(total_progress, f"复制数据: {processed_cells}/{total_cells}单元格")
# 复制列宽
for col in range(first_col_old, last_col_old + 1):
col_letter = get_column_letter(col)
new_col = start_col + (col - first_col_old)
new_col_letter = get_column_letter(new_col)
ws_new.column_dimensions[new_col_letter].width = ws_old.column_dimensions[col_letter].width
# 2. 生成比较公式
compare_start_col = start_col + data_cols_old + 2 # 旧数据后空2列放置比较公式
self.detail_label.config(text=f"正在生成比较公式...")
# 计算总单元格数(只处理有数据的区域)
formula_cells = (last_row_new - first_row_new + 1) * data_cols_new
processed_formulas = 0
# 只处理有数据的区域
for row_idx in range(first_row_new, last_row_new + 1):
for col_idx in range(first_col_new, first_col_new + data_cols_new): # 使用data_cols_new定义范围
# 只处理新文件中有内容的单元格或对应旧文件中有内容的单元格
new_cell = ws_new.cell(row=row_idx, column=col_idx)
old_col = start_col + (col_idx - first_col_new)
old_cell = ws_new.cell(row=row_idx, column=old_col)
# 如果新旧单元格都为空,跳过不处理
if new_cell.value is None and (old_col > ws_new.max_column or old_cell.value is None):
continue
# 计算列字母引用
old_col_letter = get_column_letter(old_col)
new_col_letter = get_column_letter(col_idx)
# 创建公式
formula = (
f'=IF(AND({new_col_letter}{row_idx}<>"",{old_col_letter}{row_idx}<>""),'
f'IF({new_col_letter}{row_idx}={old_col_letter}{row_idx},"无变更","变更"),'
f'IF({new_col_letter}{row_idx}<>"","新增",'
f'IF({old_col_letter}{row_idx}<>"","删除","无变更")))'
)
# 插入公式
target_col = compare_start_col + (col_idx - first_col_new)
target_cell = ws_new.cell(row=row_idx, column=target_col, value=formula)
# 复制格式(使用新文件中的格式)
source_cell = ws_new.cell(row=row_idx, column=col_idx)
self.copy_cell_style(source_cell, target_cell)
# 更新进度
processed_formulas += 1
if processed_formulas % 100 == 0 or processed_formulas == formula_cells:
sheet_progress = int((processed_formulas / formula_cells) * 40) + 50
total_progress = int((current_idx + (sheet_progress / 100.0)) / total_sheets * 100)
self.update_progress(total_progress, f"生成公式: {processed_formulas}/{formula_cells}单元格")
# 3. 生成统计列 - 修复部分
n_col = compare_start_col + data_cols_new # 放在比较公式区域之后
self.detail_label.config(text=f"正在生成统计列...")
# 计算实际需要统计的列数(有内容的列)
actual_count_cols = 0
for col_idx in range(compare_start_col, compare_start_col + data_cols_new):
col_letter = get_column_letter(col_idx)
# 检查该列是否有内容(从第2行开始)
for row_idx in range(first_row_new + 1, last_row_new + 1):
if ws_new.cell(row=row_idx, column=col_idx).value is not None:
actual_count_cols += 1
break
# 生成统计公式
for row_idx in range(first_row_new + 1, last_row_new + 1): # 从第2行开始
# 跳过完全空的行
row_has_data = False
for col_idx in range(first_col_new, last_col_new + 1):
if ws_new.cell(row=row_idx, column=col_idx).value is not None:
row_has_data = True
break
if not row_has_data:
continue
# 动态生成统计公式
formula_parts = []
for col_idx in range(compare_start_col, compare_start_col + data_cols_new):
col_letter = get_column_letter(col_idx)
formula_parts.append(f'{col_letter}{row_idx}="无变更"')
# 只有有内容的列才包含在统计中
if formula_parts:
formula = f'=IF(AND({",".join(formula_parts)}), "无", "有")'
else:
formula = '"无数据"' # 没有可统计的列
target_cell = ws_new.cell(row=row_idx, column=n_col, value=formula)
# 复制格式
if last_col_new > 0:
source_cell = ws_new.cell(row=row_idx, column=last_col_new)
self.copy_cell_style(source_cell, target_cell)
# 更新进度 - 当前工作表完成
progress = int((current_idx + 1) / total_sheets * 100)
self.update_progress(progress, f"工作表 {sheet_name} 处理完成")
except Exception as e:
logging.error(f"处理工作表 {sheet_name} 时出错: {str(e)}", exc_info=True)
self.status_label.config(text="错误", foreground="#e74c3c")
self.detail_label.config(text=f"处理工作表 {sheet_name} 时出错: {str(e)}", foreground="#e74c3c")
messagebox.showerror("处理错误", f"处理工作表 {sheet_name} 时出错:\n{str(e)}")
def update_progress(self, progress, detail=""):
"""更新进度条和状态标签"""
self.progress["value"] = progress
self.progress_percent.config(text=f"{progress}%")
self.detail_label.config(text=detail)
self.root.update_idletasks()
def start_process(self):
logging.debug("开始处理流程")
new_file = self.new_file_path.get()
old_file = self.old_file_path.get()
if not all([new_file, old_file]):
messagebox.showerror("错误", "请先选择新旧版本文件")
return
try:
self.start_btn.config(state=tk.DISABLED)
self.status_label.config(text="正在处理...", foreground="orange")
self.update_progress(0, "准备开始...")
self.result_label.config(text="")
# 加载工作簿
logging.debug(f"加载新文件: {new_file}")
self.update_progress(1, "加载新文件...")
wb_new = openpyxl.load_workbook(new_file)
logging.debug(f"加载旧文件: {old_file}")
self.update_progress(3, "加载旧文件...")
wb_old = openpyxl.load_workbook(old_file)
# 获取共同存在的sheet
self.update_progress(5, "查找共同工作表...")
sheets = [sheet for sheet in wb_new.sheetnames if sheet in wb_old.sheetnames]
total_sheets = len(sheets)
logging.debug(f"找到共同工作表: {sheets}, 数量: {total_sheets}")
if total_sheets == 0:
messagebox.showerror("错误", "新旧Excel文件没有共同的工作表")
self.reset_ui()
return
def worker():
try:
start_time = time.time()
for idx, sheet_name in enumerate(sheets):
logging.debug(f"开始处理工作表 {sheet_name} (第 {idx+1}/{total_sheets} 个)")
ws_new = wb_new[sheet_name]
ws_old = wb_old[sheet_name]
# 更新状态
sheet_progress = int(idx / total_sheets * 100)
self.update_progress(sheet_progress, f"开始处理工作表: {sheet_name}")
self.process_sheet(ws_new, ws_old, sheet_name, total_sheets, idx)
# 保存结果
self.update_progress(95, "正在保存结果...")
output_dir = os.path.dirname(new_file) or os.getcwd()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(output_dir, f"diff_result_{timestamp}.xlsx")
logging.debug(f"保存结果到: {output_path}")
wb_new.save(output_path)
# 计算处理时间
elapsed = time.time() - start_time
self.root.after(0, lambda: self.show_completion(output_path, elapsed))
except Exception as e:
logging.error(f"处理过程中出错: {str(e)}", exc_info=True)
self.root.after(0, lambda: messagebox.showerror("处理错误", f"处理过程中出错:\n{str(e)}"))
self.root.after(0, lambda: self.reset_ui())
# 启动处理线程
threading.Thread(target=worker, daemon=True).start()
except Exception as e:
messagebox.showerror("错误", f"加载文件失败: {str(e)}")
self.reset_ui()
logging.debug("处理流程结束")
def reset_ui(self):
logging.debug("重置UI状态")
self.start_btn.config(state=tk.NORMAL)
self.status_label.config(text="就绪", foreground="#27ae60")
self.detail_label.config(text="")
def show_completion(self, output_path, elapsed_time):
logging.debug(f"处理完成,结果保存至: {output_path}")
self.update_progress(100, "处理完成!")
self.status_label.config(text="完成", foreground="#27ae60")
self.detail_label.config(text=f"处理耗时: {elapsed_time:.2f}秒")
self.result_label.config(text=f"结果保存至: {output_path}")
self.start_btn.config(state=tk.NORMAL)
messagebox.showinfo("完成", f"差分处理完成!\n耗时: {elapsed_time:.2f}秒\n结果已保存至:\n{output_path}")
if __name__ == "__main__":
root = tk.Tk()
# Windows系统设置应用感观
if sys.platform.startswith('win'):
from ctypes import windll
windll.shcore.SetProcessDpiAwareness(1) # 启用高DPI支持
app = ExcelDiffApp(root)
root.mainloop()
这个统计公式还是有问题的,1、单元格很多大于100个时,公式很大很乱。2、比较公式的填充因为有一些单元格为空,那么改比较公式单元格就为空,但是统计公式并没有进行优化,仍然判定了,所以需要进行修改
最新发布