import os
import sys
import tempfile
import tkinter as tk
from tkinter import filedialog, messagebox, colorchooser, ttk
from io import BytesIO
import re
from datetime import datetime
import traceback
# 添加缺失的导入
import fitz # PyMuPDF
class PDFDateHighlighter:
def __init__(self, root):
self.root = root
self.root.title("PDF日期高亮与打印工具")
self.root.geometry("1200x700") # 增大窗口以适应两页显示
# 指定日期列表
self.specified_dates = [
"2025-01-01", "2025-01-27", "2025-01-28", "2025-01-29",
"2025-03-28", "2025-03-29", "2025-03-31", "2025-04-01",
"2025-04-02", "2025-04-03", "2025-04-04", "2025-04-07",
"2025-04-18", "2025-04-20", "2025-05-01", "2025-05-12",
"2025-05-13", "2025-05-29", "2025-05-30", "2025-06-01",
"2025-06-06", "2025-06-09", "2025-06-27", "2025-08-17",
"2025-09-05", "2025-12-25", "2025-12-26"
]
self.pdf_path = None
self.highlighted_pdf_path = None
self.current_page = 0
self.total_pages = 0
self.pdf_document = None
# 使用荧光颜色
self.highlight_color = (1.0, 1.0, 0.0) # 荧光黄色 RGB
self.highlight_alpha = 50 # 降低不透明度以确保文字可见
self.current_images = [] # 保持对图像的引用(两页)
# 手动涂改相关变量
self.drawing_mode = False
self.erasing_mode = False
self.start_x = None
self.start_y = None
self.brush_width = 10 # 默认笔宽
self.drawing_items = [] # 存储绘制的项目用于撤回
self.current_drawing = [] # 当前绘制的项目
self.setup_ui()
def setup_ui(self):
# 主框架
main_frame = ttk.Frame(self.root, padding="10")
main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# 配置行列权重
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(0, weight=1)
main_frame.columnconfigure(1, weight=1)
main_frame.rowconfigure(3, weight=1)
# 文件选择区域
ttk.Label(main_frame, text="PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.file_path = ttk.Entry(main_frame, width=80)
self.file_path.grid(row=0, column=1, sticky=(tk.W, tk.E), pady=5, padx=5)
ttk.Button(main_frame, text="浏览", command=self.browse_file).grid(row=0, column=2, pady=5)
# 按钮区域
btn_frame = ttk.Frame(main_frame)
btn_frame.grid(row=1, column=0, columnspan=3, pady=10)
# 添加撤回按钮
ttk.Button(btn_frame, text="撤回", command=self.undo_drawing).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="处理PDF", command=self.process_pdf).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="上一页", command=self.prev_page).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="下一页", command=self.next_page).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="打印", command=self.print_pdf).pack(side=tk.LEFT, padx=5)
# 颜色选择按钮和图标
ttk.Button(btn_frame, text="选择高亮颜色", command=self.choose_color).pack(side=tk.LEFT, padx=5)
# 颜色图标
self.color_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#ffff00") # 荧光黄色
self.color_icon.pack(side=tk.LEFT, padx=5)
self.color_icon.bind("<Button-1>", lambda e: self.choose_color())
# 手动涂改按钮
ttk.Button(btn_frame, text="手动涂改", command=self.toggle_drawing).pack(side=tk.LEFT, padx=5)
# 笔宽设置
ttk.Label(btn_frame, text="笔宽:").pack(side=tk.LEFT, padx=5)
self.brush_width_var = tk.StringVar(value="15")
brush_width_spin = ttk.Spinbox(btn_frame, from_=1, to=20, width=5, textvariable=self.brush_width_var)
brush_width_spin.pack(side=tk.LEFT, padx=5)
brush_width_spin.bind("<<Increment>>", self.update_brush_width)
brush_width_spin.bind("<<Decrement>>", self.update_brush_width)
# 橡皮擦按钮
ttk.Button(btn_frame, text="橡皮擦", command=self.toggle_erasing).pack(side=tk.LEFT, padx=5)
# 页面显示区域
self.page_label = ttk.Label(main_frame, text="页面: 0/0")
self.page_label.grid(row=2, column=0, columnspan=3, pady=5)
# PDF显示区域 - 改为两页平行显示
self.pdf_frame = ttk.Frame(main_frame)
self.pdf_frame.grid(row=3, column=0, columnspan=3, sticky=(tk.W, tk.E, tk.N, tk.S), pady=10)
self.pdf_frame.columnconfigure(0, weight=1)
self.pdf_frame.columnconfigure(1, weight=1)
self.pdf_frame.rowconfigure(0, weight=1)
# 创建两个Canvas用于显示两页
self.canvas1 = tk.Canvas(self.pdf_frame, bg="white")
self.canvas2 = tk.Canvas(self.pdf_frame, bg="white")
# 添加滚动条 - 改为快速翻页滚动条
self.page_scrollbar = ttk.Scrollbar(self.pdf_frame, orient=tk.VERTICAL)
self.h_scrollbar = ttk.Scrollbar(self.pdf_frame, orient=tk.HORIZONTAL)
# 配置Canvas和滚动条
self.canvas1.configure(xscrollcommand=self.h_scrollbar.set)
self.canvas2.configure(xscrollcommand=self.h_scrollbar.set)
self.h_scrollbar.configure(command=self.sync_scroll_x)
# 配置页面滚动条
self.page_scrollbar.configure(command=self.scroll_pages)
# 布局
self.canvas1.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(0, 5))
self.canvas2.grid(row=0, column=1, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(5, 0))
self.page_scrollbar.grid(row=0, column=2, sticky=(tk.N, tk.S))
self.h_scrollbar.grid(row=1, column=0, columnspan=2, sticky=(tk.W, tk.E))
# 绑定鼠标滚轮事件
self.canvas1.bind("<MouseWheel>", self.on_mousewheel)
self.canvas2.bind("<MouseWheel>", self.on_mousewheel)
self.canvas1.bind("<Button-4>", self.on_mousewheel) # Linux向上滚动
self.canvas2.bind("<Button-4>", self.on_mousewheel)
self.canvas1.bind("<Button-5>", self.on_mousewheel) # Linux向下滚动
self.canvas2.bind("<Button-5>", self.on_mousewheel)
# 绑定手动涂改事件
self.canvas1.bind("<ButtonPress-1>", self.start_drawing)
self.canvas1.bind("<B1-Motion>", self.draw)
self.canvas1.bind("<ButtonRelease-1>", self.stop_drawing)
self.canvas2.bind("<ButtonPress-1>", self.start_drawing)
self.canvas2.bind("<B1-Motion>", self.draw)
self.canvas2.bind("<ButtonRelease-1>", self.stop_drawing)
# 状态栏
self.status_bar = ttk.Label(self.root, text="就绪", relief=tk.SUNKEN, anchor=tk.W)
self.status_bar.grid(row=4, column=0, sticky=(tk.W, tk.E))
def sync_scroll_x(self, *args):
"""同步两个Canvas的水平滚动"""
self.canvas1.xview(*args)
self.canvas2.xview(*args)
def scroll_pages(self, *args):
"""处理页面滚动条事件"""
if not self.pdf_document or self.total_pages == 0:
return
# 获取滚动条位置
if len(args) > 1 and args[0] == "moveto":
scroll_pos = float(args[1])
elif len(args) > 2 and args[0] == "scroll":
units = int(args[1])
scroll_pos = float(args[2])
else:
return
# 计算目标页面
target_page = int(scroll_pos * (self.total_pages - 1))
# 确保目标页面是偶数(因为每次显示两页)
if target_page % 2 != 0:
target_page = max(0, target_page - 1)
# 更新当前页面
if target_page != self.current_page:
self.current_page = target_page
self.display_page()
def update_scrollbar(self):
"""更新滚动条位置"""
if not self.pdf_document or self.total_pages == 0:
return
# 计算滚动条位置
scroll_pos = self.current_page / (self.total_pages - 1) if self.total_pages > 1 else 0
# 更新滚动条
self.page_scrollbar.set(scroll_pos, scroll_pos + 1/(self.total_pages))
def on_mousewheel(self, event):
"""处理鼠标滚轮事件"""
if event.num == 4 or event.delta > 0: # 向上滚动
self.prev_page()
elif event.num == 5 or event.delta < 0: # 向下滚动
self.next_page()
def toggle_drawing(self):
"""切换手动涂改模式"""
self.drawing_mode = not self.drawing_mode
self.erasing_mode = False # 确保橡皮擦模式关闭
if self.drawing_mode:
self.status_bar.config(text="手动涂改模式已启用 - 点击并拖动以涂改")
else:
self.status_bar.config(text="手动涂改模式已禁用")
def toggle_erasing(self):
"""切换橡皮擦模式"""
self.erasing_mode = not self.erasing_mode
self.drawing_mode = False # 确保涂改模式关闭
if self.erasing_mode:
self.status_bar.config(text="橡皮擦模式已启用 - 点击并拖动以擦除")
else:
self.status_bar.config(text="橡皮擦模式已禁用")
def update_brush_width(self, event):
"""更新笔宽"""
try:
self.brush_width = int(self.brush_width_var.get())
except ValueError:
self.brush_width = 5
self.brush_width_var.set("5")
def start_drawing(self, event):
"""开始手动涂改"""
if not self.drawing_mode and not self.erasing_mode:
return
self.start_x = event.x
self.start_y = event.y
self.current_drawing = []
def draw(self, event):
"""手动涂改或擦除"""
if (not self.drawing_mode and not self.erasing_mode) or self.start_x is None or self.start_y is None:
return
# 获取当前Canvas
current_canvas = event.widget
# 确定颜色 - 涂改模式使用荧光色,擦除模式使用白色
if self.drawing_mode:
# 使用荧光色,但降低不透明度以确保文字可见
color = "#{:02x}{:02x}{:02x}".format(
int(self.highlight_color[0] * 255),
int(self.highlight_color[1] * 255),
int(self.highlight_color[2] * 255)
)
# 创建半透明矩形 - 使用点状图案实现半透明效果
rect_id = current_canvas.create_rectangle(
self.start_x, self.start_y, event.x, event.y,
fill=color,
outline="",
stipple="gray50" # 使用点状图案实现半透明效果
)
self.current_drawing.append(rect_id)
else: # 擦除模式
# 查找并删除与橡皮擦重叠的手动涂改
items = current_canvas.find_overlapping(
event.x - self.brush_width,
event.y - self.brush_width,
event.x + self.brush_width,
event.y + self.brush_width
)
for item in items:
if item in self.get_all_drawing_items():
current_canvas.delete(item)
# 从所有存储的绘制项目中移除
self.remove_drawing_item(item)
self.start_x = event.x
self.start_y = event.y
def get_all_drawing_items(self):
"""获取所有绘制项目的ID"""
all_items = []
for drawing in self.drawing_items:
all_items.extend(drawing)
return all_items
def remove_drawing_item(self, item_id):
"""从存储的绘制项目中移除指定ID"""
for i, drawing in enumerate(self.drawing_items):
if item_id in drawing:
drawing.remove(item_id)
if not drawing: # 如果绘图为空,移除整个绘图
self.drawing_items.pop(i)
break
def stop_drawing(self, event):
"""停止手动涂改"""
if self.current_drawing and self.drawing_mode: # 只在涂改模式下保存绘制项目
self.drawing_items.append(self.current_drawing.copy())
self.start_x = None
self.start_y = None
self.current_drawing = []
def undo_drawing(self):
"""撤回最后一次涂改"""
if not self.drawing_items:
return
# 获取最后一次涂改的项目
last_drawing = self.drawing_items.pop()
# 从两个Canvas中删除这些项目
for item_id in last_drawing:
self.canvas1.delete(item_id)
self.canvas2.delete(item_id)
self.status_bar.config(text="已撤回最后一次涂改")
def choose_color(self):
"""允许用户选择高亮颜色"""
color = colorchooser.askcolor(title="选择高亮颜色", initialcolor="#ffff00")
if color[0]: # 用户选择了颜色
r, g, b = color[0]
self.highlight_color = (r/255, g/255, b/255) # 转换为0-1范围
# 更新颜色图标
self.color_icon.config(bg=color[1])
def browse_file(self):
try:
file_path = filedialog.askopenfilename(filetypes=[("PDF文件", "*.pdf")])
if file_path:
self.file_path.delete(0, tk.END)
self.file_path.insert(0, file_path)
self.pdf_path = file_path
self.load_pdf()
except Exception as e:
messagebox.showerror("错误", f"浏览文件时发生错误: {str(e)}")
def load_pdf(self):
if not self.pdf_path:
return
try:
if self.pdf_document:
self.pdf_document.close()
self.pdf_document = fitz.open(self.pdf_path)
self.total_pages = len(self.pdf_document)
self.current_page = 0
self.display_page()
self.update_scrollbar()
except Exception as e:
messagebox.showerror("错误", f"无法打开PDF文件: {str(e)}")
def display_page(self):
if not self.pdf_document or self.total_pages == 0:
return
self.page_label.config(text=f"页面: {self.current_page+1}-{min(self.current_page+2, self.total_pages)}/{self.total_pages}")
try:
# 清空当前图像引用
self.current_images = []
# 显示第一页
if self.current_page < self.total_pages:
page1 = self.pdf_document[self.current_page]
zoom = 1.0 # 缩小一点以适应两页显示
mat = fitz.Matrix(zoom, zoom)
pix1 = page1.get_pixmap(matrix=mat)
# 转换为PhotoImage
img_data1 = pix1.tobytes("ppm")
img1 = tk.PhotoImage(data=img_data1)
self.current_images.append(img1) # 保持引用
# 更新Canvas
self.canvas1.config(width=min(img1.width(), 550), height=min(img1.height(), 500))
self.canvas1.delete("all")
self.canvas1.create_image(0, 0, anchor=tk.NW, image=img1)
# 设置滚动区域
self.canvas1.config(scrollregion=self.canvas1.bbox(tk.ALL))
# 显示第二页(如果有)
if self.current_page + 1 < self.total_pages:
page2 = self.pdf_document[self.current_page + 1]
zoom = 1.0 # 缩小一点以适应两页显示
mat = fitz.Matrix(zoom, zoom)
pix2 = page2.get_pixmap(matrix=mat)
# 转换为PhotoImage
img_data2 = pix2.tobytes("ppm")
img2 = tk.PhotoImage(data=img_data2)
self.current_images.append(img2) # 保持引用
# 更新Canvas
self.canvas2.config(width=min(img2.width(), 550), height=min(img2.height(), 500))
self.canvas2.delete("all")
self.canvas2.create_image(0, 0, anchor=tk.NW, image=img2)
# 设置滚动区域
self.canvas2.config(scrollregion=self.canvas2.bbox(tk.ALL))
else:
# 如果没有第二页,清空第二个Canvas
self.canvas2.delete("all")
self.canvas2.config(width=0, height=0)
# 更新滚动条位置
self.update_scrollbar()
except Exception as e:
messagebox.showerror("错误", f"显示页面时发生错误: {str(e)}")
def prev_page(self):
if self.current_page > 0:
self.current_page = max(0, self.current_page - 2)
self.display_page()
def next_page(self):
if self.pdf_document and self.current_page + 2 < self.total_pages:
self.current_page += 2
self.display_page()
def process_pdf(self):
if not self.pdf_path:
messagebox.showwarning("警告", "请先选择PDF文件")
return
self.status_bar.config(text="正在处理PDF...")
self.root.update()
try:
# 创建临时文件保存处理后的PDF
temp_file = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False)
self.highlighted_pdf_path = temp_file.name
temp_file.close()
# 打开原始PDF
doc = fitz.open(self.pdf_path)
# 遍历每一页
for page_num in range(len(doc)):
page = doc[page_num]
# 获取页面文本 - 使用更精确的搜索方法
text = page.get_text("text")
# 查找所有日期格式的文本
date_pattern = r"\d{4}-\d{2}-\d{2}"
dates = re.findall(date_pattern, text)
print(f"在第 {page_num+1} 页找到的日期: {dates}") # 调试信息
# 对于每个找到的日期,查找其在页面上的位置并高亮
for date_str in dates:
if self.should_highlight(date_str):
print(f"高亮日期: {date_str}") # 调试信息
# 搜索日期文本的位置
text_instances = page.search_for(date_str)
print(f"找到 {len(text_instances)} 个匹配项") # 调试信息
for inst in text_instances:
# 使用高亮注释
highlight = page.add_highlight_annot(inst)
# 设置荧光颜色
highlight.set_colors(stroke=self.highlight_color)
highlight.set_opacity(self.highlight_alpha)
highlight.update()
# 保存处理后的PDF
doc.save(self.highlighted_pdf_path)
doc.close()
# 重新加载处理后的PDF
if self.pdf_document:
self.pdf_document.close()
self.pdf_document = fitz.open(self.highlighted_pdf_path)
self.total_pages = len(self.pdf_document)
self.current_page = 0
self.display_page()
self.status_bar.config(text="处理完成")
messagebox.showinfo("成功", "PDF处理完成,日期已高亮显示")
except Exception as e:
self.status_bar.config(text="处理失败")
error_msg = f"处理PDF时发生错误: {str(e)}\n\n详细信息:\n{traceback.format_exc()}"
messagebox.showerror("错误", error_msg)
def should_highlight(self, date_str):
# 检查是否为指定日期
if date_str in self.specified_dates:
return True
# 检查是否为周日
try:
date_obj = datetime.strptime(date_str, "%Y-%m-%d")
if date_obj.weekday() == 6: # 周日是6
return True
except ValueError:
pass
return False
def print_pdf(self):
if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path):
messagebox.showwarning("警告", "请先处理PDF文件")
return
try:
# 使用系统默认程序打开PDF进行打印
if sys.platform == "win32":
os.startfile(self.highlighted_pdf_path, "print")
elif sys.platform == "darwin": # macOS
os.system(f"lpr '{self.highlighted_pdf_path}'")
else: # Linux
os.system(f"xdg-open '{self.highlighted_pdf_path}'")
self.status_bar.config(text="打印任务已发送")
except Exception as e:
messagebox.showerror("错误", f"打印失败: {str(e)}")
def __del__(self):
# 清理临时文件
if self.highlighted_pdf_path and os.path.exists(self.highlighted_pdf_path):
try:
os.unlink(self.highlighted_pdf_path)
except:
pass
# 关闭PDF文档
if self.pdf_document:
try:
self.pdf_document.close()
except:
pass
def main():
try:
# 检查是否安装了PyMuPDF
try:
import fitz
except ImportError:
messagebox.showerror("缺少依赖", "请安装PyMuPDF库: pip install PyMuPDF")
return
root = tk.Tk()
app = PDFDateHighlighter(root)
root.mainloop()
except Exception as e:
error_msg = f"程序发生未预期错误: {str(e)}\n\n详细信息:\n{traceback.format_exc()}"
messagebox.showerror("严重错误", error_msg)
if __name__ == "__main__":
main()
手动涂改时涂改的粗细未按笔宽输入的值增粗或减细,涂改的笔宽默认为10mm
点击“处理PDF”按钮后未自动对PDF中所有的图片进行扫描并拾取查找对应的日期及周日日期进行自动涂上颜色,如“2025-06-27”未被自动涂改上颜色
最新发布