e576. Drawing with Alpha

// See e575 The Quintessential Drawing Program
    public void paint(Graphics g) {
        Graphics2D g2d = (Graphics2D)g;
    
        // Draw background...
    
        // Set alpha.  0.0f is 100% transparent and 1.0f is 100% opaque.
        float alpha = .3f;
        g2d.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, alpha));
    
        // Draw foreground...
    }

 

Related Examples
import os import sys import tempfile import tkinter as tk from tkinter import filedialog, messagebox, colorchooser, ttk from io import BytesIO import re from datetime import datetime import traceback import shutil import subprocess # 尝试导入win32print,如果不可用则提供替代方案 try: import win32print # 用于获取打印机列表 WIN32PRINT_AVAILABLE = True except ImportError: WIN32PRINT_AVAILABLE = False print("win32print模块不可用,将使用默认打印机") # 添加缺失的导入 import fitz # PyMuPDF # 尝试导入OCR相关库 try: from pdf2image import convert_from_path import pytesseract from PIL import Image, ImageDraw, ImageFont # 检查Tesseract OCR是否可用 try: pytesseract.get_tesseract_version() OCR_AVAILABLE = True except: OCR_AVAILABLE = False print("Tesseract OCR未正确安装或配置") except ImportError: OCR_AVAILABLE = False class PDFDateHighlighter: def __init__(self, root): self.root = root self.root.title("PDF日期高亮与打印工具") self.root.geometry("1200x700") # 增大窗口以适应两页显示 # 指定日期列表 - 已更新 self.specified_dates = [ "2025-01-01", "2025-01-27", "2025-01-29", "2025-03-29", "2025-03-31", "2025-04-01", "2025-04-18", "2025-04-20", "2025-05-01", "2025-05-12", "2025-05-29", "2025-06-01", "2025-06-06", "2025-06-27", "2025-08-17", "2025-09-05", "2025-12-25" ] self.pdf_path = None self.highlighted_pdf_path = None self.current_page = 0 self.total_pages = 0 self.pdf_document = None # 使用不同的颜色作为默认值 self.friday_color = (0.0, 1.0, 0.0) # 周五颜色为绿色 self.sunday_color = (1.0, 0.0, 0.0) # 周日颜色为红色 self.other_red_color = (1.0, 0.0, 0.0) # 指定红日颜色为红色 self.highlight_alpha = 50 # 降低不透明度以确保文字可见 self.current_images = [] # 保持对图像的引用(两页) # 手动涂改相关变量 self.drawing_mode = False self.erasing_mode = False # 擦除模式 self.start_x = None self.start_y = None self.brush_width = 15 # 默认笔宽改为15mm self.drawing_items = [] # 存储绘制的项目用于撤回 self.current_drawing = [] # 当前绘制的项目 self.drawing_color = (1.0, 0.0, 0.0) # 涂改颜色,默认为红色 self.setup_ui() def setup_ui(self): # 主框架 main_frame = ttk.Frame(self.root, padding="10") main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) # 配置行列权重 self.root.columnconfigure(0, weight=1) self.root.rowconfigure(0, weight=1) main_frame.columnconfigure(1, weight=1) main_frame.rowconfigure(3, weight=1) # 文件选择区域 ttk.Label(main_frame, text="PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5) self.file_path = ttk.Entry(main_frame, width=80) self.file_path.grid(row=0, column=1, sticky=(tk.W, tk.E), pady=5, padx=5) ttk.Button(main_frame, text="浏览", command=self.browse_file).grid(row=0, column=2, pady=5) # 按钮区域 btn_frame = ttk.Frame(main_frame) btn_frame.grid(row=1, column=0, columnspan=3, pady=10) # 调整按钮顺序:将PDF转COR按钮放在撤回和处理PDF按钮之间 ttk.Button(btn_frame, text="保存为PDF", command=self.save_as_pdf).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="撤回", command=self.undo_drawing).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="PDF转COR", command=self.pdf_to_cor).pack(side=tk.LEFT, padx=5) # 调整位置 ttk.Button(btn_frame, text="处理PDF", command=self.process_pdf).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="上一页", command=self.prev_page).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="下一页", command=self.next_page).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="打印", command=self.show_print_dialog).pack(side=tk.LEFT, padx=5) # 颜色选择按钮和图标 - 修改为三个按钮 ttk.Button(btn_frame, text="周五颜色", command=self.choose_friday_color).pack(side=tk.LEFT, padx=5) self.friday_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#00ff00") # 绿色 self.friday_icon.pack(side=tk.LEFT, padx=5) self.friday_icon.bind("<Button-1>", lambda e: self.choose_friday_color()) ttk.Button(btn_frame, text="周日颜色", command=self.choose_sunday_color).pack(side=tk.LEFT, padx=5) self.sunday_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#ff0000") # 红色 self.sunday_icon.pack(side=tk.LEFT, padx=5) self.sunday_icon.bind("<Button-1>", lambda e: self.choose_sunday_color()) ttk.Button(btn_frame, text="指定红日", command=self.choose_other_red_color).pack(side=tk.LEFT, padx=5) self.other_red_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#ff0000") # 红色 self.other_red_icon.pack(side=tk.LEFT, padx=5) self.other_red_icon.bind("<Button-1>", lambda e: self.choose_other_red_color()) ttk.Button(btn_frame, text="涂改颜色", command=self.choose_drawing_color).pack(side=tk.LEFT, padx=5) self.drawing_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#ff0000") # 红色 self.drawing_icon.pack(side=tk.LEFT, padx=5) self.drawing_icon.bind("<Button-1>", lambda e: self.choose_drawing_color()) # 手动涂改按钮 ttk.Button(btn_frame, text="手动涂改", command=self.toggle_drawing).pack(side=tk.LEFT, padx=5) # 擦除按钮 ttk.Button(btn_frame, text="擦除", command=self.toggle_erasing).pack(side=tk.LEFT, padx=5) # 笔宽设置 ttk.Label(btn_frame, text="笔宽:").pack(side=tk.LEFT, padx=5) self.brush_width_var = tk.StringVar(value="15") brush_width_spin = ttk.Spinbox(btn_frame, from_=1, to=50, width=5, textvariable=self.brush_width_var) brush_width_spin.pack(side=tk.LEFT, padx=5) brush_width_spin.bind("<<Increment>>", self.update_brush_width) brush_width_spin.bind("<<Decrement>>", self.update_brush_width) brush_width_spin.bind("<FocusOut>", self.update_brush_width) # 透明度设置 ttk.Label(btn_frame, text="透明度:").pack(side=tk.LEFT, padx=5) self.alpha_var = tk.StringVar(value="50") alpha_spin = ttk.Spinbox(btn_frame, from_=10, to=90, width=5, textvariable=self.alpha_var) alpha_spin.pack(side=tk.LEFT, padx=5) alpha_spin.bind("<<Increment>>", self.update_alpha) alpha_spin.bind("<<Decrement>>", self.update_alpha) alpha_spin.bind("<FocusOut>", self.update_alpha) # 页面显示区域 self.page_label = ttk.Label(main_frame, text="页面: 0/0") self.page_label.grid(row=2, column=0, columnspan=3, pady=5) # PDF显示区域 - 改为两页平行显示 self.pdf_frame = ttk.Frame(main_frame) self.pdf_frame.grid(row=3, column=0, columnspan=3, sticky=(tk.W, tk.E, tk.N, tk.S), pady=10) self.pdf_frame.columnconfigure(0, weight=1) self.pdf_frame.columnconfigure(1, weight=1) self.pdf_frame.rowconfigure(0, weight=1) # 创建两个Canvas用于显示两页 self.canvas1 = tk.Canvas(self.pdf_frame, bg="white") self.canvas2 = tk.Canvas(self.pdf_frame, bg="white") # 添加滚动条 - 改为快速翻页滚动条 self.page_scrollbar = ttk.Scrollbar(self.pdf_frame, orient=tk.VERTICAL) self.h_scrollbar = ttk.Scrollbar(self.pdf_frame, orient=tk.HORIZONTAL) # 配置Canvas和滚动条 self.canvas1.configure(xscrollcommand=self.h_scrollbar.set) self.canvas2.configure(xscrollcommand=self.h_scrollbar.set) self.h_scrollbar.configure(command=self.sync_scroll_x) # 配置页面滚动条 self.page_scrollbar.configure(command=self.scroll_pages) # 布局 self.canvas1.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(0, 5)) self.canvas2.grid(row=0, column=1, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(5, 0)) self.page_scrollbar.grid(row=0, column=2, sticky=(tk.N, tk.S)) self.h_scrollbar.grid(row=1, column=0, columnspan=2, sticky=(tk.W, tk.E)) # 绑定鼠标滚轮事件 self.canvas1.bind("<MouseWheel>", self.on_mousewheel) self.canvas2.bind("<MouseWheel>", self.on_mousewheel) self.canvas1.bind("<Button-4>", self.on_mousewheel) # Linux向上滚动 self.canvas2.bind("<Button-4>", self.on_mousewheel) self.canvas1.bind("<Button-5>", self.on_mousewheel) # Linux向下滚动 self.canvas2.bind("<Button-5>", self.on_mousewheel) # 绑定手动涂改事件 self.canvas1.bind("<ButtonPress-1>", self.start_drawing) self.canvas1.bind("<B1-Motion>", self.draw) self.canvas1.bind("<ButtonRelease-1>", self.stop_drawing) self.canvas2.bind("<ButtonPress-1>", self.start_drawing) self.canvas2.bind("<B1-Motion>", self.draw) self.canvas2.bind("<ButtonRelease-1>", self.stop_drawing) # 状态栏 self.status_bar = ttk.Label(self.root, text="就绪", relief=tk.SUNKEN, anchor=tk.W) self.status_bar.grid(row=4, column=0, sticky=(tk.W, tk.E)) def pdf_to_cor(self): """将PDF转换为可搜索的文本PDF(OCR处理)""" if not self.pdf_path: messagebox.showwarning("警告", "请先选择PDF文件") return if not OCR_AVAILABLE: messagebox.showerror("错误", "OCR功能不可用。请确保已安装:\n1. Tesseract OCR\n2. pdf2image库\n3. Poppler工具") return try: # 获取桌面路径 if os.name == 'nt': # Windows desktop_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop') else: # macOS 或 Linux desktop_path = os.path.join(os.path.join(os.expanduser('~')), 'Desktop') # 生成默认文件名 base_name = os.path.basename(self.pdf_path) default_filename = os.path.splitext(base_name)[0] + "_ocr.pdf" default_path = os.path.join(desktop_path, default_filename) # 询问保存路径 save_path = filedialog.asksaveasfilename( defaultextension=".pdf", filetypes=[("PDF文件", "*.pdf")], initialdir=desktop_path, initialfile=default_filename ) if not save_path: return self.status_bar.config(text="正在转换PDF到可搜索文本PDF...") self.root.update() # 创建进度窗口 progress_window = tk.Toplevel(self.root) progress_window.title("OCR处理进度") progress_window.geometry("400x100") progress_window.transient(self.root) progress_window.grab_set() progress_label = ttk.Label(progress_window, text="正在处理第 0 页,共 0 页") progress_label.pack(pady=10) progress_bar = ttk.Progressbar(progress_window, mode='determinate') progress_bar.pack(fill=tk.X, padx=20, pady=10) progress_window.update() # 打开原始PDF doc = fitz.open(self.pdf_path) total_pages = len(doc) # 创建新的PDF文档 output_doc = fitz.open() # 设置进度 progress_bar["maximum"] = total_pages progress_bar["value"] = 0 # 临时目录用于存储图像 with tempfile.TemporaryDirectory() as temp_dir: # 将PDF转换为图像 images = convert_from_path( self.pdf_path, dpi=300, # 高DPI以获得更好的OCR结果 output_folder=temp_dir, fmt='jpeg', thread_count=4, use_pdftocairo=True ) for page_num, image in enumerate(images): # 更新进度 progress_bar["value"] = page_num + 1 progress_label.config(text=f"正在处理第 {page_num + 1} 页,共 {total_pages} 页") progress_window.update() # 使用Tesseract进行OCR pdf_bytes = pytesseract.image_to_pdf_or_hocr( image, extension='pdf', config='--psm 6 -c preserve_interword_spaces=1' ) # 将OCR结果转换为fitz文档 ocr_pdf = fitz.open("pdf", pdf_bytes) # 将OCR页面添加到输出文档 output_doc.insert_pdf(ocr_pdf) # 关闭OCR PDF ocr_pdf.close() # 保存输出文档 output_doc.save(save_path) output_doc.close() doc.close() # 关闭进度窗口 progress_window.destroy() self.status_bar.config(text=f"PDF已成功转换为可搜索文本PDF: {save_path}") messagebox.showinfo("成功", f"PDF文件已成功转换为可搜索文本PDF:\n{save_path}") except Exception as e: error_msg = f"转换PDF到可搜索文本PDF时发生错误: {str(e)}\n\n详细信息:\n{traceback.format_exc()}" messagebox.showerror("错误", error_msg) self.status_bar.config(text="转换失败") def choose_friday_color(self): """选择周五颜色""" color = colorchooser.askcolor(title="选择周五颜色", initialcolor="#00ff00") if color[0]: # 用户选择了颜色 r, g, b = color[0] self.friday_color = (r/255, g/255, b/255) # 转换为0-1范围 # 更新颜色图标 self.friday_icon.config(bg=color[1]) def choose_sunday_color(self): """选择周日颜色""" color = colorchooser.askcolor(title="选择周日颜色", initialcolor="#ff0000") if color[0]: # 用户选择了颜色 r, g, b = color[0] self.sunday_color = (r/255, g/255, b/255) # 转换为0-1范围 # 更新颜色图标 self.sunday_icon.config(bg=color[1]) def choose_other_red_color(self): """选择指定红日颜色""" color = colorchooser.askcolor(title="选择指定红日颜色", initialcolor="#ff0000") if color[0]: # 用户选择了颜色 r, g, b = color[0] self.other_red_color = (r/255, g/255, b/255) # 转换为0-1范围 # 更新颜色图标 self.other_red_icon.config(bg=color[1]) def choose_drawing_color(self): """选择涂改颜色""" color = colorchooser.askcolor(title="选择涂改颜色", initialcolor="#ff0000") if color[0]: # 用户选择了颜色 r, g, b = color[0] self.drawing_color = (r/255, g/255, b/255) # 转换为0-1范围 # 更新颜色图标 self.drawing_icon.config(bg=color[1]) def save_as_pdf(self): """保存PDF文件""" if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path): messagebox.showwarning("警告", "请先处理PDF文件") return try: # 获取桌面路径 if os.name == 'nt': # Windows desktop_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop') else: # macOS 或 Linux desktop_path = os.path.join(os.path.join(os.expanduser('~')), 'Desktop') # 生成默认文件名 - 修复: 使用os.path.splitext而不是os.path.splicext base_name = os.path.basename(self.pdf_path) default_filename = os.path.splitext(base_name)[0] + "_highlighted.pdf" default_path = os.path.join(desktop_path, default_filename) # 询问保存路径 save_path = filedialog.asksaveasfilename( defaultextension=".pdf", filetypes=[("PDF文件", "*.pdf")], initialdir=desktop_path, initialfile=default_filename ) if save_path: # 复制文件 shutil.copy2(self.highlighted_pdf_path, save_path) self.status_bar.config(text=f"PDF已保存到: {save_path}") messagebox.showinfo("成功", f"PDF文件已保存到:\n{save_path}") except Exception as e: messagebox.showerror("错误", f"保存PDF时发生错误: {str(e)}") def toggle_erasing(self): """切换擦除模式""" self.erasing_mode = not self.erasing_mode self.drawing_mode = False # 确保涂改模式关闭 if self.erasing_mode: self.status_bar.config(text="擦除模式已启用 - 点击并拖动以擦除手动涂改") else: self.status_bar.config(text="就绪") def sync_scroll_x(self, *args): """同步两个Canvas的水平滚动""" self.canvas1.xview(*args) self.canvas2.xview(*args) def scroll_pages(self, *args): """处理页面滚动条事件""" if not self.pdf_document or self.total_pages == 0: return # 获取滚动条位置 if len(args) > 1 and args[0] == "moveto": scroll_pos = float(args[1]) elif len(args) > 2 and args[0] == "scroll": units = int(args[1]) scroll_pos = float(args[2]) else: return # 计算目标页面 target_page = int(scroll_pos * (self.total_pages - 1)) # 确保目标页面是偶数(因为每次显示两页) if target_page % 2 != 0: target_page = max(0, target_page - 1) # 更新当前页面 if target_page != self.current_page: self.current_page = target_page self.display_page() def update_scrollbar(self): """更新滚动条位置""" if not self.pdf_document or self.total_pages == 0: return # 计算滚动条位置 scroll_pos = self.current_page / (self.total_pages - 1) if self.total_pages > 1 else 0 # 更新滚动条 self.page_scrollbar.set(scroll_pos, scroll_pos + 1/(self.total_pages)) def on_mousewheel(self, event): """处理鼠标滚轮事件""" if event.num == 4 or event.delta > 0: # 向上滚动 self.prev_page() elif event.num == 5 or event.delta < 0: # 向下滚动 self.next_page() def toggle_drawing(self): """切换手动涂改模式""" self.drawing_mode = not self.drawing_mode self.erasing_mode = False # 确保擦除模式关闭 if self.drawing_mode: self.status_bar.config(text="手动涂改模式已启用 - 点击并拖动以涂改") else: self.status_bar.config(text="就绪") def update_brush_width(self, event): """更新笔宽""" try: self.brush_width = int(self.brush_width_var.get()) except ValueError: self.brush_width = 15 self.brush_width_var.set("15") def update_alpha(self, event): """更新透明度""" try: self.highlight_alpha = int(self.alpha_var.get()) except ValueError: self.highlight_alpha = 50 self.alpha_var.set("50") def start_drawing(self, event): """开始手动涂改或擦除""" if not (self.drawing_mode or self.erasing_mode): return self.start_x = event.x self.start_y = event.y self.current_drawing = [] def draw(self, event): """手动涂改或擦除""" if (not self.drawing_mode and not self.erasing_mode) or self.start_x is None or self.start_y is None: return # 获取当前Canvas current_canvas = event.widget if self.drawing_mode: # 使用涂改颜色,确保使用透明效果 r, g, b = self.drawing_color # 创建半透明颜色 - 使用alpha混合公式 # 背景色为白色(255,255,255),前景色为涂改颜色 alpha_factor = self.highlight_alpha / 100 r_mixed = int((r * alpha_factor + 1.0 * (1 - alpha_factor)) * 255) g_mixed = int((g * alpha_factor + 1.0 * (1 - alpha_factor)) * 255) b_mixed = int((b * alpha_factor + 1.0 * (1 - alpha_factor)) * 255) color = "#{:02x}{:02x}{:02x}".format(r_mixed, g_mixed, b_mixed) # 使用椭圆形实现圆形笔刷效果 half_width = self.brush_width # 创建荧光笔效果 - 使用点状图案实现透明效果 oval_id = current_canvas.create_oval( event.x - half_width, event.y - half_width, event.x + half_width, event.y + half_width, fill=color, outline="", stipple="gray50" # 使用点状图案实现透明效果 ) self.current_drawing.append(oval_id) # 如果移动距离较大,在两点之间绘制线条填充间隙 distance = ((event.x - self.start_x)**2 + (event.y - self.start_y)**2)**0.5 if distance > self.brush_width: line_id = current_canvas.create_line( self.start_x, self.start_y, event.x, event.y, width=self.brush_width*2, fill=color, capstyle=tk.ROUND, joinstyle=tk.ROUND, stipple="gray50" # 使用点状图案实现透明效果 ) self.current_drawing.append(line_id) elif self.erasing_mode: # 擦除模式 - 查找并删除与当前位置重叠的手动涂改项目 half_width = self.brush_width # 获取当前位置附近的所有项目 items = current_canvas.find_overlapping( event.x - half_width, event.y - half_width, event.x + half_width, event.y + half_width ) # 只删除手动涂改的项目(在drawing_items中的项目) all_drawing_items = self.get_all_drawing_items() for item in items: if item in all_drawing_items: current_canvas.delete(item) # 从drawing_items中移除 for drawing in self.drawing_items: if item in drawing: drawing.remove(item) # 如果移动距离较大,在两点之间查找更多项目 distance = ((event.x - self.start_x)**2 + (event.y - self.start_y)**2)**0.5 if distance > self.brush_width: # 计算线段上的多个点 steps = int(distance / (self.brush_width / 2)) + 1 for i in range(steps): x = self.start_x + (event.x - self.start_x) * i / steps y = self.start_y + (event.y - self.start_y) * i / steps items = current_canvas.find_overlapping( x - half_width, y - half_width, x + half_width, y + half_width ) # 只删除手动涂改的项目 for item in items: if item in all_drawing_items: current_canvas.delete(item) # 从drawing_items中移除 for drawing in self.drawing_items: if item in drawing: drawing.remove(item) self.start_x = event.x self.start_y = event.y def get_all_drawing_items(self): """获取所有绘制项目的ID""" all_items = [] for drawing in self.drawing_items: all_items.extend(drawing) return all_items def stop_drawing(self, event): """停止手动涂改或擦除""" if self.current_drawing and (self.drawing_mode or self.erasing_mode): if self.drawing_mode: # 只在涂改模式下保存绘制项目 self.drawing_items.append(self.current_drawing.copy()) self.start_x = None self.start_y = None self.current_drawing = [] def undo_drawing(self): """撤回最后一次涂改""" if not self.drawing_items: return # 获取最后一次涂改的项目 last_drawing = self.drawing_items.pop() # 从两个Canvas中删除这些项目 for item_id in last_drawing: self.canvas1.delete(item_id) self.canvas2.delete(item_id) self.status_bar.config(text="已撤回最后一次涂改") def browse_file(self): try: file_path = filedialog.askopenfilename(filetypes=[("PDF文件", "*.pdf")]) if file_path: self.file_path.delete(0, tk.END) self.file_path.insert(0, file_path) self.pdf_path = file_path self.load_pdf() except Exception as e: messagebox.showerror("错误", f"浏览文件时发生错误: {str(e)}") def load_pdf(self): if not self.pdf_path: return try: if self.pdf_document: self.pdf_document.close() self.pdf_document = fitz.open(self.pdf_path) self.total_pages = len(self.pdf_document) self.current_page = 0 self.display_page() self.update_scrollbar() except Exception as e: messagebox.showerror("错误", f"无法打开PDF文件: {str(e)}") def display_page(self): if not self.pdf_document or self.total_pages == 0: return self.page_label.config(text=f"页面: {self.current_page+1}-{min(self.current_page+2, self.total_pages)}/{self.total_pages}") try: # 清空当前图像引用 self.current_images = [] # 显示第一页 if self.current_page < self.total_pages: page1 = self.pdf_document[self.current_page] zoom = 1.0 # 缩小一点以适应两页显示 mat = fitz.Matrix(zoom, zoom) pix1 = page1.get_pixmap(matrix=mat) # 转换为PhotoImage img_data1 = pix1.tobytes("ppm") img1 = tk.PhotoImage(data=img_data1) self.current_images.append(img1) # 保持引用 # 更新Canvas self.canvas1.config(width=min(img1.width(), 550), height=min(img1.height(), 500)) self.canvas1.delete("all") self.canvas1.create_image(0, 0, anchor=tk.NW, image=img1) # 设置滚动区域 self.canvas1.config(scrollregion=self.canvas1.bbox(tk.ALL)) # 显示第二页(如果有) if self.current_page + 1 < self.total_pages: page2 = self.pdf_document[self.current_page + 1] zoom = 1.0 # 缩小一点以适应两页显示 mat = fitz.Matrix(zoom, zoom) pix2 = page2.get_pixmap(matrix=mat) # 转换为PhotoImage img_data2 = pix2.tobytes("ppm") img2 = tk.PhotoImage(data=img_data2) self.current_images.append(img2) # 保持引用 # 更新Canvas self.canvas2.config(width=min(img2.width(), 550), height=min(img2.height(), 500)) self.canvas2.delete("all") self.canvas2.create_image(0, 0, anchor=tk.NW, image=img2) # 设置滚动区域 self.canvas2.config(scrollregion=self.canvas2.bbox(tk.ALL)) else: # 如果没有第二页,清空第二个Canvas self.canvas2.delete("all") self.canvas2.config(width=0, height=0) # 更新滚动条位置 self.update_scrollbar() except Exception as e: messagebox.showerror("错误", f"显示页面时发生错误: {str(e)}") def prev_page(self): if self.current_page > 0: self.current_page = max(0, self.current_page - 2) self.display_page() def next_page(self): if self.pdf_document and self.current_page + 2 < self.total_pages: self.current_page += 2 self.display_page() def process_pdf(self): if not self.pdf_path: messagebox.showwarning("警告", "请先选择PDF文件") return self.status_bar.config(text="正在处理PDF...") self.root.update() try: # 创建临时文件保存处理后的PDF temp_file = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) self.highlighted_pdf_path = temp_file.name temp_file.close() # 打开原始PDF doc = fitz.open(self.pdf_path) # 遍历每一页 for page_num in range(len(doc)): page = doc[page_num] # 获取页面文本 - 使用更精确的搜索方法 text = page.get_text("text") # 查找所有日期格式的文本 date_pattern = r"\d{4}-\d{2}-\d{2}" dates = re.findall(date_pattern, text) print(f"在第 {page_num+1} 页找到的日期: {dates}") # 调试信息 # 对于每个找到的日期,查找其在页面上的位置并高亮 for date_str in dates: highlight_type = self.should_highlight(date_str) if highlight_type > 0: print(f"高亮日期: {date_str}, 类型: {highlight_type}") # 调试信息 # 搜索日期文本的位置 text_instances = page.search_for(date_str) print(f"找到 {len(text_instances)} 个匹配项") # 调试信息 for inst in text_instances: # 扩大高亮区域 - 扩大2倍 expanded_rect = self.expand_rect(inst, 2.0) # 使用高亮注释 highlight = page.add_highlight_annot(expanded_rect) # 根据日期类型设置颜色 if highlight_type == 1: # 周五 highlight.set_colors(stroke=self.friday_color) elif highlight_type == 2: # 周日 highlight.set_colors(stroke=self.sunday_color) elif highlight_type == 3: # 指定红日 highlight.set_colors(stroke=self.other_red_color) highlight.set_opacity(self.highlight_alpha/100) # 修正透明度设置 highlight.update() # 保存处理后的PDF doc.save(self.highlighted_pdf_path) doc.close() # 重新加载处理后的PDF if self.pdf_document: self.pdf_document.close() self.pdf_document = fitz.open(self.highlighted_pdf_path) self.total_pages = len(self.pdf_document) self.current_page = 0 self.display_page() self.status_bar.config(text="处理完成") messagebox.showinfo("成功", "PDF处理完成,日期已高亮显示") except Exception as e: self.status_bar.config(text="处理失败") error_msg = f"处理PDF时发生错误: {str(e)}\n\n详细信息:\n{traceback.format_exc()}" messagebox.showerror("错误", error_msg) def expand_rect(self, rect, factor): """扩大矩形区域""" width = rect.width height = rect.height # 计算扩大后的尺寸 new_width = width * factor new_height = height * factor # 计算中心点 center_x = rect.x0 + width / 2 center_y = rect.y0 + height / 2 # 创建新矩形 new_rect = fitz.Rect( center_x - new_width / 2, center_y - new_height / 2, center_x + new_width / 2, center_y + new_height / 2 ) return new_rect def should_highlight(self, date_str): try: date_obj = datetime.strptime(date_str, "%Y-%m-%d") weekday = date_obj.weekday() # 检查是否为指定日期 is_specified_date = date_str in self.specified_dates # 检查是否为周五(4)或周日(6) if weekday == 4: # 周五 # 如果同时是指定日期,则使用指定红日颜色 if is_specified_date: return 3 # 指定红日 else: return 1 # 周五 elif weekday == 6: # 周日 return 2 # 周日 elif is_specified_date: # 指定指定日期 return 3 # 指定红日 except ValueError: pass return 0 # 不高亮 def show_print_dialog(self): """显示打印设置对话框""" if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path): messagebox.showwarning("警告", "请先处理PDF文件") return try: # 创建打印设置对话框 print_dialog = tk.Toplevel(self.root) print_dialog.title("打印设置") print_dialog.geometry("500x600") print_dialog.transient(self.root) print_dialog.grab_set() # 主框架 - 使用grid布局 main_frame = ttk.Frame(print_dialog, padding="10") main_frame.pack(fill=tk.BOTH, expand=True) # 打印机信息区域 printer_frame = ttk.LabelFrame(main_frame, text="打印机", padding="5") printer_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=5) printer_frame.columnconfigure(1, weight=1) # 获取系统打印机列表 printers = self.get_printer_list() # 打印机名称和属性 ttk.Label(printer_frame, text="名称(M):").grid(row=0, column=0, sticky=tk.W, pady=2) # 打印机选择下拉框 self.printer_var = tk.StringVar() if printers: self.printer_var.set(printers[0]) # 默认选择第一个打印机 printer_combo = ttk.Combobox(printer_frame, textvariable=self.printer_var, values=printers, state="readonly") printer_combo.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=5, pady=2) ttk.Button(printer_frame, text="属性(P)...", command=self.show_printer_properties).grid(row=0, column=2, padx=5, pady=2) # 打印选项区域 options_frame = ttk.LabelFrame(main_frame, text="打印选项", padding="5") options_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=5) options_frame.columnconfigure(1, weight=1) # 纸张来源 ttk.Label(options_frame, text="纸张来源(S):").grid(row=0, column=0, sticky=tk.W, pady=2) paper_combo = ttk.ComboBox(options_frame, values=["使用打印机设置", "自动选择", "手动送纸"], width=20, state="readonly") paper_combo.set("使用打印机设置") paper_combo.grid(row=0, column=1, sticky=tk.W, padx=5, pady=2) # 选择纸张 ttk.Label(options_frame, text="选择纸张:").grid(row=1, column=0, sticky=tk.W, pady=5) paper_type_combo = ttk.Combobox(options_frame, values=["A4", "Letter", "Legal", "A3"], width=20, state="readonly") paper_type_combo.set("A4") paper_type_combo.grid(row=1, column=1, sticky=tk.W, padx=5, pady=5) # 打印范围区域 range_frame = ttk.LabelFrame(main_frame, text="页码范围", padding="5") range_frame.grid(row=2, column=0, sticky=(tk.W, tk.E), pady=5) range_frame.columnconfigure(0, weight=1) # 页码范围选择 self.range_var = tk.StringVar(value="全部") ttk.Radiobutton(range_frame, text="全部(A)", variable=self.range_var, value="全部").grid(row=0, column=0, sticky=tk.W) ttk.Radiobutton(range_frame, text="当前页(U)", variable=self.range_var, value="当前页").grid(row=1, column=0, sticky=tk.W) ttk.Radiobutton(range_frame, text="页码范围(G)", variable=self.range_var, value="页码范围").grid(row=2, column=0, sticky=tk.W) # 页码范围输入 self.range_entry = ttk.Entry(range_frame) self.range_entry.grid(row=3, column=0, sticky=(tk.W, tk.E), pady=2) ttk.Label(range_frame, text="请键入页码和/或用逗号分隔的页码范围 (例如:1,3,5-12)").grid(row=4, column=0, sticky=tk.W) # 副本区域 copies_frame = ttk.LabelFrame(main_frame, text="副本", padding="5") copies_frame.grid(row=3, column=0, sticky=(tk.W, tk.E), pady=5) # 份数 self.copies_var = tk.IntVar(value=1) copies_spin = ttk.Spinbox(copies_frame, from_=1, to=999, textvariable=self.copies_var, width=5) copies_spin.grid(row=0, column=0, sticky=tk.W, padx=5) ttk.Label(copies_frame, text="份数(C)").grid(row=0, column=1, sticky=tk.W) # 逐份打印 self.collate_var = tk.BooleanVar(value=True) ttk.Checkbutton(copies_frame, text="逐份打印(T)", variable=self.collate_var).grid(row=0, column=2, sticky=tk.W, padx=20) # 并打和缩放区域 layout_frame = ttk.LabelFrame(main_frame, text="并打和缩放", padding="5") layout_frame.grid(row=4, column=0, sticky=(tk.W, tk.E), pady=5) layout_frame.columnconfigure(1, weight=1) # 每页版数 ttk.Label(layout_frame, text="每页的版数(H):").grid(row=0, column=0, sticky=tk.W, pady=2) self.pages_combo = ttk.Combobox(layout_frame, values=["1", "2", "4", "6", "9", "16"], width=5, state="readonly") self.pages_combo.set("1") self.pages_combo.grid(row=0, column=1, sticky=tk.W, padx=5, pady=2) # 按纸型缩放 ttk.Label(layout_frame, text="按纸型缩放(Z):").grid(row=1, column=0, sticky=tk.W, pady=2) self.scale_combo = ttk.Combobox(layout_frame, values=["无缩放", "缩小至可打印区域", "适合可打印区域", "实际大小"], width=20, state="readonly") self.scale_combo.set("无缩放") self.scale_combo.grid(row=1, column=1, sticky=tk.W, padx=5, pady=2) # 并打顺序 ttk.Label(layout_frame, text="并打顺序:").grid(row=2, column=0, sticky=tk.W, pady=2) self.order_var = tk.StringVar(value="从左到右") ttk.Radiobutton(layout_frame, text="从左到右(F)", variable=self.order_var, value="从左到右").grid(row=2, column=1, sticky=tk.W) ttk.Radiobutton(layout_frame, text="从上到下(B)", variable=self.order_var, value="从上到下").grid(row=2, column=2, sticky=tk.W, padx=5) # 分隔线 self.separator_var = tk.BooleanVar(value=False) ttk.Checkbutton(layout_frame, text="并打时绘制分隔线(D)", variable=self.separator_var).grid(row=3, column=0, columnspan=3, sticky=tk.W, pady=2) # 其他选项 other_frame = ttk.Frame(layout_frame) other_frame.grid(row=4, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=2) self.reverse_var = tk.BooleanVar(value=False) ttk.Checkbutton(other_frame, text="反片打印(I)", variable=self.reverse_var).grid(row=0, column=0, sticky=tk.W) self.to_file_var = tk.BooleanVar(value=False) ttk.Checkbutton(other_frame, text="打印到文件(L)", variable=self.to_file_var).grid(row=0, column=1, sticky=tk.W, padx=5) self.duplex_var = tk.BooleanVar(value=False) ttk.Checkbutton(other_frame, text="双面打印(X)", variable=self.duplex_var).grid(row=0, column=2, sticky=tk.W, padx=5) # 按钮区域 button_frame = ttk.Frame(main_frame) button_frame.grid(row=5, column=0, sticky=tk.E, pady=10) ttk.Button(button_frame, text="打印预览", command=self.print_preview).pack(side=tk.LEFT, padx=5) ttk.Button(button_frame, text="取消", command=print_dialog.destroy).pack(side=tk.LEFT, padx=5) ttk.Button(button_frame, text="开始打印", command=lambda: self.print_pdf(print_dialog)).pack(side=tk.LEFT, padx=5) # 配置权重使组件可以扩展 main_frame.columnconfigure(0, weight=1) for i in range(6): main_frame.rowconfigure(i, weight=0) main_frame.rowconfigure(5, weight=1) except Exception as e: messagebox.showerror("错误", f"打开打印设置时发生错误: {str(e)}") def get_printer_list(self): """获取系统打印机列表""" if not WIN32PRINT_AVAILABLE: # 如果win32print不可用,尝试使用系统命令获取打印机列表 try: if sys.platform == "win32": # Windows系统 result = subprocess.run(['wmic', 'printer', 'get', 'name'], capture_output=True, text=True) if result.returncode == 0: printers = [] lines = result.stdout.split('\n') for line in lines: line = line.strip() if line and not line.startswith('Name'): printers.append(line) return printers if printers else ["默认打印机"] elif sys.platform == "darwin": # macOS系统 result = subprocess.run(['lpstat', '-a'], capture_output=True, text=True) if result.returncode == 0: printers = [] lines = result.stdout.split('\n') for line in lines: if line: parts = line.split() if parts: printers.append(parts[0]) return printers if printers else ["默认打印机"] else: # Linux系统 result = subprocess.run(['lpstat', '-a'], capture_output=True, text=True) if result.returncode == 0: printers = [] lines = result.stdout.split('\n') for line in lines: if line: parts = line.split() if parts: printers.append(parts[0]) return printers if printers else ["默认打印机"] except: pass return ["默认打印机"] try: printers = [] for printer in win32print.EnumPrinters(win32print.PRENER_ENUM_LOCAL | win32print.PRENER_ENUM_CONNECTIONS): printers.append(printer[2]) return printers except: # 如果无法获取打印机列表,返回默认打印机 try: default_printer = win32print.GetDefaultPrinter() return [default_printer] if default_printer else ["默认打印机"] except: return ["默认打印机"] def show_printer_properties(self): """显示打印机属性""" if not WIN32PRINT_AVAILABLE: messagebox.showinfo("打印机属性", "在此系统上无法访问打印机属性。") return try: printer_name = self.printer_var.get() if printer_name and printer_name != "默认打印机": # 打开打印机属性对话框 hprinter = win32print.OpenPrinter(printer_name) win32print.PrinterProperties(self.root.winfo_id(), hprinter) win32print.ClosePrinter(hprinter) else: messagebox.showinfo("打印机属性", "请先选择一个有效的打印机。") except Exception as e: messagebox.showerror("错误", f"无法打开打印机属性: {str(e)}") def print_preview(self): """打印预览功能""" if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path): messagebox.showwarning("警告", "请先处理PDF文件") return try: # 创建预览窗口 preview = tk.Toplevel(self.root) preview.title("打印预览") preview.geometry("800x600") # 创建Canvas用于显示预览 preview_canvas = tk.Canvas(preview, bg="white") preview_canvas.pack(fill=tk.BOTH, expand=True) # 添加滚动条 v_scrollbar = ttk.Scrollbar(preview, orient=tk.VERTICAL, command=preview_canvas.yview) h_scrollbar = ttk.Scrollbar(preview, orient=tk.HORIZONTAL, command=preview_canvas.xview) preview_canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set) v_scrollbar.pack(side=tk.RIGHT, fill=tk.Y) h_scrollbar.pack(side=tk.BOTTOM, fill=tk.X) preview_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) # 加载PDF第一页作为预览 doc = fitz.open(self.highlighted_pdf_path) page = doc[0] zoom = 1.0 mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat) # 转换为PhotoImage img_data = pix.tobytes("ppm") img = tk.PhotoImage(data=img_data) # 保持引用 preview.img_ref = img # 显示图像 preview_canvas.create_image(0, 0, anchor=tk.NW, image=img) preview_canvas.config(scrollregion=preview_canvas.bbox(tk.ALL)) doc.close() except Exception as e: messagebox.showerror("错误", f"打印预览时发生错误: {str(e)}") def print_pdf(self, dialog): """实际打印PDF""" dialog.destroy() if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path): messagebox.showwarning("警告", "请先处理PDF文件") return try: # 获取打印设置 printer_name = self.printer_var.get() range_type = self.range_var.get() page_range = self.range_entry.get() if range_type == "页码范围" else None copies = self.copies_var.get() # 构建打印命令 if sys.platform == "win32": # 使用系统打印对话框 os.startfile(self.highlighted_pdf_path, "print") elif sys.platform == "darwin": # macOS cmd = ['lp'] if printer_name and printer_name != "默认打印机": cmd.extend(['-d', printer_name]) if copies > 1: cmd.extend(['-n', str(copies)]) if page_range: cmd.extend(['-o', f'page-ranges={page_range}']) cmd.append(self.highlighted_pdf_path) subprocess.run(cmd) else: # Linux cmd = ['lp'] if printer_name and printer_name != "默认打印机": cmd.extend(['-d', printer_name]) if copies > 1: cmd.extend(['-n', str(copies)]) if page_range: cmd.extend(['-o', f'page-ranges={page_range}']) cmd.append(self.highlighted_pdf_path) subprocess.run(cmd) self.status_bar.config(text="打印任务已发送") except Exception as e: messagebox.showerror("错误", f"打印失败: {str(e)}") def __del__(self): # 清理临时文件 if self.highlighted_pdf_path and os.path.exists(self.highlighted_pdf_path): try: os.unlink(self.highlighted_pdf_path) except: pass # 关闭PDF文档 if self.pdf_document: try: self.pdf_document.close() except: pass def main(): try: # 检查是否安装了PyMuPDF try: import fitz except ImportError: messagebox.showerror("缺少依赖", "请安装PyMuPDF库: pip install PyMuPDF") return root = tk.Tk() app = PDFDateHighlighter(root) root.mainloop() except Exception as e: error_msg = f"程序发生未预期错误: {str(e)}\n\n详细信息:\n{traceback.format_exc()}" messagebox.showerror("严重错误", error_msg) if __name__ == "__main__": main() 已安装1. Tesseract OCR,2. pdf2image库,3.Poppler工具,为何点击“PDF转COR”按钮后报错 错误 OCR功能不可用,请确保已安装: 1. Tesseract OCR 2. pdf2image库 3.Poppler工具 修改一套完整代码
08-22
import os import sys import tempfile import tkinter as tk from tkinter import filedialog, messagebox, colorchooser, ttk from io import BytesIO import re from datetime import datetime import traceback import shutil import subprocess # 尝试导入win32print,如果不可用则提供替代方案 try: import win32print # 用于获取打印机列表 WIN32PRINT_AVAILABLE = True except ImportError: WIN32PRINT_AVAILABLE = False print("win32print模块不可用,将使用默认打印机") # 添加缺失的导入 import fitz # PyMuPDF # 尝试导入OCR相关库 OCR_AVAILABLE = False TESSERACT_PATH = None POPPLER_PATH = None try: from pdf2image import convert_from_path # 尝试导入pytesseract,如果失败则尝试安装 try: import pytesseract except ImportError: print("pytesseract未安装,尝试安装...") subprocess.check_call([sys.executable, "-m", "pip", "install", "pytesseract"]) import pytesseract # 检查Tesseract OCR是否可用 try: # 尝试自动查找Tesseract路径 if sys.platform == "win32": # Windows常见安装路径 possible_tesseract_paths = [ r"C:\Program Files\Tesseract-OCR\tesseract.exe", r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe" ] for path in possible_tesseract_paths: if os.path.exists(path): pytesseract.pytesseract.tesseract_cmd = path TESSERACT_PATH = path break # 如果自动查找失败,尝试在PATH中查找 if TESSERACT_PATH is None: try: result = subprocess.run(['where', 'tesseract'], capture_output=True, text=True) if result.returncode == 0: TESSERACT_PATH = result.stdout.split('\n')[0].strip() pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH except: pass else: # macOS和Linux try: result = subprocess.run(['which', 'tesseract'], capture_output=True, text=True) if result.returncode == 0: TESSERACT_PATH = result.stdout.strip() pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH except: pass # 检查Poppler路径 if sys.platform == "win32": # Windows常见安装路径 possible_poppler_paths = [ r"C:\poppler\bin", r"C:\Program Files\poppler\bin", r"C:\Program Files (x86)\poppler\bin", r"C:\Release-25.07.0-0\poppler-25.07.0\Library\bin" # 添加用户提供的路径 ] for path in possible_poppler_paths: if os.path.exists(path): POPPLER_PATH = path break # 最终检查 if TESSERACT_PATH and os.path.exists(TESSERACT_PATH): # 测试Tesseract version = pytesseract.get_tesseract_version() print(f"Tesseract版本: {version}") OCR_AVAILABLE = True else: print("Tesseract OCR未正确安装或配置") except Exception as e: print(f"Tesseract检查错误: {str(e)}") OCR_AVAILABLE = False except ImportError as e: print(f"OCR库导入错误: {str(e)}") OCR_AVAILABLE = False class PDFDateHighlighter: def __init__(self, root): self.root = root self.root.title("PDF日期高亮与打印工具") self.root.geometry("1200x700") # 增大窗口以适应两页显示 # 指定日期列表 - 已更新 self.specified_dates = [ "2025-01-01", "2025-01-27", "2025-01-29", "2025-03-29", "2025-03-31", "2025-04-01", "2025-04-18", "2025-04-20", "2025-05-01", "2025-05-12", "2025-05-29", "2025-06-01", "2025-06-06", "2025-06-27", "2025-08-17", "2025-09-05", "2025-12-25" ] self.pdf_path = None self.highlighted_pdf_path = None self.current_page = 0 self.total_pages = 0 self.pdf_document = None # 使用不同的颜色作为默认值 self.friday_color = (0.0, 1.0, 0.0) # 周五颜色为绿色 self.sunday_color = (1.0, 0.0, 0.0) # 周日颜色为红色 self.other_red_color = (1.0, 0.0, 0.0) # 指定红日颜色为红色 self.highlight_alpha = 50 # 降低不透明度以确保文字可见 self.current_images = [] # 保持对图像的引用(两页) # 手动涂改相关变量 self.drawing_mode = False self.erasing_mode = False # 擦除模式 self.start_x = None self.start_y = None self.brush_width = 15 # 默认笔宽改为15mm self.drawing_items = [] # 存储绘制的项目用于撤回 self.current_drawing = [] # 当前绘制的项目 self.drawing_color = (1.0, 0.0, 0.0) # 涂改颜色,默认为红色 self.setup_ui() def setup_ui(self): # 主框架 main_frame = ttk.Frame(self.root, padding="10") main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) # 配置行列权重 self.root.columnconfigure(0, weight=1) self.root.rowconfigure(0, weight=1) main_frame.columnconfigure(1, weight=1) main_frame.rowconfigure(3, weight=1) # 文件选择区域 ttk.Label(main_frame, text="PDF文件:").grid(row=0, column=0, sticky=tk.W, pady=5) self.file_path = ttk.Entry(main_frame, width=80) self.file_path.grid(row=0, column=1, sticky=(tk.W, tk.E), pady=5, padx=5) ttk.Button(main_frame, text="浏览", command=self.browse_file).grid(row=0, column=2, pady=5) # 按钮区域 btn_frame = ttk.Frame(main_frame) btn_frame.grid(row=1, column=0, columnspan=3, pady=10) # 调整按钮顺序:将PDF转COR按钮放在撤回和处理PDF按钮之间 ttk.Button(btn_frame, text="保存为PDF", command=self.save_as_pdf).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="撤回", command=self.undo_drawing).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="PDF转COR", command=self.pdf_to_cor).pack(side=tk.LEFT, padx=5) # 调整位置 ttk.Button(btn_frame, text="处理PDF", command=self.process_pdf).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="上一页", command=self.prev_page).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="下一页", command=self.next_page).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="打印", command=self.show_print_dialog).pack(side=tk.LEFT, padx=5) # 颜色选择按钮和图标 - 修改为三个按钮 ttk.Button(btn_frame, text="周五颜色", command=self.choose_friday_color).pack(side=tk.LEFT, padx=5) self.friday_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#00ff00") # 绿色 self.friday_icon.pack(side=tk.LEFT, padx=5) self.friday_icon.bind("<Button-1>", lambda e: self.choose_friday_color()) ttk.Button(btn_frame, text="周日颜色", command=self.choose_sunday_color).pack(side=tk.LEFT, padx=5) self.sunday_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#ff0000") # 红色 self.sunday_icon.pack(side=tk.LEFT, padx=5) self.sunday_icon.bind("<Button-1>", lambda e: self.choose_sunday_color()) ttk.Button(btn_frame, text="指定红日", command=self.choose_other_red_color).pack(side=tk.LEFT, padx=5) self.other_red_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#ff0000") # 红色 self.other_red_icon.pack(side=tk.LEFT, padx=5) self.other_red_icon.bind("<Button-1>", lambda e: self.choose_other_red_color()) ttk.Button(btn_frame, text="涂改颜色", command=self.choose_drawing_color).pack(side=tk.LEFT, padx=5) self.drawing_icon = tk.Canvas(btn_frame, width=20, height=20, bg="#ff0000") # 红色 self.drawing_icon.pack(side=tk.LEFT, padx=5) self.drawing_icon.bind("<Button-1>", lambda e: self.choose_drawing_color()) # 手动涂改按钮 ttk.Button(btn_frame, text="手动涂改", command=self.toggle_drawing).pack(side=tk.LEFT, padx=5) # 擦除按钮 ttk.Button(btn_frame, text="擦除", command=self.toggle_erasing).pack(side=tk.LEFT, padx=5) # 笔宽设置 ttk.Label(btn_frame, text="笔宽:").pack(side=tk.LEFT, padx=5) self.brush_width_var = tk.StringVar(value="15") brush_width_spin = ttk.Spinbox(btn_frame, from_=1, to=50, width=5, textvariable=self.brush_width_var) brush_width_spin.pack(side=tk.LEFT, padx=5) brush_width_spin.bind("<<Increment>>", self.update_brush_width) brush_width_spin.bind("<<Decrement>>", self.update_brush_width) brush_width_spin.bind("<FocusOut>", self.update_brush_width) # 透明度设置 ttk.Label(btn_frame, text="透明度:").pack(side=tk.LEFT, padx=5) self.alpha_var = tk.StringVar(value="50") alpha_spin = ttk.Spinbox(btn_frame, from_=10, to=90, width=5, textvariable=self.alpha_var) alpha_spin.pack(side=tk.LEFT, padx=5) alpha_spin.bind("<<Increment>>", self.update_alpha) alpha_spin.bind("<<Decrement>>", self.update_alpha) alpha_spin.bind("<FocusOut>", self.update_alpha) # OCR设置按钮 ttk.Button(btn_frame, text="OCR设置", command=self.configure_ocr).pack(side=tk.LEFT, padx=5) # 页面显示区域 self.page_label = ttk.Label(main_frame, text="页面: 0/0") self.page_label.grid(row=2, column=0, columnspan=3, pady=5) # PDF显示区域 - 改为两页平行显示 self.pdf_frame = ttk.Frame(main_frame) self.pdf_frame.grid(row=3, column=0, columnspan=3, sticky=(tk.W, tk.E, tk.N, tk.S), pady=10) self.pdf_frame.columnconfigure(0, weight=1) self.pdf_frame.columnconfigure(1, weight=1) self.pdf_frame.rowconfigure(0, weight=1) # 创建两个Canvas用于显示两页 self.canvas1 = tk.Canvas(self.pdf_frame, bg="white") self.canvas2 = tk.Canvas(self.pdf_frame, bg="white") # 添加滚动条 - 改为快速翻页滚动条 self.page_scrollbar = ttk.Scrollbar(self.pdf_frame, orient=tk.VERTICAL) self.h_scrollbar = ttk.Scrollbar(self.pdf_frame, orient=tk.HORIZONTAL) # 配置Canvas和滚动条 self.canvas1.configure(xscrollcommand=self.h_scrollbar.set) self.canvas2.configure(xscrollcommand=self.h_scrollbar.set) self.h_scrollbar.configure(command=self.sync_scroll_x) # 配置页面滚动条 self.page_scrollbar.configure(command=self.scroll_pages) # 布局 self.canvas1.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(0, 5)) self.canvas2.grid(row=0, column=1, sticky=(tk.W, tk.E, tk.N, tk.S), padx=(5, 0)) self.page_scrollbar.grid(row=0, column=2, sticky=(tk.N, tk.S)) self.h_scrollbar.grid(row=1, column=0, columnspan=2, sticky=(tk.W, tk.E)) # 绑定鼠标滚轮事件 self.canvas1.bind("<MouseWheel>", self.on_mousewheel) self.canvas2.bind("<MouseWheel>", self.on_mousewheel) self.canvas1.bind("<Button-4>", self.on_mousewheel) # Linux向上滚动 self.canvas2.bind("<Button-4>", self.on_mousewheel) self.canvas1.bind("<Button-5>", self.on_mousewheel) # Linux向下滚动 self.canvas2.bind("<Button-5>", self.on_mousewheel) # 绑定手动涂改事件 self.canvas1.bind("<ButtonPress-1>", self.start_drawing) self.canvas1.bind("<B1-Motion>", self.draw) self.canvas1.bind("<ButtonRelease-1>", self.stop_drawing) self.canvas2.bind("<ButtonPress-1>", self.start_drawing) self.canvas2.bind("<B1-Motion>", self.draw) self.canvas2.bind("<ButtonRelease-1>", self.stop_drawing) # 状态栏 self.status_bar = ttk.Label(self.root, text="就绪", relief=tk.SUNKEN, anchor=tk.W) self.status_bar.grid(row=4, column=0, sticky=(tk.W, tk.E)) def configure_ocr(self): """配置OCR路径""" config_window = tk.Toplevel(self.root) config_window.title("OCR配置") config_window.geometry("500x300") config_window.transient(self.root) config_window.grab_set() main_frame = ttk.Frame(config_window, padding="10") main_frame.pack(fill=tk.BOTH, expand=True) # Tesseract路径 ttk.Label(main_frame, text="Tesseract路径:").grid(row=0, column=0, sticky=tk.W, pady=5) tesseract_path_var = tk.StringVar(value=TESSERACT_PATH or "") tesseract_entry = ttk.Entry(main_frame, textvariable=tesseract_path_var, width=50) tesseract_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=5, pady=5) ttk.Button(main_frame, text="浏览", command=lambda: self.browse_ocr_path(tesseract_path_var, "tesseract")).grid(row=0, column=2, padx=5, pady=5) # Poppler路径 ttk.Label(main_frame, text="Poppler路径:").grid(row=1, column=0, sticky=tk.W, pady=5) poppler_path_var = tk.StringVar(value=POPPLER_PATH or "") poppler_entry = ttk.Entry(main_frame, textvariable=poppler_path_var, width=50) poppler_entry.grid(row=1, column=1, sticky=(tk.W, tk.E), padx=5, pady=5) ttk.Button(main_frame, text="浏览", command=lambda: self.browse_ocr_path(poppler_path_var, "poppler")).grid(row=1, column=2, padx=5, pady=5) # 测试按钮 ttk.Button(main_frame, text="测试OCR", command=lambda: self.test_ocr(tesseract_path_var.get(), poppler_path_var.get())).grid(row=2, column=0, columnspan=3, pady=10) # 保存按钮 ttk.Button(main_frame, text="保存", command=lambda: self.save_ocr_config(tesseract_path_var.get(), poppler_path_var.get(), config_window)).grid(row=3, column=0, columnspan=3, pady=10) main_frame.columnconfigure(1, weight=1) def browse_ocr_path(self, path_var, ocr_type): """浏览OCR路径""" if ocr_type == "tesseract": if sys.platform == "win32": file_path = filedialog.askopenfilename( title="选择Tesseract可执行文件", filetypes=[("可执行文件", "tesseract.exe"), ("所有文件", "*.*")] ) else: file_path = filedialog.askopenfilename( title="选择Tesseract可执行文件", filetypes=[("所有文件", "*")] ) else: # poppler file_path = filedialog.askdirectory(title="选择Poppler目录") if file_path: path_var.set(file_path) def test_ocr(self, tesseract_path, poppler_path): """测试OCR配置""" try: # 检查pytesseract是否可用 if 'pytesseract' not in sys.modules: # 尝试安装pytesseract try: subprocess.check_call([sys.executable, "-m", "pip", "install", "pytesseract"]) global pytesseract import pytesseract messagebox.showinfo("成功", "已安装pytesseract模块") except Exception as e: messagebox.showerror("错误", f"安装pytesseract失败: {str(e)}") return # 测试Tesseract if tesseract_path and os.path.exists(tesseract_path): pytesseract.pytesseract.tesseract_cmd = tesseract_path version = pytesseract.get_tesseract_version() messagebox.showinfo("测试成功", f"Tesseract版本: {version}") else: messagebox.showerror("测试失败", "Tesseract路径无效或文件不存在") # 测试Poppler (可选) if poppler_path and os.path.exists(poppler_path): messagebox.showinfo("测试成功", "Poppler路径有效") except Exception as e: messagebox.showerror("测试失败", f"OCR测试失败: {str(e)}") def save_ocr_config(self, tesseract_path, poppler_path, window): """保存OCR配置""" global TESSERACT_PATH, POPPLER_PATH, OCR_AVAILABLE try: # 检查pytesseract是否可用 if 'pytesseract' not in sys.modules: # 尝试安装pytesseract try: subprocess.check_call([sys.executable, "-m", "pip", "install", "pytesseract"]) global pytesseract import pytesseract except Exception as e: messagebox.showerror("错误", f"安装pytesseract失败: {str(e)}") return # 验证Tesseract路径 if tesseract_path and os.path.exists(tesseract_path): pytesseract.pytesseract.tesseract_cmd = tesseract_path pytesseract.get_tesseract_version() # 测试是否有效 TESSERACT_PATH = tesseract_path # 设置Poppler路径 if poppler_path and os.path.exists(poppler_path): POPPLER_PATH = poppler_path OCR_AVAILABLE = True messagebox.showinfo("成功", "OCR配置已保存并验证成功") window.destroy() else: messagebox.showerror("错误", "Tesseract路径无效或文件不存在") except Exception as e: messagebox.showerror("错误", f"OCR配置验证失败: {str(e)}") def pdf_to_cor(self): """将PDF转换为可搜索的文本PDF(OCR处理)""" if not self.pdf_path: messagebox.showwarning("警告", "请先选择PDF文件") return if not OCR_AVAILABLE: # 尝试自动配置OCR if not self.auto_configure_ocr(): messagebox.showerror("错误", "OCR功能不可用。请点击'OCR设置'按钮配置路径") return try: # 获取桌面路径 if os.name == 'nt': # Windows desktop_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop') else: # macOS 或 Linux desktop_path = os.path.join(os.path.join(os.expanduser('~')), 'Desktop') # 生成默认文件名 base_name = os.path.basename(self.pdf_path) default_filename = os.path.splitext(base_name)[0] + "_ocr.pdf" default_path = os.path.join(desktop_path, default_filename) # 询问保存路径 save_path = filedialog.asksaveasfilename( defaultextension=".pdf", filetypes=[("PDF文件", "*.pdf")], initialdir=desktop_path, initialfile=default_filename ) if not save_path: return self.status_bar.config(text="正在转换PDF到可搜索文本PDF...") self.root.update() # 创建进度窗口 progress_window = tk.Toplevel(self.root) progress_window.title("OCR处理进度") progress_window.geometry("400x100") progress_window.transient(self.root) progress_window.grab_set() progress_label = ttk.Label(progress_window, text="正在处理第 0 页,共 0 页") progress_label.pack(pady=10) progress_bar = ttk.Progressbar(progress_window, mode='determinate') progress_bar.pack(fill=tk.X, padx=20, pady=10) progress_window.update() # 打开原始PDF doc = fitz.open(self.pdf_path) total_pages = len(doc) # 创建新的PDF文档 output_doc = fitz.open() # 设置进度 progress_bar["maximum"] = total_pages progress_bar["value"] = 0 # 临时目录用于存储图像 with tempfile.TemporaryDirectory() as temp_dir: # 设置pdf2image参数 pdf2image_params = { 'dpi': 300, # 高DPI以获得更好的OCR结果 'output_folder': temp_dir, 'fmt': 'jpeg', 'thread_count': 4, 'use_pdftocairo': True } # 如果设置了Poppler路径,添加路径参数 if POPPLER_PATH: pdf2image_params['poppler_path'] = POPPLER_PATH # 将PDF转换为图像 images = convert_from_path(self.pdf_path, **pdf2image_params) for page_num, image in enumerate(images): # 更新进度 progress_bar["value"] = page_num + 1 progress_label.config(text=f"正在处理第 {page_num + 1} 页,共 {total_pages} 页") progress_window.update() # 使用Tesseract进行OCR pdf_bytes = pytesseract.image_to_pdf_or_hocr( image, extension='pdf', config='--psm 6 -c preserve_interword_spaces=1' ) # 将OCR结果转换为fitz文档 ocr_pdf = fitz.open("pdf", pdf_bytes) # 将OCR页面添加到输出文档 output_doc.insert_pdf(ocr_pdf) # 关闭OCR PDF ocr_pdf.close() # 保存输出文档 output_doc.save(save_path) output_doc.close() doc.close() # 关闭进度窗口 progress_window.destroy() self.status_bar.config(text=f"PDF已成功转换为可搜索文本PDF: {save_path}") messagebox.showinfo("成功", f"PDF文件已成功转换为可搜索文本PDF:\n{save_path}") except Exception as e: error_msg = f"转换PDF到可搜索文本PDF时发生错误: {str(e)}\n\n详细信息:\n{traceback.format_exc()}" messagebox.showerror("错误", error_msg) self.status_bar.config(text="转换失败") def auto_configure_ocr(self): """尝试自动配置OCR""" global TESSERACT_PATH, POPPLER_PATH, OCR_AVAILABLE try: # 尝试查找Tesseract if sys.platform == "win32": # Windows常见安装路径 possible_tesseract_paths = [ r"C:\Program Files\Tesseract-OCR\tesseract.exe", r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe" ] for path in possible_tesseract_paths: if os.path.exists(path): TESSERACT_PATH = path pytesseract.pytesseract.tesseract_cmd = path break # 如果自动查找失败,尝试在PATH中查找 if TESSERACT_PATH is None: try: result = subprocess.run(['where', 'tesseract'], capture_output=True, text=True) if result.returncode == 0: TESSERACT_PATH = result.stdout.split('\n')[0].strip() pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH except: pass # 尝试查找Poppler possible_poppler_paths = [ r"C:\poppler\bin", r"C:\Program Files\poppler\bin", r"C:\Program Files (x86)\poppler\bin", r"C:\Release-25.07.0-0\poppler-25.07.0\Library\bin" # 添加用户提供的路径 ] for path in possible_poppler_paths: if os.path.exists(path): POPPLER_PATH = path break else: # macOS和Linux try: result = subprocess.run(['which', 'tesseract'], capture_output=True, text=True) if result.returncode == 0: TESSERACT_PATH = result.stdout.strip() pytesseract.pytesseract.tesseract_cmd = TESSERACT_PATH except: pass # 测试Tesseract是否可用 if TESSERACT_PATH and os.path.exists(TESSERACT_PATH): pytesseract.get_tesseract_version() OCR_AVAILABLE = True return True except Exception as e: print(f"自动配置OCR失败: {str(e)}") return False def choose_friday_color(self): """选择周五颜色""" color = colorchooser.askcolor(title="选择周五颜色", initialcolor="#00ff00") if color[0]: # 用户选择了颜色 r, g, b = color[0] self.friday_color = (r/255, g/255, b/255) # 转换为0-1范围 # 更新颜色图标 self.friday_icon.config(bg=color[1]) def choose_sunday_color(self): """选择周日颜色""" color = colorchooser.askcolor(title="选择周日颜色", initialcolor="#ff0000") if color[0]: # 用户选择了颜色 r, g, b = color[0] self.sunday_color = (r/255, g/255, b/255) # 转换为0-1范围 # 更新颜色图标 self.sunday_icon.config(bg=color[1]) def choose_other_red_color(self): """选择指定红日颜色""" color = colorchooser.askcolor(title="选择指定红日颜色", initialcolor="#ff0000") if color[0]: # 用户选择了颜色 r, g, b = color[0] self.other_red_color = (r/255, g/255, b/255) # 转换为0-1范围 # 更新颜色图标 self.other_red_icon.config(bg=color[1]) def choose_drawing_color(self): """选择涂改颜色""" color = colorchooser.askcolor(title="选择涂改颜色", initialcolor="#ff0000") if color[0]: # 用户选择了颜色 r, g, b = color[0] self.drawing_color = (r/255, g/255, b/255) # 转换为0-1范围 # 更新颜色图标 self.drawing_icon.config(bg=color[1]) def save_as_pdf(self): """保存PDF文件""" if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path): messagebox.showwarning("警告", "请先处理PDF文件") return try: # 获取桌面路径 if os.name == 'nt': # Windows desktop_path = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop') else: # macOS 或 Linux desktop_path = os.path.join(os.path.join(os.expanduser('~')), 'Desktop') # 生成默认文件名 - 修复: 使用os.path.splitext而不是os.path.splicext base_name = os.path.basename(self.pdf_path) default_filename = os.path.splitext(base_name)[0] + "_highlighted.pdf" default_path = os.path.join(desktop_path, default_filename) # 询问保存路径 save_path = filedialog.asksaveasfilename( defaultextension=".pdf", filetypes=[("PDF文件", "*.pdf")], initialdir=desktop_path, initialfile=default_filename ) if save_path: # 复制文件 shutil.copy2(self.highlighted_pdf_path, save_path) self.status_bar.config(text=f"PDF已保存到: {save_path}") messagebox.showinfo("成功", f"PDF文件已保存到:\n{save_path}") except Exception as e: messagebox.showerror("错误", f"保存PDF时发生错误: {str(e)}") def toggle_erasing(self): """切换擦除模式""" self.erasing_mode = not self.erasing_mode self.drawing_mode = False # 确保涂改模式关闭 if self.erasing_mode: self.status_bar.config(text="擦除模式已启用 - 点击并拖动以擦除手动涂改") else: self.status_bar.config(text="就绪") def sync_scroll_x(self, *args): """同步两个Canvas的水平滚动""" self.canvas1.xview(*args) self.canvas2.xview(*args) def scroll_pages(self, *args): """处理页面滚动条事件""" if not self.pdf_document or self.total_pages == 0: return # 获取滚动条位置 if len(args) > 1 and args[0] == "moveto": scroll_pos = float(args[1]) elif len(args) > 2 and args[0] == "scroll": units = int(args[1]) scroll_pos = float(args[2]) else: return # 计算目标页面 target_page = int(scroll_pos * (self.total_pages - 1)) # 确保目标页面是偶数(因为每次显示两页) if target_page % 2 != 0: target_page = max(0, target_page - 1) # 更新当前页面 if target_page != self.current_page: self.current_page = target_page self.display_page() def update_scrollbar(self): """更新滚动条位置""" if not self.pdf_document or self.total_pages == 0: return # 计算滚动条位置 scroll_pos = self.current_page / (self.total_pages - 1) if self.total_pages > 1 else 0 # 更新滚动条 self.page_scrollbar.set(scroll_pos, scroll_pos + 1/(self.total_pages)) def on_mousewheel(self, event): """处理鼠标滚轮事件""" if event.num == 4 or event.delta > 0: # 向上滚动 self.prev_page() elif event.num == 5 or event.delta < 0: # 向下滚动 self.next_page() def toggle_drawing(self): """切换手动涂改模式""" self.drawing_mode = not self.drawing_mode self.erasing_mode = False # 确保擦除模式关闭 if self.drawing_mode: self.status_bar.config(text="手动涂改模式已启用 - 点击并拖动以涂改") else: self.status_bar.config(text="就绪") def update_brush_width(self, event): """更新笔宽""" try: self.brush_width = int(self.brush_width_var.get()) except ValueError: self.brush_width = 15 self.brush_width_var.set("15") def update_alpha(self, event): """更新透明度""" try: self.highlight_alpha = int(self.alpha_var.get()) except ValueError: self.highlight_alpha = 50 self.alpha_var.set("50") def start_drawing(self, event): """开始手动涂改或擦除""" if not (self.drawing_mode or self.erasing_mode): return self.start_x = event.x self.start_y = event.y self.current_drawing = [] def draw(self, event): """手动涂改或擦除""" if (not self.drawing_mode and not self.erasing_mode) or self.start_x is None or self.start_y is None: return # 获取当前Canvas current_canvas = event.widget if self.drawing_mode: # 使用涂改颜色,确保使用透明效果 r, g, b = self.drawing_color # 创建半透明颜色 - 使用alpha混合公式 # 背景色为白色(255,255,255),前景色为涂改颜色 alpha_factor = self.highlight_alpha / 100 r_mixed = int((r * alpha_factor + 1.0 * (1 - alpha_factor)) * 255) g_mixed = int((g * alpha_factor + 1.0 * (1 - alpha_factor)) * 255) b_mixed = int((b * alpha_factor + 1.0 * (1 - alpha_factor)) * 255) color = "#{:02x}{:02x}{:02x}".format(r_mixed, g_mixed, b_mixed) # 使用椭圆形实现圆形笔刷效果 half_width = self.brush_width # 创建荧光笔效果 - 使用点状图案实现透明效果 oval_id = current_canvas.create_oval( event.x - half_width, event.y - half_width, event.x + half_width, event.y + half_width, fill=color, outline="", stipple="gray50" # 使用点状图案实现透明效果 ) self.current_drawing.append(oval_id) # 如果移动距离较大,在两点之间绘制线条填充间隙 distance = ((event.x - self.start_x)**2 + (event.y - self.start_y)**2)**0.5 if distance > self.brush_width: line_id = current_canvas.create_line( self.start_x, self.start_y, event.x, event.y, width=self.brush_width*2, fill=color, capstyle=tk.ROUND, joinstyle=tk.ROUND, stipple="gray50" # 使用点状图案实现透明效果 ) self.current_drawing.append(line_id) elif self.erasing_mode: # 擦除模式 - 查找并删除与当前位置重叠的手动涂改项目 half_width = self.brush_width # 获取当前位置附近的所有项目 items = current_canvas.find_overlapping( event.x - half_width, event.y - half_width, event.x + half_width, event.y + half_width ) # 只删除手动涂改的项目(在drawing_items中的项目) all_drawing_items = self.get_all_drawing_items() for item in items: if item in all_drawing_items: current_canvas.delete(item) # 从drawing_items中移除 for drawing in self.drawing_items: if item in drawing: drawing.remove(item) # 如果移动距离较大,在两点之间查找更多项目 distance = ((event.x - self.start_x)**2 + (event.y - self.start_y)**2)**0.5 if distance > self.brush_width: # 计算线段上的多个点 steps = int(distance / (self.brush_width / 2)) + 1 for i in range(steps): x = self.start_x + (event.x - self.start_x) * i / steps y = self.start_y + (event.y - self.start_y) * i / steps items = current_canvas.find_overlapping( x - half_width, y - half_width, x + half_width, y + half_width ) # 只删除手动涂改的项目 for item in items: if item in all_drawing_items: current_canvas.delete(item) # 从drawing_items中移除 for drawing in self.drawing_items: if item in drawing: drawing.remove(item) self.start_x = event.x self.start_y = event.y def get_all_drawing_items(self): """获取所有绘制项目的ID""" all_items = [] for drawing in self.drawing_items: all_items.extend(drawing) return all_items def stop_drawing(self, event): """停止手动涂改或擦除""" if self.current_drawing and (self.drawing_mode or self.erasing_mode): if self.drawing_mode: # 只在涂改模式下保存绘制项目 self.drawing_items.append(self.current_drawing.copy()) self.start_x = None self.start_y = None self.current_drawing = [] def undo_drawing(self): """撤回最后一次涂改""" if not self.drawing_items: return # 获取最后一次涂改的项目 last_drawing = self.drawing_items.pop() # 从两个Canvas中删除这些项目 for item_id in last_drawing: self.canvas1.delete(item_id) self.canvas2.delete(item_id) self.status_bar.config(text="已撤回最后一次涂改") def browse_file(self): try: file_path = filedialog.askopenfilename(filetypes=[("PDF文件", "*.pdf")]) if file_path: self.file_path.delete(0, tk.END) self.file_path.insert(0, file_path) self.pdf_path = file_path self.load_pdf() except Exception as e: messagebox.showerror("错误", f"浏览文件时发生错误: {str(e)}") def load_pdf(self): if not self.pdf_path: return try: if self.pdf_document: self.pdf_document.close() self.pdf_document = fitz.open(self.pdf_path) self.total_pages = len(self.pdf_document) self.current_page = 0 self.display_page() self.update_scrollbar() except Exception as e: messagebox.showerror("错误", f"无法打开PDF文件: {str(e)}") def display_page(self): if not self.pdf_document or self.total_pages == 0: return self.page_label.config(text=f"页面: {self.current_page+1}-{min(self.current_page+2, self.total_pages)}/{self.total_pages}") try: # 清空当前图像引用 self.current_images = [] # 显示第一页 if self.current_page < self.total_pages: page1 = self.pdf_document[self.current_page] zoom = 1.0 # 缩小一点以适应两页显示 mat = fitz.Matrix(zoom, zoom) pix1 = page1.get_pixmap(matrix=mat) # 转换为PhotoImage img_data1 = pix1.tobytes("ppm") img1 = tk.PhotoImage(data=img_data1) self.current_images.append(img1) # 保持引用 # 更新Canvas self.canvas1.config(width=min(img1.width(), 550), height=min(img1.height(), 500)) self.canvas1.delete("all") self.canvas1.create_image(0, 0, anchor=tk.NW, image=img1) # 设置滚动区域 self.canvas1.config(scrollregion=self.canvas1.bbox(tk.ALL)) # 显示第二页(如果有) if self.current_page + 1 < self.total_pages: page2 = self.pdf_document[self.current_page + 1] zoom = 1.0 # 缩小一点以适应两页显示 mat = fitz.Matrix(zoom, zoom) pix2 = page2.get_pixmap(matrix=mat) # 转换为PhotoImage img_data2 = pix2.tobytes("ppm") img2 = tk.PhotoImage(data=img_data2) self.current_images.append(img2) # 保持引用 # 更新Canvas self.canvas2.config(width=min(img2.width(), 550), height=min(img2.height(), 500)) self.canvas2.delete("all") self.canvas2.create_image(0, 0, anchor=tk.NW, image=img2) # 设置滚动区域 self.canvas2.config(scrollregion=self.canvas2.bbox(tk.ALL)) else: # 如果没有第二页,清空第二个Canvas self.canvas2.delete("all") self.canvas2.config(width=0, height=0) # 更新滚动条位置 self.update_scrollbar() except Exception as e: messagebox.showerror("错误", f"显示页面时发生错误: {str(e)}") def prev_page(self): if self.current_page > 0: self.current_page = max(0, self.current_page - 2) self.display_page() def next_page(self): if self.pdf_document and self.current_page + 2 < self.total_pages: self.current_page += 2 self.display_page() def process_pdf(self): if not self.pdf_path: messagebox.showwarning("警告", "请先选择PDF文件") return self.status_bar.config(text="正在处理PDF...") self.root.update() try: # 创建临时文件保存处理后的PDF temp_file = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) self.highlighted_pdf_path = temp_file.name temp_file.close() # 打开原始PDF doc = fitz.open(self.pdf_path) # 遍历每一页 for page_num in range(len(doc)): page = doc[page_num] # 获取页面文本 - 使用更精确的搜索方法 text = page.get_text("text") # 查找所有日期格式的文本 date_pattern = r"\d{4}-\d{2}-\d{2}" dates = re.findall(date_pattern, text) print(f"在第 {page_num+1} 页找到的日期: {dates}") # 调试信息 # 对于每个找到的日期,查找其在页面上的位置并高亮 for date_str in dates: highlight_type = self.should_highlight(date_str) if highlight_type > 0: print(f"高亮日期: {date_str}, 类型: {highlight_type}") # 调试信息 # 搜索日期文本的位置 text_instances = page.search_for(date_str) print(f"找到 {len(text_instances)} 个匹配项") # 调试信息 for inst in text_instances: # 扩大高亮区域 - 扩大2倍 expanded_rect = self.expand_rect(inst, 2.0) # 使用高亮注释 highlight = page.add_highlight_annot(expanded_rect) # 根据日期类型设置颜色 if highlight_type == 1: # 周五 highlight.set_colors(stroke=self.friday_color) elif highlight_type == 2: # 周日 highlight.set_colors(stroke=self.sunday_color) elif highlight_type == 3: # 指定红日 highlight.set_colors(stroke=self.other_red_color) highlight.set_opacity(self.highlight_alpha/100) # 修正透明度设置 highlight.update() # 保存处理后的PDF doc.save(self.highlighted_pdf_path) doc.close() # 重新加载处理后的PDF if self.pdf_document: self.pdf_document.close() self.pdf_document = fitz.open(self.highlighted_pdf_path) self.total_pages = len(self.pdf_document) self.current_page = 0 self.display_page() self.status_bar.config(text="处理完成") messagebox.showinfo("成功", "PDF处理完成,日期已高亮显示") except Exception as e: self.status_bar.config(text="处理失败") error_msg = f"处理PDF时发生错误: {str(e)}\n\n详细信息:\n{traceback.format_exc()}" messagebox.showerror("错误", error_msg) def expand_rect(self, rect, factor): """扩大矩形区域""" width = rect.width height = rect.height # 计算扩大后的尺寸 new_width = width * factor new_height = height * factor # 计算中心点 center_x = rect.x0 + width / 2 center_y = rect.y0 + height / 2 # 创建新矩形 new_rect = fitz.Rect( center_x - new_width / 2, center_y - new_height / 2, center_x + new_width / 2, center_y + new_height / 2 ) return new_rect def should_highlight(self, date_str): try: date_obj = datetime.strptime(date_str, "%Y-%m-%d") weekday = date_obj.weekday() # 检查是否为指定日期 is_specified_date = date_str in self.specified_dates # 检查是否为周五(4)或周日(6) if weekday == 4: # 周五 # 如果同时是指定日期,则使用指定红日颜色 if is_specified_date: return 3 # 指定红日 else: return 1 # 周五 elif weekday == 6: # 周日 return 2 # 周日 elif is_specified_date: # 指定指定日期 return 3 # 指定红日 except ValueError: pass return 0 # 不高亮 def show_print_dialog(self): """显示打印设置对话框""" if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path): messagebox.showwarning("警告", "请先处理PDF文件") return try: # 创建打印设置对话框 print_dialog = tk.Toplevel(self.root) print_dialog.title("打印设置") print_dialog.geometry("500x600") print_dialog.transient(self.root) print_dialog.grab_set() # 主框架 - 使用grid布局 main_frame = ttk.Frame(print_dialog, padding="10") main_frame.pack(fill=tk.BOTH, expand=True) # 打印机信息区域 printer_frame = ttk.LabelFrame(main_frame, text="打印机", padding="5") printer_frame.grid(row=0, column=0, sticky=(tk.W, tk.E), pady=5) printer_frame.columnconfigure(1, weight=1) # 获取系统打印机列表 printers = self.get_printer_list() # 打印机名称和属性 ttk.Label(printer_frame, text="名称(M):").grid(row=0, column=0, sticky=tk.W, pady=2) # 打印机选择下拉框 self.printer_var = tk.StringVar() if printers: self.printer_var.set(printers[0]) # 默认选择第一个打印机 printer_combo = ttk.Combobox(printer_frame, textvariable=self.printer_var, values=printers, state="readonly") printer_combo.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=5, pady=2) ttk.Button(printer_frame, text="属性(P)...", command=self.show_printer_properties).grid(row=0, column=2, padx=5, pady=2) # 打印选项区域 options_frame = ttk.LabelFrame(main_frame, text="打印选项", padding="5") options_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=5) options_frame.columnconfigure(1, weight=1) # 纸张来源 ttk.Label(options_frame, text="纸张来源(S):").grid(row=0, column=0, sticky=tk.W, pady=2) paper_combo = ttk.Combobox(options_frame, values=["使用打印机设置", "自动选择", "手动送纸"], width=20, state="readonly") paper_combo.set("使用打印机设置") paper_combo.grid(row=0, column=1, sticky=tk.W, padx=5, pady=2) # 选择纸张 ttk.Label(options_frame, text="选择纸张:").grid(row=1, column=0, sticky=tk.W, pady=5) paper_type_combo = ttk.Combobox(options_frame, values=["A4", "Letter", "Legal", "A3"], width=20, state="readonly") paper_type_combo.set("A4") paper_type_combo.grid(row=1, column=1, sticky=tk.W, padx=5, pady=5) # 打印范围区域 range_frame = ttk.LabelFrame(main_frame, text="页码范围", padding="5") range_frame.grid(row=2, column=0, sticky=(tk.W, tk.E), pady=5) range_frame.columnconfigure(0, weight=1) # 页码范围选择 self.range_var = tk.StringVar(value="全部") ttk.Radiobutton(range_frame, text="全部(A)", variable=self.range_var, value="全部").grid(row=0, column=0, sticky=tk.W) ttk.Radiobutton(range_frame, text="当前页(U)", variable=self.range_var, value="当前页").grid(row=1, column=0, sticky=tk.W) ttk.Radiobutton(range_frame, text="页码范围(G)", variable=self.range_var, value="页码范围").grid(row=2, column=0, sticky=tk.W) # 页码范围输入 self.range_entry = ttk.Entry(range_frame) self.range_entry.grid(row=3, column=0, sticky=(tk.W, tk.E), pady=2) ttk.Label(range_frame, text="请键入页码和/或用逗号分隔的页码范围 (例如:1,3,5-12)").grid(row=4, column=0, sticky=tk.W) # 副本区域 copies_frame = ttk.LabelFrame(main_frame, text="副本", padding="5") copies_frame.grid(row=3, column=0, sticky=(tk.W, tk.E), pady=5) # 份数 self.copies_var = tk.IntVar(value=1) copies_spin = tttk.Spinbox(copies_frame, from_=1, to=999, textvariable=self.copies_var, width=5) copies_spin.grid(row=0, column=0, sticky=tk.W, padx=5) ttk.Label(copies_frame, text="份数(C)").grid(row=0, column=1, sticky=tk.W) # 逐份打印 self.collate_var = tk.BooleanVar(value=True) ttk.Checkbutton(copies_frame, text="逐份打印(T)", variable=self.collate_var).grid(row=0, column=2, sticky=tk.W, padx=20) # 并打和缩放区域 layout_frame = ttk.LabelFrame(main_frame, text="并打和缩放", padding="5") layout_frame.grid(row=4, column=0, sticky=(tk.W, tk.E), pady=5) layout_frame.columnconfigure(1, weight=1) # 每页版数 ttk.Label(layout_frame, text="每页的版数(H):").grid(row=0, column=0, sticky=tk.W, pady=2) self.pages_combo = ttk.Combobox(layout_frame, values=["1", "2", "4", "6", "9", "16"], width=5, state="readonly") self.pages_combo.set("1") self.pages_combo.grid(row=0, column=1, sticky=tk.W, padx=5, pady=2) # 按纸型缩放 ttk.Label(layout_frame, text="按纸型缩放(Z):").grid(row=1, column=0, sticky=tk.W, pady=2) self.scale_combo = ttk.Combobox(layout_frame, values=["无缩放", "缩小至可打印区域", "适合可打印区域", "实际大小"], width=20, state="readonly") self.scale_combo.set("无缩放") self.scale_combo.grid(row=1, column=1, sticky=tk.W, padx=5, pady=2) # 并打顺序 ttk.Label(layout_frame, text="并打顺序:").grid(row=2, column=0, sticky=tk.W, pady=2) self.order_var = tk.StringVar(value="从左到右") ttk.Radiobutton(layout_frame, text="从左到右(F)", variable=self.order_var, value="从左到右").grid(row=2, column=1, sticky=tk.W) ttk.Radiobutton(layout_frame, text="从上到下(B)", variable=self.order_var, value="从上到下").grid(row=2, column=2, sticky=tk.W, padx=5) # 分隔线 self.separator_var = tk.BooleanVar(value=False) ttk.Checkbutton(layout_frame, text="并打时绘制分隔线(D)", variable=self.separator_var).grid(row=3, column=0, columnspan=3, sticky=tk.W, pady=2) # 其他选项 other_frame = ttk.Frame(layout_frame) other_frame.grid(row=4, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=2) self.reverse_var = tk.BooleanVar(value=False) ttk.Checkbutton(other_frame, text="反片打印(I)", variable=self.reverse_var).grid(row=0, column=0, sticky=tk.W) self.to_file_var = tk.BooleanVar(value=False) ttk.Checkbutton(other_frame, text="打印到文件(L)", variable=self.to_file_var).grid(row=0, column=1, sticky=tk.W, padx=5) self.duplex_var = tk.BooleanVar(value=False) ttk.Checkbutton(other_frame, text="双面打印(X)", variable=self.duplex_var).grid(row=0, column=2, sticky=tk.W, padx=5) # 按钮区域 button_frame = ttk.Frame(main_frame) button_frame.grid(row=5, column=0, sticky=tk.E, pady=10) ttk.Button(button_frame, text="打印预览", command=self.print_preview).pack(side=tk.LEFT, padx=5) ttk.Button(button_frame, text="取消", command=print_dialog.destroy).pack(side=tk.LEFT, padx=5) ttk.Button(button_frame, text="开始打印", command=lambda: self.print_pdf(print_dialog)).pack(side=tk.LEFT, padx=5) # 配置权重使组件可以扩展 main_frame.columnconfigure(0, weight=1) for i in range(6): main_frame.rowconfigure(i, weight=0) main_frame.rowconfigure(5, weight=1) except Exception as e: messagebox.showerror("错误", f"打开打印设置时发生错误: {str(e)}") def get_printer_list(self): """获取系统打印机列表""" if not WIN32PRINT_AVAILABLE: # 如果win32print不可用,尝试使用系统命令获取打印机列表 try: if sys.platform == "win32": # Windows系统 result = subprocess.run(['wmic', 'printer', 'get', 'name'], capture_output=True, text=True) if result.returncode == 0: printers = [] lines = result.stdout.split('\n') for line in lines: line = line.strip() if line and not line.startswith('Name'): printers.append(line) return printers if printers else ["默认打印机"] elif sys.platform == "darwin": # macOS系统 result = subprocess.run(['lpstat', '-a'], capture_output=True, text=True) if result.returncode == 0: printers = [] lines = result.stdout.split('\n') for line in lines: if line: parts = line.split() if parts: printers.append(parts[0]) return printers if printers else ["默认打印机"] else: # Linux系统 result = subprocess.run(['lpstat', '-a'], capture_output=True, text=True) if result.returncode == 0: printers = [] lines = result.stdout.split('\n') for line in lines: if line: parts = line.split() if parts: printers.append(parts[0]) return printers if printers else ["默认打印机"] except: pass return ["默认打印机"] try: printers = [] for printer in win32print.EnumPrinters(win32print.PRENER_ENUM_LOCAL | win32print.PRENER_ENUM_CONNECTIONS): printers.append(printer[2]) return printers except: # 如果无法获取打印机列表,返回默认打印机 try: default_printer = win32print.GetDefaultPrinter() return [default_printer] if default_printer else ["默认打印机"] except: return ["默认打印机"] def show_prriter_properties(self): """显示打印机属性""" if not WIN32PRINT_AVAILABLE: messagebox.showinfo("打印机属性", "在此系统上无法访问打印机属性。") return try: printer_name = self.printer_var.get() if printer_name and printer_name != "默认打印机": # 打开打印机属性对话框 hprinter = win32print.OpenPrinter(printer_name) win32print.PrinterProperties(self.root.winfo_id(), hprinter) win32print.ClosePrinter(hprinter) else: messagebox.showinfo("打印机属性", "请先选择一个有效的打印机。") except Exception as e: messagebox.showerror("错误", f"无法打开打印机属性: {str(e)}") def print_preview(self): """打印预览功能""" if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path): messagebox.showwarning("警告", "请先处理PDF文件") return try: # 创建预览窗口 preview = tk.Toplevel(self.root) preview.title("打印预览") preview.geometry("800x600") # 创建Canvas用于显示预览 preview_canvas = tk.Canvas(preview, bg="white") preview_canvas.pack(fill=tk.BOTH, expand=True) # 添加滚动条 v_scrollbar = ttk.Scrollbar(preview, orient=tk.VERTICAL, command=preview_canvas.yview) h_scrollbar = ttk.Scrollbar(preview, orient=tk.HORIZONTAL, command=preview_canvas.xview) preview_canvas.configure(yscrollcommand=v_scrollbar.set, xscrollcommand=h_scrollbar.set) v_scrollbar.pack(side=tk.RIGHT, fill=tk.Y) h_scrollbar.pack(side=tk.BOTTOM, fill=tk.X) preview_canvas.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) # 加载PDF第一页作为预览 doc = fitz.open(self.highlighted_pdf_path) page = doc[0] zoom = 1.0 mat = fitz.Matrix(zoom, zoom) pix = page.get_pixmap(matrix=mat) # 转换为PhotoImage img_data = pix.tobytes("ppm") img = tk.PhotoImage(data=img_data) # 保持引用 preview.img_ref = img # 显示图像 preview_canvas.create_image(0, 0, anchor=tk.NW, image=img) preview_canvas.config(scrollregion=preview_canvas.bbox(tk.ALL)) doc.close() except Exception as e: messagebox.showerror("错误", f"打印预览时发生错误: {str(e)}") def print_pdf(self, dialog): """实际打印PDF""" dialog.destroy() if not self.highlighted_pdf_path or not os.path.exists(self.highlighted_pdf_path): messagebox.showwarning("警告", "请先处理PDF文件") return try: # 获取打印设置 printer_name = self.printer_var.get() range_type = self.range_var.get() page_range = self.range_entry.get() if range_type == "页码范围" else None copies = self.copies_var.get() # 构建打印命令 if sys.platform == "win32": # 使用系统打印对话框 os.startfile(self.highlighted_pdf_path, "print") elif sys.platform == "darwin": # macOS cmd = ['lp'] if printer_name and printer_name != "默认打印机": cmd.extend(['-d', printer_name]) if copies > 1: cmd.extend(['-n', str(copies)]) if page_range: cmd.extend(['-o', f'page-ranges={page_range}']) cmd.append(self.highlighted_pdf_path) subprocess.run(cmd) else: # Linux cmd = ['lp'] if printer_name and printer_name != "默认打印机": cmd.extend(['-d', printer_name]) if copies > 1: cmd.extend(['-n', str(copies)]) if page_range: cmd.extend(['-o', f'page-ranges={page_range}']) cmd.append(self.highlighted_pdf_path) subprocess.run(cmd) self.status_bar.config(text="打印任务已发送") except Exception as e: messagebox.showerror("错误", f"打印失败: {str(e)}") def __del__(self): # 清理临时文件 if self.highlighted_pdf_path and os.path.exists(self.highlighted_pdf_path): try: os.unlink(self.highlighted_pdf_path) except: pass # 关闭PDF文档 if self.pdf_document: try: self.pdf_document.close() except: pass def main(): try: # 检查是否安装了PyMuPDF try: import fitz except ImportError: messagebox.showerror("缺少依赖", "请安装PyMuPDF库: pip install PyMuPDF") return root = tk.Tk() app = PDFDateHighlighter(root) root.mainloop() except Exception as e: error_msg = f"程序发生未预期错误: {str(e)}\n\n详细信息:\n{traceback.format_exc()}" messagebox.showerror("严重错误", error_msg) if __name__ == "__main__": main() 转换PDF到可搜索文本PDF时发生错误:name 'convert_from_path'is not defined 详细信息: Traceback (most recent call last): File"C:\Users\卓越生活\Desktop\PDF日期处理工具.py",line 471,in pdf to cor images = convert_from_path(self.pdf_path, *pdf2image_params) AAAAAAAAAAAAAAAAA NameError: name 'convert from path' is not defined
最新发布
08-22
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值