How to convert web pages and word doc to PDF files?

doc2pdf

How to convert the web pages and word document to PDF files with text / image format kept?

Sometimes, I will do some research on the internet and save some pages locally for afterward reviewing. And some problems like the text font, page layout, images are missing, or even could not open this address  and so on, a lots of problems appear.  So the best solution to deal with it is try to save them all at the first time. You may think we could copy the whole content from the web page into the word and save it as a world file. Yes, it works, at least all text will be saved. But something still missing, I tried several times like this before, but the result was not so good. I need to re-arrange the text and images layout in the word, that was a time consuming work for some guys that were not good at text layout with word.

 

There is a software named “CutePDF writer” which run based on the Print service. All most every program have implemented “Print” feature(“File” -> “Print”).  With this kind of software you could convert a word document to PDF file, convert web pages to PDF file. And the same time, the created PDF files will have the same text font, passage layout, images, text style with the original one. I have cleaned the document about how to use CutePDF, you could download the document from the top link.

转载于:https://www.cnblogs.com/open-coder/archive/2012/08/24/2653567.html

报错如下: File <tokenize>:446 except ImportError: ^ IndentationError: unindent does not match any outer indentation level # 尝试12 库版本冲突,NUmpy需要1版本,有时又要2版本,进行分类控制 # 安全的版本检查和兼容性处理 import sys import warnings import logging # 配置日志记录 logging.basicConfig(level=logging.WARNING, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) def check_numpy_compatibility(): """检查NumPy版本兼容性并处理问题""" try: import numpy as np numpy_version = tuple(map(int, np.version.version.split('.')[:2])) # 检查NumPy是否为2.0+ if numpy_version >= (2, 0): logger.warning("检测到不兼容的NumPy 2.0+版本") # 提供明确的错误信息而不是尝试自动降级 error_msg = """ ️ 严重兼容性问题 ⚠️ 检测到NumPy 2.0+版本,但当前环境需要NumPy 1.x版本。 解决方案: 1. 创建一个新的虚拟环境: python -m venv ocr-env source ocr-env/bin/activate # Linux/Mac .\ocr-env\Scripts\activate # Windows 2. 安装兼容版本的NumPy: pip install "numpy<2" 3. 然后安装其他依赖: pip install pandas opencv-python paddlepaddle paddleocr cnocr pdf2image pdfplumber PyMuPDF 4. 重新运行程序 注意: Jupyter环境中无法直接降级NumPy,必须使用虚拟环境! """ raise ImportError(error_msg) return np except ImportError: # NumPy未安装,尝试安装兼容版本 try: logger.info("NumPy未安装,尝试安装兼容版本...") import subprocess subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy<2"]) import numpy as np return np except Exception as e: logger.error(f"安装NumPy失败: {str(e)}") raise ImportError("无法安装兼容的NumPy版本,请手动安装: pip install \"numpy<2\"") # 尝试获取兼容的NumPy版本 try: np = check_numpy_compatibility() logger.info(f"使用兼容的NumPy版本: {np.version.version}") except ImportError as e: # 在GUI环境中显示错误信息 import tkinter.messagebox as messagebox messagebox.showerror("环境配置错误", str(e)) sys.exit(1) import pandas as pd import pdfplumber # PDF解析 import tkinter as tk from tkinter import filedialog, ttk, messagebox import os import threading from concurrent.futures import ThreadPoolExecutor import time import gc import psutil import openpyxl # 用于高效读取大型Excel import re import warnings import pandas as pd import pdfplumber import re import pandas as pd # 增强的依赖管理和错误处理 import sys import warnings import logging import traceback import os # 配置高级日志记录 logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', handlers=[ logging.FileHandler("ocr_system.log", encoding='utf-8'), logging.StreamHandler() ] ) logger = logging.getLogger("PDF-OCR-System") # 1. 解决NumPy兼容性问题 def resolve_numpy_conflict(): """确保NumPy版本与SciPy兼容""" try: import numpy as np # 检查NumPy版本是否在SciPy要求的范围内 [1.16.5, 1.23.0) numpy_version = tuple(map(int, np.__version__.split('.')[:3])) if numpy_version >= (1, 16, 5) and numpy_version < (1, 23, 0): logger.info(f"NumPy版本兼容: {np.__version__}") return np logger.warning(f"检测到不兼容的NumPy版本: {np.__version__}") # 尝试安装兼容版本 target_version = "1.22.4" # SciPy兼容的稳定版本 logger.info(f"尝试安装NumPy {target_version}...") import subprocess subprocess.run( [sys.executable, "-m", "pip", "install", f"numpy=={target_version}"], check=True ) # 重新加载NumPy import importlib importlib.reload(np) logger.info(f"NumPy成功降级到: {np.__version__}") return np except ImportError: logger.error("NumPy未安装,正在安装兼容版本...") import subprocess subprocess.run( [sys.executable, "-m", "pip", "install", "numpy==1.22.4"], check=True ) import numpy as np return np except Exception as e: logger.error(f"NumPy版本处理失败: {str(e)}") # 回退到系统NumPy import numpy as np return np # 安全初始化NumPy try: np = resolve_numpy_conflict() except Exception as e: logger.critical(f"NumPy初始化失败: {str(e)}") sys.exit(1) # 2. 安装缺失的关键依赖 def install_missing_dependencies(): """安装PDF处理和OCR所需的依赖""" missing_packages = [] # 检查关键依赖 required_packages = { "pandas": "2.1.4", "pdfplumber": "0.10.4", "paddleocr": "3.2.0", "cnocr": "2.3.0.2", "pdf2image": "1.16.3", "PyMuPDF": "1.24.0", # fitz模块所属包 "opencv-python": "4.9.0.80", "openpyxl": "3.1.2", "psutil": "5.9.7", "scipy": "1.13.0" # 与NumPy 1.22.4兼容的版本 } import importlib for pkg, version in required_packages.items(): try: mod = importlib.import_module(pkg) installed_version = getattr(mod, '__version__', "未知") logger.info(f"{pkg}已安装: {installed_version}") except ImportError: missing_packages.append(f"{pkg}=={version}") if missing_packages: logger.warning(f"缺失依赖: {', '.join(missing_packages)}") try: import subprocess install_cmd = [sys.executable, "-m", "pip", "install"] + missing_packages subprocess.run(install_cmd, check=True) logger.info("缺失依赖安装完成") except Exception as e: logger.error(f"依赖安装失败: {str(e)}") return False return True # 安装缺失依赖 install_missing_dependencies() # 先定义 PriceComparator 类 class PriceComparator: def __init__(self): self.history_df = None self.supplier_data = {} self.pdf_files = [] # 存储处理的PDF文件路径 self.ocr_engine = None # OCR引擎实例 def initialize_ocr(self): """初始化OCR引擎(按需初始化)""" if self.ocr_engine is None: # 使用组合OCR策略:PaddleOCR + CnOCR self.ocr_engine = { 'paddle': PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False), 'cnocr': CnOcr() } print("OCR引擎初始化完成") def load_history_data(self, excel_path): """加载历史订单数据(优化版,使用openpyxl只读模式)""" # ...(保持不变)... def preprocess_image(self, image): """图像预处理:提高OCR识别率""" # 转换为OpenCV格式 img = np.array(image) # 1. 灰度转换 gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # 2. 自适应直方图均衡化 clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) enhanced = clahe.apply(gray) # 3. 非局部均值去噪 denoised = cv2.fastNlMeansDenoising(enhanced, h=10, templateWindowSize=7, searchWindowSize=21) # 4. 锐化处理 kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) sharpened = cv2.filter2D(denoised, -1, kernel) return Image.fromarray(sharpened) def ocr_image(self, image): """使用组合OCR引擎识别图像文本""" self.initialize_ocr() # 预处理图像 processed_img = self.preprocess_image(image) # 使用PaddleOCR识别 paddle_result = self.ocr_engine['paddle'].ocr(np.array(processed_img), cls=True) paddle_text = "" if paddle_result and paddle_result[0]: for line in paddle_result[0]: if line and line[1]: paddle_text += line[1][0] + "\n" # 使用CnOCR识别 cnocr_result = self.ocr_engine['cnocr'].ocr(np.array(processed_img)) cnocr_text = "\n".join([line['text'] for line in cnocr_result]) # 合并结果(优先使用PaddleOCR,补充CnOCR的结果) combined_text = paddle_text for line in cnocr_text.split('\n'): if line.strip() and line not in paddle_text: combined_text += line + "\n" return combined_text def parse_scanned_pdf(self, pdf_path, progress_callback=None): """解析扫描版PDF文件(基于OCR)""" print(f"开始处理扫描版PDF: {pdf_path}") start_time = time.time() # 使用PyMuPDF获取页数 with fitz.open(pdf_path) as doc: total_pages = doc.page_count items = [] # 创建临时目录存储转换后的图像 with tempfile.TemporaryDirectory() as temp_dir: # 将PDF转换为图像列表 images = convert_from_path( pdf_path, dpi=300, # 高分辨率提高识别率 output_folder=temp_dir, fmt='jpeg', thread_count=4 # 多线程加速 ) for i, image in enumerate(images): if progress_callback: progress_callback(i+1, total_pages) print(f"处理第 {i+1}/{len(images)} 页...") page_text = self.ocr_image(image) # 从OCR文本中提取数据 page_items = self.parse_ocr_text(page_text) items.extend(page_items) df = pd.DataFrame(items) print(f"扫描版PDF处理完成: {pdf_path}, 耗时: {time.time()-start_time:.2f}秒") print(f"提取了 {len(df)} 条记录") return df def parse_ocr_text(self, text): """解析OCR识别的文本内容""" # 定义字段别名映射,兼容不同供应商的命名方式 field_mappings = { '名称': ['名称', '品名', '产品名称', '物料名称', '物料', '化合物名称'], '规格': ['规格', '包装规格', '规格型号', '型号', '包装', '规格说明'], '数量': ['数量', '主数量', '订货数量', '订购数量', '采购数量'], '价格': ['价格', '单价', '报价', '含税单价', '不含税单价'], 'CAS号': ['CAS', 'CAS号', 'CAS NO.', 'CAS No', 'CAS No.'], '货号': ['货号', '编号', '产品编号', '编码', '物料编码'], '品牌': ['品牌', '原厂'], '厂家': ['厂家', '生产厂家', '制造商', '原厂'], '证书': ['证书', '资质', '认证'], '货期': ['货期', '交货期', '交付周期', '生产周期'], '备注': ['备注', '说明', '附加信息'] } # 创建反向映射字典 {别名: 标准字段名} reverse_mappings = {} for standard_field, aliases in field_mappings.items(): for alias in aliases: reverse_mappings[alias.lower()] = standard_field items = [] # 分割文本为段落(假设每个段落对应一个产品) paragraphs = re.split(r'\n{2,}', text) for para in paragraphs: # 跳过空段落和页眉页脚 if len(para.strip()) < 20: continue # 尝试匹配键值对 item = {} for alias, std_field in reverse_mappings.items(): # 创建匹配模式 pattern = re.compile(fr"{alias}\s*[::]?\s*([^\n]+)") match = pattern.search(para) if match: item[std_field] = match.group(1).strip() # 如果提取到了关键字段,添加到结果 if '名称' in item and '价格' in item: # 创建物料标识 if '货号' in item and '规格' in item: item['物料标识'] = f"{item['货号']}_{item['规格']}" elif '名称' in item and '规格' in item: item['物料标识'] = f"{item['名称']}_{item['规格']}" # 数值类型转换 try: if '数量' in item: item['数量'] = float(re.sub(r'[^\d.]', '', item['数量'])) if '价格' in item: item['价格'] = float(re.sub(r'[^\d.]', '', item['价格'])) except ValueError: # 保留原始值如果不能转换 pass items.append(item) return items def parse_pdf(self, pdf_path, progress_callback=None): """解析供应商PDF报价单 - 自动检测文本型或扫描型""" print(f"开始解析PDF: {pdf_path}") start_time = time.time() supplier_name = os.path.basename(pdf_path).split('.')[0] self.pdf_files.append(pdf_path) # 首先尝试作为文本型PDF解析 try: with pdfplumber.open(pdf_path) as pdf: # 检查第一页是否有文本 first_page_text = pdf.pages[0].extract_text() if first_page_text and len(first_page_text.strip()) > 50: print("检测为文本型PDF,使用pdfplumber解析") return self.parse_text_pdf(pdf_path, progress_callback) except Exception as e: print(f"pdfplumber解析失败: {str(e)}") # 如果文本型解析失败或不适用,则使用OCR解析 print("检测为扫描版PDF,使用OCR解析") return self.parse_scanned_pdf(pdf_path, progress_callback) def parse_text_pdf(self, pdf_path, progress_callback=None): """解析文本型PDF(原有逻辑)""" # ...(这里是您原有的parse_pdf方法的内容)... # 为了节省空间,我这里省略了重复代码,您可以直接使用原有的代码 # 实际集成时请保留这部分代码 def compare_prices(self): """比较价格并生成结果""" if not self.supplier_data: return pd.DataFrame() # 合并所有供应商数据 all_items = pd.concat(self.supplier_data.values(), keys=self.supplier_data.keys(), names=['供应商']) all_items.reset_index(level=0, inplace=True) # 找出每个物料的最低价格 min_prices = all_items.groupby('物料标识')['价格'].min().reset_index() # 关联历史数据(如果有) if self.history_df is not None: merged_df = pd.merge( min_prices, self.history_df, on='物料标识', how='left', suffixes=('_最低价', '_历史') ) else: merged_df = min_prices.rename(columns={'价格': '最低价'}) # 关联供应商名称 merged_df = pd.merge( merged_df, all_items[['物料标识', '供应商', '价格']], left_on=['物料标识', '最低价'], right_on=['物料标识', '价格'], how='left' ) merged_df.rename(columns={'供应商': '最低价供应商', '价格': '最低价'}, inplace=True) # 清理临时列 merged_df.drop(columns=['价格_最低价'], inplace=True, errors='ignore') print(f"比价分析完成,共分析 {len(merged_df)} 条记录") return merged_df class PDFParser: self.ocr_engine = PaddleOCR( use_angle_cls=True, lang='ch', use_gpu=True, # 自动检测GPU show_log=False ) logger.info("使用PaddleOCR引擎") except ImportError: logger.warning("PaddleOCR未安装,尝试使用CnOCR") try: from cnocr import CnOcr self.ocr_engine = CnOcr() logger.info("使用CnOCR引擎") except ImportError: logger.error("未安装OCR引擎,请安装PaddleOCR或CnOCR") self.ocr_engine = None def _pdf_to_images(self, pdf_path): """将PDF转换为图像列表""" try: from pdf2image import convert_from_path return convert_from_path(pdf_path, dpi=300) except ImportError: logger.error("pdf2image未安装") return None except Exception as e: logger.error(f"PDF转图像失败: {str(e)}") return None def _process_ocr_results(self, results): """处理OCR结果提取表格数据""" # 简化的表格提取逻辑 data = [] for page_result in results: if self.ocr_engine.__class__.__name__ == 'PaddleOCR': # PaddleOCR结果处理 for line in page_result: if line and line[1][0]: # 确保有文本 data.append(line[1][0]) else: # CnOCR结果处理 for line in page_result: if line and line['text']: data.append(line['text']) # TODO: 根据实际需求添加表格解析逻辑 logger.warning("OCR结果提取需要根据实际表格结构增强") return {"text": data} # 然后定义 Application 类 class Application(tk.Tk): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.comparator = None # 确保初始化 self.status_var = tk.StringVar(value="就绪") self._setup_ui() self._check_dependencies() self._load_history_data() # 提前加载历史数据 def _check_dependencies(self): """增强的依赖检查,避免兼容性问题""" self.log_message("=== 依赖库版本检查 ===") # 记录核心库版本 self.log_message(f"Python版本: {sys.version.split()[0]}") self.log_message(f"NumPy版本: {np.version.version}") # 安全获取其他模块版本 def safe_get_version(module_name): try: module = __import__(module_name) if hasattr(module, '__version__'): return module.__version__ # 尝试其他方式获取版本 if module_name == "pdf2image": from pdf2image import __version__ as pdf2image_version return pdf2image_version # 使用metadata获取 if sys.version_info >= (3, 8): from importlib.metadata import version return version(module_name) else: import pkg_resources return pkg_resources.get_distribution(module_name).version except: return "未知版本" # 检查关键依赖 deps = ["pandas", "pdfplumber", "paddleocr", "cnocr", "pdf2image", "PyMuPDF", "opencv-python", "openpyxl", "psutil"] for dep in deps: try: __import__(dep) version = safe_get_version(dep) self.log_message(f"{dep}版本: {version}") except ImportError: self.log_message(f"警告: {dep}未安装") except Exception as e: self.log_message(f"{dep}检查错误: {str(e)}") self.log_message("=== 依赖检查完成 ===") def _setup_ui(self): self.title("物料价格比对系统 v3.0") self.geometry("800x600") # 主框架 main_frame = ttk.Frame(self) main_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 控制按钮框架 btn_frame = ttk.Frame(main_frame) btn_frame.pack(fill=tk.X, pady=5) self.btn_load_history = ttk.Button(btn_frame, text="加载历史数据", command=self.load_history) self.btn_load_history.pack(side=tk.LEFT, padx=5) self.btn_add_pdf = ttk.Button(btn_frame, text="添加供应商报价", command=self.add_pdf) self.btn_add_pdf.pack(side=tk.LEFT, padx=5) self.btn_generate = ttk.Button(btn_frame, text="生成比对报告", command=self.generate_report) self.btn_generate.pack(side=tk.LEFT, padx=5) self.btn_clear = ttk.Button(btn_frame, text="清除缓存", command=self.clear_cache) self.btn_clear.pack(side=tk.RIGHT, padx=5) # 进度条框架 progress_frame = ttk.LabelFrame(main_frame, text="处理进度") progress_frame.pack(fill=tk.X, padx=5, pady=5) self.progress_label = ttk.Label(progress_frame, text="就绪") self.progress_label.pack(anchor=tk.W, padx=5, pady=2) self.progress_bar = ttk.Progressbar(progress_frame, orient='horizontal', length=500, mode='determinate') self.progress_bar.pack(fill=tk.X, padx=5, pady=5) # 日志框架 log_frame = ttk.LabelFrame(main_frame, text="操作日志") log_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) self.log_text = tk.Text(log_frame, height=15) scrollbar = ttk.Scrollbar(log_frame, command=self.log_text.yview) self.log_text.configure(yscrollcommand=scrollbar.set) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.log_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) self.log_text.config(state=tk.DISABLED) # 状态栏 self.status_var = tk.StringVar(value="就绪") status_bar = ttk.Label(self, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W) status_bar.pack(side=tk.BOTTOM, fill=tk.X) # 内存监控 self.memory_var = tk.StringVar(value="内存使用: 0 MB") memory_label = ttk.Label(self, textvariable=self.memory_var, padding=2) memory_label.pack(side=tk.BOTTOM, fill=tk.X) # 禁用按钮直到历史数据加载 self.btn_add_pdf.config(state=tk.DISABLED) self.btn_generate.config(state=tk.DISABLED) # 启动内存监控 self.monitor_memory() def log_message(self, message): """在日志框中添加消息""" self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, message + "\n") self.log_text.see(tk.END) # 自动滚动到底部 self.log_text.config(state=tk.DISABLED) def monitor_memory(self): """定期更新内存使用情况""" process = psutil.Process(os.getpid()) mem_usage = process.memory_info().rss / (1024 * 1024) # 转换为MB self.memory_var.set(f"内存使用: {mem_usage:.2f} MB") self.after(5000, self.monitor_memory) # 每5秒更新一次 def clear_cache(self): """清除缓存释放内存""" self.comparator = PriceComparator() gc.collect() self.log_message("缓存已清除,内存已释放") self.status_var.set("就绪") self.progress_bar["value"] = 0 self.progress_label["text"] = "就绪" self.btn_add_pdf.config(state=tk.DISABLED) self.btn_generate.config(state=tk.DISABLED) self.btn_load_history.config(state=tk.NORMAL) def _load_history_data(self): """安全加载历史数据""" try: history_path = "C:/Users/14432/Desktop/自动比价系统/近五年订单汇总.xlsx" self.log_message(f"开始加载历史数据: {history_path}") import pandas as pd self.comparator.history_df = pd.read_excel(history_path) msg = f"历史数据加载完成: {len(self.comparator.history_df)}条记录" self.status_var.set(msg) self.log_message(msg) except Exception as e: error_msg = f"历史数据加载失败: {str(e)}" self.status_var.set(error_msg) self.log_message(error_msg, level="ERROR") self.comparator.history_df = pd.DataFrame() # 创建空DataFrame def parse_pdfs(self): """解析PDF文件并处理错误""" pdf_files = [ "C:/Users/14432/Desktop/自动比价系统/s26c-725082615560.pdf", "C:/Users/14432/Desktop/自动比价系统/对照品询比价2025.08.21-2--广州佳途报价---TO华东医药-更新2.pdf", "C:/Users/14432/Desktop/自动比价系统/广州隽沐报价单20250826-中美华东(3).pdf", "C:/Users/14432/Desktop/自动比价系统/翔龙报价单-华东-20250825.pdf" ] parser = PDFParser() results = [] for pdf_path in pdf_files: result = parser.parse_pdf(pdf_path) if result: results.append(result) self.log_message(f"成功解析: {pdf_path}") else: self.log_message(f"解析失败: {pdf_path}", level="ERROR") self.log_message(f"所有PDF文件解析完成,成功{len(results)}个") def add_pdf(self): """添加PDF文件(多线程处理)""" files = filedialog.askopenfilenames(filetypes=[("PDF文件", "*.pdf")]) if not files: return # 禁用按钮避免重复操作 self.btn_add_pdf.config(state=tk.DISABLED) self.status_var.set(f"正在解析{len(files)}个PDF文件...") self.log_message(f"开始解析{len(files)}个PDF文件") # 重置进度条 self.progress_bar["value"] = 0 # 使用线程池 def process_file(file_path): try: # 为每个文件设置进度回调 def progress_callback(current, total): self.after(0, lambda: self.progress_label.config( text=f"解析 {os.path.basename(file_path)}: {current}/{total}页") ) self.after(0, lambda: self.progress_bar.config(value=(current/total)*100)) self.comparator.parse_pdf(file_path, progress_callback) self.after(0, lambda: self.log_message(f"成功解析: {file_path}")) except Exception as e: error_msg = str(e) self.after(0, lambda: self.log_message(f"错误: 解析 {file_path} 失败 - {error_msg}")) # 启动线程池 def thread_pool_task(): with ThreadPoolExecutor(max_workers=min(4, len(files))) as executor: futures = [executor.submit(process_file, f) for f in files] # 等待所有任务完成 for future in futures: future.result() # 完成后重置界面 self.after(0, lambda: self.progress_bar.config(value=0)) self.after(0, lambda: self.progress_label.config(text="解析完成")) self.after(0, lambda: self.status_var.set(f"PDF解析完成: {len(files)}个文件")) self.after(0, lambda: self.btn_add_pdf.config(state=tk.NORMAL)) self.after(0, lambda: self.log_message(f"所有PDF文件解析完成")) threading.Thread(target=thread_pool_task, daemon=True).start() def generate_report(self): """生成报告(线程安全)""" if self.processing: return if not self.comparator.supplier_data: messagebox.showwarning("警告", "请先添加供应商报价单") return # 默认保存路径的逻辑 if self.comparator.pdf_files: first_pdf = self.comparator.pdf_files[0] report_dir = os.path.dirname(first_pdf) timestamp = time.strftime("%Y%m%d_%H%M%S") default_name = f"比价报告_{timestamp}.xlsx" save_path = os.path.join(report_dir, default_name) else: save_path = filedialog.asksaveasfilename( defaultextension=".xlsx", filetypes=[("Excel文件", "*.xlsx")] ) if not save_path: return # 启动线程生成报告 self.processing = True self.status_var.set("正在生成报告...") self.log_message("开始生成比对报告") self.progress_label.config(text="生成报告中...") # 显示进度条但不更新具体值 self.progress_bar.config(mode='indeterminate') self.progress_bar.start(10) # 开始滚动动画 def thread_task(): try: start_time = time.time() result = self.comparator.compare_prices() # 保存结果 result.to_excel(save_path, index=False) elapsed = time.time() - start_time self.after(0, lambda: self.log_message(f"报告生成成功! 保存路径: {save_path}, 耗时: {elapsed:.2f}秒")) self.after(0, lambda: self.status_var.set(f"报告生成成功: {len(result)}条记录")) self.after(0, lambda: messagebox.showinfo("成功", f"比价报告已保存至:\n{save_path}")) except Exception as ex: # 使用不同的变量名 error_msg = str(ex) # 将错误信息保存到局部变量 self.after(0, lambda: self.log_message(f"错误: 生成报告失败 - {error_msg}")) self.after(0, lambda: messagebox.showerror("错误", f"生成报告失败: {error_msg}")) finally: self.after(0, lambda: self.progress_bar.stop()) self.after(0, lambda: self.progress_bar.config(mode='determinate', value=0)) self.after(0, lambda: self.progress_label.config(text="就绪")) self.processing = False threading.Thread(target=thread_task, daemon=True).start() # 在主入口安全初始化 if __name__ == "__main__": try: # 忽略特定警告 warnings.filterwarnings("ignore", category=UserWarning, module='scipy') warnings.filterwarnings("ignore", category=UserWarning, module='paddle') app = Application() app.mainloop() except Exception as e: logger.critical(f"应用程序崩溃: {str(e)}") traceback.print_exc() # 显示错误对话框 import tkinter.messagebox as messagebox messagebox.showerror("系统错误", f"程序遇到严重错误:\n{str(e)}\n\n详细信息请查看日志文件")
09-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值