Python_API_os.path_walk_待填充

本文详细介绍了Python标准库os模块中的文件遍历函数os.walk的使用方法、工作原理以及实际应用场景,包括参数解释、注意事项和案例分析。
os.path.walk(path, visit, arg)

Calls the function visit with arguments (arg, dirname, names) for each directory in the directory tree rooted at path (including path itself, if it is a directory). The argument dirname specifies the visited directory, the argument names lists the files in the directory (gotten from os.listdir(dirname)). The visit function may modify names to influence the set of directories visited below dirname, e.g. to avoid visiting certain parts of the tree. (The object referred to by names must be modified in place, using del or slice assignment.)

Note

Symbolic links to directories are not treated as subdirectories, and that walk() therefore will not visit them. To visit linked directories you must identify them with os.path.islink(file) and os.path.isdir(file), and invoke walk() as necessary.

Note

This function is deprecated and has been removed in 3.0 in favor of os.walk().


import os import tempfile import pythoncom import win32com.client import threading import shutil import tkinter as tk from tkinter import filedialog, ttk, messagebox, scrolledtext from docx import Document from PyPDF2 import PdfMerger, PdfReader, PdfWriter from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib.colors import red, black, white from reportlab.platypus import Table, TableStyle from io import BytesIO from datetime import datetime class PDFConverterApp: def __init__(self, root): self.root = root self.root.title("audio_data") self.root.geometry("800x650") # 增加窗口高度以容纳新控件 self.folders = [] self.log_messages = [] self.output_path = "" # 存储自定义输出路径 self.backup_mode = tk.BooleanVar(value=True) # 添加备份模式开关 self.point_22_mode = tk.BooleanVar(value=False) # 新增22号点位开关 self.output_filename = tk.StringVar(value="听筒磁干扰_Simulation_Result") # 默认文件名 self.create_widgets() def create_widgets(self): # 创建顶部框架 top_frame = ttk.Frame(self.root, padding=10) top_frame.pack(fill=tk.X) output_frame = ttk.LabelFrame(self.root, text="输出设置", padding=10) output_frame.pack(fill=tk.X, padx=10, pady=(0, 5)) # 文件名输入框 ttk.Label(output_frame, text="文件名:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) filename_entry = ttk.Entry(output_frame, textvariable=self.output_filename, width=30) filename_entry.grid(row=0, column=1, sticky=tk.W, padx=5) # 输出路径选择 ttk.Label(output_frame, text="输出路径:").grid(row=0, column=2, sticky=tk.W, padx=(20, 5)) self.path_entry = ttk.Entry(output_frame, width=40, state='readonly') self.path_entry.grid(row=0, column=3, sticky=tk.EW, padx=5) browse_btn = ttk.Button(output_frame, text="浏览...", command=self.choose_output_path) browse_btn.grid(row=0, column=4, padx=(5, 0)) # 设置网格列权重 output_frame.columnconfigure(3, weight=1) # 添加文件夹按钮 add_btn = ttk.Button(top_frame, text="添加文件夹", command=self.add_folder) add_btn.pack(side=tk.LEFT, padx=5) # 移除文件夹按钮 remove_btn = ttk.Button(top_frame, text="移除选中", command=self.remove_selected) remove_btn.pack(side=tk.LEFT, padx=5) # 清空列表按钮 clear_btn = ttk.Button(top_frame, text="清空列表", command=self.clear_list) clear_btn.pack(side=tk.LEFT, padx=5) # 处理按钮 process_btn = ttk.Button(top_frame, text="开始处理", command=self.start_processing) process_btn.pack(side=tk.RIGHT, padx=5) # 创建文件夹列表 list_frame = ttk.LabelFrame(self.root, text="待处理文件夹", padding=10) list_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 滚动条 scrollbar = ttk.Scrollbar(list_frame) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) backup_frame = ttk.Frame(output_frame) backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0)) # 在输出设置区域添加22号点位复选框 backup_frame = ttk.Frame(output_frame) backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0)) self.backup_check = ttk.Checkbutton( backup_frame, text="报告存档", variable=self.backup_mode ) self.backup_check.pack(side=tk.LEFT) # 新增22号点位复选框 self.point_22_check = ttk.Checkbutton( backup_frame, text="22号点位", variable=self.point_22_mode ) self.point_22_check.pack(side=tk.LEFT, padx=(10, 0)) # 文件夹列表 self.folder_list = tk.Listbox( list_frame, selectmode=tk.EXTENDED, yscrollcommand=scrollbar.set, height=10 ) self.folder_list.pack(fill=tk.BOTH, expand=True) scrollbar.config(command=self.folder_list.yview) # 创建日志区域 log_frame = ttk.LabelFrame(self.root, text="处理日志", padding=10) log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 日志文本框 self.log_text = scrolledtext.ScrolledText( log_frame, wrap=tk.WORD, state=tk.DISABLED ) self.log_text.pack(fill=tk.BOTH, expand=True) # 进度条 self.progress = ttk.Progressbar( self.root, orient=tk.HORIZONTAL, mode='determinate' ) self.progress.pack(fill=tk.X, padx=10, pady=5) def choose_output_path(self): """选择输出文件夹""" path = filedialog.askdirectory(title="选择输出文件夹") if path: self.output_path = path self.path_entry.config(state='normal') self.path_entry.delete(0, tk.END) self.path_entry.insert(0, path) self.path_entry.config(state='readonly') self.log(f"已设置输出路径: {path}") def add_folder(self): """添加要处理的文件夹""" folders = filedialog.askdirectory( title="选择要处理的文件夹", mustexist=True ) if folders: self.folders.append(folders) self.folder_list.insert(tk.END, folders) self.log(f"已添加文件夹: {folders}") def remove_selected(self): """移除选中的文件夹""" selected = self.folder_list.curselection() for index in selected[::-1]: folder = self.folder_list.get(index) self.folder_list.delete(index) self.folders.remove(folder) self.log(f"已移除文件夹: {folder}") def clear_list(self): """清空文件夹列表""" self.folder_list.delete(0, tk.END) self.folders = [] self.log("已清空文件夹列表") def log(self, message): """向日志区域添加消息""" timestamp = datetime.now().strftime("%H:%M:%S") log_entry = f"[{timestamp}] {message}" self.log_messages.append(log_entry) self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, log_entry + "\n") self.log_text.config(state=tk.DISABLED) self.log_text.yview(tk.END) # 自动滚动到底部 self.root.update_idletasks() def start_processing(self): """启动处理过程""" if not self.folders: messagebox.showwarning("警告", "请先添加要处理的文件夹") return # 禁用处理按钮 self.root.title("Word 转 PDF 合并工具 - 处理中...") self.progress["value"] = 0 # 在新线程中处理,避免界面冻结 thread = threading.Thread(target=self.process_folders) thread.daemon = True thread.start() def backup_excel_files(self, source_folder, backup_dir): """递归查找并备份所有Sound_Pres_Cal.xlsx文件""" print(f"开始在文件夹中搜索Sound_Pres_Cal.xlsx: {source_folder}") backup_count = 0 # 使用os.walk递归遍历所有子文件夹 for root, dirs, files in os.walk(source_folder): for file in files: if file == "Sound_Pres_Cal.xlsx": excel_path = os.path.join(root, file) try: # 计算相对路径以保持目录结构 rel_path = os.path.relpath(excel_path, start=source_folder) dest_path = os.path.join(backup_dir, rel_path) # 创建目标目录结构 os.makedirs(os.path.dirname(dest_path), exist_ok=True) # 复制文件(保留元数据) shutil.copy2(excel_path, dest_path) backup_count += 1 print(f"备份成功: {excel_path} → {dest_path}") except Exception as e: print(f"备份失败 {excel_path}: {str(e)}") print(f"共找到并备份 {backup_count} 个Sound_Pres_Cal.xlsx文件") def backup_data_files(self, folder_path, backup_dir): """递归查找并备份所有.xlsx和.csv文件到指定目录(同一级)""" self.log(f"开始在文件夹中搜索所有Excel和CSV文件: {folder_path}") backup_count = 0 # 使用os.walk递归遍历所有子文件夹 for root, dirs, files in os.walk(folder_path): for file in files: # 检查文件扩展名是否为.xlsx或.csv(不区分大小写) if file.lower().endswith(('.xlsx', '.csv')): file_path = os.path.join(root, file) file_name = os.path.basename(file_path) # 只获取文件名(不含路径) dest_path = os.path.join(backup_dir, file_name) # 直接放在备份目录下 try: # 确保备份目录存在(不需要创建子目录) os.makedirs(backup_dir, exist_ok=True) # 复制文件(保留元数据) shutil.copy2(file_path, dest_path) backup_count += 1 self.log(f"备份成功: {file_path} → {dest_path}") except Exception as e: self.log(f"备份失败 {file_path}: {str(e)}") self.log(f"共找到并备份 {backup_count} 个Excel和CSV文件") def process_folders(self): """处理多个文件夹中的Word文件""" try: # 提前初始化 output_folder if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) self.log(f"开始处理 {len(self.folders)} 个文件夹...") # 获取所有文件夹中的Word文件 word_files = self.get_all_word_files(self.folders) if not word_files: self.log("没有找到任何Word文档") return self.log(f"共找到 {len(word_files)} 个Word文档") self.progress["maximum"] = len(word_files) + 5 # 文件数 + 合并步骤 backup_root = os.path.join(output_folder, "报告存档") # 统一备份根目录 if self.backup_mode.get(): os.makedirs(backup_root, exist_ok=True) # 创建临时目录存储转换后的PDF with tempfile.TemporaryDirectory() as temp_dir: pdf_files_with_header = [] toc_entries = [] all_tables = {} current_page = 1 # 处理每个Word文件 for i, word_file in enumerate(word_files): self.progress["value"] = i + 1 file_name = os.path.splitext(os.path.basename(word_file))[0] display_name = file_name # 处理每个Word文件 for i, word_file in enumerate(word_files): self.progress["value"] = i + 1 file_name = os.path.splitext(os.path.basename(word_file))[0] display_name = file_name # 新增:在备份前修改Word文档 modified_word_path = word_file if self.point_22_mode.get(): # 创建临时副本进行修改 temp_word_path = os.path.join(temp_dir, os.path.basename(word_file)) shutil.copy2(word_file, temp_word_path) if self.modify_word_spec(temp_word_path): modified_word_path = temp_word_path elif "GSM" in file_name.upper(): # 未勾选22号点位但包含GSM的文件也需要修改 temp_word_path = os.path.join(temp_dir, os.path.basename(word_file)) shutil.copy2(word_file, temp_word_path) if self.modify_word_spec(temp_word_path): modified_word_path = temp_word_path original_pdf = os.path.join(temp_dir, f"{file_name}_original.pdf") pdf_with_header = os.path.join(temp_dir, f"{file_name}_with_header.pdf") if self.backup_mode.get(): try: # 为每个Word文件创建同级备份目录(可选:使用Word文件名作为目录名) # 若要所有文件直接放在backup_root下,可将dest_dir改为backup_root dest_dir = os.path.join(backup_root, file_name) os.makedirs(dest_dir, exist_ok=True) # 备份当前Word文件到同级目录 word_dest = os.path.join(dest_dir, os.path.basename(modified_word_path)) shutil.copy2(modified_word_path, word_dest) self.log(f"Word文件备份成功: {word_file} → {word_dest}") # 备份相关的Excel和CSV文件到同一个同级目录 folder_path = os.path.dirname(word_file) self.backup_data_files(folder_path, dest_dir) # 传入同级目录 except OSError as e: self.log(f"文件备份失败: {e}") except Exception as e: self.log(f"未知错误: {e}") # 提取表格数据 tables = self.extract_spec_table(modified_word_path) if tables: all_tables[display_name] = tables self.log(f"已从 {display_name} 中提取 {len(tables)} 个数据表格") # 转换为PDF if self.word_to_pdf(modified_word_path, original_pdf): # 添加内联标题 if self.add_inline_header(original_pdf, display_name, pdf_with_header): pdf_files_with_header.append(pdf_with_header) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(pdf_with_header) else: pdf_files_with_header.append(original_pdf) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(original_pdf) else: self.log(f"跳过 {display_name},转换失败") # 更新进度条 self.progress["value"] = len(word_files) + 1 if not pdf_files_with_header: self.log("没有成功转换的PDF文件,无法进行合并") return # 获取输出路径 if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) # 获取文件名 report_name = self.output_filename.get().strip() if not report_name: report_name = self.get_folder_name_parts(self.folders[0]) # 使用默认规则 output_pdf = os.path.join(output_folder, f"{report_name}.pdf") # 合并PDF self.progress["value"] = len(word_files) + 2 success = self.merge_pdfs_with_summary( pdf_files_with_header, toc_entries, all_tables, output_pdf ) self.progress["value"] = len(word_files) + 3 if success: self.log(f"处理完成!输出文件: {output_pdf}") messagebox.showinfo("完成", f"处理完成!输出文件: {output_pdf}") else: self.log("处理失败") messagebox.showerror("错误", "处理过程中出现错误") self.root.title("Word 转 PDF 合并工具") except Exception as e: self.log(f"处理过程中出现错误: {str(e)}") messagebox.showerror("错误", f"处理过程中出现错误: {str(e)}") self.root.title("Word 转 PDF 合并工具") # 以下是原有的处理函数,保持不变但添加为类方法 def extract_spec_table(self, word_path): """从Word文档中提取SPEC(dB)、Simulation和Pass/Fail数据表格""" try: doc = Document(word_path) tables = [] for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] if "SPEC(dB)" in headers and "Simulation" in headers and "Pass/Fail" in headers: table_data = [] table_data.append(headers) for row in table.rows[1:]: row_data = [cell.text.strip() for cell in row.cells] table_data.append(row_data) tables.append(table_data) return tables except Exception as e: self.log(f"提取 {os.path.basename(word_path)} 中的表格时出错: {str(e)}") return [] def modify_word_spec(self, word_path): """根据22号点位设置修改Word文档中的SPEC值并更新Pass/Fail""" try: doc = Document(word_path) filename = os.path.basename(word_path).upper() has_gsm = "GSM" in filename # 确定SPEC基准值 if self.point_22_mode.get(): spec_value = 22 if has_gsm else 20 else: spec_value = 20 if has_gsm else 18 modified = False for table in doc.tables: # 查找表头确定列索引 headers = [cell.text.strip() for cell in table.rows[0].cells] try: spec_index = headers.index("SPEC(dB)") sim_index = headers.index("Simulation") pf_index = headers.index("Pass/Fail") except ValueError: continue # 修改每一行数据 for row in table.rows[1:]: cells = row.cells # 更新SPEC值 if spec_index < len(cells): cells[spec_index].text = str(spec_value) # 更新Pass/Fail状态 if sim_index < len(cells) and pf_index < len(cells): try: sim_value = float(cells[sim_index].text) new_status = "PASS" if sim_value < spec_value else "FAIL" cells[pf_index].text = new_status except ValueError: pass modified = True if modified: doc.save(word_path) self.log(f"已修改 {os.path.basename(word_path)} 的SPEC值为{spec_value}") return modified except Exception as e: self.log(f"修改 {os.path.basename(word_path)} 失败: {str(e)}") return False def add_inline_header(self, pdf_path, title, output_path): """在PDF的第一页顶部添加一行红色加粗的标题""" try: reader = PdfReader(pdf_path) writer = PdfWriter() if len(reader.pages) > 0: first_page = reader.pages[0] packet = BytesIO() can = canvas.Canvas(packet, pagesize=letter) width, height = letter font_name = "Helvetica-Bold" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) pdfmetrics.registerFont(TTFont('SimSun-Bold', 'simsun.ttc')) font_name = "SimSun-Bold" except: pass can.setFont(font_name, 14) can.setFillColor(red) can.drawString(50, height - 50, title) can.save() packet.seek(0) title_reader = PdfReader(packet) title_page = title_reader.pages[0] first_page.merge_page(title_page) writer.add_page(first_page) for page in reader.pages[1:]: writer.add_page(page) with open(output_path, "wb") as f: writer.write(f) return True return False except Exception as e: self.log(f"PDF添加标题失败: {str(e)}") return False def create_summary_page(self, toc_entries, all_tables, output_path): """创建Summary页""" try: c = canvas.Canvas(output_path, pagesize=letter) width, height = letter font_name = "Helvetica" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) font_name = "SimSun" except: pass # Summary标题 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, height - 50, "Summary") c.setFillColor(black) y_position = height - 100 # 添加数据汇总表格 if all_tables: c.setFont(font_name, 16) c.drawString(50, y_position, "Data Summary:") y_position -= 30 c.setFont(font_name, 10) table_width = width - 100 for doc_name, tables in all_tables.items(): c.setFont(font_name, 12) c.setFillColor(red) c.drawString(60, y_position, f"Document: {doc_name}") y_position -= 20 c.setFillColor(black) c.setFont(font_name, 10) for table_data in tables: col_widths = [table_width / len(table_data[0])] * len(table_data[0]) table = Table(table_data, colWidths=col_widths) style = TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), white), ('TEXTCOLOR', (0, 0), (-1, 0), black), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), font_name), ('FONTNAME', (0, 1), (-1, -1), font_name), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), white), ('GRID', (0, 0), (-1, -1), 1, black) ]) table.setStyle(style) table_height = table.wrap(0, 0)[1] if y_position - table_height < 50: c.showPage() y_position = height - 50 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, y_position, "Summary") y_position -= 50 c.setFillColor(black) table.drawOn(c, 50, y_position - table_height) y_position -= (table_height + 20) c.save() return output_path except Exception as e: self.log(f"创建Summary页失败: {str(e)}") return None def word_to_pdf(self, word_path, pdf_path): """将Word文档转换为PDF""" pythoncom.CoInitialize() try: word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(os.path.abspath(word_path)) doc.SaveAs(os.path.abspath(pdf_path), FileFormat=17) doc.Close() word.Quit() self.log(f"已将 {os.path.basename(word_path)} 转换为PDF") return True except Exception as e: self.log(f"转换 {os.path.basename(word_path)} 时出错: {str(e)}") return False finally: pythoncom.CoUninitialize() def get_pdf_page_count(self, pdf_path): """获取PDF文件的页数""" try: reader = PdfReader(pdf_path) return len(reader.pages) except: return 0 def merge_pdfs_with_summary(self, pdf_files, toc_entries, all_tables, output_path): """合并PDF文件并添加Summary页""" try: with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as summary_file: summary_path = summary_file.name self.create_summary_page(toc_entries, all_tables, summary_path) summary_page_count = self.get_pdf_page_count(summary_path) updated_toc_entries = [(title, page_num + summary_page_count) for title, page_num in toc_entries] self.create_summary_page(updated_toc_entries, all_tables, summary_path) merger = PdfMerger() merger.append(summary_path) current_page = summary_page_count for pdf, (title, _) in zip(pdf_files, updated_toc_entries): merger.append(pdf) merger.add_outline_item(title, current_page) current_page += self.get_pdf_page_count(pdf) merger.write(output_path) merger.close() os.remove(summary_path) self.log(f"已成功合并 {len(pdf_files)} 个PDF文件") return True except Exception as e: self.log(f"合并PDF时出错: {str(e)}") return False def get_all_word_files(self, folder_paths): """获取所有Word文件""" word_extensions = ['.docx', '.doc'] word_files = [] for folder_path in folder_paths: if not os.path.isdir(folder_path): continue for file in os.listdir(folder_path): file_ext = os.path.splitext(file)[1].lower() if file_ext in word_extensions: word_path = os.path.join(folder_path, file) word_files.append(word_path) return word_files def get_folder_name_parts(self, folder_paths): """生成报告文件名""" if not folder_paths: return "听筒磁干扰仿真报告" folder_path = folder_paths[0] norm_path = os.path.normpath(folder_path) parts = [p for p in norm_path.split(os.sep) if p] if len(parts) >= 3: return f"{parts[-3]}_{parts[-2]}_{parts[-1]}" elif len(parts) == 2: return f"{parts[-2]}_{parts[-1]}" elif len(parts) == 1: return parts[0] return "听筒磁干扰仿真报告" if __name__ == "__main__": root = tk.Tk() app = PDFConverterApp(root) root.mainloop() # 添加这行启动事件循环 为什么这段代码在Summary这一页只汇总了最后一个处理的word文档中SPEC(dB) Simulation Pass/Fail的数据,我要汇总所有处理完的word文档中SPEC(dB) Simulation Pass/Fail的数据到Summary这一页
09-17
import os import csv def read_csv(file_path): """读取CSV文件并返回数据列表""" try: with open(file_path, 'r') as file: reader = csv.reader(file) return list(reader) except FileNotFoundError: print(f"文件未找到: {file_path}") return None except Exception as e: print(f"读取文件时发生错误: {e}") return None def write_csv(file_path, data): """将数据写入CSV文件""" try: with open(file_path, 'w', newline='') as file: writer = csv.writer(file) writer.writerows(data) except Exception as e: print(f"写入文件时发生错误: {e}") def find_files(directory): """在指定目录下查找含有_CGG和_CGC的CSV文件""" cgg_files = [] cgc_files = [] for file in os.listdir(directory): if file.endswith('.csv'): if '_CGG' in file: cgg_files.append(os.path.join(directory, file)) elif '_CGC' in file: cgc_files.append(os.path.join(directory, file)) return cgg_files, cgc_files def pair_files(cgg_files, cgc_files): """将CGG和CGC文件配对""" pairs = [] for cgg_file in cgg_files: base_name = os.path.basename(cgg_file).replace('_CGG', '_CGC') for cgc_file in cgc_files: if os.path.basename(cgc_file) == base_name: pairs.append((cgg_file, cgc_file)) break return pairs def process_directory(directory): # 查找含有_CGG和_CGC的CSV文件 cgg_files, cgc_files = find_files(directory) if not cgg_files or not cgc_files: print(f"在目录 {directory} 中无法找到所有需要的文件。请确保文件名中包含'_CGG'和'_CGC'。") return # 将CGG和CGC文件配对 file_pairs = pair_files(cgg_files, cgc_files) if not file_pairs: print(f"在目录 {directory} 中无法找到配对的CGG和CGC文件。") return for cgg_file, cgc_file in file_pairs: # 读取CSV文件内容 cgg_data = read_csv(cgg_file) cgc_data = read_csv(cgc_file) if cgg_data is None or cgc_data is None: continue # 拼接数据 updated_data = cgc_data + cgg_data[7:] # 生成新的文件名 new_file_name = os.path.basename(cgc_file).replace('_CGC', '') new_file_path = os.path.join(directory, new_file_name) # 将更新后的内容写入新的文件 write_csv(new_file_path, updated_data) # 删除原始文件 os.remove(cgg_file) os.remove(cgc_file) print(f"处理完成,结果已覆盖到{new_file_path},原始文件已删除。") def main(): # 获取当前目录 current_directory = os.getcwd() # 递归遍历目录树 for root, dirs, files in os.walk(current_directory): process_directory(root) if __name__ == "__main__": main()
最新发布
01-07
import os import tempfile import pythoncom import win32com.client import threading import shutil import tkinter as tk from tkinter import filedialog, ttk, messagebox, scrolledtext from docx import Document from PyPDF2 import PdfMerger, PdfReader, PdfWriter from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib.colors import red, black, white from reportlab.platypus import Table, TableStyle from io import BytesIO from datetime import datetime import openpyxl # 用于读取Excel文件 class PDFConverterApp: def __init__(self, root): self.root = root self.root.title("audio_data") self.root.geometry("800x650") # 增加窗口高度以容纳新控件 self.folders = [] self.log_messages = [] self.output_path = "" # 存储自定义输出路径 self.backup_mode = tk.BooleanVar(value=True) # 添加备份模式开关 self.point_22_mode = tk.BooleanVar(value=False) # 新增22号点位开关 self.output_filename = tk.StringVar(value="听筒磁干扰_Simulation_Result") # 默认文件名 self.create_widgets() def create_widgets(self): # 创建顶部框架 top_frame = ttk.Frame(self.root, padding=10) top_frame.pack(fill=tk.X) output_frame = ttk.LabelFrame(self.root, text="输出设置", padding=10) output_frame.pack(fill=tk.X, padx=10, pady=(0, 5)) # 文件名输入框 ttk.Label(output_frame, text="文件名:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) filename_entry = ttk.Entry(output_frame, textvariable=self.output_filename, width=30) filename_entry.grid(row=0, column=1, sticky=tk.W, padx=5) # 输出路径选择 ttk.Label(output_frame, text="输出路径:").grid(row=0, column=2, sticky=tk.W, padx=(20, 5)) self.path_entry = ttk.Entry(output_frame, width=40, state='readonly') self.path_entry.grid(row=0, column=3, sticky=tk.EW, padx=5) browse_btn = ttk.Button(output_frame, text="浏览...", command=self.choose_output_path) browse_btn.grid(row=0, column=4, padx=(5, 0)) # 设置网格列权重 output_frame.columnconfigure(3, weight=1) # 添加文件夹按钮 add_btn = ttk.Button(top_frame, text="添加文件夹", command=self.add_folder) add_btn.pack(side=tk.LEFT, padx=5) # 移除文件夹按钮 remove_btn = ttk.Button(top_frame, text="移除选中", command=self.remove_selected) remove_btn.pack(side=tk.LEFT, padx=5) # 清空列表按钮 clear_btn = ttk.Button(top_frame, text="清空列表", command=self.clear_list) clear_btn.pack(side=tk.LEFT, padx=5) # 处理按钮 process_btn = ttk.Button(top_frame, text="开始处理", command=self.start_processing) process_btn.pack(side=tk.RIGHT, padx=5) # 创建文件夹列表 list_frame = ttk.LabelFrame(self.root, text="待处理文件夹", padding=10) list_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 滚动条 scrollbar = ttk.Scrollbar(list_frame) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) backup_frame = ttk.Frame(output_frame) backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0)) # 在输出设置区域添加22号点位复选框 backup_frame = ttk.Frame(output_frame) backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0)) self.backup_check = ttk.Checkbutton( backup_frame, text="报告存档", variable=self.backup_mode ) self.backup_check.pack(side=tk.LEFT) # 新增22号点位复选框 self.point_22_check = ttk.Checkbutton( backup_frame, text="22号点位", variable=self.point_22_mode ) self.point_22_check.pack(side=tk.LEFT, padx=(10, 0)) # 文件夹列表 self.folder_list = tk.Listbox( list_frame, selectmode=tk.EXTENDED, yscrollcommand=scrollbar.set, height=10 ) self.folder_list.pack(fill=tk.BOTH, expand=True) scrollbar.config(command=self.folder_list.yview) # 创建日志区域 log_frame = ttk.LabelFrame(self.root, text="处理日志", padding=10) log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 日志文本框 self.log_text = scrolledtext.ScrolledText( log_frame, wrap=tk.WORD, state=tk.DISABLED ) self.log_text.pack(fill=tk.BOTH, expand=True) # 进度条 self.progress = ttk.Progressbar( self.root, orient=tk.HORIZONTAL, mode='determinate' ) self.progress.pack(fill=tk.X, padx=10, pady=5) def choose_output_path(self): """选择输出文件夹""" path = filedialog.askdirectory(title="选择输出文件夹") if path: self.output_path = path self.path_entry.config(state='normal') self.path_entry.delete(0, tk.END) self.path_entry.insert(0, path) self.path_entry.config(state='readonly') self.log(f"已设置输出路径: {path}") def add_folder(self): """添加要处理的文件夹""" folders = filedialog.askdirectory( title="选择要处理的文件夹", mustexist=True ) if folders: self.folders.append(folders) self.folder_list.insert(tk.END, folders) self.log(f"已添加文件夹: {folders}") def remove_selected(self): """移除选中的文件夹""" selected = self.folder_list.curselection() for index in selected[::-1]: folder = self.folder_list.get(index) self.folder_list.delete(index) self.folders.remove(folder) self.log(f"已移除文件夹: {folder}") def clear_list(self): """清空文件夹列表""" self.folder_list.delete(0, tk.END) self.folders = [] self.log("已清空文件夹列表") def log(self, message): """向日志区域添加消息""" timestamp = datetime.now().strftime("%H:%M:%S") log_entry = f"[{timestamp}] {message}" self.log_messages.append(log_entry) self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, log_entry + "\n") self.log_text.config(state=tk.DISABLED) self.log_text.yview(tk.END) # 自动滚动到底部 self.root.update_idletasks() def start_processing(self): """启动处理过程""" if not self.folders: messagebox.showwarning("警告", "请先添加要处理的文件夹") return # 禁用处理按钮 self.root.title("Word 转 PDF 合并工具 - 处理中...") self.progress["value"] = 0 # 在新线程中处理,避免界面冻结 thread = threading.Thread(target=self.process_folders) thread.daemon = True thread.start() # +++ 修改方法:备份时获取校准数据 +++ def backup_data_files(self, folder_path, backup_dir): """递归查找并备份所有.xlsx和.csv文件并返回校准数据""" self.log(f"开始在文件夹中搜索所有Excel和CSV文件: {folder_path}") backup_count = 0 calibration_data = {'j2': None, 'j3': None} # 存储校准数据 for root, dirs, files in os.walk(folder_path): for file in files: if file.lower().endswith(('.xlsx', '.csv')): file_path = os.path.join(root, file) file_name = os.path.basename(file_path) dest_path = os.path.join(backup_dir, file_name) try: os.makedirs(backup_dir, exist_ok=True) shutil.copy2(file_path, dest_path) backup_count += 1 self.log(f"备份成功: {file_path} → {dest_path}") except Exception as e: self.log(f"备份失败 {file_path}: {str(e)}") self.log(f"共找到并备份 {backup_count} 个Excel和CSV文件") def process_folders(self): """处理多个文件夹中的Word文件""" try: # 提前初始化 output_folder if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) self.log(f"开始处理 {len(self.folders)} 个文件夹...") # 获取所有文件夹中的Word文件 word_files = self.get_all_word_files(self.folders) if not word_files: self.log("没有找到任何Word文档") return self.log(f"共找到 {len(word_files)} 个Word文档") self.progress["maximum"] = len(word_files) + 5 # 文件数 + 合并步骤 backup_root = os.path.join(output_folder, "报告存档") # 统一备份根目录 if self.backup_mode.get(): os.makedirs(backup_root, exist_ok=True) # 创建临时目录存储转换后的PDF with tempfile.TemporaryDirectory() as temp_dir: pdf_files_with_header = [] toc_entries = [] all_tables = {} current_page = 1 # 处理每个Word文件 for i, word_file in enumerate(word_files): self.progress["value"] = i + 1 file_name = os.path.splitext(os.path.basename(word_file))[0] display_name = file_name # 修改Word文档逻辑 modified_word_path = word_file if self.point_22_mode.get() or "GSM" in file_name.upper(): # 创建临时副本进行修改 temp_word_path = os.path.join(temp_dir, os.path.basename(word_file)) shutil.copy2(word_file, temp_word_path) if self.modify_word_spec(temp_word_path): modified_word_path = temp_word_path original_pdf = os.path.join(temp_dir, f"{file_name}_original.pdf") pdf_with_header = os.path.join(temp_dir, f"{file_name}_with_header.pdf") if self.backup_mode.get(): try: # 为每个Word文件创建备份目录 dest_dir = os.path.join(backup_root, file_name) os.makedirs(dest_dir, exist_ok=True) # 备份Word文件 word_dest = os.path.join(dest_dir, os.path.basename(modified_word_path)) shutil.copy2(modified_word_path, word_dest) self.log(f"Word文件备份成功: {word_file} → {word_dest}") # +++ 备份数据文件并获取校准数据 +++ folder_path = os.path.dirname(word_file) except OSError as e: self.log(f"文件备份失败: {e}") except Exception as e: self.log(f"未知错误: {e}") # 提取表格数据 tables = self.extract_spec_table(modified_word_path) if tables: all_tables[display_name] = tables self.log(f"已从 {display_name} 中提取 {len(tables)} 个数据表格") # 转换为PDF if self.word_to_pdf(modified_word_path, original_pdf): # 添加内联标题 if self.add_inline_header(original_pdf, display_name, pdf_with_header): pdf_files_with_header.append(pdf_with_header) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(pdf_with_header) else: pdf_files_with_header.append(original_pdf) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(original_pdf) else: self.log(f"跳过 {display_name},转换失败") # 更新进度条 self.progress["value"] = len(word_files) + 1 if not pdf_files_with_header: self.log("没有成功转换的PDF文件,无法进行合并") return # 获取输出路径 if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) # 获取文件名 report_name = self.output_filename.get().strip() if not report_name: report_name = self.get_folder_name_parts(self.folders[0]) # 使用默认规则 output_pdf = os.path.join(output_folder, f"{report_name}.pdf") # 合并PDF success = self.merge_pdfs_with_summary( pdf_files_with_header, toc_entries, all_tables, output_pdf ) self.progress["value"] = len(word_files) + 3 if success: self.log(f"处理完成!输出文件: {output_pdf}") messagebox.showinfo("完成", f"处理完成!输出文件: {output_pdf}") else: self.log("处理失败") messagebox.showerror("错误", "处理过程中出现错误") self.root.title("Word 转 PDF 合并工具") except Exception as e: self.log(f"处理过程中出现错误: {str(e)}") messagebox.showerror("错误", f"处理过程中出现错误: {str(e)}") self.root.title("Word 转 PDF 合并工具") # 以下是原有的处理函数,保持不变但添加为类方法 def extract_spec_table(self, word_path): """从Word文档中提取SPEC(dB)、Simulation和Pass/Fail数据表格""" try: doc = Document(word_path) tables = [] for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] if "SPEC(dB)" in headers and "Simulation" in headers and "Pass/Fail" in headers: table_data = [] table_data.append(headers) for row in table.rows[1:]: row_data = [cell.text.strip() for cell in row.cells] table_data.append(row_data) tables.append(table_data) return tables except Exception as e: self.log(f"提取 {os.path.basename(word_path)} 中的表格时出错: {str(e)}") return [] def modify_word_spec(self, word_path): try: doc = Document(word_path) filename = os.path.basename(word_path).upper() has_gsm = "GSM" in filename # 移动到try块内部 # 确定SPEC基准值 if self.point_22_mode.get(): # 22号点位启用 spec_value = 22 if has_gsm else 20 else: # 22号点位未启用 spec_value = 20 if has_gsm else 18 modified = False # 初始化修改标志 # 遍历文档所有表格 for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] try: spec_index = headers.index("SPEC(dB)") # 定位SPEC列 sim_index = headers.index("Simulation") # 定位Simulation列 pf_index = headers.index("Pass/Fail") # 定位Pass/Fail列 except ValueError: continue # 跳过不含目标列的表 # 标记已找到可修改表格 modified = True # 修改每行数据 for row in table.rows[1:]: cells = row.cells # 更新SPEC值 if spec_index < len(cells): cells[spec_index].text = str(spec_value) # 更新Pass/Fail状态 if sim_index < len(cells) and pf_index < len(cells): try: sim_value = float(cells[sim_index].text) new_status = "PASS" if sim_value < spec_value else "FAIL" cells[pf_index].text = new_status except ValueError: pass # 忽略格式错误 # 保存修改后的文档 if modified: doc.save(word_path) self.log(f"已修改 {os.path.basename(word_path)} 的SPEC值为{spec_value}") return modified except Exception as e: self.log(f"修改 {os.path.basename(word_path)} 失败: {str(e)}") return False def add_inline_header(self, pdf_path, title, output_path): """在PDF的第一页顶部添加一行红色加粗的标题""" try: reader = PdfReader(pdf_path) writer = PdfWriter() if len(reader.pages) > 0: first_page = reader.pages[0] packet = BytesIO() can = canvas.Canvas(packet, pagesize=letter) width, height = letter font_name = "Helvetica-Bold" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) pdfmetrics.registerFont(TTFont('SimSun-Bold', 'simsun.ttc')) font_name = "SimSun-Bold" except: pass can.setFont(font_name, 14) can.setFillColor(red) can.drawString(50, height - 50, title) can.save() packet.seek(0) title_reader = PdfReader(packet) title_page = title_reader.pages[0] first_page.merge_page(title_page) writer.add_page(first_page) for page in reader.pages[1:]: writer.add_page(page) with open(output_path, "wb") as f: writer.write(f) return True return False except Exception as e: self.log(f"PDF添加标题失败: {str(e)}") return False # +++ 修改方法:创建Summary页(核心修改) +++ def create_summary_page(self, toc_entries, all_tables, calibration_data, output_path): """创建包含四列数据的Summary页(新增Calibration列)""" try: c = canvas.Canvas(output_path, pagesize=letter) width, height = letter font_name = "Helvetica" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) font_name = "SimSun" except: pass # Summary标题 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, height - 50, "Summary") c.setFillColor(black) y_position = height - 100 # 添加数据汇总表格 if all_tables: c.setFont(font_name, 16) c.drawString(50, y_position, "Data Summary:") y_position -= 30 c.setFont(font_name, 10) table_width = width - 100 for doc_name, tables in all_tables.items(): c.setFont(font_name, 12) c.setFillColor(red) c.drawString(60, y_position, f"Document: {doc_name}") y_position -= 20 c.setFillColor(black) c.setFont(font_name, 10) # 修复:处理每个表格 for table_data in tables: # 确保表格有数据行 if len(table_data) < 2: # 至少包含表头+1行数据 continue # 新表头格式 headers = ["SPEC(dB)", "Simulation", "Pass/Fail"] # 移除Calibration列 # 提取第一行原始数据(跳过表头) data_row = table_data[1] if len(table_data) > 1 else ["N/A"] * 3 # 确保数据行有足够列 while len(data_row) < 3: data_row.append("N/A") # 获取当前文档的J3校准值 doc_cal_data = calibration_data.get(doc_name, {}) j3_value = doc_cal_data.get('j3', 'N/A') # 创建数据行:前三列使用原始数据,第四列添加J3值 new_row = [ data_row[0], # SPEC(dB)值 data_row[1], # Simulation值 data_row[2], # Pass/Fail值 ] # 表格数据:表头+数据行 modified_table = [headers, new_row] # 设置四列等宽布局 col_widths = [table_width / 3] * 3 table = Table(modified_table, colWidths=col_widths) # 设置表格样式 style = TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), white), ('TEXTCOLOR', (0, 0), (-1, 0), black), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), font_name), ('FONTNAME', (0, 1), (-1, -1), font_name), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), white), ('GRID', (0, 0), (-1, -1), 1, black) ]) table.setStyle(style) # 计算表格高度并绘制 table_height = table.wrap(0, 0)[1] if y_position - table_height < 50: c.showPage() y_position = height - 50 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, y_position, "Summary") y_position -= 50 c.setFillColor(black) table.drawOn(c, 50, y_position - table_height) y_position -= (table_height + 20) c.save() return output_path except Exception as e: self.log(f"创建Summary页失败: {str(e)}") return None def word_to_pdf(self, word_path, pdf_path): """将Word文档转换为PDF""" pythoncom.CoInitialize() try: word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(os.path.abspath(word_path)) doc.SaveAs(os.path.abspath(pdf_path), FileFormat=17) doc.Close() word.Quit() self.log(f"已将 {os.path.basename(word_path)} 转换为PDF") return True except Exception as e: self.log(f"转换 {os.path.basename(word_path)} 时出错: {str(e)}") return False finally: pythoncom.CoUninitialize() def get_pdf_page_count(self, pdf_path): """获取PDF文件的页数""" try: reader = PdfReader(pdf_path) return len(reader.pages) except: return 0 def merge_pdfs_with_summary(self, pdf_files, toc_entries, all_tables, output_path): """合并PDF文件并添加Summary页""" try: with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as summary_file: summary_path = summary_file.name # 修复:传递calibration_data参数 self.create_summary_page(toc_entries, all_tables, summary_path) summary_page_count = self.get_pdf_page_count(summary_path) updated_toc_entries = [(title, page_num + summary_page_count) for title, page_num in toc_entries] merger = PdfMerger() merger.append(summary_path) current_page = summary_page_count for pdf, (title, _) in zip(pdf_files, updated_toc_entries): merger.append(pdf) merger.add_outline_item(title, current_page) current_page += self.get_pdf_page_count(pdf) merger.write(output_path) merger.close() os.remove(summary_path) self.log(f"已成功合并 {len(pdf_files)} 个PDF文件") return True except Exception as e: self.log(f"合并PDF时出错: {str(e)}") return False def get_all_word_files(self, folder_paths): """获取所有Word文件""" word_extensions = ['.docx', '.doc'] word_files = [] for folder_path in folder_paths: if not os.path.isdir(folder_path): continue for file in os.listdir(folder_path): file_ext = os.path.splitext(file)[1].lower() if file_ext in word_extensions: word_path = os.path.join(folder_path, file) word_files.append(word_path) return word_files def get_folder_name_parts(self, folder_paths): """生成报告文件名""" if not folder_paths: return "听筒磁干扰仿真报告" folder_path = folder_paths[0] norm_path = os.path.normpath(folder_path) parts = [p for p in norm_path.split(os.sep) if p] if len(parts) >= 3: return f"{parts[-3]}_{parts[-2]}_{parts[-1]}" elif len(parts) == 2: return f"{parts[-2]}_{parts[-1]}" elif len(parts) == 1: return parts[0] return "听筒磁干扰仿真报告" if __name__ == "__main__": root = tk.Tk() app = PDFConverterApp(root) root.mainloop() # 添加这行启动事件循环 [16:50:22] 已添加文件夹: D:/tes/新建文件夹/1 [16:50:26] 已添加文件夹: D:/tes/新建文件夹/2 [16:50:32] 已设置输出路径: D:/tes/新建文件夹/111 [16:50:35] 开始处理 2 个文件夹... [16:50:35] 共找到 2 个Word文档 [16:50:35] Word文件备份成功: D:/tes/新建文件夹/1\Lux-H_PA0701_B41-qet1801_0905_simulation_result.docx → D:/tes/新建文件夹/111\报告存档\Lux-H_PA0701_B41-qet1801_0905_simulation_result\Lux-H_PA0701_B41-qet1801_0905_simulation_result.docx [16:50:35] 已从 Lux-H_PA0701_B41-qet1801_0905_simulation_result 中提取 1 个数据表格 [16:50:40] 已将 Lux-H_PA0701_B41-qet1801_0905_simulation_result.docx 转换为PDF [16:50:41] Word文件备份成功: D:/tes/新建文件夹/2\Lux-H_PA0701_B41-qet1802_0905_simulation_result.docx → D:/tes/新建文件夹/111\报告存档\Lux-H_PA0701_B41-qet1802_0905_simulation_result\Lux-H_PA0701_B41-qet1802_0905_simulation_result.docx [16:50:41] 已从 Lux-H_PA0701_B41-qet1802_0905_simulation_result 中提取 1 个数据表格 [16:50:45] 已将 Lux-H_PA0701_B41-qet1802_0905_simulation_result.docx 转换为PDF [16:50:45] 合并PDF时出错: PDFConverterApp.create_summary_page() missing 1 required positional argument: 'output_path' [16:50:45] 处理失败帮我修改报错和汇总修改部分
09-23
import os import tempfile import pythoncom import win32com.client import threading import shutil import tkinter as tk from tkinter import filedialog, ttk, messagebox, scrolledtext from docx import Document from PyPDF2 import PdfMerger, PdfReader, PdfWriter from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from reportlab.pdfbase import pdfmetrics from reportlab.pdfbase.ttfonts import TTFont from reportlab.lib.colors import red, black, white from reportlab.platypus import Table, TableStyle from io import BytesIO from datetime import datetime class PDFConverterApp: def init(self, root): self.root = root self.root.title(“audio_data”) self.root.geometry(“800x650”) # 增加窗口高度以容纳新控件 self.folders = [] self.log_messages = [] self.output_path = “” # 存储自定义输出路径 self.backup_mode = tk.BooleanVar(value=True) # 添加备份模式开关 self.point_22_mode = tk.BooleanVar(value=False) # 新增22号点位开关 self.output_filename = tk.StringVar(value=“听筒磁干扰_Simulation_Result”) # 默认文件名 self.create_widgets() def create_widgets(self): # 创建顶部框架 top_frame = ttk.Frame(self.root, padding=10) top_frame.pack(fill=tk.X) output_frame = ttk.LabelFrame(self.root, text="输出设置", padding=10) output_frame.pack(fill=tk.X, padx=10, pady=(0, 5)) # 文件名输入框 ttk.Label(output_frame, text="文件名:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) filename_entry = ttk.Entry(output_frame, textvariable=self.output_filename, width=30) filename_entry.grid(row=0, column=1, sticky=tk.W, padx=5) # 输出路径选择 ttk.Label(output_frame, text="输出路径:").grid(row=0, column=2, sticky=tk.W, padx=(20, 5)) self.path_entry = ttk.Entry(output_frame, width=40, state='readonly') self.path_entry.grid(row=0, column=3, sticky=tk.EW, padx=5) browse_btn = ttk.Button(output_frame, text="浏览...", command=self.choose_output_path) browse_btn.grid(row=0, column=4, padx=(5, 0)) # 设置网格列权重 output_frame.columnconfigure(3, weight=1) # 添加文件夹按钮 add_btn = ttk.Button(top_frame, text="添加文件夹", command=self.add_folder) add_btn.pack(side=tk.LEFT, padx=5) # 移除文件夹按钮 remove_btn = ttk.Button(top_frame, text="移除选中", command=self.remove_selected) remove_btn.pack(side=tk.LEFT, padx=5) # 清空列表按钮 clear_btn = ttk.Button(top_frame, text="清空列表", command=self.clear_list) clear_btn.pack(side=tk.LEFT, padx=5) # 处理按钮 process_btn = ttk.Button(top_frame, text="开始处理", command=self.start_processing) process_btn.pack(side=tk.RIGHT, padx=5) # 创建文件夹列表 list_frame = ttk.LabelFrame(self.root, text="待处理文件夹", padding=10) list_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 滚动条 scrollbar = ttk.Scrollbar(list_frame) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) backup_frame = ttk.Frame(output_frame) backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0)) # 在输出设置区域添加22号点位复选框 backup_frame = ttk.Frame(output_frame) backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0)) self.backup_check = ttk.Checkbutton( backup_frame, text="报告存档", variable=self.backup_mode ) self.backup_check.pack(side=tk.LEFT) # 新增22号点位复选框 self.point_22_check = ttk.Checkbutton( backup_frame, text="22号点位", variable=self.point_22_mode ) self.point_22_check.pack(side=tk.LEFT, padx=(10, 0)) # 文件夹列表 self.folder_list = tk.Listbox( list_frame, selectmode=tk.EXTENDED, yscrollcommand=scrollbar.set, height=10 ) self.folder_list.pack(fill=tk.BOTH, expand=True) scrollbar.config(command=self.folder_list.yview) # 创建日志区域 log_frame = ttk.LabelFrame(self.root, text="处理日志", padding=10) log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 日志文本框 self.log_text = scrolledtext.ScrolledText( log_frame, wrap=tk.WORD, state=tk.DISABLED ) self.log_text.pack(fill=tk.BOTH, expand=True) # 进度条 self.progress = ttk.Progressbar( self.root, orient=tk.HORIZONTAL, mode='determinate' ) self.progress.pack(fill=tk.X, padx=10, pady=5) def choose_output_path(self): """选择输出文件夹""" path = filedialog.askdirectory(title="选择输出文件夹") if path: self.output_path = path self.path_entry.config(state='normal') self.path_entry.delete(0, tk.END) self.path_entry.insert(0, path) self.path_entry.config(state='readonly') self.log(f"已设置输出路径: {path}") def add_folder(self): """添加要处理的文件夹""" folders = filedialog.askdirectory( title="选择要处理的文件夹", mustexist=True ) if folders: self.folders.append(folders) self.folder_list.insert(tk.END, folders) self.log(f"已添加文件夹: {folders}") def remove_selected(self): """移除选中的文件夹""" selected = self.folder_list.curselection() for index in selected[::-1]: folder = self.folder_list.get(index) self.folder_list.delete(index) self.folders.remove(folder) self.log(f"已移除文件夹: {folder}") def clear_list(self): """清空文件夹列表""" self.folder_list.delete(0, tk.END) self.folders = [] self.log("已清空文件夹列表") def log(self, message): """向日志区域添加消息""" timestamp = datetime.now().strftime("%H:%M:%S") log_entry = f"[{timestamp}] {message}" self.log_messages.append(log_entry) self.log_text.config(state=tk.NORMAL) self.log_text.insert(tk.END, log_entry + "\n") self.log_text.config(state=tk.DISABLED) self.log_text.yview(tk.END) # 自动滚动到底部 self.root.update_idletasks() def start_processing(self): """启动处理过程""" if not self.folders: messagebox.showwarning("警告", "请先添加要处理的文件夹") return # 禁用处理按钮 self.root.title("Word 转 PDF 合并工具 - 处理中...") self.progress["value"] = 0 # 在新线程中处理,避免界面冻结 thread = threading.Thread(target=self.process_folders) thread.daemon = True thread.start() def backup_excel_files(self, source_folder, backup_dir): """递归查找并备份所有Sound_Pres_Cal.xlsx文件""" print(f"开始在文件夹中搜索Sound_Pres_Cal.xlsx: {source_folder}") backup_count = 0 # 使用os.walk递归遍历所有子文件夹 for root, dirs, files in os.walk(source_folder): for file in files: if file == "Sound_Pres_Cal.xlsx": excel_path = os.path.join(root, file) try: # 计算相对路径以保持目录结构 rel_path = os.path.relpath(excel_path, start=source_folder) dest_path = os.path.join(backup_dir, rel_path) # 创建目标目录结构 os.makedirs(os.path.dirname(dest_path), exist_ok=True) # 复制文件(保留元数据) shutil.copy2(excel_path, dest_path) backup_count += 1 print(f"备份成功: {excel_path} → {dest_path}") except Exception as e: print(f"备份失败 {excel_path}: {str(e)}") print(f"共找到并备份 {backup_count} 个Sound_Pres_Cal.xlsx文件") def backup_data_files(self, folder_path, backup_dir): """递归查找并备份所有.xlsx和.csv文件到指定目录(同一级)""" self.log(f"开始在文件夹中搜索所有Excel和CSV文件: {folder_path}") backup_count = 0 # 使用os.walk递归遍历所有子文件夹 for root, dirs, files in os.walk(folder_path): for file in files: # 检查文件扩展名是否为.xlsx或.csv(不区分大小写) if file.lower().endswith(('.xlsx', '.csv')): file_path = os.path.join(root, file) file_name = os.path.basename(file_path) # 只获取文件名(不含路径) dest_path = os.path.join(backup_dir, file_name) # 直接放在备份目录下 try: # 确保备份目录存在(不需要创建子目录) os.makedirs(backup_dir, exist_ok=True) # 复制文件(保留元数据) shutil.copy2(file_path, dest_path) backup_count += 1 self.log(f"备份成功: {file_path} → {dest_path}") except Exception as e: self.log(f"备份失败 {file_path}: {str(e)}") self.log(f"共找到并备份 {backup_count} 个Excel和CSV文件") def process_folders(self): """处理多个文件夹中的Word文件""" try: # 提前初始化 output_folder if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) self.log(f"开始处理 {len(self.folders)} 个文件夹...") # 获取所有文件夹中的Word文件 word_files = self.get_all_word_files(self.folders) if not word_files: self.log("没有找到任何Word文档") return self.log(f"共找到 {len(word_files)} 个Word文档") self.progress["maximum"] = len(word_files) + 5 # 文件数 + 合并步骤 backup_root = os.path.join(output_folder, "报告存档") # 统一备份根目录 if self.backup_mode.get(): os.makedirs(backup_root, exist_ok=True) # 创建临时目录存储转换后的PDF with tempfile.TemporaryDirectory() as temp_dir: pdf_files_with_header = [] toc_entries = [] all_tables = {} current_page = 1 # 处理每个Word文件 for i, word_file in enumerate(word_files): self.progress["value"] = i + 1 file_name = os.path.splitext(os.path.basename(word_file))[0] display_name = file_name # 修改Word文档逻辑 modified_word_path = word_file if self.point_22_mode.get() or "GSM" in file_name.upper(): # 创建临时副本进行修改 temp_word_path = os.path.join(temp_dir, os.path.basename(word_file)) shutil.copy2(word_file, temp_word_path) if self.modify_word_spec(temp_word_path): modified_word_path = temp_word_path original_pdf = os.path.join(temp_dir, f"{file_name}_original.pdf") pdf_with_header = os.path.join(temp_dir, f"{file_name}_with_header.pdf") if self.backup_mode.get(): try: # 为每个Word文件创建备份目录 dest_dir = os.path.join(backup_root, file_name) os.makedirs(dest_dir, exist_ok=True) # 备份当前Word文件 word_dest = os.path.join(dest_dir, os.path.basename(modified_word_path)) shutil.copy2(modified_word_path, word_dest) self.log(f"Word文件备份成功: {word_file} → {word_dest}") # 备份相关的Excel和CSV文件 folder_path = os.path.dirname(word_file) self.backup_data_files(folder_path, dest_dir) except OSError as e: self.log(f"文件备份失败: {e}") except Exception as e: self.log(f"未知错误: {e}") # 提取表格数据 tables = self.extract_spec_table(modified_word_path) if tables: all_tables[display_name] = tables self.log(f"已从 {display_name} 中提取 {len(tables)} 个数据表格") # 转换为PDF if self.word_to_pdf(modified_word_path, original_pdf): # 添加内联标题 if self.add_inline_header(original_pdf, display_name, pdf_with_header): pdf_files_with_header.append(pdf_with_header) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(pdf_with_header) else: pdf_files_with_header.append(original_pdf) toc_entries.append((display_name, current_page)) current_page += self.get_pdf_page_count(original_pdf) else: self.log(f"跳过 {display_name},转换失败") # 更新进度条 self.progress["value"] = len(word_files) + 1 if not pdf_files_with_header: self.log("没有成功转换的PDF文件,无法进行合并") return # 获取输出路径 if self.output_path: output_folder = self.output_path else: output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd()) # 获取文件名 report_name = self.output_filename.get().strip() if not report_name: report_name = self.get_folder_name_parts(self.folders[0]) # 使用默认规则 output_pdf = os.path.join(output_folder, f"{report_name}.pdf") # 合并PDF self.progress["value"] = len(word_files) + 2 success = self.merge_pdfs_with_summary( pdf_files_with_header, toc_entries, all_tables, output_pdf ) self.progress["value"] = len(word_files) + 3 if success: self.log(f"处理完成!输出文件: {output_pdf}") messagebox.showinfo("完成", f"处理完成!输出文件: {output_pdf}") else: self.log("处理失败") messagebox.showerror("错误", "处理过程中出现错误") self.root.title("Word 转 PDF 合并工具") except Exception as e: self.log(f"处理过程中出现错误: {str(e)}") messagebox.showerror("错误", f"处理过程中出现错误: {str(e)}") self.root.title("Word 转 PDF 合并工具") # 以下是原有的处理函数,保持不变但添加为类方法 def extract_spec_table(self, word_path): """从Word文档中提取SPEC(dB)、Simulation和Pass/Fail数据表格""" try: doc = Document(word_path) tables = [] for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] if "SPEC(dB)" in headers and "Simulation" in headers and "Pass/Fail" in headers: table_data = [] table_data.append(headers) for row in table.rows[1:]: row_data = [cell.text.strip() for cell in row.cells] table_data.append(row_data) tables.append(table_data) return tables except Exception as e: self.log(f"提取 {os.path.basename(word_path)} 中的表格时出错: {str(e)}") return [] def modify_word_spec(self, word_path): try: doc = Document(word_path) filename = os.path.basename(word_path).upper() has_gsm = "GSM" in filename # 移动到try块内部 # 确定SPEC基准值 if self.point_22_mode.get(): # 22号点位启用 spec_value = 22 if has_gsm else 20 else: # 22号点位未启用 spec_value = 20 if has_gsm else 18 modified = False # 初始化修改标志 # 遍历文档所有表格 for table in doc.tables: headers = [cell.text.strip() for cell in table.rows[0].cells] try: spec_index = headers.index("SPEC(dB)") # 定位SPEC列 sim_index = headers.index("Simulation") # 定位Simulation列 pf_index = headers.index("Pass/Fail") # 定位Pass/Fail列 except ValueError: continue # 跳过不含目标列的表 # 标记已找到可修改表格 modified = True # 修改每行数据 for row in table.rows[1:]: cells = row.cells # 更新SPEC值 if spec_index < len(cells): cells[spec_index].text = str(spec_value) # 更新Pass/Fail状态 if sim_index < len(cells) and pf_index < len(cells): try: sim_value = float(cells[sim_index].text) new_status = "PASS" if sim_value < spec_value else "FAIL" cells[pf_index].text = new_status except ValueError: pass # 忽略格式错误 # 保存修改后的文档 if modified: doc.save(word_path) self.log(f"已修改 {os.path.basename(word_path)} 的SPEC值为{spec_value}") return modified except Exception as e: self.log(f"修改 {os.path.basename(word_path)} 失败: {str(e)}") return False def add_inline_header(self, pdf_path, title, output_path): """在PDF的第一页顶部添加一行红色加粗的标题""" try: reader = PdfReader(pdf_path) writer = PdfWriter() if len(reader.pages) > 0: first_page = reader.pages[0] packet = BytesIO() can = canvas.Canvas(packet, pagesize=letter) width, height = letter font_name = "Helvetica-Bold" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) pdfmetrics.registerFont(TTFont('SimSun-Bold', 'simsun.ttc')) font_name = "SimSun-Bold" except: pass can.setFont(font_name, 14) can.setFillColor(red) can.drawString(50, height - 50, title) can.save() packet.seek(0) title_reader = PdfReader(packet) title_page = title_reader.pages[0] first_page.merge_page(title_page) writer.add_page(first_page) for page in reader.pages[1:]: writer.add_page(page) with open(output_path, "wb") as f: writer.write(f) return True return False except Exception as e: self.log(f"PDF添加标题失败: {str(e)}") return False def create_summary_page(self, toc_entries, all_tables, output_path): """创建Summary页""" try: c = canvas.Canvas(output_path, pagesize=letter) width, height = letter font_name = "Helvetica" try: pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc')) font_name = "SimSun" except: pass # Summary标题 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, height - 50, "Summary") c.setFillColor(black) y_position = height - 100 # 添加数据汇总表格 - 修复:遍历所有文档的所有表格 if all_tables: c.setFont(font_name, 16) c.drawString(50, y_position, "Data Summary:") y_position -= 30 c.setFont(font_name, 10) table_width = width - 100 # 修复:遍历所有文档 for doc_name, tables in all_tables.items(): c.setFont(font_name, 12) c.setFillColor(red) c.drawString(60, y_position, f"Document: {doc_name}") y_position -= 20 c.setFillColor(black) c.setFont(font_name, 10) # 修复:遍历文档中的所有表格 for table_data in tables: col_widths = [table_width / len(table_data[0])] * len(table_data[0]) table = Table(table_data, colWidths=col_widths) style = TableStyle([ ('BACKGROUND', (0, 0), (-1, 0), white), ('TEXTCOLOR', (0, 0), (-1, 0), black), ('ALIGN', (0, 0), (-1, -1), 'CENTER'), ('FONTNAME', (0, 0), (-1, 0), font_name), ('FONTNAME', (0, 1), (-1, -1), font_name), ('BOTTOMPADDING', (0, 0), (-1, 0), 12), ('BACKGROUND', (0, 1), (-1, -1), white), ('GRID', (0, 0), (-1, -1), 1, black) ]) table.setStyle(style) table_height = table.wrap(0, 0)[1] if y_position - table_height < 50: c.showPage() y_position = height - 50 c.setFont(font_name, 24) c.setFillColor(red) c.drawCentredString(width / 2.0, y_position, "Summary") y_position -= 50 c.setFillColor(black) table.drawOn(c, 50, y_position - table_height) y_position -= (table_height + 20) c.save() return output_path except Exception as e: self.log(f"创建Summary页失败: {str(e)}") return None def word_to_pdf(self, word_path, pdf_path): """将Word文档转换为PDF""" pythoncom.CoInitialize() try: word = win32com.client.Dispatch("Word.Application") word.Visible = False doc = word.Documents.Open(os.path.abspath(word_path)) doc.SaveAs(os.path.abspath(pdf_path), FileFormat=17) doc.Close() word.Quit() self.log(f"已将 {os.path.basename(word_path)} 转换为PDF") return True except Exception as e: self.log(f"转换 {os.path.basename(word_path)} 时出错: {str(e)}") return False finally: pythoncom.CoUninitialize() def get_pdf_page_count(self, pdf_path): """获取PDF文件的页数""" try: reader = PdfReader(pdf_path) return len(reader.pages) except: return 0 def merge_pdfs_with_summary(self, pdf_files, toc_entries, all_tables, output_path): """合并PDF文件并添加Summary页""" try: with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as summary_file: summary_path = summary_file.name # 修复:正确传递all_tables参数 self.create_summary_page(toc_entries, all_tables, summary_path) # 添加toc_entries参数 summary_page_count = self.get_pdf_page_count(summary_path) updated_toc_entries = [(title, page_num + summary_page_count) for title, page_num in toc_entries] merger = PdfMerger() merger.append(summary_path) current_page = summary_page_count for pdf, (title, _) in zip(pdf_files, updated_toc_entries): merger.append(pdf) merger.add_outline_item(title, current_page) current_page += self.get_pdf_page_count(pdf) merger.write(output_path) merger.close() os.remove(summary_path) self.log(f"已成功合并 {len(pdf_files)} 个PDF文件") return True except Exception as e: self.log(f"合并PDF时出错: {str(e)}") return False def get_all_word_files(self, folder_paths): """获取所有Word文件""" word_extensions = ['.docx', '.doc'] word_files = [] for folder_path in folder_paths: if not os.path.isdir(folder_path): continue for file in os.listdir(folder_path): file_ext = os.path.splitext(file)[1].lower() if file_ext in word_extensions: word_path = os.path.join(folder_path, file) word_files.append(word_path) return word_files def get_folder_name_parts(self, folder_paths): """生成报告文件名""" if not folder_paths: return "听筒磁干扰仿真报告" folder_path = folder_paths[0] norm_path = os.path.normpath(folder_path) parts = [p for p in norm_path.split(os.sep) if p] if len(parts) >= 3: return f"{parts[-3]}_{parts[-2]}_{parts[-1]}" elif len(parts) == 2: return f"{parts[-2]}_{parts[-1]}" elif len(parts) == 1: return parts[0] return "听筒磁干扰仿真报告" if name == “main”: root = tk.Tk() app = PDFConverterApp(root) root.mainloop() # 添加这行启动事件循环 不改变代码功能的情况下增加在备份的xlsx文档中打开Sound_pres_cal.xlsx文档将单元格J2和J3的数据添加到Summary页的表格里在SPEC(dB) 增加一列两行显示,第一行显示J2的数据,第二行显示J3的数据,帮我调整数据单元格的大小能一行放下四列数据
09-17
1.environment_interface.py改完了2.model_manager.py你写一半又卡住了 我没看见完整的 只看见这些”# E:\AI_System\agent\model_manager.py import os import sys import logging import json import hashlib import gc import time from pathlib import Path from typing import Dict, Any, Optional, Tuple, List from utils.path_utils import normalize_path, is_valid_hf_id class ModelManager: """AI模型管理器 - 完整修复版""" MODEL_REGISTRY_FILE = "model_registry.json" DEFAULT_MODEL_PATHS = { "TEXT_BASE": "local_models/text_base", "TEXT_CHAT": "local_models/text_chat", "IMAGE_MODEL": "local_models/image_model" } def __init__(self, config: Dict[str, Any] = None, cache_dir: str = "model_cache", use_gpu: bool = True, max_models_in_memory: int = 3): # 配置日志 self.logger = logging.getLogger("ModelManager") self.logger.setLevel(logging.INFO) if not self.logger.handlers: handler = logging.StreamHandler() formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) handler.setFormatter(formatter) self.logger.addHandler(handler) self.logger.info("🚀 初始化模型管理器...") # 初始化参数 self.config = config or {} self.cache_dir = normalize_path(cache_dir) # 使用路径规范化 self.use_gpu = use_gpu self.max_models_in_memory = max_models_in_memory # 确保缓存目录存在 os.makedirs(self.cache_dir, exist_ok=True) # 加载或创建注册表 self._persistent_registry = self._load_or_create_registry() # 已加载的模型 self.loaded_models: Dict[str, Any] = {} # 自动注册默认模型 self._register_default_models() self.logger.info(f"✅ 模型管理器初始化完成 (GPU: {'启用' if use_gpu else '禁用'})") self.logger.info(f"已注册模型: {list(self._persistent_registry.keys())}") def _load_or_create_registry(self) -> Dict[str, dict]: """加载或创建模型注册表""" try: registry_path = Path(normalize_path(self.MODEL_REGISTRY_FILE)) # 路径规范化 if registry_path.exists(): with open(registry_path, 'r', encoding='utf-8') as f: registry = json.load(f) self.logger.info(f"📋 成功加载模型注册表: {registry_path}") return registry self.logger.warning(f"⚠️ 模型注册表不存在,创建新文件: {registry_path}") with open(registry_path, 'w', encoding='utf-8') as f: json.dump({}, f, indent=2) return {} except Exception as e: self.logger.error(f"❌ 处理模型注册表失败: {str(e)}") return {} def _register_default_models(self): """注册配置文件中的默认模型""" model_settings = self.config.get("model_settings", {}) # 合并默认路径和配置路径 default_paths = {**self.DEFAULT_MODEL_PATHS, **{ name: info.get("path", self.DEFAULT_MODEL_PATHS.get(name, "")) for name, info in model_settings.items() }} # 注册模型 for model_name, model_path in default_paths.items(): if model_name not in self._persistent_registry: abs_path = normalize_path(model_path) # 路径规范化 model_type = model_settings.get(model_name, {}).get("type", "text") self.register_model(model_name, abs_path, model_type) def _save_registry(self): """保存模型注册表""" try: registry_path = normalize_path(self.MODEL_REGISTRY_FILE) # 路径规范化 with open(registry_path, 'w', encoding='utf-8') as f: json.dump(self._persistent_registry, f, indent=2, ensure_ascii=False) self.logger.info(f"💾 模型注册表已保存: {registry_path}") return True except Exception as e: self.logger.error(f"❌ 保存模型注册表失败: {str(e)}") return False def register_model(self, model_name: str, model_path: str, model_type: str = "text", adapter_config: Optional[dict] = None) -> bool: """ 注册新模型 """ # 检查模型是否存在 exists, is_local = self._check_model_exists(model_path) if not exists: self.logger.error(f"❌ 模型路径不可访问: {model_path}") return False # 计算校验和 checksum = "unknown" if is_local: try: checksum = self._calculate_checksum(model_path) except Exception as e: self.logger.warning(f"⚠️ 无法计算校验和: {str(e)}") checksum = "error" # 添加到注册表 self._persistent_registry[model_name] = { "path": model_path, "type": model_type, "status": "unloaded", "checksum": checksum, "last_accessed": time.time(), "adapter": adapter_config, "is_local": is_local } self.logger.info(f"✅ 模型注册成功: {model_name} ({model_type})") self._save_registry() return True def _check_model_exists(self, model_path: str) -> Tuple[bool, bool]: """检查模型路径是否有效""" # 如果是HuggingFace模型ID if is_valid_hf_id(model_path): # 使用路径工具验证 self.logger.info(f"🔍 检测到HuggingFace模型ID: {model_path}") return True, False # 检查本地路径 abs_path = normalize_path(model_path) # 路径规范化 if os.path.exists(abs_path): return True, True # 尝试相对路径 if os.path.exists(model_path): return True, True return False, False def _calculate_checksum(self, model_path: str) -> str: """计算模型校验和""" abs_path = normalize_path(model_path) # 路径规范化 if os.path.isdir(abs_path): sha256 = hashlib.sha256() key_files = ["pytorch_model.bin", "model.safetensors", "config.json"] for root, _, files in os.walk(abs_path): for file in files: if file in key_files: file_path = os.path.join(root, file) with open(file_path, 'rb') as f: while chunk := f.read(8192): sha256.update(chunk) return sha256.hexdigest() # 单个模型文件 with open(abs_path, 'rb') as f: return hashlib.sha256(f.read()).hexdigest() def load_model(self, model_name: str, force_reload: bool = False) -> Tuple[bool, Any]: """ 加载模型到内存 """ if model_name not in self._persistent_registry: self.logger.error(f"❌ 模型未注册: {model_name}") return False, None model_info = self._persistent_registry[model_name] model_path = model_info["path"] abs_path = normalize_path(model_path) if model_info.get("is_local", True) else model_path # 路径规范化 # 如果模型已加载且不需要强制重载 if model_name in self.loaded_models and not force_reload: self.logger.info(f"📦 模型已在内存中: {model_name}") model_info["last_accessed"] = time.time() return True, self.loaded_models[model_name] # 检查内存占用 if len(self.loaded_models) >= self.max_models_in_memory: self._unload_least_recently_used() # 实际加载模型 try: self.logger.info(f"🔄 加载模型: {model_name} ({model_info['type']})") model_type = model_info["type"] if model_type == "text": model = self._load_text_model(model_info, abs_path) elif model_type == "image": model = self._load_image_model(model_info, abs_path) elif model_type == "audio": model = self._load_audio_model(model_info, abs_path) else: self.logger.error(f"❌ 不支持的模型类型: {model_type}") return False, None # 更新状态 self.loaded_models[model_name] = model model_info["status"] = "loaded" model_info["last_accessed"] = time.time() self._save_registry() self.logger.info(f"✅ 模型加载成功: {model_name}") return True, model except ImportError as e: self.logger.error(f"❌ 缺失依赖库: {str(e)}") return False, None except Exception as e: self.logger.error(f"❌ 模型加载失败: {model_name}, 路径: {abs_path}, 错误: {str(e)}") model_info["status"] = "error" return False, None def _load_text_model(self, model_info: dict, model_path: str) -> Any: """加载文本模型""" try: from transformers import AutoModelForCausalLM, AutoTokenizer except ImportError: self.logger.error("❌ transformers库未安装") raise RuntimeError("transformers not installed") self.logger.debug(f"🔧 加载文本模型: {model_path}") device = "cuda" if self.use_gpu else "cpu" try: tokenizer = AutoTokenizer.from_pretrained(model_path, cache_dir=self.cache_dir) model = AutoModelForCausalLM.from_pretrained( model_path, cache_dir=self.cache_dir, device_map=device if self.use_gpu else None ) return { "model": model, "tokenizer": tokenizer, "info": model_info } except OSError as e: self.logger.error(f"❌ 加载失败: 请检查路径 '{model_path}' 是否正确") fallback_path = self._try_find_model_path(model_path) if fallback_path: self.logger.warning(f"⚠️ 尝试备用路径: {fallback_path}") return self._load_text_model(model_info, fallback_path) raise except Exception as e: self.logger.error(f"❌ 加载过程中发生意外错误: {str(e)}") raise def _try_find_model_path(self, original_path: str) -> Optional[str]: """尝试找到备用模型路径""" # 1. 检查项目内的模型目录 project_models = os.path.join(os.getcwd(), "local_models", os.path.basename(original_path)) if os.path.exists(project_models): return project_models # 2. 检查缓存目录 cache_path = os.path.join(self.cache_dir, "models", os.path.basename(original_path)) if os.path.exists(cache_path): return cache_path # 3. 尝试父目录 parent_path = os.path.join(os.path.dirname(os.getcwd()), os.path.basename(original_path)) if os.path.exists(parent_path): return parent_path return None def unload_model(self, model_name: str = None) -> bool: """卸载模型""" if model_name is None: self.logger.info("卸载所有模型") for name in list(self.loaded_models.keys()): if not self._unload_single_model(name“ 3.config.json内容替换成你写的”{ "model_settings": { "TEXT_BASE": { "path": "E:/AI_Models/Qwen2-7B", // 使用完整路径 "type": "text" }, "TEXT_CHAT": { "path": "E:/AI_Models/deepseek-7b-chat", "type": "text" }, "IMAGE_MODEL": { "path": "E:/AI_Models/sdxl", "type": "image" } }, "model_manager": { "cache_dir": "E:/AI_System/model_cache", // 使用完整路径 "use_gpu": true, "max_models_in_memory": 3 }, "cognitive_config": { "auto_reflection": true, "max_concurrent_tasks": 5 } } “了 4.path_utils.py也用你刚刚给我的内容覆盖了 5.我感觉你现在需要给我model_manager.py、cognitive_architecture.py、main.py你觉得呢?
08-31
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值