import os
import tempfile
import shutil
import threading
import tkinter as tk
from tkinter import filedialog, ttk, messagebox, scrolledtext
from docx import Document
from PyPDF2 import PdfMerger, PdfReader, PdfWriter
from reportlab.lib.pagesizes import letter, A4
from reportlab.pdfgen import canvas
from datetime import datetime
import pythoncom
import win32com.client
from io import BytesIO
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
# 修改Table的导入,使用别名RLTable
from reportlab.platypus import Table as RLTable, TableStyle
from reportlab.lib import colors
class Table:
pass
class PDFConverterApp:
def __init__(self, root):
self.root = root
self.root.title("音频数据处理工具")
self.root.geometry("800x650")
# 初始化变量
self.folders = []
self.output_path = ""
self.backup_mode = tk.BooleanVar(value=True)
self.point_mode = tk.BooleanVar(value=False)
self.output_filename = tk.StringVar(value="听筒磁干扰_结果报告")
# 创建界面
self.create_widgets()
def create_widgets(self):
"""创建GUI界面组件"""
# 输出设置区域
output_frame = ttk.LabelFrame(self.root, text="输出设置", padding=10)
output_frame.pack(fill=tk.X, padx=10, pady=5)
# 文件名设置
ttk.Label(output_frame, text="文件名:").grid(row=0, column=0, sticky=tk.W)
ttk.Entry(output_frame, textvariable=self.output_filename, width=30).grid(row=0, column=1, sticky=tk.W, padx=5)
# 输出路径选择
ttk.Label(output_frame, text="输出路径:").grid(row=0, column=2, sticky=tk.W, padx=(20, 5))
self.path_entry = ttk.Entry(output_frame, width=40, state='readonly')
self.path_entry.grid(row=0, column=3, sticky=tk.EW)
ttk.Button(output_frame, text="浏览...", command=self.choose_output_path).grid(row=0, column=4, padx=5)
output_frame.columnconfigure(3, weight=1)
# 选项区域
options_frame = ttk.Frame(output_frame)
options_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0))
ttk.Checkbutton(options_frame, text="报告存档", variable=self.backup_mode).pack(side=tk.LEFT)
ttk.Checkbutton(options_frame, text="2号点位", variable=self.point_mode, padding=(10, 0)).pack(side=tk.LEFT)
# 操作按钮区域
btn_frame = ttk.Frame(self.root, padding=10)
btn_frame.pack(fill=tk.X)
ttk.Button(btn_frame, text="添加文件夹", command=self.add_folder).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="移除选中", command=self.remove_selected).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="清空列表", command=self.clear_list).pack(side=tk.LEFT, padx=5)
ttk.Button(btn_frame, text="开始处理", command=self.start_processing).pack(side=tk.RIGHT, padx=5)
# 文件夹列表区域
list_frame = ttk.LabelFrame(self.root, text="待处理文件夹", padding=10)
list_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
scrollbar = ttk.Scrollbar(list_frame)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.folder_list = tk.Listbox(list_frame, selectmode=tk.EXTENDED, yscrollcommand=scrollbar.set, height=10)
self.folder_list.pack(fill=tk.BOTH, expand=True)
scrollbar.config(command=self.folder_list.yview)
# 日志区域
log_frame = ttk.LabelFrame(self.root, text="处理日志", padding=10)
log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
self.log_text = scrolledtext.ScrolledText(log_frame, wrap=tk.WORD, state=tk.DISABLED)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 进度条
self.progress = ttk.Progressbar(self.root, orient=tk.HORIZONTAL, mode='determinate')
self.progress.pack(fill=tk.X, padx=10, pady=5)
# === 文件操作相关方法 ===
def choose_output_path(self):
"""选择输出文件夹"""
path = filedialog.askdirectory(title="选择输出文件夹")
if path:
self.output_path = path
self.path_entry.config(state='normal')
self.path_entry.delete(0, tk.END)
self.path_entry.insert(0, path)
self.path_entry.config(state='readonly')
self.log(f"输出路径设置为: {path}")
def add_folder(self):
"""添加要处理的文件夹"""
folders = filedialog.askdirectory(title="选择文件夹", mustexist=True)
if folders:
self.folders.append(folders)
self.folder_list.insert(tk.END, folders)
self.log(f"已添加文件夹: {folders}")
def remove_selected(self):
"""移除选中的文件夹"""
for index in self.folder_list.curselection()[::-1]:
folder = self.folder_list.get(index)
self.folder_list.delete(index)
self.folders.remove(folder)
self.log(f"已移除文件夹: {folder}")
def clear_list(self):
"""清空文件夹列表"""
self.folder_list.delete(0, tk.END)
self.folders = []
self.log("已清空文件夹列表")
def log(self, message):
"""记录日志消息"""
timestamp = datetime.now().strftime("%H:%M:%S")
log_entry = f"[{timestamp}] {message}"
self.log_text.config(state=tk.NORMAL)
self.log_text.insert(tk.END, log_entry + "\n")
self.log_text.config(state=tk.DISABLED)
self.log_text.yview(tk.END)
self.root.update_idletasks()
# === 核心处理功能 ===
def start_processing(self):
"""启动处理流程"""
if not self.folders:
messagebox.showwarning("警告", "请先添加要处理的文件夹")
return
self.root.title("音频数据处理 - 处理中...")
self.progress["value"] = 0
thread = threading.Thread(target=self.process_folders)
thread.daemon = True
thread.start()
def process_folders(self):
"""处理所有文件夹中的文件"""
try:
# 收集所有Word文件
word_files = self.get_word_files()
if not word_files:
self.log("未找到任何Word文档")
return
self.progress["maximum"] = len(word_files) + 5
# 设置输出路径
output_folder = self.output_path or self.folders[0]
backup_root = os.path.join(output_folder, "报告存档")
with tempfile.TemporaryDirectory() as temp_dir:
pdf_files = []
toc_entries = []
all_tables = {}
current_page = 1
# 处理每个Word文件
for i, word_file in enumerate(word_files):
self.progress["value"] = i + 1
display_name = os.path.splitext(os.path.basename(word_file))[0]
# 备份处理
if self.backup_mode.get():
self.backup_files(word_file, backup_root)
# 修改Word文档
modified_path = self.modify_word(word_file, temp_dir)
# 提取表格数据
tables = self.extract_tables(modified_path)
if tables:
all_tables[display_name] = tables
# 转换为PDF
pdf_path = self.convert_to_pdf(modified_path, temp_dir, display_name)
if pdf_path:
pdf_files.append(pdf_path)
toc_entries.append((display_name, current_page))
current_page += self.get_page_count(pdf_path)
# 合并PDF
if pdf_files:
report_name = self.output_filename.get().strip() or "结果报告"
output_pdf = os.path.join(output_folder, f"{report_name}.pdf")
if self.merge_pdfs(pdf_files, toc_entries, all_tables, output_pdf):
self.log(f"处理完成!输出文件: {output_pdf}")
messagebox.showinfo("完成", f"处理完成!输出文件: {output_pdf}")
self.root.title("音频数据处理")
except Exception as e:
self.log(f"处理出错: {str(e)}")
messagebox.showerror("错误", f"处理出错: {str(e)}")
self.root.title("音频数据处理")
def get_word_files(self):
"""获取所有Word文件路径"""
word_files = []
for folder in self.folders:
for file in os.listdir(folder):
if file.lower().endswith(('.docx', '.doc')):
word_files.append(os.path.join(folder, file))
return word_files
def backup_files(self, word_path, backup_root):
"""备份文件到指定目录"""
try:
file_name = os.path.splitext(os.path.basename(word_path))[0]
backup_dir = os.path.join(backup_root, file_name)
os.makedirs(backup_dir, exist_ok=True)
# 备份Word文件
shutil.copy2(word_path, os.path.join(backup_dir, os.path.basename(word_path)))
self.log(f"已备份Word文件: {file_name}")
except Exception as e:
self.log(f"备份失败: {str(e)}")
def modify_word(self, word_path, temp_dir):
"""修改Word文档内容"""
try:
doc = Document(word_path)
modified = False
# 确定SPEC基准值
spec_value = 22 if ("GSM" in word_path.upper() and self.point_mode.get()) else 20
# 修改表格内容
for table in doc.tables:
headers = [cell.text.strip() for cell in table.rows[0].cells]
if "SPEC(dB)" in headers and "Simulation" in headers and "Pass/Fail" in headers:
# 查找列索引
spec_idx = headers.index("SPEC(dB)")
sim_idx = headers.index("Simulation")
pf_idx = headers.index("Pass/Fail")
# 修改每行数据
for row in table.rows[1:]:
# 更新SPEC值
row.cells[spec_idx].text = str(spec_value)
# 更新Pass/Fail状态
try:
sim_val = float(row.cells[sim_idx].text)
row.cells[pf_idx].text = "PASS" if sim_val < spec_value else "FAIL"
except ValueError:
pass
# 设置单元格居中
for cell in row.cells:
for paragraph in cell.paragraphs:
paragraph.alignment = 1 # 居中
modified = True
if modified:
temp_path = os.path.join(temp_dir, os.path.basename(word_path))
doc.save(temp_path)
return temp_path
return word_path
except Exception as e:
self.log(f"修改Word失败: {str(e)}")
return word_path
def extract_tables(self, word_path):
"""从Word文档中提取表格数据"""
try:
doc = Document(word_path)
tables = []
for table in doc.tables:
headers = [cell.text.strip() for cell in table.rows[0].cells]
if "SPEC(dB)" in headers and "Simulation" in headers and "Pass/Fail" in headers:
table_data = [headers]
for row in table.rows[1:]:
table_data.append([cell.text.strip() for cell in row.cells])
tables.append(table_data)
return tables
except Exception as e:
self.log(f"提取表格失败: {str(e)}")
return []
def convert_to_pdf(self, word_path, temp_dir, title):
"""将Word转换为PDF并添加标题"""
try:
# 转换为PDF
pdf_path = os.path.join(temp_dir, f"{os.path.basename(word_path)}.pdf")
self.word_to_pdf(word_path, pdf_path)
# 添加标题
titled_pdf = os.path.join(temp_dir, f"titled_{os.path.basename(pdf_path)}")
self.add_pdf_title(pdf_path, title, titled_pdf)
return titled_pdf
except Exception as e:
self.log(f"转换失败: {str(e)}")
return None
def word_to_pdf(self, word_path, pdf_path):
"""使用Word COM接口转换为PDF"""
pythoncom.CoInitialize()
try:
word = win32com.client.Dispatch("Word.Application")
word.Visible = False
doc = word.Documents.Open(os.path.abspath(word_path))
doc.SaveAs(os.path.abspath(pdf_path), FileFormat=17)
doc.Close()
word.Quit()
self.log(f"已转换: {os.path.basename(word_path)} → PDF")
return True
except Exception as e:
self.log(f"转换失败: {str(e)}")
return False
finally:
pythoncom.CoUninitialize()
def add_pdf_title(self, pdf_path, title, output_path):
"""在PDF首页顶部添加标题"""
try:
reader = PdfReader(pdf_path)
writer = PdfWriter()
# 创建标题Canvas
packet = BytesIO()
c = canvas.Canvas(packet, pagesize=letter)
width, height = letter
c.setFont("Helvetica-Bold", 14)
c.setFillColorRGB(1, 0, 0) # 红色
c.drawString(50, height - 50, title)
c.save()
# 合并到第一页
packet.seek(0)
title_page = PdfReader(packet).pages[0]
first_page = reader.pages[0]
first_page.merge_page(title_page)
writer.add_page(first_page)
# 添加剩余页面
for page in reader.pages[1:]:
writer.add_page(page)
# 保存结果
with open(output_path, "wb") as f:
writer.write(f)
return True
except Exception as e:
self.log(f"添加标题失败: {str(e)}")
return False
def get_page_count(self, pdf_path):
"""获取PDF页数"""
try:
return len(PdfReader(pdf_path).pages)
except:
return 0
def merge_pdfs(self, pdf_files, toc_entries, all_tables, output_path):
"""合并PDF并添加摘要页(修复空文件问题)"""
try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as summary_file:
summary_path = summary_file.name
# 创建摘要页并验证
if not self.create_summary_page(all_tables, summary_path):
raise Exception("摘要页创建失败")
# 验证摘要页文件
if not os.path.exists(summary_path) or os.path.getsize(summary_path) == 0:
raise Exception("摘要页是空文件")
# 获取摘要页页数
summary_pages = self.get_page_count(summary_path)
updated_toc = [(title, page + summary_pages) for title, page in toc_entries]
# 合并PDF
merger = PdfMerger()
# 添加摘要页前验证
self.validate_pdf_file(summary_path)
merger.append(summary_path)
# 添加其他PDF文件
current_page = summary_pages
for pdf, (title, _) in zip(pdf_files, updated_toc):
self.validate_pdf_file(pdf) # 文件验证
merger.append(pdf)
merger.add_outline_item(title, current_page)
current_page += self.get_page_count(pdf)
# 写入最终文件
with open(output_path, 'wb') as f:
merger.write(f)
merger.close()
self.log(f"已合并 {len(pdf_files)} 个PDF文件,总大小: {os.path.getsize(output_path) // 1024}KB")
return True
except Exception as e:
self.log(f"合并失败: {str(e)}")
return False
finally:
# 清理临时文件
if os.path.exists(summary_path):
os.remove(summary_path)
def validate_pdf_file(self, file_path):
"""验证PDF文件是否有效"""
if not os.path.exists(file_path):
raise FileNotFoundError(f"文件不存在: {file_path}")
if os.path.getsize(file_path) == 0:
raise ValueError(f"空文件: {file_path}")
# 尝试读取文件验证完整性
with open(file_path, 'rb') as f:
reader = PdfReader(f)
if len(reader.pages) == 0:
raise ValueError(f"无效PDF文件: 无页面内容")
def create_summary_page(self, all_tables, output_path):
try:
# 注册字体
try:
font_path = r"C:\Windows\Fonts\simsun.ttc"
if os.path.exists(font_path):
pdfmetrics.registerFont(TTFont('SimSun', font_path))
pdfmetrics.registerFont(TTFont('SimSunBold', font_path))
from reportlab.lib.fonts import addMapping
addMapping('SimSun', 0, 0, 'SimSun')
addMapping('SimSun', 1, 0, 'SimSunBold')
font_name = 'SimSun'
else:
font_name = "Helvetica"
except Exception as e:
font_name = "Helvetica"
# 创建PDF
c = canvas.Canvas(output_path, pagesize=A4)
width, height = A4
# 标题
c.setFont("Helvetica-Bold" if font_name == "Helvetica" else "SimSunBold", 24)
c.setFillColorRGB(1, 0, 0)
c.drawCentredString(width / 2, height - 50, "摘要")
c.setFillColorRGB(0, 0, 0)
# 收集表格数据
table_data = []
try:
if all_tables and len(all_tables) > 0:
# 检查第一个表格是否有数据(至少包含表头和数据行)
if len(all_tables[0]) > 1:
# 构建表头:第一列“来源”,其他列为表格的表头(假设所有表格结构相同)
header = ["来源"] + list(all_tables[0][0])
table_data.append(header)
# 遍历每个表格
for idx, table in enumerate(all_tables):
# 从表格的第二行开始(跳过表头)
for row in table[1:]:
# 每行第一列为表格序号(如“表格1”)
new_row = [f"表格{idx + 1}"] + list(row)
table_data.append(new_row)
else:
self.log(f"警告:表格{idx + 1}数据不足,跳过")
else:
self.log("摘要生成:all_tables为空")
except Exception as e:
self.log(f"表格数据处理异常: {str(e)}")
# 创建PDF
c = canvas.Canvas(output_path, pagesize=A4)
width, height = A4
# 标题
c.setFont("Helvetica-Bold" if font_name == "Helvetica" else "SimSunBold", 24)
c.setFillColorRGB(1, 0, 0)
c.drawCentredString(width / 2, height - 50, "摘要")
c.setFillColorRGB(0, 0, 0)
# 如果有表格数据,则创建表格
if table_data:
num_cols = len(table_data[0])
col_widths = [140] + [100] * (num_cols - 1) # 第一列稍宽
t = RLTable(table_data, colWidths=col_widths)
# 表格样式(使用注册的字体名称)
style = TableStyle([
('BACKGROUND', (0, 0), (-1, 0), colors.lightgrey),
('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, -1), font_name),
('FONTSIZE', (0, 0), (-1, 0), 12),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), colors.white),
('GRID', (0, 0), (-1, -1), 1, colors.black),
('SPAN', (0, 0), (-1, 0)),
('ALIGN', (0, 0), (0, 0), 'LEFT'),
('FONTNAME', (0, 0), (0, 0), font_name),
('ALIGN', (0, 1), (-1, 1), 'CENTER'),
('FONTNAME', (0, 1), (-1, 1), 'SimSunBold' if font_name == 'SimSun' else 'Helvetica-Bold'),
])
t.setStyle(style)
t.wrapOn(c, width - 100, height)
t.drawOn(c, 50, height - 500)
else:
c.setFont(font_name, 14)
c.drawString(100, height - 150, "未提取到表格数据")
c.save()
return True
except Exception as e:
# 回退机制
try:
c = canvas.Canvas(output_path, pagesize=A4)
width, height = A4
c.setFont("Helvetica", 14)
c.drawString(100, height - 150, "摘要生成失败: 字体配置问题")
c.drawString(100, height - 180, f"错误详情: {str(e)}")
c.save()
return True
except:
return False
if __name__ == "__main__":
root = tk.Tk()
app = PDFConverterApp(root)
root.mainloop()
[10:29:36] 输出路径设置为: D:/tes/新建文件夹/111
[10:29:41] 已添加文件夹: D:/tes/新建文件夹/1
[10:29:45] 已添加文件夹: D:/tes/新建文件夹/2
[10:29:46] 已备份Word文件: Lux-H_PA0701_B41-qet1801_0905_simulation_result
[10:29:52] 已转换: Lux-H_PA0701_B41-qet1801_0905_simulation_result.docx → PDF
[10:29:52] 已备份Word文件: Lux-H_PA0701_B41-qet1802_0905_simulation_result
[10:29:56] 已转换: Lux-H_PA0701_B41-qet1802_0905_simulation_result.docx → PDF
[10:29:57] 表格数据处理异常: 0
[10:29:57] 已合并 2 个PDF文件,总大小: 995KB
[10:29:57] 处理完成!输出文件: D:/tes/新建文件夹/111\听筒磁干扰_结果报告.pdf
分析报错并且在原代码修改