import os
import tempfile
import pythoncom
import win32com.client
import threading
import shutil
import tkinter as tk
from tkinter import filedialog, ttk, messagebox, scrolledtext
from docx import Document
from PyPDF2 import PdfMerger, PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib.colors import red, black, white
from reportlab.platypus import Table, TableStyle
from io import BytesIO
from datetime import datetime
class PDFConverterApp:
def __init__(self, root):
self.root = root
self.root.title("audio_data")
self.root.geometry("800x650") # 增加窗口高度以容纳新控件
self.folders = []
self.log_messages = []
self.output_path = "" # 存储自定义输出路径
self.backup_mode = tk.BooleanVar(value=True) # 添加备份模式开关
self.point_22_mode = tk.BooleanVar(value=False) # 新增22号点位开关
self.output_filename = tk.StringVar(value="听筒磁干扰_Simulation_Result") # 默认文件名
self.create_widgets()
def create_widgets(self):
# 创建顶部框架
top_frame = ttk.Frame(self.root, padding=10)
top_frame.pack(fill=tk.X)
output_frame = ttk.LabelFrame(self.root, text="输出设置", padding=10)
output_frame.pack(fill=tk.X, padx=10, pady=(0, 5))
# 文件名输入框
ttk.Label(output_frame, text="文件名:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5))
filename_entry = ttk.Entry(output_frame, textvariable=self.output_filename, width=30)
filename_entry.grid(row=0, column=1, sticky=tk.W, padx=5)
# 输出路径选择
ttk.Label(output_frame, text="输出路径:").grid(row=0, column=2, sticky=tk.W, padx=(20, 5))
self.path_entry = ttk.Entry(output_frame, width=40, state='readonly')
self.path_entry.grid(row=0, column=3, sticky=tk.EW, padx=5)
browse_btn = ttk.Button(output_frame, text="浏览...", command=self.choose_output_path)
browse_btn.grid(row=0, column=4, padx=(5, 0))
# 设置网格列权重
output_frame.columnconfigure(3, weight=1)
# 添加文件夹按钮
add_btn = ttk.Button(top_frame, text="添加文件夹", command=self.add_folder)
add_btn.pack(side=tk.LEFT, padx=5)
# 移除文件夹按钮
remove_btn = ttk.Button(top_frame, text="移除选中", command=self.remove_selected)
remove_btn.pack(side=tk.LEFT, padx=5)
# 清空列表按钮
clear_btn = ttk.Button(top_frame, text="清空列表", command=self.clear_list)
clear_btn.pack(side=tk.LEFT, padx=5)
# 处理按钮
process_btn = ttk.Button(top_frame, text="开始处理", command=self.start_processing)
process_btn.pack(side=tk.RIGHT, padx=5)
# 创建文件夹列表
list_frame = ttk.LabelFrame(self.root, text="待处理文件夹", padding=10)
list_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
# 滚动条
scrollbar = ttk.Scrollbar(list_frame)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
backup_frame = ttk.Frame(output_frame)
backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0))
# 在输出设置区域添加22号点位复选框
backup_frame = ttk.Frame(output_frame)
backup_frame.grid(row=0, column=5, sticky=tk.W, padx=(20, 0))
self.backup_check = ttk.Checkbutton(
backup_frame,
text="报告存档",
variable=self.backup_mode
)
self.backup_check.pack(side=tk.LEFT)
# 新增22号点位复选框
self.point_22_check = ttk.Checkbutton(
backup_frame,
text="22号点位",
variable=self.point_22_mode
)
self.point_22_check.pack(side=tk.LEFT, padx=(10, 0))
# 文件夹列表
self.folder_list = tk.Listbox(
list_frame,
selectmode=tk.EXTENDED,
yscrollcommand=scrollbar.set,
height=10
)
self.folder_list.pack(fill=tk.BOTH, expand=True)
scrollbar.config(command=self.folder_list.yview)
# 创建日志区域
log_frame = ttk.LabelFrame(self.root, text="处理日志", padding=10)
log_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
# 日志文本框
self.log_text = scrolledtext.ScrolledText(
log_frame,
wrap=tk.WORD,
state=tk.DISABLED
)
self.log_text.pack(fill=tk.BOTH, expand=True)
# 进度条
self.progress = ttk.Progressbar(
self.root,
orient=tk.HORIZONTAL,
mode='determinate'
)
self.progress.pack(fill=tk.X, padx=10, pady=5)
def choose_output_path(self):
"""选择输出文件夹"""
path = filedialog.askdirectory(title="选择输出文件夹")
if path:
self.output_path = path
self.path_entry.config(state='normal')
self.path_entry.delete(0, tk.END)
self.path_entry.insert(0, path)
self.path_entry.config(state='readonly')
self.log(f"已设置输出路径: {path}")
def add_folder(self):
"""添加要处理的文件夹"""
folders = filedialog.askdirectory(
title="选择要处理的文件夹",
mustexist=True
)
if folders:
self.folders.append(folders)
self.folder_list.insert(tk.END, folders)
self.log(f"已添加文件夹: {folders}")
def remove_selected(self):
"""移除选中的文件夹"""
selected = self.folder_list.curselection()
for index in selected[::-1]:
folder = self.folder_list.get(index)
self.folder_list.delete(index)
self.folders.remove(folder)
self.log(f"已移除文件夹: {folder}")
def clear_list(self):
"""清空文件夹列表"""
self.folder_list.delete(0, tk.END)
self.folders = []
self.log("已清空文件夹列表")
def log(self, message):
"""向日志区域添加消息"""
timestamp = datetime.now().strftime("%H:%M:%S")
log_entry = f"[{timestamp}] {message}"
self.log_messages.append(log_entry)
self.log_text.config(state=tk.NORMAL)
self.log_text.insert(tk.END, log_entry + "\n")
self.log_text.config(state=tk.DISABLED)
self.log_text.yview(tk.END) # 自动滚动到底部
self.root.update_idletasks()
def start_processing(self):
"""启动处理过程"""
if not self.folders:
messagebox.showwarning("警告", "请先添加要处理的文件夹")
return
# 禁用处理按钮
self.root.title("Word 转 PDF 合并工具 - 处理中...")
self.progress["value"] = 0
# 在新线程中处理,避免界面冻结
thread = threading.Thread(target=self.process_folders)
thread.daemon = True
thread.start()
def backup_excel_files(self, source_folder, backup_dir):
"""递归查找并备份所有Sound_Pres_Cal.xlsx文件"""
print(f"开始在文件夹中搜索Sound_Pres_Cal.xlsx: {source_folder}")
backup_count = 0
# 使用os.walk递归遍历所有子文件夹
for root, dirs, files in os.walk(source_folder):
for file in files:
if file == "Sound_Pres_Cal.xlsx":
excel_path = os.path.join(root, file)
try:
# 计算相对路径以保持目录结构
rel_path = os.path.relpath(excel_path, start=source_folder)
dest_path = os.path.join(backup_dir, rel_path)
# 创建目标目录结构
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
# 复制文件(保留元数据)
shutil.copy2(excel_path, dest_path)
backup_count += 1
print(f"备份成功: {excel_path} → {dest_path}")
except Exception as e:
print(f"备份失败 {excel_path}: {str(e)}")
print(f"共找到并备份 {backup_count} 个Sound_Pres_Cal.xlsx文件")
def backup_data_files(self, folder_path, backup_dir):
"""递归查找并备份所有.xlsx和.csv文件到指定目录(同一级)"""
self.log(f"开始在文件夹中搜索所有Excel和CSV文件: {folder_path}")
backup_count = 0
# 使用os.walk递归遍历所有子文件夹
for root, dirs, files in os.walk(folder_path):
for file in files:
# 检查文件扩展名是否为.xlsx或.csv(不区分大小写)
if file.lower().endswith(('.xlsx', '.csv')):
file_path = os.path.join(root, file)
file_name = os.path.basename(file_path) # 只获取文件名(不含路径)
dest_path = os.path.join(backup_dir, file_name) # 直接放在备份目录下
try:
# 确保备份目录存在(不需要创建子目录)
os.makedirs(backup_dir, exist_ok=True)
# 复制文件(保留元数据)
shutil.copy2(file_path, dest_path)
backup_count += 1
self.log(f"备份成功: {file_path} → {dest_path}")
except Exception as e:
self.log(f"备份失败 {file_path}: {str(e)}")
self.log(f"共找到并备份 {backup_count} 个Excel和CSV文件")
def process_folders(self):
"""处理多个文件夹中的Word文件"""
try:
# 提前初始化 output_folder
if self.output_path:
output_folder = self.output_path
else:
output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd())
self.log(f"开始处理 {len(self.folders)} 个文件夹...")
# 获取所有文件夹中的Word文件
word_files = self.get_all_word_files(self.folders)
if not word_files:
self.log("没有找到任何Word文档")
return
self.log(f"共找到 {len(word_files)} 个Word文档")
self.progress["maximum"] = len(word_files) + 5 # 文件数 + 合并步骤
backup_root = os.path.join(output_folder, "报告存档") # 统一备份根目录
if self.backup_mode.get():
os.makedirs(backup_root, exist_ok=True)
# 创建临时目录存储转换后的PDF
with tempfile.TemporaryDirectory() as temp_dir:
pdf_files_with_header = []
toc_entries = []
all_tables = {}
current_page = 1
# 处理每个Word文件
for i, word_file in enumerate(word_files):
self.progress["value"] = i + 1
file_name = os.path.splitext(os.path.basename(word_file))[0]
display_name = file_name
# 处理每个Word文件
for i, word_file in enumerate(word_files):
self.progress["value"] = i + 1
file_name = os.path.splitext(os.path.basename(word_file))[0]
display_name = file_name
# 新增:在备份前修改Word文档
modified_word_path = word_file
if self.point_22_mode.get():
# 创建临时副本进行修改
temp_word_path = os.path.join(temp_dir, os.path.basename(word_file))
shutil.copy2(word_file, temp_word_path)
if self.modify_word_spec(temp_word_path):
modified_word_path = temp_word_path
elif "GSM" in file_name.upper():
# 未勾选22号点位但包含GSM的文件也需要修改
temp_word_path = os.path.join(temp_dir, os.path.basename(word_file))
shutil.copy2(word_file, temp_word_path)
if self.modify_word_spec(temp_word_path):
modified_word_path = temp_word_path
original_pdf = os.path.join(temp_dir, f"{file_name}_original.pdf")
pdf_with_header = os.path.join(temp_dir, f"{file_name}_with_header.pdf")
if self.backup_mode.get():
try:
# 为每个Word文件创建同级备份目录(可选:使用Word文件名作为目录名)
# 若要所有文件直接放在backup_root下,可将dest_dir改为backup_root
dest_dir = os.path.join(backup_root, file_name)
os.makedirs(dest_dir, exist_ok=True)
# 备份当前Word文件到同级目录
word_dest = os.path.join(dest_dir, os.path.basename(modified_word_path))
shutil.copy2(modified_word_path, word_dest)
self.log(f"Word文件备份成功: {word_file} → {word_dest}")
# 备份相关的Excel和CSV文件到同一个同级目录
folder_path = os.path.dirname(word_file)
self.backup_data_files(folder_path, dest_dir) # 传入同级目录
except OSError as e:
self.log(f"文件备份失败: {e}")
except Exception as e:
self.log(f"未知错误: {e}")
# 提取表格数据
tables = self.extract_spec_table(modified_word_path)
if tables:
all_tables[display_name] = tables
self.log(f"已从 {display_name} 中提取 {len(tables)} 个数据表格")
# 转换为PDF
if self.word_to_pdf(modified_word_path, original_pdf):
# 添加内联标题
if self.add_inline_header(original_pdf, display_name, pdf_with_header):
pdf_files_with_header.append(pdf_with_header)
toc_entries.append((display_name, current_page))
current_page += self.get_pdf_page_count(pdf_with_header)
else:
pdf_files_with_header.append(original_pdf)
toc_entries.append((display_name, current_page))
current_page += self.get_pdf_page_count(original_pdf)
else:
self.log(f"跳过 {display_name},转换失败")
# 更新进度条
self.progress["value"] = len(word_files) + 1
if not pdf_files_with_header:
self.log("没有成功转换的PDF文件,无法进行合并")
return
# 获取输出路径
if self.output_path:
output_folder = self.output_path
else:
output_folder = next((p for p in self.folders if os.path.isdir(p)), os.getcwd())
# 获取文件名
report_name = self.output_filename.get().strip()
if not report_name:
report_name = self.get_folder_name_parts(self.folders[0]) # 使用默认规则
output_pdf = os.path.join(output_folder, f"{report_name}.pdf")
# 合并PDF
self.progress["value"] = len(word_files) + 2
success = self.merge_pdfs_with_summary(
pdf_files_with_header,
toc_entries,
all_tables,
output_pdf
)
self.progress["value"] = len(word_files) + 3
if success:
self.log(f"处理完成!输出文件: {output_pdf}")
messagebox.showinfo("完成", f"处理完成!输出文件: {output_pdf}")
else:
self.log("处理失败")
messagebox.showerror("错误", "处理过程中出现错误")
self.root.title("Word 转 PDF 合并工具")
except Exception as e:
self.log(f"处理过程中出现错误: {str(e)}")
messagebox.showerror("错误", f"处理过程中出现错误: {str(e)}")
self.root.title("Word 转 PDF 合并工具")
# 以下是原有的处理函数,保持不变但添加为类方法
def extract_spec_table(self, word_path):
"""从Word文档中提取SPEC(dB)、Simulation和Pass/Fail数据表格"""
try:
doc = Document(word_path)
tables = []
for table in doc.tables:
headers = [cell.text.strip() for cell in table.rows[0].cells]
if "SPEC(dB)" in headers and "Simulation" in headers and "Pass/Fail" in headers:
table_data = []
table_data.append(headers)
for row in table.rows[1:]:
row_data = [cell.text.strip() for cell in row.cells]
table_data.append(row_data)
tables.append(table_data)
return tables
except Exception as e:
self.log(f"提取 {os.path.basename(word_path)} 中的表格时出错: {str(e)}")
return []
def modify_word_spec(self, word_path):
"""根据22号点位设置修改Word文档中的SPEC值并更新Pass/Fail"""
try:
doc = Document(word_path)
filename = os.path.basename(word_path).upper()
has_gsm = "GSM" in filename
# 确定SPEC基准值
if self.point_22_mode.get():
spec_value = 22 if has_gsm else 20
else:
spec_value = 20 if has_gsm else 18
modified = False
for table in doc.tables:
# 查找表头确定列索引
headers = [cell.text.strip() for cell in table.rows[0].cells]
try:
spec_index = headers.index("SPEC(dB)")
sim_index = headers.index("Simulation")
pf_index = headers.index("Pass/Fail")
except ValueError:
continue
# 修改每一行数据
for row in table.rows[1:]:
cells = row.cells
# 更新SPEC值
if spec_index < len(cells):
cells[spec_index].text = str(spec_value)
# 更新Pass/Fail状态
if sim_index < len(cells) and pf_index < len(cells):
try:
sim_value = float(cells[sim_index].text)
new_status = "PASS" if sim_value < spec_value else "FAIL"
cells[pf_index].text = new_status
except ValueError:
pass
modified = True
if modified:
doc.save(word_path)
self.log(f"已修改 {os.path.basename(word_path)} 的SPEC值为{spec_value}")
return modified
except Exception as e:
self.log(f"修改 {os.path.basename(word_path)} 失败: {str(e)}")
return False
def add_inline_header(self, pdf_path, title, output_path):
"""在PDF的第一页顶部添加一行红色加粗的标题"""
try:
reader = PdfReader(pdf_path)
writer = PdfWriter()
if len(reader.pages) > 0:
first_page = reader.pages[0]
packet = BytesIO()
can = canvas.Canvas(packet, pagesize=letter)
width, height = letter
font_name = "Helvetica-Bold"
try:
pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc'))
pdfmetrics.registerFont(TTFont('SimSun-Bold', 'simsun.ttc'))
font_name = "SimSun-Bold"
except:
pass
can.setFont(font_name, 14)
can.setFillColor(red)
can.drawString(50, height - 50, title)
can.save()
packet.seek(0)
title_reader = PdfReader(packet)
title_page = title_reader.pages[0]
first_page.merge_page(title_page)
writer.add_page(first_page)
for page in reader.pages[1:]:
writer.add_page(page)
with open(output_path, "wb") as f:
writer.write(f)
return True
return False
except Exception as e:
self.log(f"PDF添加标题失败: {str(e)}")
return False
def create_summary_page(self, toc_entries, all_tables, output_path):
"""创建Summary页"""
try:
c = canvas.Canvas(output_path, pagesize=letter)
width, height = letter
font_name = "Helvetica"
try:
pdfmetrics.registerFont(TTFont('SimSun', 'simsun.ttc'))
font_name = "SimSun"
except:
pass
# Summary标题
c.setFont(font_name, 24)
c.setFillColor(red)
c.drawCentredString(width / 2.0, height - 50, "Summary")
c.setFillColor(black)
y_position = height - 100
# 添加数据汇总表格 - 修复:遍历所有文档的所有表格
if all_tables:
c.setFont(font_name, 16)
c.drawString(50, y_position, "Data Summary:")
y_position -= 30
c.setFont(font_name, 10)
table_width = width - 100
# 修复:遍历所有文档
for doc_name, tables in all_tables.items():
c.setFont(font_name, 12)
c.setFillColor(red)
c.drawString(60, y_position, f"Document: {doc_name}")
y_position -= 20
c.setFillColor(black)
c.setFont(font_name, 10)
# 修复:遍历文档中的所有表格
for table_data in tables:
col_widths = [table_width / len(table_data[0])] * len(table_data[0])
table = Table(table_data, colWidths=col_widths)
style = TableStyle([
('BACKGROUND', (0, 0), (-1, 0), white),
('TEXTCOLOR', (0, 0), (-1, 0), black),
('ALIGN', (0, 0), (-1, -1), 'CENTER'),
('FONTNAME', (0, 0), (-1, 0), font_name),
('FONTNAME', (0, 1), (-1, -1), font_name),
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
('BACKGROUND', (0, 1), (-1, -1), white),
('GRID', (0, 0), (-1, -1), 1, black)
])
table.setStyle(style)
table_height = table.wrap(0, 0)[1]
if y_position - table_height < 50:
c.showPage()
y_position = height - 50
c.setFont(font_name, 24)
c.setFillColor(red)
c.drawCentredString(width / 2.0, y_position, "Summary")
y_position -= 50
c.setFillColor(black)
table.drawOn(c, 50, y_position - table_height)
y_position -= (table_height + 20)
c.save()
return output_path
except Exception as e:
self.log(f"创建Summary页失败: {str(e)}")
return None
def word_to_pdf(self, word_path, pdf_path):
"""将Word文档转换为PDF"""
pythoncom.CoInitialize()
try:
word = win32com.client.Dispatch("Word.Application")
word.Visible = False
doc = word.Documents.Open(os.path.abspath(word_path))
doc.SaveAs(os.path.abspath(pdf_path), FileFormat=17)
doc.Close()
word.Quit()
self.log(f"已将 {os.path.basename(word_path)} 转换为PDF")
return True
except Exception as e:
self.log(f"转换 {os.path.basename(word_path)} 时出错: {str(e)}")
return False
finally:
pythoncom.CoUninitialize()
def get_pdf_page_count(self, pdf_path):
"""获取PDF文件的页数"""
try:
reader = PdfReader(pdf_path)
return len(reader.pages)
except:
return 0
def merge_pdfs_with_summary(self, pdf_files, toc_entries, all_tables, output_path):
"""合并PDF文件并添加Summary页"""
try:
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as summary_file:
summary_path = summary_file.name
# 修复:正确传递all_tables参数
self.create_summary_page(toc_entries, all_tables, summary_path) # 添加toc_entries参数
summary_page_count = self.get_pdf_page_count(summary_path)
updated_toc_entries = [(title, page_num + summary_page_count) for title, page_num in toc_entries]
merger = PdfMerger()
merger.append(summary_path)
current_page = summary_page_count
for pdf, (title, _) in zip(pdf_files, updated_toc_entries):
merger.append(pdf)
merger.add_outline_item(title, current_page)
current_page += self.get_pdf_page_count(pdf)
merger.write(output_path)
merger.close()
os.remove(summary_path)
self.log(f"已成功合并 {len(pdf_files)} 个PDF文件")
return True
except Exception as e:
self.log(f"合并PDF时出错: {str(e)}")
return False
def get_all_word_files(self, folder_paths):
"""获取所有Word文件"""
word_extensions = ['.docx', '.doc']
word_files = []
for folder_path in folder_paths:
if not os.path.isdir(folder_path):
continue
for file in os.listdir(folder_path):
file_ext = os.path.splitext(file)[1].lower()
if file_ext in word_extensions:
word_path = os.path.join(folder_path, file)
word_files.append(word_path)
return word_files
def get_folder_name_parts(self, folder_paths):
"""生成报告文件名"""
if not folder_paths:
return "听筒磁干扰仿真报告"
folder_path = folder_paths[0]
norm_path = os.path.normpath(folder_path)
parts = [p for p in norm_path.split(os.sep) if p]
if len(parts) >= 3:
return f"{parts[-3]}_{parts[-2]}_{parts[-1]}"
elif len(parts) == 2:
return f"{parts[-2]}_{parts[-1]}"
elif len(parts) == 1:
return parts[0]
return "听筒磁干扰仿真报告"
if __name__ == "__main__":
root = tk.Tk()
app = PDFConverterApp(root)
root.mainloop() # 添加这行启动事件循环 [11:14:13] 已从 Lux-H_PA1901_n78-qet1801_0905_simulation_result 中提取 1 个数据表格
[11:14:17] 已将 Lux-H_PA1901_n78-qet1801_0905_simulation_result.docx 转换为PDF
[11:14:17] 已修改 Lux-H_PA0701_B41-qet1801_0905_simulation_result.docx 的SPEC值为20
[11:14:18] 已修改 Lux-H_PA0701_B41-qet1802_0905_simulation_result.docx 的SPEC值为20
[11:14:18] 已修改 Lux-H_PA0701_GSM-L0703_0905_simulation_result.docx 的SPEC值为22
[11:14:18] 已修改 Lux-H_PA0701_GSM-qet1801_0905_simulation_result.docx 的SPEC值为22
[11:14:18] 已修改 Lux-H_PA1901_n78-qet1801_0905_simulation_result.docx 的SPEC值为20
[11:14:18] Word文件备份成功: D:/tes/新建文件夹\Lux-H_PA1901_n78-qet1801_0905_simulation_result.docx → D:/tes/新建文件夹/111\报告存档\Lux-H_PA1901_n78-qet1801_0905_simulation_result\Lux-H_PA1901_n78-qet1801_0905_simulation_result.docx
[11:14:18] 开始在文件夹中搜索所有Excel和CSV文件: D:/tes/新建文件夹
[11:14:18] 共找到并备份 0 个Excel和CSV文件
[11:14:18] 已从 Lux-H_PA1901_n78-qet1801_0905_simulation_result 中提取 1 个数据表格
[11:14:21] 已将 Lux-H_PA1901_n78-qet1801_0905_simulation_result.docx 转换为PDF
[11:14:22] 已成功合并 5 个PDF文件
[11:14:22] 处理完成!输出文件: D:/tes/新建文件夹/111\听筒磁干扰_Simulation_Result.pdf 从日志上看有找到五个word文档为什么最后合并的文档中是五个 Lux-H_PA1901_n78-qet1801_0905_simulation_result.docx 的数据没有其他四个的数据