import os
import sys
import threading
from datetime import datetime
# ============= PyQt6 Import ===============
from PyQt6.QtCore import Qt, QTimer
from PyQt6.QtGui import QPixmap, QBrush, QColor
from PyQt6.QtWidgets import (
QApplication, QWidget, QVBoxLayout, QHBoxLayout, QLabel,
QPushButton, QLineEdit, QToolButton, QListWidget, QFileDialog,
QMessageBox, QTableWidget, QTableWidgetItem, QCheckBox,
QSpacerItem, QSizePolicy
)
# 初始化 COM(用于调用 Word)
import pythoncom
from win32com.client import Dispatch, constants, gencache
from PyPDF2 import PdfFileReader, PdfFileMerger
# ==================== tools/common.py ====================
def getfilenames(filepath="", filelist_out=None, file_ext='all'):
if filelist_out is None:
filelist_out = []
try:
for filename in os.listdir(filepath):
fullpath = os.path.join(filepath, filename)
if os.path.isdir(fullpath):
getfilenames(fullpath, filelist_out, file_ext)
else:
_, ext = os.path.splitext(filename)
if file_ext == '.doc':
if ext.lower() in ['.doc', '.docx']:
filelist_out.append(fullpath)
elif file_ext == 'all' or ext == file_ext:
filelist_out.append(fullpath)
except Exception as e:
print(f"遍历出错: {e}")
# 排序:按“第X章”中的数字排序
filelist_out.sort(key=lambda x: extract_chapter_number(x))
return filelist_out
def extract_chapter_number(path):
import re
match = re.search(r"第(\d+)章", os.path.basename(path))
return int(match.group(1)) if match else float('inf')
# ==================== 核心功能模块 ====================
def word_to_pdf_batch(filelist, target_dir):
"""批量转换Word为PDF"""
result = []
os.makedirs(target_dir, exist_ok=True)
try:
pythoncom.CoInitialize()
gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)
word = Dispatch("Word.Application")
word.Visible = False
for path in filelist:
base_name = os.path.splitext(os.path.basename(path))[0]
pdf_path = os.path.join(target_dir, f"{base_name}.pdf")
try:
doc = word.Documents.Open(path, ReadOnly=1)
doc.ExportAsFixedFormat(
pdf_path,
constants.wdExportFormatPDF,
Item=constants.wdExportDocumentWithMarkup,
CreateBookmarks=constants.wdExportCreateHeadingBookmarks
)
doc.Close()
result.append(pdf_path)
except Exception as e:
print(f"转换失败 {path}: {e}")
word.Quit()
except Exception as e:
print("Word COM 初始化失败:", e)
return -1
return result
def get_pdf_page_count(pdf_path):
"""获取PDF页数"""
try:
with open(pdf_path, "rb") as f:
reader = PdfFileReader(f)
return reader.getNumPages()
except Exception as e:
print(f"读取页数失败: {e}")
return 0
def merge_pdfs(pdf_list, output_path, add_bookmarks=True):
"""合并多个PDF"""
merger = PdfFileMerger()
for pdf in pdf_list:
try:
with open(pdf, 'rb') as f:
reader = PdfFileReader(f)
if reader.isEncrypted:
continue
short_name = os.path.splitext(os.path.basename(pdf))[0]
merger.append(reader, bookmark=short_name if add_bookmarks else None)
except Exception as e:
print(f"跳过文件 {pdf}: {e}")
merger.write(output_path)
merger.close()
def extract_outline_from_pdf(pdf_path, output_docx, include_page=True):
"""从PDF提取大纲写入Word"""
try:
pythoncom.CoInitialize()
gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)
word_app = Dispatch("Word.Application")
word_app.Visible = True
doc = word_app.Documents.Add()
with open(pdf_path, "rb") as f:
reader = PdfFileReader(f)
outlines = reader.getOutlines()
items = []
def traverse(obj):
if isinstance(obj, list):
for item in obj:
traverse(item)
elif hasattr(obj, '/Title'):
title = obj['/Title']
page_num = str(obj.get('/Page', 0) + 1) if include_page else ""
line = f"{title}\t\t{page_num}" if include_page else title
items.append(line + "\n")
for item in outlines:
traverse(item)
for line in items:
doc.Content.InsertAfter(line)
doc.SaveAs(output_docx)
doc.Close()
word_app.Quit()
return output_docx
except Exception as e:
print(f"提取目录失败: {e}")
return None
# ==================== 主窗体 ====================
class MainWindow(QWidget):
def __init__(self):
super().__init__()
self.setWindowTitle("Word助手")
self.resize(600, 400)
self.setMaximumSize(600, 400)
layout = QVBoxLayout()
# 背景提示
tip = QLabel("欢迎使用 Word助手\n\n"
"本程序会自动调用 Microsoft Word\n"
"请先保存并关闭所有打开的 Word 文档")
tip.setAlignment(Qt.AlignmentFlag.AlignCenter)
tip.setStyleSheet("font-size: 14px; color: #333;")
layout.addWidget(tip)
# 按钮区
btn_layout = QHBoxLayout()
self.btn_transform = QPushButton("Word转PDF")
self.btn_page = QPushButton("统计页码")
self.btn_list = QPushButton("提取总目录")
self.btn_transform.clicked.connect(self.open_transform)
self.btn_page.clicked.connect(self.open_page)
self.btn_list.clicked.connect(self.open_list)
btn_layout.addWidget(self.btn_transform)
btn_layout.addWidget(self.btn_page)
btn_layout.addWidget(self.btn_list)
layout.addLayout(btn_layout)
self.setLayout(layout)
def open_transform(self):
self.tw = TransformWindow()
self.tw.show()
def open_page(self):
self.pw = PageWindow()
self.pw.show()
def open_list(self):
self.lw = ListWindow()
self.lw.show()
# ==================== 子窗体:Word转PDF ====================
class TransformWindow(QWidget):
def __init__(self):
super().__init__()
self.filelist = []
self.init_ui()
def init_ui(self):
self.setWindowTitle("Word转PDF")
self.resize(800, 600)
layout = QVBoxLayout()
# 源目录
h1 = QHBoxLayout()
h1.addWidget(QLabel("Word文档所在目录:"))
self.src_edit = QLineEdit()
self.src_btn = QToolButton()
self.src_btn.setText("...")
self.src_btn.clicked.connect(self.browse_source)
h1.addWidget(self.src_edit)
h1.addWidget(self.src_btn)
layout.addLayout(h1)
self.list_word = QListWidget()
layout.addWidget(QLabel("待转换文件:"))
layout.addWidget(self.list_word)
# 目标目录
h2 = QHBoxLayout()
h2.addWidget(QLabel("PDF保存目录:"))
self.dst_edit = QLineEdit()
self.dst_btn = QToolButton()
self.dst_btn.setText("...")
self.dst_btn.clicked.connect(self.browse_target)
h2.addWidget(self.dst_edit)
h2.addWidget(self.dst_btn)
layout.addLayout(h2)
# 操作按钮
op_layout = QHBoxLayout()
op_layout.addSpacerItem(QSpacerItem(40, 20, QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Minimum))
self.batch_btn = QPushButton("批量转换")
self.merge_btn = QPushButton("合为一个PDF")
self.batch_btn.clicked.connect(self.do_batch)
self.merge_btn.clicked.connect(self.do_merge)
op_layout.addWidget(self.batch_btn)
op_layout.addWidget(self.merge_btn)
layout.addLayout(op_layout)
self.result_list = QListWidget()
layout.addWidget(QLabel("转换结果:"))
layout.addWidget(self.result_list)
self.setLayout(layout)
def browse_source(self):
dir_ = QFileDialog.getExistingDirectory(self, "选择Word文档目录", "")
if dir_:
self.src_edit.setText(dir_)
self.filelist = getfilenames(dir_, [], '.doc')
self.list_word.clear()
self.list_word.addItems([os.path.basename(f) for f in self.filelist])
def browse_target(self):
dir_ = QFileDialog.getExistingDirectory(self, "选择PDF保存目录", "")
if dir_:
self.dst_edit.setText(dir_)
def do_batch(self):
if not self.filelist:
QMessageBox.warning(self, "警告", "未选择任何Word文档!")
return
dst = self.dst_edit.text()
if not dst or not os.path.exists(dst):
QMessageBox.warning(self, "警告", "请选择有效的保存路径!")
return
def task():
results = word_to_pdf_batch(self.filelist, dst)
self.result_list.clear()
if results != -1:
self.result_list.addItems(results)
QMessageBox.information(self, "完成", f"成功生成 {len(results)} 个PDF文件。")
else:
QMessageBox.critical(self, "错误", "转换过程中发生COM错误。")
threading.Thread(target=task, daemon=True).start()
def do_merge(self):
if not self.filelist:
QMessageBox.warning(self, "警告", "未选择任何Word文档!")
return
dst = self.dst_edit.text()
if not dst or not os.path.exists(dst):
QMessageBox.warning(self, "警告", "请选择有效的保存路径!")
return
temp_dir = os.path.join(dst, "temp_pdfs")
os.makedirs(temp_dir, exist_ok=True)
def task():
pdfs = word_to_pdf_batch(self.filelist, temp_dir)
if pdfs == -1:
return
merged = os.path.join(dst, "merged.pdf")
merge_pdfs(pdfs, merged)
# 清理临时文件
for p in pdfs:
try:
os.remove(p)
except:
pass
self.result_list.clear()
self.result_list.addItem(merged)
QMessageBox.information(self, "完成", "已合并为 merged.pdf")
threading.Thread(target=task, daemon=True).start()
# ==================== 子窗体:统计页码 ====================
class PageWindow(QWidget):
def __init__(self):
super().__init__()
self.filelist = []
self.init_ui()
def init_ui(self):
self.setWindowTitle("统计Word文档页码")
self.resize(700, 500)
layout = QVBoxLayout()
h = QHBoxLayout()
h.addWidget(QLabel("Word文档所在目录:"))
self.src_edit = QLineEdit()
self.src_btn = QToolButton()
self.src_btn.setText("...")
self.src_btn.clicked.connect(self.browse_source)
h.addWidget(self.src_edit)
h.addWidget(self.src_btn)
layout.addLayout(h)
self.list_word = QListWidget()
layout.addWidget(QLabel("文档列表:"))
layout.addWidget(self.list_word)
self.table = QTableWidget(0, 2)
self.table.setHorizontalHeaderLabels(["文件名", "页码"])
self.table.horizontalHeader().setStretchLastSection(True)
layout.addWidget(self.table)
foot = QHBoxLayout()
foot.addWidget(QLabel("合计页码:"))
self.total_label = QLabel("0")
foot.addWidget(self.total_label)
foot.addSpacerItem(QSpacerItem(40, 20, QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Minimum))
self.exec_btn = QPushButton("开始统计")
self.exec_btn.clicked.connect(self.start_count)
foot.addWidget(self.exec_btn)
layout.addLayout(foot)
self.setLayout(layout)
def browse_source(self):
dir_ = QFileDialog.getExistingDirectory(self, "选择Word文档目录", "")
if dir_:
self.src_edit.setText(dir_)
self.filelist = getfilenames(dir_, [], '.doc')
self.list_word.clear()
self.list_word.addItems([os.path.basename(f) for f in self.filelist])
def start_count(self):
if not self.filelist:
QMessageBox.warning(self, "警告", "没有可统计的文档!")
return
def task():
total = 0
result = []
temp_dir = os.path.join(os.path.dirname(self.filelist[0]), f"temp_{int(datetime.now().timestamp())}")
os.makedirs(temp_dir, exist_ok=True)
for fp in self.filelist:
pdf_path = os.path.join(temp_dir, os.path.splitext(os.path.basename(fp))[0] + ".pdf")
if word_to_pdf_batch([fp], temp_dir) and os.path.exists(pdf_path):
pages = get_pdf_page_count(pdf_path)
result.append((os.path.basename(fp), str(pages)))
total += pages
os.remove(pdf_path)
try:
os.rmdir(temp_dir)
except:
pass
self.table.setRowCount(len(result))
for i, (name, p) in enumerate(result):
self.table.setItem(i, 0, QTableWidgetItem(name))
self.table.setItem(i, 1, QTableWidgetItem(p))
self.total_label.setText(str(total))
QMessageBox.information(self, "完成", "页码统计完成!")
threading.Thread(target=task, daemon=True).start()
# ==================== 子窗体:提取目录 ====================
class ListWindow(QWidget):
def __init__(self):
super().__init__()
self.filelist = []
self.generated_docx = ""
self.init_ui()
def init_ui(self):
self.setWindowTitle("提取总目录")
self.resize(700, 400)
layout = QVBoxLayout()
h = QHBoxLayout()
h.addWidget(QLabel("Word文档所在目录:"))
self.src_edit = QLineEdit()
self.src_btn = QToolButton()
self.src_btn.setText("...")
self.src_btn.clicked.connect(self.browse_source)
h.addWidget(self.src_edit)
h.addWidget(self.src_btn)
layout.addLayout(h)
self.list_word = QListWidget()
layout.addWidget(QLabel("文档列表:"))
layout.addWidget(self.list_word)
self.chk_page = QCheckBox("包含页码")
self.chk_page.setChecked(True)
layout.addWidget(self.chk_page)
btn_h = QHBoxLayout()
btn_h.addSpacerItem(QSpacerItem(40, 20, QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Minimum))
self.extract_btn = QPushButton("开始提取")
self.open_btn = QPushButton("打开文件")
self.extract_btn.clicked.connect(self.extract)
self.open_btn.clicked.connect(self.open_file)
btn_h.addWidget(self.extract_btn)
btn_h.addWidget(self.open_btn)
layout.addLayout(btn_h)
self.result_label = QLabel("还未提取...")
layout.addWidget(QLabel("目录文件保存位置:"))
layout.addWidget(self.result_label)
self.setLayout(layout)
def browse_source(self):
dir_ = QFileDialog.getExistingDirectory(self, "选择Word文档目录", "")
if dir_:
self.src_edit.setText(dir_)
self.filelist = getfilenames(dir_, [], '.doc')
self.list_word.clear()
self.list_word.addItems([os.path.basename(f) for f in self.filelist])
def extract(self):
if not self.filelist:
QMessageBox.warning(self, "警告", "未选择文档!")
return
src = self.src_edit.text()
temp_pdf_dir = os.path.join(src, "temp_merge")
os.makedirs(temp_pdf_dir, exist_ok=True)
def task():
# Step 1: 转为PDF
pdfs = word_to_pdf_batch(self.filelist, temp_pdf_dir)
if not pdfs:
return
# Step 2: 合并
merged_pdf = os.path.join(temp_pdf_dir, "merged.pdf")
merge_pdfs(pdfs, merged_pdf)
# Step 3: 提取大纲
output_docx = os.path.join(src, f"总目录_{datetime.now().strftime('%H%M%S')}.docx")
success = extract_outline_from_pdf(merged_pdf, output_docx, self.chk_page.isChecked())
# 清理
for p in pdfs:
try:
os.remove(p)
except:
pass
try:
os.remove(merged_pdf)
os.rmdir(temp_pdf_dir)
except:
pass
if success:
self.generated_docx = success
self.result_label.setText(success)
QMessageBox.information(self, "完成", "目录已提取!")
threading.Thread(target=task, daemon=True).start()
def open_file(self):
if not self.generated_docx or not os.path.exists(self.generated_docx):
QMessageBox.warning(self, "警告", "尚未生成目录文件!")
return
os.startfile(self.generated_docx)
# ==================== 主程序入口 ====================
if __name__ == "__main__":
app = QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec())
合成一个PDF失败