import os
import re
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import PyPDF2
from datetime import datetime
class PDFRenamerApp:
def __init__(self, root):
self.root = root
self.root.title("PDF文件名管理工具")
self.root.geometry("900x600")
self.root.resizable(True, True)
# 设置主题
self.style = ttk.Style()
self.style.theme_use("clam")
# 初始化变量
self.folder_path = tk.StringVar()
self.files = []
self.create_widgets()
def create_widgets(self):
# 顶部框架 - 文件夹选择
top_frame = ttk.LabelFrame(self.root, text="文件夹选择")
top_frame.pack(fill="x", padx=10, pady=5)
ttk.Label(top_frame, text="目标文件夹:").grid(row=0, column=0, padx=5, pady=5)
ttk.Entry(top_frame, textvariable=self.folder_path, width=50).grid(row=0, column=1, padx=5, pady=5)
ttk.Button(top_frame, text="浏览", command=self.browse_folder).grid(row=0, column=2, padx=5, pady=5)
ttk.Button(top_frame, text="扫描PDF文件", command=self.scan_pdfs).grid(row=0, column=3, padx=5, pady=5)
# 中间框架 - 文件列表和编辑
middle_frame = ttk.LabelFrame(self.root, text="PDF文件列表")
middle_frame.pack(fill="both", expand=True, padx=10, pady=5)
# 创建表格
self.tree = ttk.Treeview(middle_frame, columns=("orig_name", "fonds", "year", "period", "number", "new_name"), show="headings")
# 设置列
self.tree.heading("orig_name", text="原文件名")
self.tree.heading("fonds", text="全宗号")
self.tree.heading("year", text="年度")
self.tree.heading("period", text="保管期限")
self.tree.heading("number", text="件号")
self.tree.heading("new_name", text="新文件名")
# 设置列宽
self.tree.column("orig_name", width=200)
self.tree.column("fonds", width=80)
self.tree.column("year", width=60)
self.tree.column("period", width=80)
self.tree.column("number", width=80)
self.tree.column("new_name", width=250)
# 添加滚动条
scrollbar = ttk.Scrollbar(middle_frame, orient="vertical", command=self.tree.yview)
self.tree.configure(yscrollcommand=scrollbar.set)
self.tree.pack(side="left", fill="both", expand=True)
scrollbar.pack(side="right", fill="y")
# 底部框架 - 操作按钮
bottom_frame = ttk.Frame(self.root)
bottom_frame.pack(fill="x", padx=10, pady=10)
ttk.Button(bottom_frame, text="批量重命名", command=self.batch_rename).pack(side="left", padx=5)
ttk.Button(bottom_frame, text="导出CSV", command=self.export_csv).pack(side="left", padx=5)
ttk.Button(bottom_frame, text="导入CSV", command=self.import_csv).pack(side="left", padx=5)
ttk.Button(bottom_frame, text="自动提取元数据", command=self.extract_metadata).pack(side="left", padx=5)
ttk.Button(bottom_frame, text="清空列表", command=self.clear_list).pack(side="left", padx=5)
ttk.Button(bottom_frame, text="退出", command=self.root.quit).pack(side="right", padx=5)
# 绑定双击事件编辑单元格
self.tree.bind("<Double-1>", self.on_double_click)
def browse_folder(self):
folder_selected = filedialog.askdirectory()
if folder_selected:
self.folder_path.set(folder_selected)
def scan_pdfs(self):
folder = self.folder_path.get()
if not folder or not os.path.isdir(folder):
messagebox.showerror("错误", "请选择有效的文件夹路径")
return
# 清空现有列表
for item in self.tree.get_children():
self.tree.delete(item)
# 扫描PDF文件
self.files = []
for filename in os.listdir(folder):
if filename.lower().endswith(".pdf"):
file_path = os.path.join(folder, filename)
self.files.append({
"orig_path": file_path,
"fonds": "",
"year": datetime.now().year,
"period": "Y", # 默认保管期限为永久
"number": "",
"new_name": ""
})
# 添加到列表
self.tree.insert("", "end", values=(
filename,
"",
datetime.now().year,
"Y",
"",
""
))
messagebox.showinfo("完成", f"找到 {len(self.files)} 个PDF文件")
def on_double_click(self, event):
# 获取选中的行和列
region = self.tree.identify("region", event.x, event.y)
if region != "cell":
return
column = self.tree.identify_column(event.x)
row_id = self.tree.focus()
# 排除原文件名和新文件名列
if column == "#1" or column == "#6":
return
# 获取当前值
current_values = self.tree.item(row_id, "values")
col_index = int(column[1:]) - 1 # 列索引
# 创建编辑框
x, y, width, height = self.tree.bbox(row_id, column)
# 创建编辑框
entry_edit = ttk.Entry(self.tree)
entry_edit.place(x=x, y=y, width=width, height=height)
entry_edit.insert(0, current_values[col_index])
entry_edit.select_range(0, tk.END)
entry_edit.focus()
# 保存编辑
def save_edit(event=None):
new_value = entry_edit.get()
# 验证输入
if col_index == 1: # 全宗号
if not re.match(r'^\d{4}$', new_value):
messagebox.showerror("错误", "全宗号必须是4位数字")
entry_edit.destroy()
return
elif col_index == 2: # 年度
if not re.match(r'^\d{4}$', new_value):
messagebox.showerror("错误", "年度必须是4位数字")
entry_edit.destroy()
return
elif col_index == 3: # 保管期限
if new_value not in ["Y", "C", "D"]:
messagebox.showerror("错误", "保管期限必须是Y(永久), C(长期)或D(短期)")
entry_edit.destroy()
return
elif col_index == 4: # 件号
if not re.match(r'^\d{1,5}$', new_value):
messagebox.showerror("错误", "件号必须是1-5位数字")
entry_edit.destroy()
return
# 补零到5位
new_value = new_value.zfill(5)
# 更新值
new_values = list(current_values)
new_values[col_index] = new_value
# 生成新文件名
if new_values[1] and new_values[2] and new_values[3] and new_values[4]:
new_values[5] = f"{new_values[1]}-WS·{new_values[2]}-{new_values[3]}-{new_values[4]}.pdf"
self.tree.item(row_id, values=new_values)
entry_edit.destroy()
# 更新文件列表
index = self.tree.index(row_id)
if index < len(self.files):
self.files[index]["fonds"] = new_values[1]
self.files[index]["year"] = new_values[2]
self.files[index]["period"] = new_values[3]
self.files[index]["number"] = new_values[4]
self.files[index]["new_name"] = new_values[5]
entry_edit.bind("<Return>", save_edit)
entry_edit.bind("<FocusOut>", lambda e: save_edit())
def batch_rename(self):
if not self.files:
messagebox.showwarning("警告", "没有要重命名的文件")
return
folder = self.folder_path.get()
if not folder or not os.path.isdir(folder):
messagebox.showerror("错误", "请选择有效的文件夹路径")
return
success_count = 0
error_count = 0
for file_info in self.files:
if not file_info["new_name"]:
error_count += 1
continue
orig_path = file_info["orig_path"]
new_path = os.path.join(folder, file_info["new_name"])
try:
os.rename(orig_path, new_path)
success_count += 1
# 更新原路径
file_info["orig_path"] = new_path
except Exception as e:
error_count += 1
print(f"重命名失败: {str(e)}")
# 更新列表显示
for idx, file_info in enumerate(self.files):
orig_name = os.path.basename(file_info["orig_path"])
self.tree.item(self.tree.get_children()[idx], values=(
orig_name,
file_info["fonds"],
file_info["year"],
file_info["period"],
file_info["number"],
file_info["new_name"]
))
messagebox.showinfo("完成", f"成功重命名 {success_count} 个文件\n失败 {error_count} 个文件")
def export_csv(self):
if not self.files:
messagebox.showwarning("警告", "没有可导出的数据")
return
file_path = filedialog.asksaveasfilename(
defaultextension=".csv",
filetypes=[("CSV文件", "*.csv")]
)
if not file_path:
return
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write("原文件名,全宗号,年度,保管期限,件号,新文件名\n")
for file_info in self.files:
orig_name = os.path.basename(file_info["orig_path"])
f.write(f"{orig_name},{file_info['fonds']},{file_info['year']},{file_info['period']},{file_info['number']},{file_info['new_name']}\n")
messagebox.showinfo("成功", "数据已成功导出到CSV文件")
except Exception as e:
messagebox.showerror("错误", f"导出失败: {str(e)}")
def import_csv(self):
file_path = filedialog.askopenfilename(
filetypes=[("CSV文件", "*.csv")]
)
if not file_path:
return
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
# 清空现有列表
for item in self.tree.get_children():
self.tree.delete(item)
self.files = []
# 处理数据行
for line in lines[1:]: # 跳过标题行
parts = line.strip().split(",")
if len(parts) < 6:
continue
orig_name, fonds, year, period, number, new_name = parts
# 验证数据
if not (re.match(r'^\d{4}$', fonds) and
re.match(r'^\d{4}$', year) and
period in ["Y", "C", "D"] and
re.match(r'^\d{5}$', number)):
continue
# 添加到列表
self.files.append({
"orig_path": os.path.join(self.folder_path.get(), orig_name),
"fonds": fonds,
"year": year,
"period": period,
"number": number,
"new_name": new_name
})
self.tree.insert("", "end", values=(
orig_name, fonds, year, period, number, new_name
))
messagebox.showinfo("成功", f"成功导入 {len(self.files)} 条记录")
except Exception as e:
messagebox.showerror("错误", f"导入失败: {str(e)}")
def extract_metadata(self):
if not self.files:
messagebox.showwarning("警告", "没有可处理的文件")
return
for idx, file_info in enumerate(self.files):
try:
with open(file_info["orig_path"], "rb") as f:
pdf_reader = PyPDF2.PdfReader(f)
# 尝试从元数据中获取信息
meta = pdf_reader.metadata
# 尝试从文本中提取信息
text = ""
for page in pdf_reader.pages:
text += page.extract_text() + " "
# 提取全宗号 - 尝试从文本中找到4位数字
fonds_match = re.search(r'\b(\d{4})\b', text)
if fonds_match:
file_info["fonds"] = fonds_match.group(1)
# 提取年度 - 尝试找到4位数字的年份
year_match = re.search(r'\b(20\d{2})\b', text)
if year_match:
file_info["year"] = year_match.group(1)
# 提取件号 - 尝试找到5位数字
number_match = re.search(r'\b(\d{5})\b', text)
if number_match:
file_info["number"] = number_match.group(1)
# 生成新文件名
if file_info["fonds"] and file_info["year"] and file_info["period"] and file_info["number"]:
file_info["new_name"] = f"{file_info['fonds']}-WS·{file_info['year']}-{file_info['period']}-{file_info['number']}.pdf"
# 更新列表显示
orig_name = os.path.basename(file_info["orig_path"])
self.tree.item(self.tree.get_children()[idx], values=(
orig_name,
file_info["fonds"],
file_info["year"],
file_info["period"],
file_info["number"],
file_info["new_name"]
))
except Exception as e:
print(f"处理文件 {file_info['orig_path']} 时出错: {str(e)}")
messagebox.showinfo("完成", "元数据提取完成")
def clear_list(self):
for item in self.tree.get_children():
self.tree.delete(item)
self.files = []
messagebox.showinfo("完成", "文件列表已清空")
if __name__ == "__main__":
root = tk.Tk()
app = PDFRenamerApp(root)
root.mainloop()
这种不行
最新发布