### Excel双字符串处理工具(带GUI界面)
以下是完整的解决方案,支持两个字符串列处理,可自定义目标单元格位置(如C2和H2),并以字符串组合命名新文件:
```python
import openpyxl
import os
import tkinter as tk
from tkinter import filedialog, messagebox, ttk, simpledialog
import re
import sys
from datetime import datetime
from openpyxl.utils import get_column_letter, column_index_from_string
class ExcelBatchProcessor:
def __init__(self, root):
self.root = root
self.root.title("Excel双字符串处理工具")
self.root.geometry("800x600")
self.setup_ui()
# 高DPI支持
if sys.platform == "win32":
from ctypes import windll
windll.shcore.SetProcessDpiAwareness(1)
def setup_ui(self):
"""创建用户界面"""
# 主框架
main_frame = ttk.Frame(self.root, padding=20)
main_frame.pack(fill=tk.BOTH, expand=True)
# 文件选择部分
self.create_file_section(main_frame)
# 列设置部分
self.create_column_section(main_frame)
# 目标单元格设置
self.create_target_section(main_frame)
# 文件名设置
self.create_filename_section(main_frame)
# 日志和进度
self.create_log_section(main_frame)
# 处理按钮
process_btn = ttk.Button(
main_frame,
text="开始处理",
command=self.start_processing,
style="Accent.TButton"
)
process_btn.pack(pady=20)
# 状态栏
self.status_var = tk.StringVar(value="就绪")
status_bar = ttk.Label(
self.root,
textvariable=self.status_var,
relief=tk.SUNKEN,
anchor=tk.W
)
status_bar.pack(side=tk.BOTTOM, fill=tk.X)
# 设置样式
self.set_styles()
def set_styles(self):
"""设置UI样式"""
style = ttk.Style()
style.theme_use("vista")
style.configure("Accent.TButton", foreground="white", background="#0078D7")
style.map("Accent.TButton", background=[("active", "#106EBE")])
style.configure("TFrame", background="#F3F3F3")
def create_file_section(self, parent):
"""创建文件选择区域"""
frame = ttk.LabelFrame(parent, text="文件选择", padding=10)
frame.pack(fill=tk.X, pady=10)
# 母版文件
ttk.Label(frame, text="母版Excel文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.template_path = tk.StringVar()
ttk.Entry(frame, textvariable=self.template_path, width=50).grid(row=0, column=1, padx=5)
ttk.Button(frame, text="浏览...", command=self.select_template).grid(row=0, column=2)
# 数据源文件
ttk.Label(frame, text="数据源Excel文件:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.source_path = tk.StringVar()
ttk.Entry(frame, textvariable=self.source_path, width=50).grid(row=1, column=1, padx=5)
ttk.Button(frame, text="浏览...", command=self.select_source).grid(row=1, column=2)
# 输出目录
ttk.Label(frame, text="输出文件夹:").grid(row=2, column=0, sticky=tk.W, pady=5)
self.output_dir = tk.StringVar()
ttk.Entry(frame, textvariable=self.output_dir, width=50).grid(row=2, column=1, padx=5)
ttk.Button(frame, text="浏览...", command=self.select_output).grid(row=2, column=2)
def create_column_section(self, parent):
"""创建数据列设置区域"""
frame = ttk.LabelFrame(parent, text="数据列设置", padding=10)
frame.pack(fill=tk.X, pady=10)
# 字符串1列
ttk.Label(frame, text="字符串1列:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.str1_col = tk.StringVar(value="A")
ttk.Entry(frame, textvariable=self.str1_col, width=5).grid(row=0, column=1, padx=5)
# 字符串2列
ttk.Label(frame, text="字符串2列:").grid(row=0, column=2, sticky=tk.W, padx=10, pady=5)
self.str2_col = tk.StringVar(value="B")
ttk.Entry(frame, textvariable=self.str2_col, width=5).grid(row=0, column=3, padx=5)
# 起始行
ttk.Label(frame, text="数据起始行:").grid(row=0, column=4, sticky=tk.W, padx=10, pady=5)
self.start_row = tk.IntVar(value=2)
ttk.Entry(frame, textvariable=self.start_row, width=5).grid(row=0, column=5, padx=5)
# 预览按钮
ttk.Button(frame, text="预览数据", command=self.preview_data).grid(row=0, column=6, padx=20)
def create_target_section(self, parent):
"""创建目标单元格设置区域"""
frame = ttk.LabelFrame(parent, text="目标单元格设置", padding=10)
frame.pack(fill=tk.X, pady=10)
# 字符串1目标
ttk.Label(frame, text="字符串1目标单元格:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.str1_target = tk.StringVar(value="C2")
ttk.Entry(frame, textvariable=self.str1_target, width=10).grid(row=0, column=1, padx=5)
# 字符串2目标
ttk.Label(frame, text="字符串2目标单元格:").grid(row=0, column=2, sticky=tk.W, padx=10, pady=5)
self.str2_target = tk.StringVar(value="H2")
ttk.Entry(frame, textvariable=self.str2_target, width=10).grid(row=0, column=3, padx=5)
# 单元格选择器
ttk.Button(frame, text="选择单元格", command=self.select_target_cells).grid(row=0, column=4, padx=20)
def create_filename_section(self, parent):
"""创建文件名设置区域"""
frame = ttk.LabelFrame(parent, text="文件名设置", padding=10)
frame.pack(fill=tk.X, pady=10)
# 文件名格式
ttk.Label(frame, text="文件名格式:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.filename_format = tk.StringVar(value="{str1}_{str2}")
ttk.Entry(frame, textvariable=self.filename_format, width=40).grid(row=0, column=1, padx=5)
# 分隔符
ttk.Label(frame, text="分隔符:").grid(row=0, column=2, sticky=tk.W, padx=10, pady=5)
self.delimiter = tk.StringVar(value="_")
ttk.Entry(frame, textvariable=self.delimiter, width=5).grid(row=0, column=3, padx=5)
# 示例
ttk.Label(frame, text="示例: 字符串1_字符串2").grid(row=1, column=1, sticky=tk.W, pady=5)
def create_log_section(self, parent):
"""创建日志和进度区域"""
frame = ttk.LabelFrame(parent, text="处理日志", padding=10)
frame.pack(fill=tk.BOTH, expand=True, pady=10)
# 日志文本框
self.log_text = tk.Text(frame, height=10)
self.log_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5)
# 滚动条
scrollbar = ttk.Scrollbar(frame, command=self.log_text.yview)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
self.log_text.config(yscrollcommand=scrollbar.set)
# 进度条
self.progress_var = tk.DoubleVar()
progress_bar = ttk.Progressbar(
frame,
variable=self.progress_var,
maximum=100,
length=400
)
progress_bar.pack(fill=tk.X, pady=5)
def log_message(self, message):
"""添加日志消息"""
timestamp = datetime.now().strftime("%H:%M:%S")
self.log_text.insert(tk.END, f"[{timestamp}] {message}\n")
self.log_text.see(tk.END) # 自动滚动到底部
self.root.update()
def clean_filename(self, name):
"""清理文件名,移除非法字符"""
# 替换Windows文件名非法字符
return re.sub(r'[\\/*?:"<>|]', "_", str(name)).strip()
def select_template(self):
"""选择母版文件"""
path = filedialog.askopenfilename(
title="选择母版Excel文件",
filetypes=[("Excel文件", "*.xlsx;*.xls")],
initialdir=os.path.expanduser("~\\Documents")
)
if path:
self.template_path.set(path)
def select_source(self):
"""选择数据源文件"""
path = filedialog.askopenfilename(
title="选择数据源Excel文件",
filetypes=[("Excel文件", "*.xlsx;*.xls")],
initialdir=os.path.dirname(self.template_path.get()) if self.template_path.get() else os.path.expanduser("~\\Documents")
)
if path:
self.source_path.set(path)
def select_output(self):
"""选择输出目录"""
path = filedialog.askdirectory(
title="选择输出文件夹",
initialdir=os.path.dirname(self.template_path.get()) if self.template_path.get() else os.path.expanduser("~\\Documents")
)
if path:
self.output_dir.set(path)
def preview_data(self):
"""预览数据源文件内容"""
source_path = self.source_path.get()
if not source_path or not os.path.exists(source_path):
messagebox.showerror("错误", "请先选择有效的数据源文件")
return
try:
# 创建预览窗口
preview_win = tk.Toplevel(self.root)
preview_win.title("数据预览")
preview_win.geometry("800x500")
# 创建表格
tree = ttk.Treeview(preview_win, show="headings")
tree.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
# 加载工作簿
wb = openpyxl.load_workbook(source_path)
ws = wb.active
# 设置列
columns = ["行号"] + [get_column_letter(i) for i in range(1, min(ws.max_column + 1, 10))]
tree["columns"] = columns
tree.heading("行号", text="行")
# 添加列标题
for col in columns[1:]:
tree.heading(col, text=col)
tree.column(col, width=100)
# 添加行数据(前20行)
start_row = self.start_row.get()
for row_idx in range(start_row, min(start_row + 20, ws.max_row + 1)):
values = [str(row_idx)]
for col_idx in range(1, min(ws.max_column + 1, 10)):
cell_value = ws.cell(row=row_idx, column=col_idx).value
values.append(str(cell_value)[:20] + "..." if cell_value and len(str(cell_value)) > 20 else str(cell_value))
tree.insert("", "end", values=values)
# 状态信息
tk.Label(
preview_win,
text=f"共 {ws.max_row} 行, {ws.max_column} 列 | 当前预览行: {start_row} 到 {min(start_row+20, ws.max_row)}",
relief=tk.SUNKEN
).pack(fill=tk.X, side=tk.BOTTOM)
wb.close()
except Exception as e:
messagebox.showerror("错误", f"无法预览数据:\n{str(e)}")
def select_target_cells(self):
"""从母版中选择目标单元格"""
template_path = self.template_path.get()
if not template_path or not os.path.exists(template_path):
messagebox.showerror("错误", "请先选择有效的母版文件")
return
try:
# 创建选择窗口
select_win = tk.Toplevel(self.root)
select_win.title("选择目标单元格")
select_win.geometry("600x400")
# 创建标签
label = ttk.Label(
select_win,
text="请在下方表格中点击选择两个目标单元格(先选字符串1位置,再选字符串2位置)",
wraplength=500
)
label.pack(pady=10)
# 创建表格
tree = ttk.Treeview(select_win, show="headings")
tree.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
# 加载工作簿
wb = openpyxl.load_workbook(template_path)
ws = wb.active
# 设置列
columns = ["行号"] + [get_column_letter(i) for i in range(1, min(ws.max_column + 1, 10))]
tree["columns"] = columns
tree.heading("行号", text="行")
# 添加列标题
for col in columns[1:]:
tree.heading(col, text=col)
tree.column(col, width=100)
# 添加行数据(前15行)
for row_idx in range(1, min(16, ws.max_row + 1)):
values = [str(row_idx)]
for col_idx in range(1, min(ws.max_column + 1, 10)):
cell_value = ws.cell(row=row_idx, column=col_idx).value
values.append(str(cell_value)[:15] + "..." if cell_value and len(str(cell_value)) > 15 else str(cell_value))
tree.insert("", "end", values=values)
# 选择处理
self.selected_cells = []
def on_click(event):
region = tree.identify_region(event.x, event.y)
if region == "cell":
row = tree.item(tree.focus())["values"][0]
column = tree.identify_column(event.x)
col_letter = columns[int(column[1:])]
cell_address = f"{col_letter}{row}"
self.selected_cells.append(cell_address)
if len(self.selected_cells) == 1:
self.log_message(f"已选择字符串1位置: {cell_address}")
elif len(self.selected_cells) == 2:
self.str1_target.set(self.selected_cells[0])
self.str2_target.set(self.selected_cells[1])
self.log_message(f"已选择字符串2位置: {cell_address}")
select_win.destroy()
tree.bind("<Button-1>", on_click)
wb.close()
except Exception as e:
messagebox.showerror("错误", f"无法打开母版文件:\n{str(e)}")
def start_processing(self):
"""开始处理数据"""
# 验证输入
if not all([
self.template_path.get(),
self.source_path.get(),
self.output_dir.get()
]):
messagebox.showerror("错误", "请填写所有必填字段")
return
# 获取参数
template_path = self.template_path.get()
source_path = self.source_path.get()
output_dir = self.output_dir.get()
str1_col = self.str1_col.get().upper()
str2_col = self.str2_col.get().upper()
start_row = self.start_row.get()
str1_target = self.str1_target.get().upper()
str2_target = self.str2_target.get().upper()
filename_format = self.filename_format.get()
delimiter = self.delimiter.get()
# 更新状态
self.status_var.set("处理中...请稍候")
self.progress_var.set(0)
self.log_message("=" * 50)
self.log_message("开始处理数据")
self.log_message(f"母版文件: {os.path.basename(template_path)}")
self.log_message(f"数据源文件: {os.path.basename(source_path)}")
self.log_message(f"输出目录: {output_dir}")
self.root.update()
try:
# 加载数据源
source_wb = openpyxl.load_workbook(source_path)
source_ws = source_wb.active
# 获取列索引
str1_col_idx = column_index_from_string(str1_col)
str2_col_idx = column_index_from_string(str2_col)
# 获取数据行数
total_rows = 0
for row in range(start_row, source_ws.max_row + 1):
if source_ws.cell(row=row, column=str1_col_idx).value is not None:
total_rows += 1
else:
break
if total_rows == 0:
self.log_message("错误: 未找到有效数据")
return
self.log_message(f"共发现 {total_rows} 条数据需要处理")
# 加载母版模板
template_wb = openpyxl.load_workbook(template_path)
template_ws = template_wb.active
# 处理每条数据
success_count = 0
for idx, row_idx in enumerate(range(start_row, start_row + total_rows)):
# 获取数据
str1_value = source_ws.cell(row=row_idx, column=str1_col_idx).value
str2_value = source_ws.cell(row=row_idx, column=str2_col_idx).value
if str1_value is None or str2_value is None:
self.log_message(f"跳过第 {row_idx} 行: 数据不完整")
continue
# 创建新工作簿(复制母版)
new_wb = openpyxl.Workbook()
new_ws = new_wb.active
# 复制母版内容
for row in template_ws.iter_rows():
for cell in row:
new_ws[cell.coordinate].value = cell.value
if cell.has_style:
new_ws[cell.coordinate].font = cell.font.copy()
new_ws[cell.coordinate].border = cell.border.copy()
new_ws[cell.coordinate].fill = cell.fill.copy()
# 填充数据
new_ws[str1_target] = str1_value
new_ws[str2_target] = str2_value
# 生成文件名
filename = filename_format.format(
str1=str1_value,
str2=str2_value,
delimiter=delimiter,
row=row_idx
)
clean_filename = self.clean_filename(filename) + ".xlsx"
output_path = os.path.join(output_dir, clean_filename)
# 保存文件
try:
new_wb.save(output_path)
self.log_message(f"已创建: {clean_filename}")
success_count += 1
except Exception as e:
self.log_message(f"错误: 无法保存 {clean_filename} - {str(e)}")
# 更新进度
progress = ((idx + 1) / total_rows) * 100
self.progress_var.set(progress)
self.status_var.set(f"处理中: {idx+1}/{total_rows} ({progress:.1f}%)")
self.root.update()
# 处理完成
self.progress_var.set(100)
self.status_var.set(f"处理完成! 成功创建 {success_count}/{total_rows} 个文件")
self.log_message(f"处理完成! 成功创建 {success_count} 个文件")
self.log_message("=" * 50)
# 打开输出文件夹
os.startfile(output_dir)
except Exception as e:
self.log_message(f"处理出错: {str(e)}")
messagebox.showerror("错误", f"处理过程中出错:\n{str(e)}")
finally:
try:
source_wb.close()
template_wb.close()
except:
pass
# 运行主程序
if __name__ == "__main__":
root = tk.Tk()
app = ExcelBatchProcessor(root)
root.mainloop()
```
### 使用说明
1. **操作流程**:
- 步骤1:选择母版Excel文件(模板)
- 步骤2:选择数据源Excel文件(包含两个字符串列)
- 步骤3:设置输出文件夹
- 步骤4:配置数据列(字符串1和字符串2所在列)
- 步骤5:设置目标单元格(如C2和H2)
- 步骤6:设置文件名格式(如"{str1}_{str2}")
- 步骤7:点击"开始处理"按钮
2. **核心功能**:
- **双字符串处理**:同时处理两个字符串列
- **目标单元格定位**:支持手动输入或图形化选择目标单元格
- **自定义文件名**:使用字符串组合命名文件
- **实时进度显示**:进度条和详细日志
- **数据预览**:查看数据源内容
3. **文件名格式选项**:
```markdown
| 变量名 | 说明 | 示例值 |
|-----------|------------------|--------------|
| {str1} | 第一个字符串值 | "产品名称" |
| {str2} | 第二个字符串值 | "产品编码" |
| {delimiter}| 设置的分隔符 | "_" |
| {row} | 数据行号 | "2" |
```
### 示例场景
**数据源文件(data.xlsx)**:
| A列(产品名称) | B列(产品编码) |
|-----------------|----------------|
| 智能手机 | P1001 |
| 笔记本电脑 | P2002 |
| 平板电脑 | P3003 |
**配置参数**:
- 字符串1列:A
- 字符串2列:B
- 目标单元格:C2(产品名称)和H2(产品编码)
- 文件名格式:`{str1}_{str2}`
**处理结果**:
```
输出目录/
├── 智能手机_P1001.xlsx
├── 笔记本电脑_P2002.xlsx
└── 平板电脑_P3003.xlsx
```
每个文件内容:
- 母版内容完全复制
- C2单元格:产品名称
- H2单元格:产品编码
### 技术特点
1. **目标单元格可视化选择**:
```python
def select_target_cells(self):
# 显示母版内容表格
# 用户点击选择两个单元格位置
self.str1_target.set(self.selected_cells[0])
self.str2_target.set(self.selected_cells[1])
```
2. **灵活的文件命名系统**:
```python
filename = filename_format.format(
str1=str1_value,
str2=str2_value,
delimiter=delimiter,
row=row_idx
)
```
3. **安全文件名处理**:
```python
def clean_filename(self, name):
return re.sub(r'[\\/*?:"<>|]', "_", str(name)).strip()
```
4. **详细日志系统**:
```python
def log_message(self, message):
timestamp = datetime.now().strftime("%H:%M:%S")
self.log_text.insert(tk.END, f"[{timestamp}] {message}\n")
self.log_text.see(tk.END)
```
5. **内存优化**:
```python
finally:
try:
source_wb.close()
template_wb.close()
except:
pass
```
### 安装与运行
1. **安装依赖**:
```bash
pip install openpyxl
```
2. **运行程序**:
- 双击脚本文件运行(Windows)
- 命令行执行:`python excel_processor.py`
3. **系统要求**:
- Windows 7/10/11
- Python 3.6+
- 支持Excel 2007+格式(.xlsx)