import pandas as pd
import re
import numpy as np
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from urllib.parse import urlparse, parse_qs
from collections import OrderedDict
import os
from openpyxl.styles import Font, Alignment
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
import webbrowser
class ExcelProcessorApp:
def __init__(self, root):
self.root = root
self.root.title("Excel表格处理器")
self.root.geometry("700x550") # 增加高度以容纳预览按钮
# 初始化变量
self.source_file_path = tk.StringVar()
self.hyperlink_source_path = tk.StringVar()
self.output_file_path = tk.StringVar()
self.preview_df = None # 存储预览数据
# 创建界面
self.create_widgets()
def create_widgets(self):
# 主框架
main_frame = ttk.Frame(self.root, padding="20")
main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# 源文件选择区域
ttk.Label(main_frame, text="源表文件:").grid(row=0, column=0, sticky=tk.W, pady=5)
ttk.Entry(main_frame, textvariable=self.source_file_path, width=50).grid(row=0, column=1, padx=5, pady=5)
ttk.Button(main_frame, text="浏览...", command=self.browse_source_file).grid(row=0, column=2, padx=5, pady=5)
# 超链接源表选择区域
ttk.Label(main_frame, text="超链接源表:").grid(row=1, column=0, sticky=tk.W, pady=5)
ttk.Entry(main_frame, textvariable=self.hyperlink_source_path, width=50).grid(row=1, column=1, padx=5, pady=5)
ttk.Button(main_frame, text="浏览...", command=self.browse_hyperlink_source_file).grid(row=1, column=2, padx=5, pady=5)
# 输出文件选择区域
ttk.Label(main_frame, text="输出文件:").grid(row=2, column=0, sticky=tk.W, pady=5)
ttk.Entry(main_frame, textvariable=self.output_file_path, width=50).grid(row=2, column=1, padx=5, pady=5)
ttk.Button(main_frame, text="浏览...", command=self.browse_output_file).grid(row=2, column=2, padx=5, pady=5)
# 按钮框架
button_frame = ttk.Frame(main_frame)
button_frame.grid(row=3, column=0, columnspan=3, pady=10)
# 预览按钮
ttk.Button(button_frame, text="预览结果", command=self.preview_results).pack(side=tk.LEFT, padx=5)
# 处理按钮
ttk.Button(button_frame, text="开始处理", command=self.process_file).pack(side=tk.LEFT, padx=5)
# 进度条
self.progress = ttk.Progressbar(main_frame, mode='indeterminate')
self.progress.grid(row=4, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=10)
# 状态标签
self.status_label = ttk.Label(main_frame, text="请选择源表和输出文件")
self.status_label.grid(row=5, column=0, columnspan=3, pady=10)
# 配置网格权重
main_frame.columnconfigure(1, weight=1)
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(0, weight=1)
def browse_source_file(self):
file_path = filedialog.askopenfilename(
title="选择源表文件",
filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")]
)
if file_path:
self.source_file_path.set(file_path)
# 自动设置输出文件路径
dir_name = os.path.dirname(file_path)
base_name = os.path.basename(file_path)
name, ext = os.path.splitext(base_name)
output_path = os.path.join(dir_name, f"{name}_处理结果{ext}")
self.output_file_path.set(output_path)
self.status_label.config(text="已选择源表文件,请选择超链接源表")
def browse_hyperlink_source_file(self):
file_path = filedialog.askopenfilename(
title="选择超链接源表文件",
filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")]
)
if file_path:
self.hyperlink_source_path.set(file_path)
self.status_label.config(text="已选择超链接源表,请点击开始处理")
def browse_output_file(self):
file_path = filedialog.asksaveasfilename(
title="选择输出文件",
defaultextension=".xlsx",
filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")]
)
if file_path:
self.output_file_path.set(file_path)
def preview_results(self):
"""预览处理结果"""
if not self.source_file_path.get():
messagebox.showerror("错误", "请先选择源表文件")
return
if not self.hyperlink_source_path.get():
messagebox.showerror("错误", "请先选择超链接源表文件")
return
# 开始处理预览
self.progress.start()
self.status_label.config(text="正在生成预览,请稍候...")
self.root.update()
try:
# 调用处理函数,生成全部预览数据
self.preview_df = self.process_excel_file_preview(
self.source_file_path.get(),
self.hyperlink_source_path.get()
)
# 停止进度条
self.progress.stop()
self.status_label.config(text="预览生成完成")
# 显示预览窗口
self.show_preview(self.preview_df)
except Exception as e:
self.progress.stop()
self.status_label.config(text="预览生成失败")
messagebox.showerror("错误", f"生成预览时发生错误:\n{str(e)}")
def show_preview(self, preview_df):
"""显示预览窗口"""
# 创建预览窗口
preview_window = tk.Toplevel(self.root)
preview_window.title("处理结果预览")
preview_window.geometry("1200x700") # 增加宽度以容纳更多列
# 创建框架
frame = ttk.Frame(preview_window, padding="10")
frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# 添加标题
title_label = ttk.Label(frame, text="处理结果预览", font=("Arial", 12, "bold"))
title_label.grid(row=0, column=0, columnspan=2, pady=(0, 10))
# 创建表格框架
table_frame = ttk.Frame(frame)
table_frame.grid(row=1, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S))
# 创建表格
columns = list(preview_df.columns)
tree = ttk.Treeview(table_frame, columns=columns, show="headings", height=20)
# 设置列标题和宽度
col_widths = {
'顺序': 50, '商品名称': 150, '商品链接': 150, '商品ID': 80,
'优惠方式(xx元优惠券/拍立减xx元)': 150, '优惠券链接': 150,
'优惠券提取': 100, '公式放这一列': 100, '优惠券1': 100,
'优惠券2': 100, '优惠券3': 100, '优惠券4': 100, '优惠券5': 100,
'优惠券链接(提取)': 150, '优惠券链接1': 150, '优惠券链接2': 150,
'优惠券链接3': 150, '优惠券链接4': 150, '优惠券链接5': 150
}
for col in columns:
tree.heading(col, text=col)
width = col_widths.get(col, 100)
tree.column(col, width=width, minwidth=80, anchor=tk.CENTER)
# 添加滚动条
vsb = ttk.Scrollbar(table_frame, orient="vertical", command=tree.yview)
hsb = ttk.Scrollbar(table_frame, orient="horizontal", command=tree.xview)
tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set)
# 布局
tree.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
vsb.grid(row=0, column=1, sticky=(tk.N, tk.S))
hsb.grid(row=1, column=0, sticky=(tk.W, tk.E))
# 添加数据
for _, row in preview_df.iterrows():
tree.insert("", "end", values=list(row))
# 绑定点击事件到C列(商品链接)
tree.bind("<Button-1>", lambda event: self.on_tree_click(event, tree, preview_window))
# 创建可编辑的E列和F列
self.create_editable_columns(tree, preview_window, preview_df)
# 添加按钮
button_frame = ttk.Frame(frame)
button_frame.grid(row=2, column=0, columnspan=2, pady=10)
ttk.Button(button_frame, text="确认并开始处理",
command=lambda: [preview_window.destroy(), self.process_file()]).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="关闭预览",
command=preview_window.destroy).pack(side=tk.LEFT, padx=5)
# 配置权重
frame.columnconfigure(0, weight=1)
frame.rowconfigure(1, weight=1)
table_frame.columnconfigure(0, weight=1)
table_frame.rowconfigure(0, weight=1)
preview_window.columnconfigure(0, weight=1)
preview_window.rowconfigure(0, weight=1)
# 存储tree和df引用以便后续使用
self.preview_tree = tree
self.preview_window = preview_window
self.preview_df = preview_df
def create_editable_columns(self, tree, window, df):
"""为E列和F列创建可编辑的Entry小部件"""
# 获取E列和F列的索引
columns = tree["columns"]
# 找到E列和F列在Treeview中的索引
e_col_index = None
f_col_index = None
for i, col in enumerate(columns):
if tree.heading(col)["text"] == "优惠券链接": # E列
e_col_index = i
elif tree.heading(col)["text"] == "优惠券提取": # F列
f_col_index = i
if e_col_index is None or f_col_index is None:
messagebox.showerror("错误", "无法找到可编辑列")
return
# 存储所有Entry小部件的引用
self.entry_widgets_e = {}
self.entry_widgets_f = {}
# 为每一行创建Entry小部件
for i, item_id in enumerate(tree.get_children()):
# 获取行位置和大小
e_bbox = tree.bbox(item_id, column=e_col_index)
f_bbox = tree.bbox(item_id, column=f_col_index)
if e_bbox:
# 创建E列的Entry
entry_e = ttk.Entry(window)
entry_e.place(x=e_bbox[0], y=e_bbox[1], width=e_bbox[2], height=e_bbox[3])
# 设置初始值
current_value = df.iloc[i, e_col_index]
entry_e.insert(0, str(current_value) if not pd.isna(current_value) else "")
# 绑定事件以更新DataFrame
entry_e.bind("<FocusOut>", lambda e, row=i, col=e_col_index:
self.update_dataframe_from_entry(e, row, col))
# 绑定回车键事件
entry_e.bind("<Return>", lambda e: e.widget.master.focus())
self.entry_widgets_e[item_id] = entry_e
if f_bbox:
# 创建F列的Entry
entry_f = ttk.Entry(window)
entry_f.place(x=f_bbox[0], y=f_bbox[1], width=f_bbox[2], height=f_bbox[3])
# 设置初始值
current_value = df.iloc[i, f_col_index]
entry_f.insert(0, str(current_value) if not pd.isna(current_value) else "")
# 绑定事件以更新DataFrame
entry_f.bind("<FocusOut>", lambda e, row=i, col=f_col_index:
self.update_dataframe_from_entry(e, row, col))
# 绑定回车键事件
entry_f.bind("<Return>", lambda e: e.widget.master.focus())
self.entry_widgets_f[item_id] = entry_f
# 绑定Treeview的滚动事件,以便在滚动时更新Entry位置
def update_entry_positions(event=None):
for item_id in tree.get_children():
e_bbox = tree.bbox(item_id, column=e_col_index)
f_bbox = tree.bbox(item_id, column=f_col_index)
if e_bbox and item_id in self.entry_widgets_e:
self.entry_widgets_e[item_id].place(x=e_bbox[0], y=e_bbox[1],
width=e_bbox[2], height=e_bbox[3])
if f_bbox and item_id in self.entry_widgets_f:
self.entry_widgets_f[item_id].place(x=f_bbox[0], y=f_bbox[1],
width=f_bbox[2], height=f_bbox[3])
# 绑定滚动事件
tree.bind("<MouseWheel>", update_entry_positions)
tree.bind("<Button-4>", update_entry_positions) # Linux上滚
tree.bind("<Button-5>", update_entry_positions) # Linux下滚
# 获取滚动条并绑定事件
vsb = tree.master.children.get('!scrollbar')
hsb = tree.master.children.get('!scrollbar2')
if vsb:
vsb.configure(command=lambda *args: [tree.yview(*args), update_entry_positions()])
if hsb:
hsb.configure(command=lambda *args: [tree.xview(*args), update_entry_positions()])
# 初始更新位置
window.after(100, update_entry_positions)
def update_dataframe_from_entry(self, event, row, col):
"""从Entry小部件更新DataFrame"""
widget = event.widget
new_value = widget.get()
if self.preview_df is not None and row < len(self.preview_df):
self.preview_df.iloc[row, col] = new_value
# 同时更新Treeview中的显示
item_id = self.preview_tree.get_children()[row]
values = list(self.preview_tree.item(item_id)['values'])
values[col] = new_value
self.preview_tree.item(item_id, values=values)
def on_tree_click(self, event, tree, window):
"""处理树形视图的点击事件"""
region = tree.identify("region", event.x, event.y)
if region == "cell":
column = tree.identify_column(event.x)
row = tree.identify_row(event.y)
# 检查是否是C列(商品链接列)
if column == "#3": # 列索引从1开始,C列是第3列
item = tree.item(row)
values = item['values']
if len(values) >= 3: # 确保有足够的值
url = values[2] # C列是第3个值(索引2)
if url and isinstance(url, str) and url.startswith(('http://', 'https://')):
webbrowser.open_new(url)
def process_file(self):
if not self.source_file_path.get():
messagebox.showerror("错误", "请先选择源表文件")
return
if not self.hyperlink_source_path.get():
messagebox.showerror("错误", "请先选择超链接源表文件")
return
if not self.output_file_path.get():
messagebox.showerror("错误", "请先选择输出文件")
return
# 开始处理
self.progress.start()
self.status_label.config(text="正在处理文件,请稍候...")
self.root.update()
try:
# 如果有预览数据,使用预览数据
if self.preview_df is not None:
# 保存结果
with pd.ExcelWriter(self.output_file_path.get(), engine='openpyxl') as writer:
# 保存Sheet2
self.preview_df.to_excel(writer, sheet_name='Sheet2', index=False)
# 创建超链接工作表
self.create_hyperlinks_sheet(writer, self.hyperlink_source_path.get(), self.preview_df)
else:
# 调用处理函数
self.process_excel_file(
self.source_file_path.get(),
self.hyperlink_source_path.get(),
self.output_file_path.get()
)
# 处理完成
self.progress.stop()
self.status_label.config(text="处理完成!")
messagebox.showinfo("成功", f"文件处理完成,已保存到:\n{self.output_file_path.get()}")
except Exception as e:
self.progress.stop()
self.status_label.config(text="处理失败")
messagebox.showerror("错误", f"处理文件时发生错误:\n{str(e)}")
# 以下所有其他方法保持不变
def process_excel_file_preview(self, source_path, hyperlink_source_path):
"""处理Excel文件用于预览"""
# 读取源表
source_df = pd.read_excel(source_path, sheet_name='Sheet1')
# 清除C列(优惠方式列)的空格
source_df.iloc[:, 2] = source_df.iloc[:, 2].apply(self.clean_text)
# 创建目标DataFrame
target_columns = [
'顺序', '商品名称', '商品链接', '商品ID', '优惠方式(xx元优惠券/拍立减xx元)',
'优惠券链接', '优惠券提取', '公式放这一列', '优惠券1', '优惠券2', '优惠券3',
'优惠券4', '优惠券5', '优惠券链接(提取)', '优惠券链接1', '优惠券链接2',
'优惠券链接3', '优惠券链接4', '优惠券链接5'
]
target_df = pd.DataFrame(columns=target_columns)
# 填充数据
for idx, row in source_df.iterrows():
# 顺序
target_df.loc[idx, '顺序'] = idx + 1
# 商品名称和链接
target_df.loc[idx, '商品名称'] = row.iloc[0] # A列
target_df.loc[idx, '商品链接'] = row.iloc[1] # B列
# 商品ID
item_id = self.extract_item_id(row.iloc[1])
target_df.loc[idx, '商品ID'] = item_id
# 优惠方式(已清除空格)
discount_info = row.iloc[2] # C列
target_df.loc[idx, '优惠方式(xx元优惠券/拍立减xx元)'] = discount_info
# 优惠券链接
coupon_links = row.iloc[3] # D列
target_df.loc[idx, '优惠券链接'] = coupon_links
# 应用ExtractCouponInfo函数
coupon_extract = self.extract_coupon_info(discount_info)
target_df.loc[idx, '优惠券提取'] = coupon_extract
# 应用FormatCoupon函数
formatted_coupon = self.format_coupon(coupon_extract)
target_df.loc[idx, '公式放这一列'] = formatted_coupon
# 根据H列的内容填充I列到M列
coupon_values = self.fill_coupon_columns(formatted_coupon, discount_info)
for i in range(5):
target_df.loc[idx, f'优惠券{i+1}'] = coupon_values[i]
# 处理优惠券链接
urls, coupon_ids = self.split_coupon_links(coupon_links)
# 优惠券链接提取 (用逗号分隔的URL)
target_df.loc[idx, '优惠券链接(提取)'] = ", ".join(urls) if urls else ""
# 填充优惠券链接1-5列(O列到S列)
for i in range(5):
if i < len(urls):
target_df.loc[idx, f'优惠券链接{i+1}'] = urls[i]
else:
target_df.loc[idx, f'优惠券链接{i+1}'] = ""
return target_df
def clean_text(self, text):
"""清除文本中的多余空格"""
if pd.isna(text) or not isinstance(text, str):
return text
# 去除前后空格
cleaned = text.strip()
# 将多个连续空格替换为单个空格
cleaned = re.sub(r'\s+', ' ', cleaned)
return cleaned
def extract_coupon_info(self, text):
"""模拟VBA中的ExtractCouponInfo函数"""
if pd.isna(text) or not isinstance(text, str):
return ""
# 第一段正则表达式:匹配优惠券相关短语
pattern1 = r"-?[1-9]\d*[\u4e00-\u9fa5][优][惠][券]|[领]-?[1-9]\d*[\u4e00-\u9fa5]{3,6}|[领][\u4e00-\u9fa5]{1,4}-?[1-9]\d*[元]|[领]-?[1-9]\d*[\u4e00-\u9fa5]{1,3}|[优][惠][券]-?[1-9]\d*|[领][券]-?[1-9]\d*|-?[1-9]\d*[优][惠][券]|[拍][立][减]|[直][接][拍]|[达][人][闪][降]|[闪][降]"
# 第二段正则表达式:提取数字部分或特定关键词
pattern2 = r"-?[1-9]\d*[元]|-?[1-9]\d*|[拍][立][减]|[直][接][拍]|[达][人][闪][降]|[闪][降]"
# 正则表达式匹配双破折号分隔的数字格式
pattern_dash = r"\b(?:\d{1,3}--)+\d{1,3}\b"
# 执行第一段匹配
matches1 = re.findall(pattern1, text)
# 执行双破折号分隔数字的匹配
dash_matches = re.findall(pattern_dash, text)
# 使用有序字典来存储唯一值和它们的出现顺序
unique_values = OrderedDict()
counter = 0
# 对每个匹配项执行第二段匹配
for match in matches1:
matches2 = re.findall(pattern2, match)
for value in matches2:
# 添加到字典(自动去重)并记录顺序
if value not in unique_values:
unique_values[value] = counter
counter += 1
# 处理双破折号分隔的数字
for dash_match in dash_matches:
# 分割数字并添加"元"后缀
numbers = dash_match.split("--")
for number in numbers:
number_with_yuan = number + "元"
# 添加到字典(自动去重)并记录顺序
if number_with_yuan not in unique_values:
unique_values[number_with_yuan] = counter
counter += 1
# 按照原始顺序将值连接成字符串
result = "/".join(unique_values.keys())
return result
def format_coupon(self, input_value):
"""模拟VBA中的FormatCoupon函数"""
if pd.isna(input_value) or (isinstance(input_value, str) and input_value.strip() == ""):
return ""
# 确保处理字符串类型
if not isinstance(input_value, str):
input_str = str(input_value)
else:
input_str = input_value
# 检查是否已包含优惠券后缀
if "元优惠券" in input_str:
return input_str
# 处理带斜杠的分段情况
if "/" in input_str:
segments = input_str.split("/")
result_segments = []
unique_segments = set()
# 处理每一段并去重
for segment in segments:
# 递归处理每一段
processed_segment = self.format_coupon(segment)
# 去重
if processed_segment not in unique_segments:
unique_segments.add(processed_segment)
result_segments.append(processed_segment)
return "/".join(result_segments)
# 关键词检测
keywords = ["拍立减", "直接拍", "闪降", "达人闪降"]
for keyword in keywords:
if keyword in input_str:
return "直接拍"
# 处理包含"元"的情况
if "元" in input_str:
return input_str.replace("元", "元优惠券")
# 处理纯数字情况
if input_str.isdigit():
if int(input_str) > 0:
return input_str + "元优惠券"
else:
return input_str
# 默认返回原始值
return input_str
def extract_item_id(self, url):
"""从商品链接中提取商品ID"""
if pd.isna(url) or not isinstance(url, str):
return ""
# 尝试从URL参数中提取id
try:
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)
if 'id' in query_params:
return query_params['id'][0]
elif 'ft.t' in url and 'id=' in url:
# 处理淘宝链接格式
match = re.search(r'id=(\d+)', url)
if match:
return match.group(1)
except:
pass
# 如果上述方法失败,尝试使用正则表达式匹配
match = re.search(r'(?:id=|&id=)(\d+)', url)
if match:
return match.group(1)
return ""
def split_coupon_links(self, links_text):
"""拆分优惠券链接"""
if pd.isna(links_text) or not isinstance(links_text, str):
return [], []
# 使用正则表达式提取所有URL
urls = re.findall(r'https?://[^\s<>"{}|\\^`[\]]+', links_text)
# 提取优惠券ID或关键信息
coupon_ids = []
for url in urls:
# 尝试从URL中提取优惠券ID
match = re.search(r'(?:activityId|activity_id)=([^&]+)', url)
if match:
coupon_ids.append(match.group(1))
else:
# 如果没有activityId,尝试提取其他标识
match = re.search(r'/([^/]+)(?=\?|$)', url)
if match:
coupon_ids.append(match.group(1))
else:
coupon_ids.append("优惠券")
return urls, coupon_ids
def extract_coupon_amounts(self, discount_text):
"""从优惠信息中提取优惠券金额"""
if pd.isna(discount_text) or not isinstance(discount_text, str):
return []
# 匹配优惠券金额模式
patterns = [
r"领(\d+)元优惠券", # 匹配"领XX元优惠券"
r"优惠券(\d+)元", # 匹配"优惠券XX元"
r"(\d+)元优惠券", # 匹配"XX元优惠券"
r"领券(\d+)元", # 匹配"领券XX元"
r"领(\d+)元券", # 匹配"领XX元券"
r"(\d+)元券", # 匹配"XX元券"
r"立减(\d+)元", # 匹配"立减XX元"
r"减(\d+)元", # 匹配"减XX元"
]
amounts = []
for pattern in patterns:
matches = re.findall(pattern, discount_text)
amounts.extend(matches)
# 去重并排序
unique_amounts = list(set(amounts))
unique_amounts.sort(key=lambda x: int(x))
return unique_amounts
def fill_coupon_columns(self, formatted_coupon, discount_text):
"""根据H列的内容填充I列到M列"""
# 如果H列为空,则I列到M列保持留空
if pd.isna(formatted_coupon) or formatted_coupon == "":
return [""] * 5
# 如果H列是"直接拍",则I列填充"直接拍",其他列留空
if formatted_coupon == "直接拍":
return ["直接拍"] + [""] * 4
# 如果H列包含斜杠,则按斜杠分割
if "/" in formatted_coupon:
coupons = formatted_coupon.split("/")
# 只取前5个
return coupons[:5] + [""] * (5 - len(coupons))
# 如果H列是单个优惠券金额,则提取金额并添加"元优惠券"后缀
if formatted_coupon.endswith("元优惠券"):
return [formatted_coupon] + [""] * 4
# 默认情况下,从优惠信息中提取金额并添加"元优惠券"后缀
amounts = self.extract_coupon_amounts(discount_text)
formatted_amounts = [f"{amount}元优惠券" for amount in amounts]
# 只取前5个
return formatted_amounts[:5] + [""] * (5 - len(formatted_amounts))
def create_hyperlinks_sheet(self, writer, hyperlink_source_path, sheet2_df):
"""创建超链接工作表,A列到C列来自源表,D列到H列根据Sheet2相关列创建"""
# 读取超链接源表
hyperlink_df = pd.read_excel(hyperlink_source_path)
# 创建新的工作表
hyperlink_sheet_name = "超链接表"
# 获取工作表对象
workbook = writer.book
worksheet = workbook.create_sheet(hyperlink_sheet_name)
# 写入A列到C列的数据(来自超链接源表)
for r_idx, row in enumerate(dataframe_to_rows(hyperlink_df, index=False, header=True), 1):
for c_idx, value in enumerate(row[:3], 1): # 只取前三列
worksheet.cell(row=r_idx, column=c_idx, value=value)
# 添加D列到H列的标题
headers = ["优惠券1", "优惠券2", "优惠券3", "优惠券4", "优惠券5"]
for c_idx, header in enumerate(headers, 4): # 从第4列开始
worksheet.cell(row=1, column=c_idx, value=header)
# 模拟VBA宏的逻辑
success_count = [0, 0, 0, 0, 0] # D, E, F, G, H列的成功计数
empty_count = [0, 0, 0, 0, 0] # D, E, F, G, H列的空计数
direct_buy_count = [0, 0, 0, 0, 0] # D, E, F, G, H列的直接拍计数
error_count = 0
# 获取最后一行
last_row = min(len(hyperlink_df), len(sheet2_df)) + 1 # +1 因为第一行是标题
# 处理每一行
for i in range(2, last_row + 1): # 从第2行开始(跳过标题行)
# 获取Sheet2中对应行的数据
sheet2_row_idx = i - 2 # 转换为0-based索引
# 处理D列 (Sheet2的I列和O列)
try:
url = sheet2_df.iloc[sheet2_row_idx, 14] if len(sheet2_df.columns) > 14 else "" # O列
text = sheet2_df.iloc[sheet2_row_idx, 8] if len(sheet2_df.columns) > 8 else "" # I列
target_cell = worksheet.cell(row=i, column=4) # D列
self.process_single_hyperlink(target_cell, url, text, success_count, 0, empty_count, 0, direct_buy_count, 0)
except Exception as e:
error_count += 1
print(f"处理D列第{i}行时出错: {e}")
# 处理E列 (Sheet2的J列和P列)
try:
url = sheet2_df.iloc[sheet2_row_idx, 15] if len(sheet2_df.columns) > 15 else "" # P列
text = sheet2_df.iloc[sheet2_row_idx, 9] if len(sheet2_df.columns) > 9 else "" # J列
target_cell = worksheet.cell(row=i, column=5) # E列
self.process_single_hyperlink(target_cell, url, text, success_count, 1, empty_count, 1, direct_buy_count, 1)
except Exception as e:
error_count += 1
print(f"处理E列第{i}行时出错: {e}")
# 处理F列 (Sheet2的K列和Q列)
try:
url = sheet2_df.iloc[sheet2_row_idx, 16] if len(sheet2_df.columns) > 16 else "" # Q列
text = sheet2_df.iloc[sheet2_row_idx, 10] if len(sheet2_df.columns) > 10 else "" # K列
target_cell = worksheet.cell(row=i, column=6) # F列
self.process_single_hyperlink(target_cell, url, text, success_count, 2, empty_count, 2, direct_buy_count, 2)
except Exception as e:
error_count += 1
print(f"处理F列第{i}行时出错: {e}")
# 处理G列 (Sheet2的L列和R列)
try:
url = sheet2_df.iloc[sheet2_row_idx, 17] if len(sheet2_df.columns) > 17 else "" # R列
text = sheet2_df.iloc[sheet2_row_idx, 11] if len(sheet2_df.columns) > 11 else "" # L列
target_cell = worksheet.cell(row=i, column=7) # G列
self.process_single_hyperlink(target_cell, url, text, success_count, 3, empty_count, 3, direct_buy_count, 3)
except Exception as e:
error_count += 1
print(f"处理G列第{i}行时出错: {e}")
# 处理H列 (Sheet2的M列和S列)
try:
url = sheet2_df.iloc[sheet2_row_idx, 18] if len(sheet2_df.columns) > 18 else "" # S列
text = sheet2_df.iloc[sheet2_row_idx, 12] if len(sheet2_df.columns) > 12 else "" # M列
target_cell = worksheet.cell(row=i, column=8) # H列
self.process_single_hyperlink(target_cell, url, text, success_count, 4, empty_count, 4, direct_buy_count, 4)
except Exception as e:
error_count += 1
print(f"处理H列第{i}行时出错: {e}")
# 添加处理结果统计
result_cell = worksheet.cell(row=last_row + 2, column=1)
result_cell.value = "处理结果统计:"
result_cell = worksheet.cell(row=last_row + 3, column=1)
result_cell.value = f"D列成功: {success_count[0]} 个,空白: {empty_count[0]} 个,直接拍: {direct_buy_count[0]} 个"
result_cell = worksheet.cell(row=last_row + 4, column=1)
result_cell.value = f"E列成功: {success_count[1]} 个,空白: {empty_count[1]} 个,直接拍: {direct_buy_count[1]} 个"
result_cell = worksheet.cell(row=last_row + 5, column=1)
result_cell.value = f"F列成功: {success_count[2]} 个,空白: {empty_count[2]} 个,直接拍: {direct_buy_count[2]} 个"
result_cell = worksheet.cell(row=last_row + 6, column=1)
result_cell.value = f"G列成功: {success_count[3]} 个,空白: {empty_count[3]} 个,直接拍: {direct_buy_count[3]} 个"
result_cell = worksheet.cell(row=last_row + 7, column=1)
result_cell.value = f"H列成功: {success_count[4]} 个,空白: {empty_count[4]} 个,直接拍: {direct_buy_count[4]} 个"
result_cell = worksheet.cell(row=last_row + 8, column=1)
result_cell.value = f"失败: {error_count} 个"
def process_single_hyperlink(self, target_cell, url, display_text, success_count, idx1, empty_count, idx2, direct_buy_count, idx3):
"""处理单个超链接,模拟VBA宏中的ProcessSingleHyperlink函数"""
# 清除单元格内容和超链接
target_cell.value = None
if target_cell.hyperlink:
target_cell.hyperlink = None
# 检查URL是否存在且有效
has_url = bool(url and not pd.isna(url) and str(url).strip())
# 检查文本是否存在且有效
has_text = bool(display_text and not pd.isna(display_text) and str(display_text).strip())
# 如果既没有URL也没有文本,保持单元格为空
if not has_url and not has_text:
empty_count[idx2] += 1
return
# 检查是否为"直接拍"文本
if has_text and str(display_text) == "直接拍":
target_cell.value = display_text
# 设置默认字体颜色(黑色)和无下划线
target_cell.font = Font(color="000000", underline="none")
direct_buy_count[idx3] += 1
return
# 如果有URL但无文本,使用URL作为显示文本
if has_url and not has_text:
display_text = "链接"
# 处理URL
if has_url:
# 添加协议前缀(如果缺失)
url_str = str(url)
if "://" not in url_str:
url_str = "http://" + url_str
# 创建超链接
target_cell.hyperlink = url_str
target_cell.value = display_text
# 去除下划线并设置字体颜色
target_cell.font = Font(color="0000FF", underline="none") # 蓝色字体,无下划线
success_count[idx1] += 1
else:
# 只有文本没有URL,只显示文本
target_cell.value = display_text
# 设置红色字体(非超链接文本)
target_cell.font = Font(color="FF0000", underline="none") # 红色,无下划线
empty_count[idx2] += 1
# 设置居中对齐
target_cell.alignment = Alignment(horizontal="center", vertical="center")
def process_excel_file(self, source_path, hyperlink_source_path, output_path):
"""处理Excel文件的主要函数"""
# 读取源表
source_df = pd.read_excel(source_path, sheet_name='Sheet1')
# 清除C列(优惠方式列)的空格
source_df.iloc[:, 2] = source_df.iloc[:, 2].apply(self.clean_text)
# 创建目标DataFrame
target_columns = [
'顺序', '商品名称', '商品链接', '商品ID', '优惠方式(xx元优惠券/拍立减xx元)',
'优惠券链接', '优惠券提取', '公式放这一列', '优惠券1', '优惠券2', '优惠券3',
'优惠券4', '优惠券5', '优惠券链接(提取)', '优惠券链接1', '优惠券链接2',
'优惠券链接3', '优惠券链接4', '优惠券链接5'
]
target_df = pd.DataFrame(columns=target_columns)
# 填充数据
for idx, row in source_df.iterrows():
# 顺序
target_df.loc[idx, '顺序'] = idx + 1
# 商品名称和链接
target_df.loc[idx, '商品名称'] = row.iloc[0] # A列
target_df.loc[idx, '商品链接'] = row.iloc[1] # B列
# 商品ID
item_id = self.extract_item_id(row.iloc[1])
target_df.loc[idx, '商品ID'] = item_id
# 优惠方式(已清除空格)
discount_info = row.iloc[2] # C列
target_df.loc[idx, '优惠方式(xx元优惠券/拍立减xx元)'] = discount_info
# 优惠券链接
coupon_links = row.iloc[3] # D列
target_df.loc[idx, '优惠券链接'] = coupon_links
# 应用ExtractCouponInfo函数
coupon_extract = self.extract_coupon_info(discount_info)
target_df.loc[idx, '优惠券提取'] = coupon_extract
# 应用FormatCoupon函数
formatted_coupon = self.format_coupon(coupon_extract)
target_df.loc[idx, '公式放这一列'] = formatted_coupon
# 根据H列的内容填充I列到M列
coupon_values = self.fill_coupon_columns(formatted_coupon, discount_info)
for i in range(5):
target_df.loc[idx, f'优惠券{i+1}'] = coupon_values[i]
# 处理优惠券链接
urls, coupon_ids = self.split_coupon_links(coupon_links)
# 优惠券链接提取 (用逗号分隔的URL)
target_df.loc[idx, '优惠券链接(提取)'] = ", ".join(urls) if urls else ""
# 填充优惠券链接1-5列(O列到S列)
for i in range(5):
if i < len(urls):
target_df.loc[idx, f'优惠券链接{i+1}'] = urls[i]
else:
target_df.loc[idx, f'优惠券链接{i+1}'] = ""
# 保存结果
with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
# 保存Sheet2
target_df.to_excel(writer, sheet_name='Sheet2', index=False)
# 创建超链接工作表
self.create_hyperlinks_sheet(writer, hyperlink_source_path, target_df)
# 创建主窗口并运行应用
if __name__ == "__main__":
root = tk.Tk()
app = ExcelProcessorApp(root)
root.mainloop()
以上代码变更为C列点击后跳转网页,E列优惠方式(xx元优惠券/拍立减xx元)和F列优惠券链接预览可编辑
最新发布