处理手机点击淘宝优惠链接提示“目标地址不可达”错误

注意:

该案例针对性较强,

如果你的网络环境中没有AdGuard之类的广告过滤器在工作,

可能对你并不适用

最近发现手机上从优惠App访问淘宝的优惠商品链接
频繁会提示“服务竟然出错了 目标地址不可达”的错误
一开始我以为是优惠过期了
后来它出现过于频繁,于是我才着手进行处理

错误表现

点击跳转链接,能够实现App之间的跳转
但是跳转后无法正常浏览内容
在这里插入图片描述
另外的异常现象为
淘宝App内已经可以看到商品及优惠券信息
点击领券提示领券成功
但是无法自动跳转至商品页,手动跳转同样无法完成

排查思路

打开AdGuard日志记录功能,在查询日志功能页设置好过滤器
重新尝试整个错误过程
遇到有问题的步骤即刷新日志,查看哪些规则刚被应用

处理方法

AdGuard中放行被屏蔽链接即可

@@||s.click.taobao.com^$important
@@||s.click.tmall.com^$important
import pandas as pd import re import numpy as np import tkinter as tk from tkinter import filedialog, messagebox, ttk from urllib.parse import urlparse, parse_qs from collections import OrderedDict import os from openpyxl.styles import Font, Alignment from openpyxl import Workbook from openpyxl.utils.dataframe import dataframe_to_rows import webbrowser class ExcelProcessorApp: def __init__(self, root): self.root = root self.root.title("Excel表格处理器") self.root.geometry("700x550") # 增加高度以容纳预览按钮 # 初始化变量 self.source_file_path = tk.StringVar() self.hyperlink_source_path = tk.StringVar() self.output_file_path = tk.StringVar() self.preview_df = None # 存储预览数据 # 创建界面 self.create_widgets() def create_widgets(self): # 主框架 main_frame = ttk.Frame(self.root, padding="20") main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) # 源文件选择区域 ttk.Label(main_frame, text="源表文件:").grid(row=0, column=0, sticky=tk.W, pady=5) ttk.Entry(main_frame, textvariable=self.source_file_path, width=50).grid(row=0, column=1, padx=5, pady=5) ttk.Button(main_frame, text="浏览...", command=self.browse_source_file).grid(row=0, column=2, padx=5, pady=5) # 超链接源表选择区域 ttk.Label(main_frame, text="超链接源表:").grid(row=1, column=0, sticky=tk.W, pady=5) ttk.Entry(main_frame, textvariable=self.hyperlink_source_path, width=50).grid(row=1, column=1, padx=5, pady=5) ttk.Button(main_frame, text="浏览...", command=self.browse_hyperlink_source_file).grid(row=1, column=2, padx=5, pady=5) # 输出文件选择区域 ttk.Label(main_frame, text="输出文件:").grid(row=2, column=0, sticky=tk.W, pady=5) ttk.Entry(main_frame, textvariable=self.output_file_path, width=50).grid(row=2, column=1, padx=5, pady=5) ttk.Button(main_frame, text="浏览...", command=self.browse_output_file).grid(row=2, column=2, padx=5, pady=5) # 按钮框架 button_frame = ttk.Frame(main_frame) button_frame.grid(row=3, column=0, columnspan=3, pady=10) # 预览按钮 ttk.Button(button_frame, text="预览结果", command=self.preview_results).pack(side=tk.LEFT, padx=5) # 处理按钮 ttk.Button(button_frame, text="开始处理", command=self.process_file).pack(side=tk.LEFT, padx=5) # 进度条 self.progress = ttk.Progressbar(main_frame, mode='indeterminate') self.progress.grid(row=4, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=10) # 状态标签 self.status_label = ttk.Label(main_frame, text="请选择源表和输出文件") self.status_label.grid(row=5, column=0, columnspan=3, pady=10) # 配置网格权重 main_frame.columnconfigure(1, weight=1) self.root.columnconfigure(0, weight=1) self.root.rowconfigure(0, weight=1) def browse_source_file(self): file_path = filedialog.askopenfilename( title="选择源表文件", filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")] ) if file_path: self.source_file_path.set(file_path) # 自动设置输出文件路径 dir_name = os.path.dirname(file_path) base_name = os.path.basename(file_path) name, ext = os.path.splitext(base_name) output_path = os.path.join(dir_name, f"{name}_处理结果{ext}") self.output_file_path.set(output_path) self.status_label.config(text="已选择源表文件,请选择超链接源表") def browse_hyperlink_source_file(self): file_path = filedialog.askopenfilename( title="选择超链接源表文件", filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")] ) if file_path: self.hyperlink_source_path.set(file_path) self.status_label.config(text="已选择超链接源表,请点击开始处理") def browse_output_file(self): file_path = filedialog.asksaveasfilename( title="选择输出文件", defaultextension=".xlsx", filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")] ) if file_path: self.output_file_path.set(file_path) def preview_results(self): """预览处理结果""" if not self.source_file_path.get(): messagebox.showerror("错误", "请先选择源表文件") return if not self.hyperlink_source_path.get(): messagebox.showerror("错误", "请先选择超链接源表文件") return # 开始处理预览 self.progress.start() self.status_label.config(text="正在生成预览,请稍候...") self.root.update() try: # 调用处理函数,生成全部预览数据 self.preview_df = self.process_excel_file_preview( self.source_file_path.get(), self.hyperlink_source_path.get() ) # 停止进度条 self.progress.stop() self.status_label.config(text="预览生成完成") # 显示预览窗口 self.show_preview(self.preview_df) except Exception as e: self.progress.stop() self.status_label.config(text="预览生成失败") messagebox.showerror("错误", f"生成预览时发生错误:\n{str(e)}") def show_preview(self, preview_df): """显示预览窗口""" # 创建预览窗口 preview_window = tk.Toplevel(self.root) preview_window.title("处理结果预览") preview_window.geometry("1200x700") # 增加宽度以容纳更多列 # 创建框架 frame = ttk.Frame(preview_window, padding="10") frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) # 添加标题 title_label = ttk.Label(frame, text="处理结果预览", font=("Arial", 12, "bold")) title_label.grid(row=0, column=0, columnspan=2, pady=(0, 10)) # 创建表格框架 table_frame = ttk.Frame(frame) table_frame.grid(row=1, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S)) # 创建表格 columns = list(preview_df.columns) tree = ttk.Treeview(table_frame, columns=columns, show="headings", height=20) # 设置列标题和宽度 col_widths = { '顺序': 50, '商品名称': 150, '商品链接': 150, '商品ID': 80, '优惠方式(xx元优惠券/拍立减xx元)': 150, '优惠链接': 150, '优惠券提取': 100, '公式放这一列': 100, '优惠券1': 100, '优惠券2': 100, '优惠券3': 100, '优惠券4': 100, '优惠券5': 100, '优惠链接(提取)': 150, '优惠链接1': 150, '优惠链接2': 150, '优惠链接3': 150, '优惠链接4': 150, '优惠链接5': 150 } for col in columns: tree.heading(col, text=col) width = col_widths.get(col, 100) tree.column(col, width=width, minwidth=80, anchor=tk.CENTER) # 添加滚动条 vsb = ttk.Scrollbar(table_frame, orient="vertical", command=tree.yview) hsb = ttk.Scrollbar(table_frame, orient="horizontal", command=tree.xview) tree.configure(yscrollcommand=vsb.set, xscrollcommand=hsb.set) # 布局 tree.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) vsb.grid(row=0, column=1, sticky=(tk.N, tk.S)) hsb.grid(row=1, column=0, sticky=(tk.W, tk.E)) # 添加数据 for _, row in preview_df.iterrows(): tree.insert("", "end", values=list(row)) # 绑定点击事件到C列(商品链接) tree.bind("<Button-1>", lambda event: self.on_tree_click(event, tree, preview_window)) # 创建可编辑的E列和F列 self.create_editable_columns(tree, preview_window, preview_df) # 添加按钮 button_frame = ttk.Frame(frame) button_frame.grid(row=2, column=0, columnspan=2, pady=10) ttk.Button(button_frame, text="确认并开始处理", command=lambda: [preview_window.destroy(), self.process_file()]).pack(side=tk.LEFT, padx=5) ttk.Button(button_frame, text="关闭预览", command=preview_window.destroy).pack(side=tk.LEFT, padx=5) # 配置权重 frame.columnconfigure(0, weight=1) frame.rowconfigure(1, weight=1) table_frame.columnconfigure(0, weight=1) table_frame.rowconfigure(0, weight=1) preview_window.columnconfigure(0, weight=1) preview_window.rowconfigure(0, weight=1) # 存储tree和df引用以便后续使用 self.preview_tree = tree self.preview_window = preview_window self.preview_df = preview_df def create_editable_columns(self, tree, window, df): """为E列和F列创建可编辑的Entry小部件""" # 获取E列和F列的索引 columns = tree["columns"] # 找到E列和F列在Treeview中的索引 e_col_index = None f_col_index = None for i, col in enumerate(columns): if tree.heading(col)["text"] == "优惠链接": # E列 e_col_index = i elif tree.heading(col)["text"] == "优惠券提取": # F列 f_col_index = i if e_col_index is None or f_col_index is None: messagebox.showerror("错误", "无法找到可编辑列") return # 存储所有Entry小部件的引用 self.entry_widgets_e = {} self.entry_widgets_f = {} # 为每一行创建Entry小部件 for i, item_id in enumerate(tree.get_children()): # 获取行位置和大小 e_bbox = tree.bbox(item_id, column=e_col_index) f_bbox = tree.bbox(item_id, column=f_col_index) if e_bbox: # 创建E列的Entry entry_e = ttk.Entry(window) entry_e.place(x=e_bbox[0], y=e_bbox[1], width=e_bbox[2], height=e_bbox[3]) # 设置初始值 current_value = df.iloc[i, e_col_index] entry_e.insert(0, str(current_value) if not pd.isna(current_value) else "") # 绑定事件以更新DataFrame entry_e.bind("<FocusOut>", lambda e, row=i, col=e_col_index: self.update_dataframe_from_entry(e, row, col)) # 绑定回车键事件 entry_e.bind("<Return>", lambda e: e.widget.master.focus()) self.entry_widgets_e[item_id] = entry_e if f_bbox: # 创建F列的Entry entry_f = ttk.Entry(window) entry_f.place(x=f_bbox[0], y=f_bbox[1], width=f_bbox[2], height=f_bbox[3]) # 设置初始值 current_value = df.iloc[i, f_col_index] entry_f.insert(0, str(current_value) if not pd.isna(current_value) else "") # 绑定事件以更新DataFrame entry_f.bind("<FocusOut>", lambda e, row=i, col=f_col_index: self.update_dataframe_from_entry(e, row, col)) # 绑定回车键事件 entry_f.bind("<Return>", lambda e: e.widget.master.focus()) self.entry_widgets_f[item_id] = entry_f # 绑定Treeview的滚动事件,以便在滚动时更新Entry位置 def update_entry_positions(event=None): for item_id in tree.get_children(): e_bbox = tree.bbox(item_id, column=e_col_index) f_bbox = tree.bbox(item_id, column=f_col_index) if e_bbox and item_id in self.entry_widgets_e: self.entry_widgets_e[item_id].place(x=e_bbox[0], y=e_bbox[1], width=e_bbox[2], height=e_bbox[3]) if f_bbox and item_id in self.entry_widgets_f: self.entry_widgets_f[item_id].place(x=f_bbox[0], y=f_bbox[1], width=f_bbox[2], height=f_bbox[3]) # 绑定滚动事件 tree.bind("<MouseWheel>", update_entry_positions) tree.bind("<Button-4>", update_entry_positions) # Linux上滚 tree.bind("<Button-5>", update_entry_positions) # Linux下滚 # 获取滚动条并绑定事件 vsb = tree.master.children.get('!scrollbar') hsb = tree.master.children.get('!scrollbar2') if vsb: vsb.configure(command=lambda *args: [tree.yview(*args), update_entry_positions()]) if hsb: hsb.configure(command=lambda *args: [tree.xview(*args), update_entry_positions()]) # 初始更新位置 window.after(100, update_entry_positions) def update_dataframe_from_entry(self, event, row, col): """从Entry小部件更新DataFrame""" widget = event.widget new_value = widget.get() if self.preview_df is not None and row < len(self.preview_df): self.preview_df.iloc[row, col] = new_value # 同时更新Treeview中的显示 item_id = self.preview_tree.get_children()[row] values = list(self.preview_tree.item(item_id)['values']) values[col] = new_value self.preview_tree.item(item_id, values=values) def on_tree_click(self, event, tree, window): """处理树形视图的点击事件""" region = tree.identify("region", event.x, event.y) if region == "cell": column = tree.identify_column(event.x) row = tree.identify_row(event.y) # 检查是否是C列(商品链接列) if column == "#3": # 列索引从1开始,C列是第3列 item = tree.item(row) values = item['values'] if len(values) >= 3: # 确保有足够的值 url = values[2] # C列是第3个值(索引2) if url and isinstance(url, str) and url.startswith(('http://', 'https://')): webbrowser.open_new(url) def process_file(self): if not self.source_file_path.get(): messagebox.showerror("错误", "请先选择源表文件") return if not self.hyperlink_source_path.get(): messagebox.showerror("错误", "请先选择超链接源表文件") return if not self.output_file_path.get(): messagebox.showerror("错误", "请先选择输出文件") return # 开始处理 self.progress.start() self.status_label.config(text="正在处理文件,请稍候...") self.root.update() try: # 如果有预览数据,使用预览数据 if self.preview_df is not None: # 保存结果 with pd.ExcelWriter(self.output_file_path.get(), engine='openpyxl') as writer: # 保存Sheet2 self.preview_df.to_excel(writer, sheet_name='Sheet2', index=False) # 创建超链接工作表 self.create_hyperlinks_sheet(writer, self.hyperlink_source_path.get(), self.preview_df) else: # 调用处理函数 self.process_excel_file( self.source_file_path.get(), self.hyperlink_source_path.get(), self.output_file_path.get() ) # 处理完成 self.progress.stop() self.status_label.config(text="处理完成!") messagebox.showinfo("成功", f"文件处理完成,已保存到:\n{self.output_file_path.get()}") except Exception as e: self.progress.stop() self.status_label.config(text="处理失败") messagebox.showerror("错误", f"处理文件时发生错误:\n{str(e)}") # 以下所有其他方法保持不变 def process_excel_file_preview(self, source_path, hyperlink_source_path): """处理Excel文件用于预览""" # 读取源表 source_df = pd.read_excel(source_path, sheet_name='Sheet1') # 清除C列(优惠方式列)的空格 source_df.iloc[:, 2] = source_df.iloc[:, 2].apply(self.clean_text) # 创建目标DataFrame target_columns = [ '顺序', '商品名称', '商品链接', '商品ID', '优惠方式(xx元优惠券/拍立减xx元)', '优惠链接', '优惠券提取', '公式放这一列', '优惠券1', '优惠券2', '优惠券3', '优惠券4', '优惠券5', '优惠链接(提取)', '优惠链接1', '优惠链接2', '优惠链接3', '优惠链接4', '优惠链接5' ] target_df = pd.DataFrame(columns=target_columns) # 填充数据 for idx, row in source_df.iterrows(): # 顺序 target_df.loc[idx, '顺序'] = idx + 1 # 商品名称和链接 target_df.loc[idx, '商品名称'] = row.iloc[0] # A列 target_df.loc[idx, '商品链接'] = row.iloc[1] # B列 # 商品ID item_id = self.extract_item_id(row.iloc[1]) target_df.loc[idx, '商品ID'] = item_id # 优惠方式(已清除空格) discount_info = row.iloc[2] # C列 target_df.loc[idx, '优惠方式(xx元优惠券/拍立减xx元)'] = discount_info # 优惠链接 coupon_links = row.iloc[3] # D列 target_df.loc[idx, '优惠链接'] = coupon_links # 应用ExtractCouponInfo函数 coupon_extract = self.extract_coupon_info(discount_info) target_df.loc[idx, '优惠券提取'] = coupon_extract # 应用FormatCoupon函数 formatted_coupon = self.format_coupon(coupon_extract) target_df.loc[idx, '公式放这一列'] = formatted_coupon # 根据H列的内容填充I列到M列 coupon_values = self.fill_coupon_columns(formatted_coupon, discount_info) for i in range(5): target_df.loc[idx, f'优惠券{i+1}'] = coupon_values[i] # 处理优惠链接 urls, coupon_ids = self.split_coupon_links(coupon_links) # 优惠链接提取 (用逗号分隔的URL) target_df.loc[idx, '优惠链接(提取)'] = ", ".join(urls) if urls else "" # 填充优惠链接1-5列(O列到S列) for i in range(5): if i < len(urls): target_df.loc[idx, f'优惠链接{i+1}'] = urls[i] else: target_df.loc[idx, f'优惠链接{i+1}'] = "" return target_df def clean_text(self, text): """清除文本中的多余空格""" if pd.isna(text) or not isinstance(text, str): return text # 去除前后空格 cleaned = text.strip() # 将多个连续空格替换为单个空格 cleaned = re.sub(r'\s+', ' ', cleaned) return cleaned def extract_coupon_info(self, text): """模拟VBA中的ExtractCouponInfo函数""" if pd.isna(text) or not isinstance(text, str): return "" # 第一段正则表达式:匹配优惠券相关短语 pattern1 = r"-?[1-9]\d*[\u4e00-\u9fa5][优][惠][券]|[领]-?[1-9]\d*[\u4e00-\u9fa5]{3,6}|[领][\u4e00-\u9fa5]{1,4}-?[1-9]\d*[元]|[领]-?[1-9]\d*[\u4e00-\u9fa5]{1,3}|[优][惠][券]-?[1-9]\d*|[领][券]-?[1-9]\d*|-?[1-9]\d*[优][惠][券]|[拍][立][减]|[直][接][拍]|[达][人][闪][降]|[闪][降]" # 第二段正则表达式:提取数字部分或特定关键词 pattern2 = r"-?[1-9]\d*[元]|-?[1-9]\d*|[拍][立][减]|[直][接][拍]|[达][人][闪][降]|[闪][降]" # 正则表达式匹配双破折号分隔的数字格式 pattern_dash = r"\b(?:\d{1,3}--)+\d{1,3}\b" # 执行第一段匹配 matches1 = re.findall(pattern1, text) # 执行双破折号分隔数字的匹配 dash_matches = re.findall(pattern_dash, text) # 使用有序字典来存储唯一值和它们的出现顺序 unique_values = OrderedDict() counter = 0 # 对每个匹配项执行第二段匹配 for match in matches1: matches2 = re.findall(pattern2, match) for value in matches2: # 添加到字典(自动去重)并记录顺序 if value not in unique_values: unique_values[value] = counter counter += 1 # 处理双破折号分隔的数字 for dash_match in dash_matches: # 分割数字并添加"元"后缀 numbers = dash_match.split("--") for number in numbers: number_with_yuan = number + "元" # 添加到字典(自动去重)并记录顺序 if number_with_yuan not in unique_values: unique_values[number_with_yuan] = counter counter += 1 # 按照原始顺序将值连接成字符串 result = "/".join(unique_values.keys()) return result def format_coupon(self, input_value): """模拟VBA中的FormatCoupon函数""" if pd.isna(input_value) or (isinstance(input_value, str) and input_value.strip() == ""): return "" # 确保处理字符串类型 if not isinstance(input_value, str): input_str = str(input_value) else: input_str = input_value # 检查是否已包含优惠券后缀 if "元优惠券" in input_str: return input_str # 处理带斜杠的分段情况 if "/" in input_str: segments = input_str.split("/") result_segments = [] unique_segments = set() # 处理每一段并去重 for segment in segments: # 递归处理每一段 processed_segment = self.format_coupon(segment) # 去重 if processed_segment not in unique_segments: unique_segments.add(processed_segment) result_segments.append(processed_segment) return "/".join(result_segments) # 关键词检测 keywords = ["拍立减", "直接拍", "闪降", "达人闪降"] for keyword in keywords: if keyword in input_str: return "直接拍" # 处理包含"元"的情况 if "元" in input_str: return input_str.replace("元", "元优惠券") # 处理纯数字情况 if input_str.isdigit(): if int(input_str) > 0: return input_str + "元优惠券" else: return input_str # 默认返回原始值 return input_str def extract_item_id(self, url): """从商品链接中提取商品ID""" if pd.isna(url) or not isinstance(url, str): return "" # 尝试从URL参数中提取id try: parsed_url = urlparse(url) query_params = parse_qs(parsed_url.query) if 'id' in query_params: return query_params['id'][0] elif 'ft.t' in url and 'id=' in url: # 处理淘宝链接格式 match = re.search(r'id=(\d+)', url) if match: return match.group(1) except: pass # 如果上述方法失败,尝试使用正则表达式匹配 match = re.search(r'(?:id=|&id=)(\d+)', url) if match: return match.group(1) return "" def split_coupon_links(self, links_text): """拆分优惠链接""" if pd.isna(links_text) or not isinstance(links_text, str): return [], [] # 使用正则表达式提取所有URL urls = re.findall(r'https?://[^\s<>"{}|\\^`[\]]+', links_text) # 提取优惠券ID或关键信息 coupon_ids = [] for url in urls: # 尝试从URL中提取优惠券ID match = re.search(r'(?:activityId|activity_id)=([^&]+)', url) if match: coupon_ids.append(match.group(1)) else: # 如果没有activityId,尝试提取其他标识 match = re.search(r'/([^/]+)(?=\?|$)', url) if match: coupon_ids.append(match.group(1)) else: coupon_ids.append("优惠券") return urls, coupon_ids def extract_coupon_amounts(self, discount_text): """从优惠信息中提取优惠券金额""" if pd.isna(discount_text) or not isinstance(discount_text, str): return [] # 匹配优惠券金额模式 patterns = [ r"领(\d+)元优惠券", # 匹配"领XX元优惠券" r"优惠券(\d+)元", # 匹配"优惠券XX元" r"(\d+)元优惠券", # 匹配"XX元优惠券" r"领券(\d+)元", # 匹配"领券XX元" r"领(\d+)元券", # 匹配"领XX元券" r"(\d+)元券", # 匹配"XX元券" r"立减(\d+)元", # 匹配"立减XX元" r"减(\d+)元", # 匹配"减XX元" ] amounts = [] for pattern in patterns: matches = re.findall(pattern, discount_text) amounts.extend(matches) # 去重并排序 unique_amounts = list(set(amounts)) unique_amounts.sort(key=lambda x: int(x)) return unique_amounts def fill_coupon_columns(self, formatted_coupon, discount_text): """根据H列的内容填充I列到M列""" # 如果H列为空,则I列到M列保持留空 if pd.isna(formatted_coupon) or formatted_coupon == "": return [""] * 5 # 如果H列是"直接拍",则I列填充"直接拍",其他列留空 if formatted_coupon == "直接拍": return ["直接拍"] + [""] * 4 # 如果H列包含斜杠,则按斜杠分割 if "/" in formatted_coupon: coupons = formatted_coupon.split("/") # 只取前5个 return coupons[:5] + [""] * (5 - len(coupons)) # 如果H列是单个优惠券金额,则提取金额并添加"元优惠券"后缀 if formatted_coupon.endswith("元优惠券"): return [formatted_coupon] + [""] * 4 # 默认情况下,从优惠信息中提取金额并添加"元优惠券"后缀 amounts = self.extract_coupon_amounts(discount_text) formatted_amounts = [f"{amount}元优惠券" for amount in amounts] # 只取前5个 return formatted_amounts[:5] + [""] * (5 - len(formatted_amounts)) def create_hyperlinks_sheet(self, writer, hyperlink_source_path, sheet2_df): """创建超链接工作表,A列到C列来自源表,D列到H列根据Sheet2相关列创建""" # 读取超链接源表 hyperlink_df = pd.read_excel(hyperlink_source_path) # 创建新的工作表 hyperlink_sheet_name = "超链接表" # 获取工作表对象 workbook = writer.book worksheet = workbook.create_sheet(hyperlink_sheet_name) # 写入A列到C列的数据(来自超链接源表) for r_idx, row in enumerate(dataframe_to_rows(hyperlink_df, index=False, header=True), 1): for c_idx, value in enumerate(row[:3], 1): # 只取前三列 worksheet.cell(row=r_idx, column=c_idx, value=value) # 添加D列到H列的标题 headers = ["优惠券1", "优惠券2", "优惠券3", "优惠券4", "优惠券5"] for c_idx, header in enumerate(headers, 4): # 从第4列开始 worksheet.cell(row=1, column=c_idx, value=header) # 模拟VBA宏的逻辑 success_count = [0, 0, 0, 0, 0] # D, E, F, G, H列的成功计数 empty_count = [0, 0, 0, 0, 0] # D, E, F, G, H列的空计数 direct_buy_count = [0, 0, 0, 0, 0] # D, E, F, G, H列的直接拍计数 error_count = 0 # 获取最后一行 last_row = min(len(hyperlink_df), len(sheet2_df)) + 1 # +1 因为第一行是标题 # 处理每一行 for i in range(2, last_row + 1): # 从第2行开始(跳过标题行) # 获取Sheet2中对应行的数据 sheet2_row_idx = i - 2 # 转换为0-based索引 # 处理D列 (Sheet2的I列和O列) try: url = sheet2_df.iloc[sheet2_row_idx, 14] if len(sheet2_df.columns) > 14 else "" # O列 text = sheet2_df.iloc[sheet2_row_idx, 8] if len(sheet2_df.columns) > 8 else "" # I列 target_cell = worksheet.cell(row=i, column=4) # D列 self.process_single_hyperlink(target_cell, url, text, success_count, 0, empty_count, 0, direct_buy_count, 0) except Exception as e: error_count += 1 print(f"处理D列第{i}行时出错: {e}") # 处理E列 (Sheet2的J列和P列) try: url = sheet2_df.iloc[sheet2_row_idx, 15] if len(sheet2_df.columns) > 15 else "" # P列 text = sheet2_df.iloc[sheet2_row_idx, 9] if len(sheet2_df.columns) > 9 else "" # J列 target_cell = worksheet.cell(row=i, column=5) # E列 self.process_single_hyperlink(target_cell, url, text, success_count, 1, empty_count, 1, direct_buy_count, 1) except Exception as e: error_count += 1 print(f"处理E列第{i}行时出错: {e}") # 处理F列 (Sheet2的K列和Q列) try: url = sheet2_df.iloc[sheet2_row_idx, 16] if len(sheet2_df.columns) > 16 else "" # Q列 text = sheet2_df.iloc[sheet2_row_idx, 10] if len(sheet2_df.columns) > 10 else "" # K列 target_cell = worksheet.cell(row=i, column=6) # F列 self.process_single_hyperlink(target_cell, url, text, success_count, 2, empty_count, 2, direct_buy_count, 2) except Exception as e: error_count += 1 print(f"处理F列第{i}行时出错: {e}") # 处理G列 (Sheet2的L列和R列) try: url = sheet2_df.iloc[sheet2_row_idx, 17] if len(sheet2_df.columns) > 17 else "" # R列 text = sheet2_df.iloc[sheet2_row_idx, 11] if len(sheet2_df.columns) > 11 else "" # L列 target_cell = worksheet.cell(row=i, column=7) # G列 self.process_single_hyperlink(target_cell, url, text, success_count, 3, empty_count, 3, direct_buy_count, 3) except Exception as e: error_count += 1 print(f"处理G列第{i}行时出错: {e}") # 处理H列 (Sheet2的M列和S列) try: url = sheet2_df.iloc[sheet2_row_idx, 18] if len(sheet2_df.columns) > 18 else "" # S列 text = sheet2_df.iloc[sheet2_row_idx, 12] if len(sheet2_df.columns) > 12 else "" # M列 target_cell = worksheet.cell(row=i, column=8) # H列 self.process_single_hyperlink(target_cell, url, text, success_count, 4, empty_count, 4, direct_buy_count, 4) except Exception as e: error_count += 1 print(f"处理H列第{i}行时出错: {e}") # 添加处理结果统计 result_cell = worksheet.cell(row=last_row + 2, column=1) result_cell.value = "处理结果统计:" result_cell = worksheet.cell(row=last_row + 3, column=1) result_cell.value = f"D列成功: {success_count[0]} 个,空白: {empty_count[0]} 个,直接拍: {direct_buy_count[0]} 个" result_cell = worksheet.cell(row=last_row + 4, column=1) result_cell.value = f"E列成功: {success_count[1]} 个,空白: {empty_count[1]} 个,直接拍: {direct_buy_count[1]} 个" result_cell = worksheet.cell(row=last_row + 5, column=1) result_cell.value = f"F列成功: {success_count[2]} 个,空白: {empty_count[2]} 个,直接拍: {direct_buy_count[2]} 个" result_cell = worksheet.cell(row=last_row + 6, column=1) result_cell.value = f"G列成功: {success_count[3]} 个,空白: {empty_count[3]} 个,直接拍: {direct_buy_count[3]} 个" result_cell = worksheet.cell(row=last_row + 7, column=1) result_cell.value = f"H列成功: {success_count[4]} 个,空白: {empty_count[4]} 个,直接拍: {direct_buy_count[4]} 个" result_cell = worksheet.cell(row=last_row + 8, column=1) result_cell.value = f"失败: {error_count} 个" def process_single_hyperlink(self, target_cell, url, display_text, success_count, idx1, empty_count, idx2, direct_buy_count, idx3): """处理单个超链接,模拟VBA宏中的ProcessSingleHyperlink函数""" # 清除单元格内容和超链接 target_cell.value = None if target_cell.hyperlink: target_cell.hyperlink = None # 检查URL是否存在且有效 has_url = bool(url and not pd.isna(url) and str(url).strip()) # 检查文本是否存在且有效 has_text = bool(display_text and not pd.isna(display_text) and str(display_text).strip()) # 如果既没有URL也没有文本,保持单元格为空 if not has_url and not has_text: empty_count[idx2] += 1 return # 检查是否为"直接拍"文本 if has_text and str(display_text) == "直接拍": target_cell.value = display_text # 设置默认字体颜色(黑色)和无下划线 target_cell.font = Font(color="000000", underline="none") direct_buy_count[idx3] += 1 return # 如果有URL但无文本,使用URL作为显示文本 if has_url and not has_text: display_text = "链接" # 处理URL if has_url: # 添加协议前缀(如果缺失) url_str = str(url) if "://" not in url_str: url_str = "http://" + url_str # 创建超链接 target_cell.hyperlink = url_str target_cell.value = display_text # 去除下划线并设置字体颜色 target_cell.font = Font(color="0000FF", underline="none") # 蓝色字体,无下划线 success_count[idx1] += 1 else: # 只有文本没有URL,只显示文本 target_cell.value = display_text # 设置红色字体(非超链接文本) target_cell.font = Font(color="FF0000", underline="none") # 红色,无下划线 empty_count[idx2] += 1 # 设置居中对齐 target_cell.alignment = Alignment(horizontal="center", vertical="center") def process_excel_file(self, source_path, hyperlink_source_path, output_path): """处理Excel文件的主要函数""" # 读取源表 source_df = pd.read_excel(source_path, sheet_name='Sheet1') # 清除C列(优惠方式列)的空格 source_df.iloc[:, 2] = source_df.iloc[:, 2].apply(self.clean_text) # 创建目标DataFrame target_columns = [ '顺序', '商品名称', '商品链接', '商品ID', '优惠方式(xx元优惠券/拍立减xx元)', '优惠链接', '优惠券提取', '公式放这一列', '优惠券1', '优惠券2', '优惠券3', '优惠券4', '优惠券5', '优惠链接(提取)', '优惠链接1', '优惠链接2', '优惠链接3', '优惠链接4', '优惠链接5' ] target_df = pd.DataFrame(columns=target_columns) # 填充数据 for idx, row in source_df.iterrows(): # 顺序 target_df.loc[idx, '顺序'] = idx + 1 # 商品名称和链接 target_df.loc[idx, '商品名称'] = row.iloc[0] # A列 target_df.loc[idx, '商品链接'] = row.iloc[1] # B列 # 商品ID item_id = self.extract_item_id(row.iloc[1]) target_df.loc[idx, '商品ID'] = item_id # 优惠方式(已清除空格) discount_info = row.iloc[2] # C列 target_df.loc[idx, '优惠方式(xx元优惠券/拍立减xx元)'] = discount_info # 优惠链接 coupon_links = row.iloc[3] # D列 target_df.loc[idx, '优惠链接'] = coupon_links # 应用ExtractCouponInfo函数 coupon_extract = self.extract_coupon_info(discount_info) target_df.loc[idx, '优惠券提取'] = coupon_extract # 应用FormatCoupon函数 formatted_coupon = self.format_coupon(coupon_extract) target_df.loc[idx, '公式放这一列'] = formatted_coupon # 根据H列的内容填充I列到M列 coupon_values = self.fill_coupon_columns(formatted_coupon, discount_info) for i in range(5): target_df.loc[idx, f'优惠券{i+1}'] = coupon_values[i] # 处理优惠链接 urls, coupon_ids = self.split_coupon_links(coupon_links) # 优惠链接提取 (用逗号分隔的URL) target_df.loc[idx, '优惠链接(提取)'] = ", ".join(urls) if urls else "" # 填充优惠链接1-5列(O列到S列) for i in range(5): if i < len(urls): target_df.loc[idx, f'优惠链接{i+1}'] = urls[i] else: target_df.loc[idx, f'优惠链接{i+1}'] = "" # 保存结果 with pd.ExcelWriter(output_path, engine='openpyxl') as writer: # 保存Sheet2 target_df.to_excel(writer, sheet_name='Sheet2', index=False) # 创建超链接工作表 self.create_hyperlinks_sheet(writer, hyperlink_source_path, target_df) # 创建主窗口并运行应用 if __name__ == "__main__": root = tk.Tk() app = ExcelProcessorApp(root) root.mainloop() 以上代码变更为C列点击后跳转网页,E列优惠方式(xx元优惠券/拍立减xx元)和F列优惠链接预览可编辑
最新发布
09-20
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值