#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
淘宝优惠券批量优惠券获取工具
支持批量处理多个商品URL获取优惠券信息
支持结果导出为JSON、CSV、TXT格式文件
支持收集多个优惠券链接
"""
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
import re
import sys
import os
import json
import csv
from datetime import datetime
class CouponFinder:
def __init__(self):
self.driver = None
self.wait = None
def setup_driver(self):
"""初始化Chrome浏览器驱动"""
try:
options = Options()
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--disable-gpu')
options.add_argument('--window-size=1920,1080')
# 关闭自动测试状态显示
options.add_experimental_option("excludeSwitches", ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)
# 设置用户代理
options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36')
self.driver = webdriver.Chrome(options=options)
# 反爬虫检测
self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": """
Object.defineProperty(navigator, 'webdriver', {
get: () => undefined
})
"""
})
self.driver.maximize_window()
self.wait = WebDriverWait(self.driver, 20)
print("✅ 浏览器驱动初始化成功")
return True
except Exception as e:
print(f"❌ 浏览器驱动初始化失败: {e}")
return False
def get_product_info(self, product_url):
"""获取商品基本信息"""
try:
print(f"🌐 正在访问商品页面: {product_url}")
self.driver.get(product_url)
time.sleep(3)
# 等待页面加载
input("⏳ 请确认商品页面完全加载后按回车继续...")
product_info = {
'title': '',
'price': '',
'url': product_url
}
try:
# 获取商品标题
title_selectors = [
'h1[data-spm="1000983"]',
'.tb-detail-hd h1',
'[class*="ItemTitle"]',
'h1[class*="title"]',
'.tb-main-title'
]
for selector in title_selectors:
try:
title_elem = self.driver.find_element(By.CSS_SELECTOR, selector)
if title_elem and title_elem.text.strip():
product_info['title'] = title_elem.text.strip()
break
except:
continue
# 获取商品价格
price_selectors = [
'.tb-rmb-num',
'.notranslate',
'[class*="price"] [class*="num"]',
'.tm-price-cur',
'.tb-price-cur'
]
for selector in price_selectors:
try:
price_elem = self.driver.find_element(By.CSS_SELECTOR, selector)
if price_elem and price_elem.text.strip():
price_text = price_elem.text.strip()
# 提取数字
price_match = re.search(r'[\d.]+', price_text)
if price_match:
product_info['price'] = f"¥{price_match.group()}"
break
except:
continue
if not product_info['title']:
product_info['title'] = "商品标题获取失败"
if not product_info['price']:
product_info['price'] = "价格获取失败"
return product_info
except Exception as e:
print(f"⚠️ 获取商品信息失败: {e}")
return product_info
except Exception as e:
print(f"❌ 访问商品页面失败: {e}")
return None
def extract_coupon_info(self):
"""从当前页面提取优惠券信息"""
try:
coupon_info = {
'amount': '无优惠券',
'urls': [], # 改为列表存储多个链接
'primary_url': '', # 主链接
'description': '',
'type': '未知'
}
# 查找优惠券信息的各种选择器
coupon_selectors = [
"//*[contains(@class, 'coupon') and (contains(text(), '减') or contains(text(), '满减') or contains(text(), '折扣'))]",
"//*[contains(@class, 'Coupon') and (contains(text(), '减') or contains(text(), '满减') or contains(text(), '折扣'))]",
"//*[contains(text(), '优惠券') and (contains(text(), '减') or contains(text(), '满减') or contains(text(), '折扣'))]",
"//*[contains(@class, 'promo') and (contains(text(), '减') or contains(text(), '满减') or contains(text(), '折扣'))]",
"//div[contains(@class, 'coupon')]",
"//span[contains(@class, 'coupon')]"
]
# 查找优惠券金额
for xpath in coupon_selectors:
try:
coupon_elements = self.driver.find_elements(By.XPATH, xpath)
for elem in coupon_elements:
text = elem.text.strip()
if text and ('减' in text or '满减' in text or '折扣' in text):
# 提取优惠券金额
if '满减' in text:
match = re.search(r'满(\d+)减(\d+)', text)
if match:
coupon_info['amount'] = f"满{match.group(1)}减{match.group(2)}元"
coupon_info['type'] = '满减'
coupon_info['description'] = text
break
elif '折扣' in text:
match = re.search(r'(\d+\.?\d*)折', text)
if match:
coupon_info['amount'] = f"{match.group(1)}折"
coupon_info['type'] = '折扣券'
coupon_info['description'] = text
break
elif '减' in text:
match = re.search(r'减(\d+)', text)
if match:
coupon_info['amount'] = f"减{match.group(1)}元"
coupon_info['type'] = '减价券'
coupon_info['description'] = text
break
if coupon_info['amount'] != '无优惠券':
break
except:
continue
# 查找优惠券链接 - 修改为收集多个链接
coupon_link_selectors = [
"//a[contains(@href, 'coupon')]",
"//a[contains(@href, 'activity')]",
"//a[contains(text(), '领券')]",
"//button[contains(text(), '领券')]",
"//a[contains(@class, 'coupon')]"
]
# 初始化一个列表来存储所有找到的优惠券链接
all_coupon_links = []
for xpath in coupon_link_selectors:
try:
link_elements = self.driver.find_elements(By.XPATH, xpath)
# 记录当前选择器找到的链接数量
links_found_with_this_selector = 0
for elem in link_elements:
if elem.is_displayed() and elem.is_enabled():
# 尝试获取href属性
href = elem.get_attribute('href')
if href and ('coupon' in href.lower() or 'activity' in href.lower()):
if not href.startswith('http'):
href = 'https:' + href
# 添加到链接列表而不是直接赋值
all_coupon_links.append({
'url': href,
'type': 'direct_link',
'selector': xpath,
'element_text': elem.text.strip() if elem.text else ''
})
links_found_with_this_selector += 1
# 如果没有href,可能是按钮,点击获取链接
elif elem.text and '领券' in elem.text:
try:
# 尝试点击获取弹窗链接
original_url = self.driver.current_url
elem.click()
time.sleep(2)
# 检查是否有新窗口或弹窗
if len(self.driver.window_handles) > 1:
self.driver.switch_to.window(self.driver.window_handles[-1])
clicked_url = self.driver.current_url
self.driver.close()
self.driver.switch_to.window(self.driver.window_handles[0])
# 添加到链接列表
all_coupon_links.append({
'url': clicked_url,
'type': 'clicked_link',
'selector': xpath,
'element_text': elem.text.strip() if elem.text else ''
})
links_found_with_this_selector += 1
else:
# 检查URL是否变化
if self.driver.current_url != original_url:
clicked_url = self.driver.current_url
# 添加到链接列表
all_coupon_links.append({
'url': clicked_url,
'type': 'clicked_link',
'selector': xpath,
'element_text': elem.text.strip() if elem.text else ''
})
links_found_with_this_selector += 1
self.driver.back()
except Exception as e:
print(f"点击元素时出错: {e}")
# 如果点击失败,记录一个占位符
all_coupon_links.append({
'url': "点击获取链接失败",
'type': 'failed_click',
'selector': xpath,
'element_text': elem.text.strip() if elem.text else ''
})
# 如果当前选择器找到了链接,可以选择继续使用同一选择器寻找更多链接
# 或者跳出循环使用下一个选择器
if links_found_with_this_selector > 0:
# 可以选择继续尝试其他选择器,或者停止
# 这里我们选择继续尝试其他选择器以获取更多可能的链接
continue
except Exception as e:
print(f"使用选择器 {xpath} 时出错: {e}")
continue
# 处理收集到的所有链接
if all_coupon_links:
# 去重处理
seen_urls = set()
unique_links = []
for link in all_coupon_links:
if link['url'] not in seen_urls and link['url'] not in ["点击获取链接失败", "请在商品页面手动查找优惠券"]:
seen_urls.add(link['url'])
unique_links.append(link)
# 将去重后的链接存入coupon_info
coupon_info['urls'] = unique_links
coupon_info['primary_url'] = unique_links[0]['url'] if unique_links else "无优惠券链接"
else:
coupon_info['urls'] = []
coupon_info['primary_url'] = "无优惠券链接"
# 如果找到优惠券描述但没有链接
if coupon_info['amount'] != '无优惠券' and not coupon_info['primary_url']:
coupon_info['primary_url'] = "请在商品页面手动领取优惠券"
return coupon_info
except Exception as e:
print(f"❌ 提取优惠券信息失败: {e}")
return {'amount': '获取失败', 'urls': [], 'primary_url': '', 'description': ''}
def display_result(self, product_info, coupon_info, index=None, total=None):
"""显示优惠券结果"""
if not product_info:
print(f"\n❌ 未获取到商品信息")
return
# 批量处理时的标题
if index and total:
print(f"\n📊 处理进度: {index}/{total}")
print("\n" + "="*80)
if index:
print(f"🎉 第{index}个商品优惠券获取结果")
else:
print("🎉 优惠券获取结果")
print("="*80)
print(f"\n📦 商品信息")
print(f" 标题: {product_info['title']}")
print(f" 价格: {product_info['price']}")
print(f" 商品链接: {product_info['url']}")
print("-" * 50)
print(f"\n🎫 优惠券信息")
print(f" 优惠券: {coupon_info['amount']}")
if coupon_info['description']:
print(f" 描述: {coupon_info['description']}")
# 显示所有找到的优惠券链接
if coupon_info['urls']:
print(f" 找到 {len(coupon_info['urls'])} 个优惠券链接:")
for i, link_info in enumerate(coupon_info['urls'], 1):
print(f" {i}. {link_info['url']} (来源: {link_info['selector']})")
else:
print(f" 优惠券链接: {coupon_info['primary_url']}")
print("="*80)
def process_single_url(self, product_url):
"""处理单个商品URL"""
try:
# 获取商品信息
product_info = self.get_product_info(product_url)
if not product_info:
return None, None
# 提取优惠券信息
coupon_info = self.extract_coupon_info()
return product_info, coupon_info
except Exception as e:
print(f"❌ 处理商品失败: {e}")
return None, None
def run_batch(self, url_list):
"""批量处理URL列表"""
try:
print(f"🚀 开始批量获取优惠券,共{len(url_list)}个商品...")
# 初始化
if not self.setup_driver():
return False
results = []
for index, url in enumerate(url_list, 1):
print(f"\n{'='*80}")
print(f"🔄 开始处理第{index}个商品...")
product_info, coupon_info = self.process_single_url(url)
if product_info and coupon_info:
self.display_result(product_info, coupon_info, index, len(url_list))
results.append({
'product': product_info,
'coupon': coupon_info,
'success': True
})
else:
print(f"❌ 第{index}个商品处理失败")
results.append({
'url': url,
'success': False
})
# 处理间隔,避免被限制
if index < len(url_list):
print(f"⏳ 等待3秒后继续处理下一个商品...")
time.sleep(3)
# 显示批量处理总结
self.display_batch_summary(results)
# 询问是否保存结果
if any(r.get('success') for r in results):
self.ask_save_results(results)
return True
except Exception as e:
print(f"❌ 批量处理失败: {e}")
return False
finally:
if self.driver:
self.driver.quit()
print("🔚 浏览器已关闭")
def display_batch_summary(self, results):
"""显示批量处理总结"""
successful = sum(1 for r in results if r.get('success'))
total = len(results)
print("\n" + "="*80)
print("📊 批量处理完成总结")
print("="*80)
print(f"✅ 成功处理: {successful}/{total} 个商品")
if successful > 0:
print("\n🎫 优惠券汇总:")
for i, result in enumerate(results, 1):
if result.get('success'):
product = result['product']
coupon = result['coupon']
print(f" {i}. {product['title'][:50]}...")
print(f" 价格: {product['price']} | 优惠券: {coupon['amount']}")
print("="*80)
def save_results(self, results, format_type='json', filename=None):
"""保存优惠券结果到文件"""
try:
if not filename:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"coupon_results_{timestamp}.{format_type}"
# 只保存成功的结果
successful_results = [r for r in results if r.get('success')]
if not successful_results:
print("⚠️ 没有成功获取的优惠券信息可保存")
return None
if format_type.lower() == 'json':
return self._save_json(successful_results, filename)
elif format_type.lower() == 'csv':
return self._save_csv(successful_results, filename)
elif format_type.lower() == 'txt':
return self._save_txt(successful_results, filename)
else:
print(f"❌ 不支持的格式: {format_type}")
return None
except Exception as e:
print(f"❌ 保存文件失败: {e}")
return None
def _save_json(self, results, filename):
"""保存为JSON格式"""
try:
# 格式化数据
json_data = {
'export_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
'total_items': len(results),
'results': []
}
for result in results:
json_data['results'].append({
'product': result['product'],
'coupon': result['coupon']
})
with open(filename, 'w', encoding='utf-8') as f:
json.dump(json_data, f, ensure_ascii=False, indent=2)
print(f"✅ JSON文件已保存: {filename}")
return filename
except Exception as e:
print(f"❌ 保存JSON文件失败: {e}")
return None
def _save_csv(self, results, filename):
"""保存为CSV格式"""
try:
with open(filename, 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.writer(f)
# 写入表头
writer.writerow(['序号', '商品标题', '商品价格', '优惠券金额', '优惠券描述', '优惠券链接', '所有优惠券链接', '商品链接'])
# 写入数据
for i, result in enumerate(results, 1):
product = result['product']
coupon = result['coupon']
# 将所有优惠券链接合并为字符串
all_links = "; ".join([link_info['url'] for link_info in coupon['urls']]) if coupon['urls'] else coupon['primary_url']
writer.writerow([
i,
product['title'],
product['price'],
coupon['amount'],
coupon['description'],
coupon['primary_url'],
all_links,
product['url']
])
print(f"✅ CSV文件已保存: {filename}")
return filename
except Exception as e:
print(f"❌ 保存CSV文件失败: {e}")
return None
def _save_txt(self, results, filename):
"""保存为TXT格式"""
try:
with open(filename, 'w', encoding='utf-8') as f:
f.write("淘宝优惠券获取结果\n")
f.write("=" * 80 + "\n")
f.write(f"导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"成功获取: {len(results)} 个商品的优惠券信息\n")
f.write("=" * 80 + "\n\n")
for i, result in enumerate(results, 1):
product = result['product']
coupon = result['coupon']
f.write(f"【第{i}个商品】\n")
f.write(f"商品标题: {product['title']}\n")
f.write(f"商品价格: {product['price']}\n")
f.write(f"优惠券: {coupon['amount']}\n")
if coupon['description']:
f.write(f"描述: {coupon['description']}\n")
# 写入所有优惠券链接
if coupon['urls']:
f.write(f"找到 {len(coupon['urls'])} 个优惠券链接:\n")
for j, link_info in enumerate(coupon['urls'], 1):
f.write(f" {j}. {link_info['url']} (来源: {link_info['selector']})\n")
else:
f.write(f"优惠券链接: {coupon['primary_url']}\n")
f.write(f"商品链接: {product['url']}\n")
f.write("-" * 80 + "\n\n")
print(f"✅ TXT文件已保存: {filename}")
return filename
except Exception as e:
print(f"❌ 保存TXT文件失败: {e}")
return None
def run(self, product_url):
"""运行单个商品优惠券获取"""
try:
print("🚀 开始获取商品优惠券...")
# 初始化
if not self.setup_driver():
return False
# 处理单个URL
product_info, coupon_info = self.process_single_url(product_url)
if product_info and coupon_info:
self.display_result(product_info, coupon_info)
# 询问是否保存结果
results = [{
'product': product_info,
'coupon': coupon_info,
'success': True
}]
self.ask_save_results(results)
return True
return False
except Exception as e:
print(f"❌ 运行失败: {e}")
return False
finally:
if self.driver:
self.driver.quit()
print("🔚 浏览器已关闭")
def ask_save_results(self, results):
"""询问用户是否保存结果"""
try:
successful_results = [r for r in results if r.get('success')]
if not successful_results:
return
print("\n💾 是否保存优惠券结果到文件?")
choice = input("请输入 (y/n) [默认y]: ").strip().lower()
if choice == '' or choice == 'y':
print("\n📁 请选择保存格式:")
print("1. JSON格式 (推荐)")
print("2. CSV格式 (Excel兼容)")
print("3. TXT格式 (纯文本)")
format_choice = input("请选择格式 (1/2/3) [默认1]: ").strip()
format_map = {
'1': 'json',
'2': 'csv',
'3': 'txt',
'': 'json'
}
format_type = format_map.get(format_choice, 'json')
# 询问文件名
custom_filename = input("请输入文件名(留空使用默认名称): ").strip()
if custom_filename:
# 确保有扩展名
if not custom_filename.endswith(f'.{format_type}'):
filename = f"{custom_filename}.{format_type}"
else:
filename = custom_filename
else:
filename = None
# 保存文件
saved_file = self.save_results(results, format_type, filename)
if saved_file:
print(f"✅ 文件已保存到当前目录: {saved_file}")
print(f"📂 文件路径: {os.path.abspath(saved_file)}")
else:
print("❌ 保存失败")
else:
print("⏭️ 跳过保存")
except KeyboardInterrupt:
print("\n⏹️ 用户取消保存")
except Exception as e:
print(f"❌ 保存过程出错: {e}")
def validate_url(url):
"""验证URL格式"""
if not url:
return False
# 检查是否为淘宝商品链接
taobao_patterns = [
r'https?://item\.taobao\.com/.*',
r'https?://detail\.tmall\.com/.*',
r'https?://detail\.tmall\.hk/.*'
]
for pattern in taobao_patterns:
if re.match(pattern, url):
return True
return False
def read_urls_from_file(filename):
"""从文件读取URL列表"""
try:
with open(filename, 'r', encoding='utf-8') as f:
urls = [line.strip() for line in f if line.strip() and not line.startswith('#')]
return urls
except Exception as e:
print(f"❌ 读取文件失败: {e}")
return []
def get_batch_urls():
"""获取批量URL列表"""
urls = []
print("\n📋 批量URL输入方式:")
print("1. 直接输入多个URL(用空格分隔)")
print("2. 从文件读取URL列表")
print("3. 交互式逐行输入")
choice = input("\n请选择输入方式 (1/2/3): ").strip()
if choice == '1':
# 直接输入多个URL
url_input = input("🔗 请输入多个URL(用空格分隔): ").strip()
urls = [url.strip() for url in url_input.split() if url.strip()]
elif choice == '2':
# 从文件读取
filename = input("📁 请输入文件名(如 urls.txt): ").strip()
if os.path.exists(filename):
urls = read_urls_from_file(filename)
print(f"✅ 从文件读取到 {len(urls)} 个URL")
else:
print("❌ 文件不存在")
return []
elif choice == '3':
# 交互式输入
print("🔗 请逐行输入URL(输入空行结束):")
while True:
url = input("URL: ").strip()
if not url:
break
urls.append(url)
else:
print("❌ 无效选择")
return []
# 验证URL
valid_urls = [url for url in urls if validate_url(url)]
invalid_urls = [url for url in urls if not validate_url(url)]
if invalid_urls:
print(f"\n⚠️ 发现 {len(invalid_urls)} 个无效URL,已自动过滤")
return valid_urls
def main():
"""主函数"""
print("=" * 80)
print("🎫 淘宝优惠券批量获取工具")
print("=" * 80)
print("📋 功能说明:")
print(" • 支持单个商品URL获取优惠券")
print(" • 支持批量处理多个商品URL")
print(" • 支持从文件读取URL列表")
print(" • 支持淘宝、天猫、天猫国际商品")
print(" • 支持收集多个优惠券链接")
print("=" * 80)
try:
url_list = []
# 检查命令行参数
if len(sys.argv) > 1:
# 检查是否为文件
if os.path.exists(sys.argv[1]):
url_list = read_urls_from_file(sys.argv[1])
print(f"📁 从文件读取到 {len(url_list)} 个URL")
else:
# 检查是否为单个URL
url = sys.argv[1]
if validate_url(url):
url_list = [url]
else:
print("❌ 无效的URL格式")
return
else:
# 交互式选择
print("\n🎯 请选择操作模式:")
print("1. 单个商品处理")
print("2. 批量商品处理")
mode = input("\n请选择模式 (1/2): ").strip()
if mode == '1':
# 单个商品
product_url = input("🔗 请输入商品详情页URL: ").strip()
if product_url and validate_url(product_url):
url_list = [product_url]
else:
print("❌ 请输入有效的淘宝/天猫商品链接")
return
elif mode == '2':
# 批量商品
url_list = get_batch_urls()
if not url_list:
print("❌ 未获取到有效URL")
return
else:
print("❌ 无效选择")
return
if not url_list:
print("❌ 未提供有效URL")
return
# 询问是否需要登录
login_choice = input("\n🔑 是否需要登录淘宝账号?(y/n) [默认n]: ").strip().lower()
if login_choice == 'y':
username = input("请输入淘宝用户名: ").strip()
password = input("请输入淘宝密码: ").strip()
else:
username = None
password = None
finder = CouponFinder(username, password)
if len(url_list) == 1:
# 单个商品
success = finder.run(url_list[0])
else:
# 批量商品
success = finder.run_batch(url_list)
if success:
print("\n✅ 优惠券获取完成!")
else:
print("\n❌ 优惠券获取失败!")
except KeyboardInterrupt:
print("\n⏹️ 用户中断程序")
except Exception as e:
print(f"\n❌ 程序异常: {e}")
if __name__ == "__main__":
main()
最新发布