item_search_shop - 获得店铺的所有商品

item_search_shop - 获得店铺的所有商品

在这里插入图片描述

测试网址:点击注册测试

{
	"items": {
		"keyword": "",
		"shop_type": "C",
		"page": "1",
		"total_results": "476",
		"pagecount": "20",
		"page_size": 24,
		"item": [
			{
				"num_iid": "579813460997",
				"pic_url": "//img.alicdn.com/bao/uploaded/i3/478952498/O1CN011UK7q9WzgFGKQw8_!!478952498.jpg",
				"title": "英伦小皮鞋女复古2020秋冬新款网红毛毛鞋女百搭外穿粗跟中跟单鞋",
				"orginal_price": "398.00",
				"price": "148.00",
				"sales": "35",
				"detail_url": "//item.taobao.com/item.htm?id=579813460997"
			},
			{
				"num_iid": "558807078642",
				"pic_url": "//img.alicdn.com/bao/uploaded/i2/TB13b8Mg50TMKJjSZFNYXG_1FXa_M2.SS2",
				"title": "短靴女粗跟韩版百搭复古春秋冬季真皮磨砂内增高靴子马丁靴英伦风",
				"orginal_price": "298.00",
				"price": "168.00",
				"sales": "3",
				"detail_url": "//item.taobao.com/item.htm?id=558807078642"
			},
			{
				"num_iid": "561329714920",
				"pic_url": "//img.alicdn.com/bao/uploaded/i5/TB1li31bgjN8KJjSZFgYXHjbVXa_M2.SS2",
				"title": "靴子女切尔西短靴粗跟2020新款女鞋秋冬季加绒保暖棉鞋高跟马丁靴",
				"orginal_price": "298.00",
				"price": "158.00",
				"sales": "259",
				"detail_url": "//item.taobao.com/item.htm?id=561329714920"
			},
			{
				"num_iid": "563787228848",
				"pic_url": "//img.alicdn.com/bao/uploaded/i6/TB168bmocjI8KJjSsppYXFbyVXa_M2.SS2",
				"title": "软妹英伦小皮鞋2020春秋季仙女的鞋新款单鞋女韩版百搭中跟乐福鞋",
				"orginal_price": "298.00",
				"price": "118.00",
				"sales": "13",
				"detail_url": "//item.taobao.com/item.htm?id=563787228848"
			},
			{
				"num_iid": "574618567399",
				"pic_url": "//img.alicdn.com/bao/uploaded/i4/478952498/O1CN011UK7oz7sQ6Ptiof_!!478952498.jpg",
				"title": "粗跟单鞋女2020秋季新款软妹小皮鞋仙女百搭韩版一脚蹬中跟乐福鞋",
				"orginal_price": "298.00",
				"price": "118.00",
				"sales": "8",
				"detail_url": "//item.taobao.com/item.htm?id=574618567399"
			},
			{
				"num_iid": "576205129128",
				"pic_url": "//img.alicdn.com/bao/uploaded/i3/478952498/O1CN017eCZkQ1UK7qURTtkF_!!478952498.jpg",
				"title": "2020新款马丁靴女英伦瘦瘦靴韩版百搭春秋冬靴子女内增高磨砂短靴",
				"orginal_price": "298.00",
				"price": "168.00",
				"sales": "5",
				"detail_url": "//item.taobao.com/item.htm?id=576205129128"
			},
			{
				"num_iid": "576987455658",
				"pic_url": "//img.alicdn.com/bao/uploaded/i4/478952498/O1CN011UK7pO8U82hr9Cn_!!478952498.jpg",
				"title": "切尔西短靴女2020秋季新款平底粗跟英伦马丁靴女短筒百搭chic靴子",
				"orginal_price": "398.00",
				"price": "158.00",
				"sales": "104",
				"detail_url": "//item.taobao.com/item.htm?id=576987455658"
			},
			{
				"num_iid": "577372500055",
				"pic_url": "//img.alicdn.com/bao/uploaded/i2/478952498/O1CN011UK7phkRLXNtytT_!!478952498.jpg",
				"title": "ins马丁靴女短筒2020秋冬季新款靴子百搭平底粗跟英伦加绒小短靴",
				"orginal_price": "398.00",
				"price": "168.00",
				"sales": "15",
				"detail_url": "//item.taobao.com/item.htm?id=577372500055"
			},
			{
				"num_iid": "578106593770",
				"pic_url": "//img.alicdn.com/bao/uploaded/i1/478952498/O1CN010KdYms1UK7qvVtbG5_!!478952498.png",
				"title": "切尔西短靴女粗跟2019秋冬新款帅气复古靴子女百搭中跟英伦马丁靴",
				"orginal_price": "398.00",
				"price": "168.00",
				"sales": "7",
				"detail_url": "//item.taobao.com/item.htm?id=578106593770"
			},
			{
				"num_iid": "578219766082",
				"pic_url": "//img.alicdn.com/bao/uploaded/i2/478952498/O1CN011UK7qQvh2us7sUw_!!478952498.jpg",
				"title": "加绒懒人乐福鞋女2020秋冬新款小皮鞋女复古外穿中跟粗跟毛毛单鞋",
				"orginal_price": "298.00",
				"price": "148.00",
				"sales": "536",
				"detail_url": "//item.taobao.com/item.htm?id=578219766082"
			},
			{
				"num_iid": "579083541117",
				"pic_url": "//img.alicdn.com/bao/uploaded/i3/478952498/O1CN011UK7pyqNwcjreIt_!!478952498.jpg",
				"title": "粗跟平底黑色小短靴2020新款百搭切尔西靴春秋复古英伦风马丁靴女",
				"orginal_price": "398.00",
				"price": "168.00",
				"sales": "241",
				"detail_url": "//item.taobao.com/item.htm?id=579083541117"
			},
			{
				"num_iid": "583325030150",
				"pic_url": "//img.alicdn.com/bao/uploaded/i1/478952498/O1CN01nyZhG51UK7rExtEZR_!!478952498.jpg",
				"title": "皮毛一体雪地靴女2020新款秋冬短靴百搭学生加厚保暖棉鞋冬季加绒",
				"orginal_price": "498.00",
				"price": "178.00",
				"sales": "2157",
				"detail_url": "//item.taobao.com/item.htm?id=583325030150"
			},
			{
				"num_iid": "584090410715",
				"pic_url": "//img.alicdn.com/bao/uploaded/i1/478952498/O1CN01NG4ayQ1UK7rVp6STi_!!478952498.jpg",
				"title": "鞋子女秋冬2020新款粗跟深口鞋百搭一脚蹬乐福鞋英伦风高跟单鞋女",
				"orginal_price": "398.00",
				"price": "148.00",
				"sales": "65",
				"detail_url": "//item.taobao.com/item.htm?id=584090410715"
			},
			{
				"num_iid": "585977098518",
				"pic_url": "//img.alicdn.com/bao/uploaded/i3/478952498/O1CN01fmemyT1UK7ruXeXf7_!!478952498.jpg",
				"title": "粗跟单鞋女2020新款秋季方头高跟鞋百搭中跟乐福鞋复古英伦小皮鞋",
				"orginal_price": "298.00",
				"price": "138.00",
				"sales": "11",
				"detail_url": "//item.taobao.com/item.htm?id=585977098518"
			},
			{
				"num_iid": "586106739365",
				"pic_url": "//img.alicdn.com/bao/uploaded/i4/478952498/O1CN01uEzIc61UK7rkzcP9z_!!478952498.jpg",
				"title": "粗跟单鞋女2020秋新款网红高跟鞋方头复古小皮鞋女英伦风百搭鞋子",
				"orginal_price": "298.00",
				"price": "138.00",
				"sales": "11",
				"detail_url": "//item.taobao.com/item.htm?id=586106739365"
			},
			{
				"num_iid": "598396628465",
				"pic_url": "//img.alicdn.com/bao/uploaded/i2/478952498/O1CN01mgXmTm1UK7uxFtigU_!!478952498.jpg",
				"title": "方头高跟鞋2020新款秋季网红粗跟单鞋女英伦风小皮鞋百搭乐福鞋",
				"orginal_price": "228.00",
				"price": "148.00",
				"sales": "41",
				"detail_url": "//item.taobao.com/item.htm?id=598396628465"
			},
			{
				"num_iid": "598710008141",
				"pic_url": "//img.alicdn.com/bao/uploaded/i3/478952498/O1CN010WRWRT1UK7vMkXjZl_!!478952498.jpg",
				"title": "方头单鞋女粗跟2020秋季新款复古中跟英伦风小皮鞋女百搭高跟鞋子",
				"orginal_price": "228.00",
				"price": "138.00",
				"sales": "4",
				"detail_url": "//item.taobao.com/item.htm?id=598710008141"
			},
			{
				"num_iid": "598867511004",
				"pic_url": "//img.alicdn.com/bao/uploaded/i4/478952498/O1CN01gv1c4Z1UK7uuI7jMx_!!478952498.jpg",
				"title": "秋季单鞋女中跟2020新款乐福鞋女春款粗跟一脚蹬百搭英伦风小皮鞋",
				"orginal_price": "228.00",
				"price": "138.00",
				"sales": "4",
				"detail_url": "//item.taobao.com/item.htm?id=598867511004"
			},
			{
				"num_iid": "599703498327",
				"pic_url": "//img.alicdn.com/bao/uploaded/i3/478952498/O1CN01EywUqM1UK7vATQldm_!!478952498.jpg",
				"title": "中跟单鞋女粗跟2020新款高跟鞋女网红百搭秋季鞋子浅口复古奶奶鞋",
				"orginal_price": "228.00",
				"price": "138.00",
				"sales": "1",
				"detail_url": "//item.taobao.com/item.htm?id=599703498327"
			},
			{
				"num_iid": "600995687062",
				"pic_url": "//img.alicdn.com/bao/uploaded/i4/478952498/O1CN01OMQaiN1UK7vPt9VTu_!!478952498.jpg",
				"title": "英伦小皮鞋女2020秋季新款鞋子百搭粗跟单鞋女秋款方头中跟乐福鞋",
				"orginal_price": "228.00",
				"price": "138.00",
				"sales": "8",
				"detail_url": "//item.taobao.com/item.htm?id=600995687062"
			},
			{
				"num_iid": "602072749432",
				"pic_url": "//img.alicdn.com/bao/uploaded/i2/478952498/O1CN0136WB4T1UK7vek3lr9_!!478952498.jpg",
				"title": "老爹鞋女ins潮2020春季新款网红超火学生运动鞋女百搭厚底松糕鞋",
				"orginal_price": "228.00",
				"price": "168.00",
				"sales": "2",
				"detail_url": "//item.taobao.com/item.htm?id=602072749432"
			},
			{
				"num_iid": "602231173085",
				"pic_url": "//img.alicdn.com/bao/uploaded/i1/478952498/O1CN01MLiNbq1UK7vqIqMMw_!!478952498.jpg",
				"title": "高跟短靴女2020秋冬新款粗跟马丁靴女英伦风百搭ins网红瘦瘦靴子",
				"orginal_price": "228.00",
				"price": "168.00",
				"sales": "12",
				"detail_url": "//item.taobao.com/item.htm?id=602231173085"
			},
			{
				"num_iid": "602296612188",
				"pic_url": "//img.alicdn.com/bao/uploaded/i3/478952498/O1CN01es2LDc1UK7vyVivSq_!!478952498.jpg",
				"title": "马丁靴女英伦风2020秋冬季新款中跟靴子百搭方头粗跟短靴女潮ins",
				"orginal_price": "228.00",
				"price": "158.00",
				"sales": "2",
				"detail_url": "//item.taobao.com/item.htm?id=602296612188"
			},
			{
				"num_iid": "602308156925",
				"pic_url": "//img.alicdn.com/bao/uploaded/i3/478952498/O1CN01ETEVSw1UK7vmANHjZ_!!478952498.jpg",
				"title": "粗跟单鞋女2020秋季新款鞋子百搭乐福鞋女复古一脚蹬英伦风小皮鞋",
				"orginal_price": "228.00",
				"price": "148.00",
				"sales": "2",
				"detail_url": "//item.taobao.com/item.htm?id=602308156925"
			}
		],
		"user": {
			"seller_id": "478952498",
			"shop_id": "104789036",
			"shop_url": "https://mxjycnx.taobao.com",
			"shop_tel": "",
			"shop_mobile": "",
			"shop_company": "yes"
		},
		"data_from": "pc"
	},
	"secache": "b571db7ab2dc90c71332210ce53f2a0d",
	"secache_time": 1608385655,
	"secache_date": "2020-12-19 21:47:35",
	"translate_status": "",
	"translate_time": 0,
	"language": {
		"default_lang": "cn",
		"current_lang": "cn"
	},
	"error": "",
	"reason": "",
	"error_code": "0000",
	"cache": 1,
	"api_info": "today:1 max:10000",
	"execution_time": 0.135,
	"server_time": "Beijing/2020-12-24 09:13:20",
	"client_ip": "115.151.185.211",
	"call_args": {
		"seller_nick": "https://mxjycnx.taobao.com",
		"page": "1",
		"shop_url": "https://mxjycnx.taobao.com"
	},
	"api_type": "taobao",
	"translate_language": "zh-CN",
	"translate_engine": "google_cn",
	"server_memory": "6.85MB",
	"request_id": "gw-2.5fe3eb309c1a4"
}
import time import random import json import pandas as pd from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC import logging # 配置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class TaobaoCrawler: def __init__(self): """初始化游客模式爬虫""" self.driver = None self.products_data = [] self.setup_browser() def setup_browser(self): """设置浏览器配置""" options = webdriver.ChromeOptions() # 反检测配置 options.add_argument('--disable-blink-features=AutomationControlled') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') options.add_argument('--disable-extensions') options.add_argument( '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36 Edg/141.0.0.0') # 隐藏自动化特征 options.add_experimental_option('excludeSwitches', ['enable-automation']) options.add_experimental_option('useAutomationExtension', False) # 可选:无头模式(生产环境使用) # options.add_argument('--headless') try: # 直接使用系统 ChromeDriver,不通过 webdriver_manager self.driver = webdriver.Chrome(options=options) # 执行脚本来隐藏webdriver特征 self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { 'source': ''' Object.defineProperty(navigator, 'webdriver', {get: () => undefined}) Object.defineProperty(navigator, 'languages', {get: () => ['zh-CN', 'zh']}) ''' }) logger.info("浏览器初始化成功 - 游客模式") except Exception as e: logger.error(f"浏览器初始化失败: {str(e)}") raise def search_products(self, keyword, pages=3, sort_by="sale-desc"): """搜索商品并爬取多页数据 - 改进版""" try: # 构建搜索URL,添加排序参数 search_url = f"https://s.taobao.com/search?q={keyword}&sort={sort_by}" self.driver.get(search_url) logger.info(f"搜索关键词: {keyword}, 排序方式: {sort_by}") # 等待搜索结果加载 - 使用更稳定的选择器 WebDriverWait(self.driver, 15).until( EC.presence_of_element_located((By.CSS_SELECTOR, ".CardV2--doubleCard--_OJ1T8j")) ) # 爬取多页数据 for page in range(1, pages + 1): logger.info(f"正在爬取第 {page} 页") self._scrape_current_page() # 如果不是最后一页,尝试翻页 if page < pages: if not self._go_to_next_page(): logger.warning("无法翻页,停止爬取") break # 随机延迟避免被封 time.sleep(random.uniform(3, 7)) return True except Exception as e: logger.error(f"搜索商品失败: {str(e)}") return False def _go_to_next_page(self): """翻页到下一页""" try: # 尝试多种翻页方式 next_selectors = [ ".next-btn.next-next", # 淘宝的下一页按钮 ".next-btn", "button[aria-label*='下一页']", ".next-btn[aria-label*='下一页']" ] for selector in next_selectors: try: next_btn = WebDriverWait(self.driver, 10).until( EC.element_to_be_clickable((By.CSS_SELECTOR, selector)) ) self.driver.execute_script("arguments[0].click();", next_btn) # 等待新页面加载 WebDriverWait(self.driver, 15).until( EC.presence_of_element_located((By.CSS_SELECTOR, ".CardV2--doubleCard--_OJ1T8j")) ) logger.info("成功翻页到下一页") return True except: continue return False except Exception as e: logger.error(f"翻页失败: {str(e)}") return False def _scrape_current_page(self): """爬取当前页面的商品数据 - 改进版""" try: # 使用实际的选择器 WebDriverWait(self.driver, 10).until( EC.presence_of_element_located((By.CSS_SELECTOR, ".CardV2--doubleCard--_OJ1T8j")) ) # 获取商品元素 items = self.driver.find_elements(By.CSS_SELECTOR, ".CardV2--doubleCard--_OJ1T8j") if not items: # 备用选择器 items = self.driver.find_elements(By.CSS_SELECTOR, ".CardV2--mainPicAndDesc--Eb_BxDU") logger.info(f"找到 {len(items)} 个商品") successful_count = 0 for index, item in enumerate(items): try: # 滚动到元素位置 self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", item) time.sleep(0.5) product_data = self._extract_product_data(item) if product_data: self.products_data.append(product_data) successful_count += 1 logger.info(f"成功提取商品 {successful_count}: {product_data['title'][:20]}...") # 每5个商品休息一下 if (index + 1) % 5 == 0: time.sleep(random.uniform(1, 3)) except Exception as e: logger.error(f"提取第{index + 1}个商品失败: {str(e)}") continue logger.info(f"当前页面爬取完成,成功获取 {successful_count}/{len(items)} 个商品") except Exception as e: logger.error(f"爬取当前页面失败: {str(e)}") def _extract_product_data(self, item): """从商品元素中提取数据 - 改进版""" try: # 提取基本信息 - 使用实际的选择器 title = self._safe_find_text(item, ".Title--title--wJY8TeA span") # 提取价格(需要组合整数和小数) price_int = self._safe_find_text(item, ".Price--priceInt--BXYeCOI") price_float = self._safe_find_text(item, ".Price--priceFloat--rI_BYho") price = f"{price_int}{price_float}" if price_int else "未知" # 提取销量信息 sales = self._safe_find_text(item, ".Price--realSales--wnhaqVr") # 提取店铺信息 shop_name = self._safe_find_text(item, ".ShopInfo--shopNameText--kxQC2cC") # 提取发货地 location = self._safe_find_text(item, ".Price--procity--Na1DQVe") # 提取商品链接 try: link_element = item.find_element(By.CSS_SELECTOR, ".Title--title--wJY8TeA a") product_url = link_element.get_attribute("href") except: product_url = "" # 构建商品数据 product_data = { "title": title or "未知", "price": price or "未知", "shop_name": shop_name or "未知", "sales": sales or "0", "location": location or "未知", "url": product_url, "crawl_time": time.strftime("%Y-%m-%d %H:%M:%S") } return product_data except Exception as e: logger.error(f"提取商品数据失败: {str(e)}") return None def _safe_find_text(self, parent, selector): """安全查找元素文本""" try: element = parent.find_element(By.CSS_SELECTOR, selector) return element.text.strip() except: return "" def save_to_csv(self, filename="taobao_products.csv"): """将爬取的数据保存为CSV文件""" if not self.products_data: logger.warning("没有数据可保存") return False try: df = pd.DataFrame(self.products_data) df.to_csv(filename, index=False, encoding='utf-8-sig') logger.info(f"数据已保存到 {filename},共 {len(self.products_data)} 条记录") return True except Exception as e: logger.error(f"保存CSV失败: {str(e)}") return False def save_to_json(self, filename="taobao_products.json"): """将爬取的数据保存为JSON文件""" if not self.products_data: logger.warning("没有数据可保存") return False try: with open(filename, 'w', encoding='utf-8') as f: json.dump(self.products_data, f, ensure_ascii=False, indent=4) logger.info(f"数据已保存到 {filename},共 {len(self.products_data)} 条记录") return True except Exception as e: logger.error(f"保存JSON失败: {str(e)}") return False def get_statistics(self): """获取爬取统计信息""" if not self.products_data: return "没有数据" total = len(self.products_data) shops = len(set(item['shop_name'] for item in self.products_data)) return f"总计: {total} 个商品, {shops} 个店铺" def close(self): """关闭浏览器""" if hasattr(self, 'driver') and self.driver: self.driver.quit() logger.info("浏览器已关闭") def __del__(self): """析构函数,确保浏览器被关闭""" self.close() # 使用示例 if __name__ == "__main__": crawler = TaobaoCrawler() try: # 搜索商品,按销量排序 crawler.search_products("女装", pages=60, sort_by="sale-desc") # 保存数据 crawler.save_to_csv("taobao_products.csv") crawler.save_to_json("taobao_products.json") # 打印统计信息 print(crawler.get_statistics()) finally: crawler.close()将这段代码里面的谷歌浏览器全都换成edge浏览器
最新发布
11-04
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值