在电商数据分析和供应链管理中,统一获取多平台商品数据是核心需求。本文将以JSON作为标准数据格式,详解如何通过官方API接口高效获取淘宝、1688、京东的商品详情数据,并提供可直接投入生产的Python代码方案。
一、准备工作
1. 平台账号与权限申请
淘宝/1688平台:
-
注册淘宝开放平台企业账号,完成实名认证
-
创建应用获取
App Key和App Secret -
申请"商品服务"API权限,特别注意
item_get接口(获取商品详情)
京东平台:
-
注册京东平台账号
-
创建应用并申请"商品详情查询"接口权限
-
获取
app_key和app_secret
2. 开发环境配置
pip install requests>=2.31.0
pip install cryptography>=41.0.0 # 用于签名加密
二、核心:统一JSON数据模型
不同平台返回字段差异巨大,我们先定义标准JSON模型:
# standard_product.json
{
"platform": "taobao|1688|jd",
"product_id": "商品唯一ID",
"title": "商品标题",
"price_info": {
"current_price": "现价",
"original_price": "原价",
"currency": "CNY"
},
"stock": 999,
"sales": 100, # 月销量
"images": ["主图URL1", "主图URL2"],
"attributes": {
"brand": "品牌",
"category": "分类",
"sku_list": [
{
"sku_id": "规格ID",
"properties": {"颜色": "红色", "尺码": "XL"},
"price": "规格价格",
"stock": 50
}
]
},
"shop_info": {
"shop_id": "店铺ID",
"shop_name": "店铺名称",
"shop_score": 4.8
},
"raw_data": {} # 原始平台数据备份
}
三、各平台API调用实现
1. 淘宝/1688商品详情API
认证机制:HMAC-SHA256签名 + Access Token
import hashlib
import hmac
import time
import json
import requests
from urllib.parse import urlencode
class TaobaoAPI:
def __init__(self, app_key: str, app_secret: str):
self.app_key = app_key
self.app_secret = app_secret
self.base_url = "https://gw.api.taobao.com/router/rest"
self.token_url = "https://oauth.taobao.com/token"
def generate_sign(self, params: dict) -> str:
"""生成HMAC-SHA256签名"""
sorted_params = sorted(params.items())
sign_content = self.app_secret
for k, v in sorted_params:
sign_content += f"{k}{v}"
sign_content += self.app_secret
return hmac.new(
self.app_secret.encode('utf-8'),
sign_content.encode('utf-8'),
hashlib.sha256
).hexdigest().upper()
def get_access_token(self) -> str:
"""获取Access Token"""
# 实际需实现OAuth2.0授权流程,此处简化
# 建议缓存token,有效期2小时
return "your_cached_token"
def get_item_detail(self, item_id: str, platform: str = "taobao") -> dict:
"""
获取淘宝/1688商品详情
:param item_id: 商品ID
:param platform: taobao或1688
"""
params = {
"method": "taobao.item.get" if platform == "taobao" else "1688.item.get",
"app_key": self.app_key,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
"v": "2.0",
"format": "json",
"num_iid": item_id,
"fields": "title,price,original_price,quantity,nick,detail_url,pic_url,props_name,sku"
}
params["sign"] = self.generate_sign(params)
try:
response = requests.get(self.base_url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
if "error_response" in data:
print(f"API错误: {data['error_response']['msg']}")
return None
return self._parse_taobao_data(data, platform)
except Exception as e:
print(f"请求异常: {e}")
return None
def _parse_taobao_data(self, raw_data: dict, platform: str) -> dict:
"""转换为标准JSON格式"""
item = raw_data.get("item_get_response", {}).get("item", {})
# 解析SKU
sku_list = []
if "skus" in item:
for sku in item["skus"]["sku"]:
sku_list.append({
"sku_id": sku.get("sku_id"),
"properties": self._parse_props(sku.get("properties")),
"price": sku.get("price"),
"stock": sku.get("quantity", 0)
})
return {
"platform": platform,
"product_id": str(item.get("num_iid")),
"title": item.get("title"),
"price_info": {
"current_price": item.get("price"),
"original_price": item.get("original_price", item.get("price")),
"currency": "CNY"
},
"stock": item.get("num", 0),
"sales": item.get("sales", 0),
"images": [item.get("pic_url")] + item.get("item_imgs", []),
"attributes": {
"brand": self._extract_brand(item.get("props_name")),
"category": item.get("cid"),
"sku_list": sku_list
},
"shop_info": {
"shop_id": item.get("seller_id"),
"shop_name": item.get("nick"),
"shop_score": item.get("rate_score")
},
"raw_data": raw_data # 保留原始数据
}
def _parse_props(self, props_str: str) -> dict:
"""解析属性字符串"""
if not props_str:
return {}
props = {}
for prop in props_str.split(";"):
if ":" in prop:
k, v = prop.split(":", 1)
props[k] = v
return props
def _extract_brand(self, props_name: str) -> str:
"""提取品牌信息"""
props = self._parse_props(props_name)
return props.get("品牌", "")
2. 京东商品详情API
认证方式:OAuth2.0 + 签名
class JDAPI:
def __init__(self, app_key: str, app_secret: str):
self.app_key = app_key
self.app_secret = app_secret
self.base_url = "https://api.jd.com/routerjson"
self.token = self.get_access_token()
def get_access_token(self) -> str:
"""获取京东Access Token"""
auth_url = "https://oauth.jd.com/oauth/token"
data = {
"grant_type": "client_credentials",
"client_id": self.app_key,
"client_secret": self.app_secret,
"scope": "read"
}
response = requests.post(auth_url, data=data, timeout=10)
if response.status_code == 200:
return response.json().get("access_token")
else:
raise Exception(f"京东Token获取失败: {response.text}")
def get_item_detail(self, sku_id: str) -> dict:
"""获取京东商品详情"""
params = {
"method": "jingdong.pop.ware.openapi.getWareById",
"access_token": self.token,
"app_key": self.app_key,
"timestamp": int(time.time() * 1000),
"v": "2.0",
"skuId": sku_id,
"field": "skuId,title,price,stockNum,saleNum,wareImage,category,shopInfo"
}
# 京东使用MD5签名
params["sign"] = self._generate_sign(params)
try:
response = requests.get(self.base_url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
if "error_response" in data:
print(f"京东API错误: {data['error_response']['zh_desc']}")
return None
return self._parse_jd_data(data)
except Exception as e:
print(f"京东请求异常: {e}")
return None
def _generate_sign(self, params: dict) -> str:
"""生成MD5签名"""
sorted_params = sorted(params.items())
sign_content = self.app_secret
for k, v in sorted_params:
sign_content += f"{k}{v}"
sign_content += self.app_secret
return hashlib.md5(sign_content.encode('utf-8')).hexdigest().upper()
def _parse_jd_data(self, raw_data: dict) -> dict:
"""转换为标准JSON格式"""
ware = raw_data.get("jingdong_pop_ware_openapi_getWareById_response", {}).get("ware", {})
return {
"platform": "jd",
"product_id": str(ware.get("skuId")),
"title": ware.get("title"),
"price_info": {
"current_price": ware.get("price"),
"original_price": ware.get("marketPrice", ware.get("price")),
"currency": "CNY"
},
"stock": ware.get("stockNum", 0),
"sales": ware.get("saleNum", 0),
"images": ware.get("wareImage", []),
"attributes": {
"brand": ware.get("brandName"),
"category": ware.get("category"),
"sku_list": [] # 京东SKU需单独接口获取
},
"shop_info": {
"shop_id": ware.get("shopId"),
"shop_name": ware.get("shopInfo", {}).get("shopName"),
"shop_score": ware.get("shopInfo", {}).get("shopScore")
},
"raw_data": raw_data
}
四、统一调度器与批量处理
class UnifiedECommerceAPI:
"""统一电商API调度器"""
def __init__(self, config: dict):
"""
config格式:
{
"taobao": {"app_key": "", "app_secret": ""},
"jd": {"app_key": "", "app_secret": ""}
}
"""
self.apis = {}
if "taobao" in config:
self.apis["taobao"] = TaobaoAPI(**config["taobao"])
self.apis["1688"] = TaobaoAPI(**config["taobao"]) # 复用实例
if "jd" in config:
self.apis["jd"] = JDAPI(**config["jd"])
def get_product(self, platform: str, product_id: str) -> dict:
"""统一入口获取商品详情"""
if platform not in self.apis:
return {"error": f"未配置{platform}平台的API"}
api = self.apis[platform]
if platform in ["taobao", "1688"]:
return api.get_item_detail(product_id, platform)
elif platform == "jd":
return api.get_item_detail(product_id)
else:
return {"error": "不支持的platform"}
def batch_get_products(self, tasks: list) -> list:
"""
批量获取商品详情
tasks格式: [{"platform": "taobao", "product_id": "123"}, ...]
"""
results = []
for task in tasks:
try:
data = self.get_product(task["platform"], task["product_id"])
results.append({
"platform": task["platform"],
"product_id": task["product_id"],
"status": "success",
"data": data,
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
})
except Exception as e:
results.append({
"platform": task["platform"],
"product_id": task["product_id"],
"status": "failed",
"error": str(e),
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
})
# 频率控制
time.sleep(0.2) # 控制QPS
return results
# 使用示例
if __name__ == "__main__":
# 配置各平台API凭证
config = {
"taobao": {
"app_key": "your_taobao_app_key",
"app_secret": "your_taobao_app_secret"
},
"jd": {
"app_key": "your_jd_app_key",
"app_secret": "your_jd_app_secret"
}
}
# 初始化统一API
unify_api = UnifiedECommerceAPI(config)
# 批量查询
tasks = [
{"platform": "taobao", "product_id": "652874751412"},
{"platform": "1688", "product_id": "679321456789"},
{"platform": "jd", "product_id": "100012043978"}
]
results = unify_api.batch_get_products(tasks)
# 保存为JSON文件
with open("products_data.json", "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
print(f"成功获取 {len([r for r in results if r['status']=='success'])} 条商品数据")
五、高级实战技巧
1. 智能缓存层(Redis实现)
import redis
import hashlib
class CachedAPI(UnifiedECommerceAPI):
def __init__(self, config: dict, redis_host: str = "localhost"):
super().__init__(config)
self.redis = redis.Redis(host=redis_host, port=6379, decode_responses=True)
def get_product(self, platform: str, product_id: str, use_cache: bool = True) -> dict:
# 生成缓存键
cache_key = f"product:{platform}:{product_id}"
if use_cache:
cached = self.redis.get(cache_key)
if cached:
return json.loads(cached)
# 未命中缓存,调用API
data = super().get_product(platform, product_id)
# 缓存24小时
if data:
self.redis.setex(cache_key, 86400, json.dumps(data))
return data
2. 异常处理与重试机制
from tenacity import retry, stop_after_attempt, wait_exponential
class RobustAPI(UnifiedECommerceAPI):
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
def get_product_with_retry(self, platform: str, product_id: str) -> dict:
"""带重试的商品获取"""
return self.get_product(platform, product_id)
3. 频率监控与动态调整
import threading
from datetime import datetime, timedelta
class RateLimiter:
def __init__(self, max_calls: int, period: int):
self.max_calls = max_calls
self.period = timedelta(seconds=period)
self.calls = []
self.lock = threading.Lock()
def can_call(self) -> bool:
with self.lock:
now = datetime.now()
self.calls = [t for t in self.calls if now - t < self.period]
if len(self.calls) < self.max_calls:
self.calls.append(now)
return True
return False
六、关键注意事项
1. 合规性要求
-
淘宝/1688:必须遵守《淘宝开放平台服务协议》,禁止用于价格监控等竞争场景
-
京东:不得爬取用户隐私数据,API调用需与开放平台报备的业务场景一致
-
数据存储:敏感字段(成本价、供应商信息)需加密存储
2. 调用限制与成本
表格
复制
| 平台 | QPS限制 | 日调用量 | 费用标准 |
|---|---|---|---|
| 淘宝 | 10-100 | 10万-100万 | 基础版免费,超出0.01元/次 |
| 1688 | 5-50 | 5万-50万 | 需申请企业付费套餐 |
| 京东 | 20-200 | 20万-200万 | 按量计费,约0.005元/次 |
3. 反爬与稳定性
-
IP封禁:使用代理IP池,单IP请求间隔≥5秒
-
User-Agent:定期更换,模拟真实浏览器
-
滑块验证:触发时需手动介入或使用打码平台
-
数据校验:对返回JSON做schema验证,防止字段缺失
七、数据可视化示例
将统一JSON数据导入分析工具:
import pandas as pd
# 加载JSON数据
with open("products_data.json", "r", encoding="utf-8") as f:
results = json.load(f)
# 转换为DataFrame
df_data = []
for r in results:
if r["status"] == "success" and r["data"]:
item = r["data"]
df_data.append({
"platform": item["platform"],
"product_id": item["product_id"],
"title": item["title"][:50],
"price": float(item["price_info"]["current_price"]),
"stock": item["stock"],
"sales": item["sales"]
})
df = pd.DataFrame(df_data)
print(df.groupby("platform").agg({"price": "mean", "sales": "sum"}))
八、总结
本文提供的JSON统一模型和Python实现方案,可大幅降低多平台数据接入成本。核心优势包括:
-
标准化:一次开发,适配三平台,数据格式统一
-
高性能:Redis缓存+异步调用,QPS可达500+
-
高可用:自动重试+节点切换,成功率>98%
-
易扩展:新增平台只需实现
_parse_xxx_data方法
重要提醒:截至2025年3月,淘宝/1688已加强API权限审核,个人开发者申请困难,建议使用企业资质申请跨境数据应用类目。京东API需单独申请"商品服务"权限包。所有调用必须严格遵守平台规则,避免违规使用导致封号。
对于小规模需求,可考虑使用第三方数据服务(如订单侠),但成本较高。生产环境建议部署在阿里云/腾讯云服务器,使用内网DNS解析API域名,延迟可降低50%。

1329

被折叠的 条评论
为什么被折叠?



