import requests
import re
from lxml import html
from urls import urls
import time
urls = [‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-16098873358.11.12811196bDJpsC&id=538816177755’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-18441682902.15.156b1a4aKCJPxi&id=557184688722’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-14875785479.17.7d669cc5hW24ot&id=18560097304’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-16834862113.10.68d5e6b8RM23BW&id=525536903949’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-17860968276.13.45034af95Mqtrg&id=574634293410’,
‘https://item.taobao.com/item.htm?id=559772839020’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c.w4002-15765945305.19.64962492ghdVd6&id=551742703161’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-16909601097.12.6e372779zv9YQo&id=530114471418’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-16505135429.16.227c21c6nW3WtO&id=573325276834’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-1254031097.19.7c55c813t1BULK&id=558817493389’,
‘https://item.taobao.com/item.htm?spm=a1z10.1-c-s.w5003-18705208106.5.5ce3df40Rr2Qy8&id=573919813728’,
‘https://item.taobao.com/item.htm?spm=a1z10.3-c-s.w4002-14956884862.11.IRLK69&id=21773744751’]
cookie = ‘miid=1033223038684955227; t=2cfcf3b7185144930719d0f830934e0c; hng=CN%7Czh-CN%7CCNY%7C156; cna=0qheFP9WgnkCAcpkM1rYgzyz; tg=0; thw=cn; ubn=p; ucn=unsz; enc=tFAyrpuJbf%2B51crEGOB8t7GEL4sw%2FwXOcrfZe%2BT9PgQVCmTx%2FmKVveMg5rIJmnHww2kPptfT%2BrKzDTva4EUVuw%3D%3D; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; cc=VT5L2FSpdA%3D%3D; mt=ci=0_0; _m_h5_tk=65ca25ff5cf4fe3af44e5bb02ed95860_1547179622973; _m_h5_tk_enc=3f9a8142437455712674da408bcb5e64; cookie2=3271f302e0a5a30c25bd2b863d5a41f1; v=0; tb_token=3e9e903357357; isg=BCgogtR54Pzpg8uWyfYAaBoF-Rb6-el5ZlCFSOJZu6OWPcqnimG16oM8MbVoDUQz; l=aBdlq0dYyHamKgDBSMaOaSS8M702Q_ZPZDAy1MwHeTEhNBrU7RXy1ctb-_adSYAcfrVR_Ihotyw…’
cookies = {i.split(’=’)[0]: i.split(’=’)[1] for i in cookie.split(";")}
user_agent = ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36’
headers = {
‘User-Agent’: user_agent}
for url in urls:
try:
url = url
res = requests.get(url, headers=headers)
shop_name = re.findall('sellerNick.+:.*,?', res.text)
shop_name = shop_name[0].split(":")[-1].split("'")[1]
res = re.findall(";userid=(\d+);", res.text)
# 用户id是
user_id = res[0]
# 构建url
item_id = url.split('=')[-1]
headers['Referer'] = url
while True:
js_url = 'https://detailskip.taobao.com/service/getData/1/p1/item/detail/sib.htm?itemId=' + item_id + '&sellerId=' + user_id + '&modules=dynStock,qrcode,viewer,price,duty,xmpPromotion,delivery,activity,fqg,zjys,couponActivity,soldQuantity,originalPrice,tradeContract&callback=onSibRequestSuccess'
ress = requests.get(js_url, headers, headers=headers, cookies=cookies)
sell_num = re.findall('"soldQuantity":{"confirmGoodsCount":(.+),', ress.text)
if len(sell_num) != 1:
time.sleep(1)
continue
else:
# sell_num = sell_num[0].split(',')[0]
sell_num = sell_num[0].split(',')[0]
print('店名:', shop_name, '销量:', sell_num)
break
except Exception as e:
continue