import requests
from bs4 import BeautifulSoup
from requests.exceptions import ReadTimeout,ConnectionError,RequestException
import re
import json
import csv
headers = {
'User-Agent': r'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
r'xxxxxxxxxxxxxxxxxxxxxxxxxx',
'Referer': 'https://ai.taobao.com',
'Connection': 'keep-alive'
}
result=[]
with open('result.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(["链接","图片","销量","商铺名",'价格','发货地','描述'])
def cookie():
with open('cookie.txt','r') as f:
cookies={}
for line in f.read().split(';'):
name,value=line.strip().split('=',1)
cookies[name]=value
return cookies
def Brand(url):
try:
Get=requests.get(url,headers=headers,cookies=cookie()).text
bs4=BeautifulSoup(Get,'lxml')
except ReadTimeout:
print('连接超时...')
except ConnectionError:
print('连接已断开...')
except RequestException:
print('请求异常...')
a=bs4.find('div','tags-wrap clearfix').find_all('a')
for i in a:
print(i.text)
def goods(url):
r=requests.get(url,headers=headers,cookies=cookie()).text
a=json.loads(r)
b=a['result']['auction']
for j in range(0,60):
c=b[j]
Link=c["clickUrl"] #销量
Picture=c["origPicUrl"] #图片
Sales=c["saleCount"] #销量
Shop=c["nick"] #商铺名
Price=c["realPrice"] #价格
Location=c["itemLocation"] #发货地
Description=re.sub('<span class=H>','',c['description']) #描述
Description=re.sub('</span>','',Description)
result.append([Link, Picture,Sales, Shop,Price,Location,Description])
return result
def save(results):
with open('result.csv', 'w') as f:
writer = csv.writer(f)
for row in results:
writer.writerow(row)
Url='https://ai.taobao.com/search/index.htm?pid=mm_12351394_2325537_70732358&unid=&source_id=search&key=%E7%94%B5%E7%83%AD%E6%B0%B4%E5%99%A8&b=sousuo_ssk&clk1=&prepvid=200_11.224.194.91_485047_1518681317486&spm=a231o.7712113%2Fb.a3342.1'
Brand(Url)
for page in range(1,100):
url='https://ai.taobao.com/search/getItem.htm?_tb_token_=73be7033ee5f&__ajax__=1&pid=mm_12351394_2325537_70732358&unid=&clk1=&page='+str(page)+'&pageSize=60&pvid=200_11.224.196.1_3783_1519127011374&squareFlag=&cat=&city=&custAssurance=&dc12=&debug=false&exchange7=&fcat=&fcatName=&from=&itemAssurance=&key=%E7%94%B5%E7%83%AD%E6%B0%B4%E5%99%A8&location=&maxPageSize=200&nick=sunny%5Cu5927%5Cu54E5%5Cu54E5&npx=50&pageNav=true&postFree=&ppath=&ppathName=&price=&sort=&sourceId=search&supportCod=&tmall='
results=goods(url)
save(results)
爬取淘宝商品
最新推荐文章于 2025-07-07 18:28:33 发布