爬取淘宝商品

该博客围绕淘宝商品爬取展开,虽未给出具体内容,但可知核心是对淘宝商品数据进行抓取,这在信息技术领域涉及数据采集等相关操作。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import requests
from bs4 import BeautifulSoup
from requests.exceptions import ReadTimeout,ConnectionError,RequestException
import re
import json
import csv

headers = {
    'User-Agent': r'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
                    r'xxxxxxxxxxxxxxxxxxxxxxxxxx',
    'Referer': 'https://ai.taobao.com',
    'Connection': 'keep-alive'
    }

result=[]
with open('result.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(["链接","图片","销量","商铺名",'价格','发货地','描述']) 

def cookie():
    with open('cookie.txt','r') as f:
        cookies={}
        for line in f.read().split(';'):
            name,value=line.strip().split('=',1)
            cookies[name]=value 
        return cookies

def Brand(url):
    try:
        Get=requests.get(url,headers=headers,cookies=cookie()).text
        bs4=BeautifulSoup(Get,'lxml')
    except ReadTimeout:
        print('连接超时...')
    except ConnectionError:
        print('连接已断开...')
    except RequestException:
        print('请求异常...')
    a=bs4.find('div','tags-wrap clearfix').find_all('a')
    for i in a:
        print(i.text)

def goods(url):
    r=requests.get(url,headers=headers,cookies=cookie()).text
    a=json.loads(r)
    b=a['result']['auction']
    for j in range(0,60):
        c=b[j]
        Link=c["clickUrl"] #销量
        Picture=c["origPicUrl"] #图片
        Sales=c["saleCount"] #销量
        Shop=c["nick"] #商铺名
        Price=c["realPrice"] #价格
        Location=c["itemLocation"] #发货地
        Description=re.sub('<span class=H>','',c['description']) #描述
        Description=re.sub('</span>','',Description)
        result.append([Link, Picture,Sales, Shop,Price,Location,Description])
    return result

def save(results):
    with open('result.csv', 'w') as f:
        writer = csv.writer(f)
        for row in results:
            writer.writerow(row) 


Url='https://ai.taobao.com/search/index.htm?pid=mm_12351394_2325537_70732358&unid=&source_id=search&key=%E7%94%B5%E7%83%AD%E6%B0%B4%E5%99%A8&b=sousuo_ssk&clk1=&prepvid=200_11.224.194.91_485047_1518681317486&spm=a231o.7712113%2Fb.a3342.1'    
Brand(Url)

for page in range(1,100):
    url='https://ai.taobao.com/search/getItem.htm?_tb_token_=73be7033ee5f&__ajax__=1&pid=mm_12351394_2325537_70732358&unid=&clk1=&page='+str(page)+'&pageSize=60&pvid=200_11.224.196.1_3783_1519127011374&squareFlag=&cat=&city=&custAssurance=&dc12=&debug=false&exchange7=&fcat=&fcatName=&from=&itemAssurance=&key=%E7%94%B5%E7%83%AD%E6%B0%B4%E5%99%A8&location=&maxPageSize=200&nick=sunny%5Cu5927%5Cu54E5%5Cu54E5&npx=50&pageNav=true&postFree=&ppath=&ppathName=&price=&sort=&sourceId=search&supportCod=&tmall='
    results=goods(url)
    save(results)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值