import json import re from time import sleep import requests # 指定要搜索的商品并把商品的名字作为参数加入到url中 input_name = input('输入你要查找商品的名字:') def get_taobao(url): html = requests.get(url).text # 利用正则提取想要的数据 reg = r'g_page_config = .*?g_srp_loadCss' reg = re.compile(reg, re.S) result = re.findall(reg, html)[0] result = re.findall(r'{.*}', result) # 把文件处理成字典类型 result = json.loads(result[0]) # 提取想要的内容 auctions = (result['mods']['itemlist']['data']['auctions']) # 把内容生成字典 for auction in auctions: title = auction['title'].replace('</span>', '').replace('<span class=H>', '') image = 'https:' + auction['pic_url'] price = auction['view_price'] adress = auction['item_loc'] store = auction['nick'] dict1 = { 'title': title, 'image': image, 'price': price, 'adress': adress, 'store': store } sleep(2) return
python爬虫 爬取淘宝网商品信息
最新推荐文章于 2024-04-24 16:59:57 发布