import requests # 第三方模块
import parsel # 解析数据模块
import csv
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': '相关cookie',
'downlink': '10',
'ect': '4g',
'rtt': '200',
'sec-ch-ua': '"Chromium";v="118", "Microsoft Edge";v="118", "Not=A?Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.69',
}
for j in range(1,2):
url = f'https://www.amazon.com/链接/link={j}'
response = requests.get(url=url, headers=headers)
data_html = response.text
selector = parsel.Selector(data_html)
img = selector.xpath('//div[contains(@class,"a-section a-spacing-mini _cDEzb_noop_3Xbw5")]/img/@src').getall()
ids = selector.xpath('//span[contains(@class, "zg-bdg-text")]//text()').getall()
titles = selector.xpath('//div[contains(@class, "_cDEzb_p13n-sc-css-line-clamp-3_g3dy1")]//text()').getall()
nums = selector.xpath('//span[contains(@class, "a-size-small")]//text()').getall()
prices = selector.xpath('//span[contains(@class, "p13n-sc-price")]//text()').getall()
i = 0
while i < len(ids):
try:
im = img[i]
_id = ids[i]
title = titles[i]
num = nums[i]
price = prices[i]
with open('亚马逊.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow([_id, title, num, img])
i += 1
pass
except Exception as e:
print(e)
pass
Python亚马逊排行榜爬取
于 2023-10-29 13:34:23 首次发布