from selenium import webdriver
from selenium.webdriver import ChromeOptions
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
import csv
from pyquery import PyQuery as pq
# 1.构造浏览器对象
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
option.add_argument('--headless')
browser = webdriver.Chrome(options=option)
wait = WebDriverWait(browser, 10)
# 2.初始化网页
browser.maximize_window()
keyword = input("请输入搜索的商品名称:")
url = "https://search.jd.com/Search?keyword=" + keyword
browser.get(url)
filename = keyword + '.csv'
# # 3.循环翻100页,并且解析每一页
for page_num in range(1, 101):
print("正在获取第" + str(page_num) + "页")
# 3.1拉到最底下,加载网页
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
time.sleep(3)
# 3.2获取商品信息
doc = pq(browser.page_source)
lis = doc.find('div#J_goodsList li').items()
for li in lis:
goods_name = li.find('.p-name').text().replace('\n', '')
goods_price = li.find('.p-price').text()
goods_shop = li.find('.curr-shop.hd-shopname').text()
goods_item = [goods_name, goods_price, goods_shop]
with open(filename, 'a', newline='') as f:
writer = csv.writer(f)
writer.writerow(goods_item)
# 3.3翻到下一页
input_page = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="J_bottomPage"]/span[2]/input')))
submit = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="J_bottomPage"]/span[2]/a')))
input_page.clear()
input_page.send_keys(page_num)
submit.click()
selenium的使用-爬取天猫商品信息
最新推荐文章于 2025-05-27 17:35:27 发布