使用selenium可以打开百度的网页但是打不开淘宝的网页了
# 淘宝使用selenium无法打开了 from selenium import webdriver from selenium.common.exceptions import TimeoutException from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from urllib.parse import quote from pyquery import PyQuery KEYWORD = 'iphonex' # url='https://s.taobao.com/search?q=' browser = webdriver.Chrome(r'G:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') # browser.get("https://www.baidu.com") wait = WebDriverWait(browser, 10) def crawl_page(page): try: url = 'https://s.taobao.com/search?q=' + quote(KEYWORD) browser.get(url) # 翻页 if page > 1: page_box = wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, '.input.J_Input') ) ) jump_button = wait.until( EC.element_to_be_clickable( (By.CSS_SELECTOR, '.btn.J_Submit') ) ) page_box.clear() page_box.send_keys(page) jump_button.click() # div.item.J_MouserOnverReq # 下面一句意思是通过wait设置的20秒,在这20秒内执行括号里面的内容,如果超时还没有找到就继续向下执行 wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, 'div .item .J_MouserOnverReq') ) ) get_product() except: crawl_page(page) def get_product(): html = browser.page_source doc = PyQuery(html) items = doc('div .item.J_MouserOnverReq').items() for item in items: product = { 'img': item.find('.img').attr('data-src'), 'price': item.find('.price').text(), 'deal': item.find('.deal-cnt').text(), 'title': item.find('.title').text(), 'shop': item.find('.shop').text(), 'location': item.find('.location').text() }