1. 准备工作
IDE: pycharm
PY版本:python3.6
库:selenium pyquery等
浏览器:Chrome ,webdriver (版本要互相对应才能用)
数据库:MongoDB
2. 具体操作,上code
#-*-coding:utf-8-*-
import re
import time
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
from config import *
import pymongo
#应该加个header更稳妥些
client = pymongo.MongoClient(MONGO_URL) #声明
db = client[MONGO_DB]
broswer = webdriver.Chrome()
wait = WebDriverWait(broswer,50) #最大等待时间50
def search():
try:
broswer.get('https://www.jd.com/') #淘宝需要cookie 下次爬
input = wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'#key'))
) #F12 定位到COPY-CSS-SECECTOR
submit = wait.until(EC.element_to_be_clickab