我们今天来说说怎么用selenium模块爬取
首先我们先来看一下selenium的基础语法

了解完了基础语法,我们现在写几个实例来让我们更了解selenium
#这是利用selenium操作百度的一个实例
from selenium import webdriver
import time
ww = webdriver.Chrome(r'E:\人工智能\第六个月爬虫\chromedriver.exe')
ww.get('https://www.baidu.com/')
# 输入
input = ww.find_element_by_id('kw')
input.send_keys('反转')
# 点击
but = ww.find_element_by_id('su')
but.click()
time.sleep(6)
# 清空
input.clear()
# 页面滑动
js = 'window.scrollTo(0, document.body.scrollHeight)'
ww.execute_script(js)
time.sleep(5)
# 退出浏览器
ww.quit()
#这是用脚本爬取某网站图片的实例
from selenium import webdriver
from lxml import etree
import requests
ww = webdriver.Chrome(executable_path=r'E:\人工智能\第六个月爬虫\chromedriver.exe')
ww.get(
'http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&fm=detail&lm=-1&hd=undefined&latest'
'=undefined©right=undefined&st=-1&sf=2&fmq=&fm=detail&pv=&ic=undefined&nc=1&z=0&se=&showtab=0&fb=0&width'
'=undefined&height=undefined&face=0&istype=2&ie=utf-8&hs=2&word=%E6%80%A7%E6%84%9F%E7%BE%8E%E5%A5%B3%E5%9B%BE%E7'
'%89%87')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/79.0.3941.4 Safari/537.36 '
}
html = ww.page_source
tree = etree.HTML(html)
li_list = tree.xpath('//ul[@class="imglist clearfix pageNum0"]/li')
for li in li_list:
img = li.xpath('./@data-objurl')
for src in img:
content=requests.get(url=src,headers=headers, verify=False).content
name = src.split('/')[-1]
with open('./img/%s'%name,'wb') as f:
f.write(content)
#这是利用selenium实现的分页爬取进入详情页爬取数据的实例
import requests
from lxml import etree
from selenium import webdriver
borwser = webdriver.Chrome(executable_path&