Selenium

安装

# 安装selenium
pip install -U selenium
# centos安装chrome(如有必要)
yum install -y google-chrome-stable_current_x86_64.rpm

Drivers

Chrome: https://sites.google.com/a/chromium.org/chromedriver/downloads
Edge: https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/
Firefox: https://github.com/mozilla/geckodriver/releases
Safari: https://webkit.org/blog/6900/webdriver-support-in-safari-10/

headless

options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('../cookie/chromedriver', chrome_options=options)

相关操作

获取页面

from selenium import webdriver

browser = webdriver.Firefox()
browser.get('http://seleniumhq.org/)

等待页面加载完成(Waits)

显式等待

  • 等待一定条件发生后再进一步执行你的代码
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
try:
    element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "myDynamicElement"))
    )
finally:
    driver.quit()

在抛出TimeoutException异常之前将等待10秒或者在10秒内发现了查找的元素。 WebDriverWait 默认情况下会每500毫秒调用一次ExpectedCondition直到结果成功返回。 ExpectedCondition成功的返回结果是一个布尔类型的true或是不为null的返回值。

  • 预期条件
from selenium.webdriver.support import expected_conditions as EC

wait = WebDriverWait(driver, 10)
element = wait.until(EC.element_to_be_clickable((By.ID,'someid')))
  • title_is - title_contains - presence_of_element_located - visibility_of_element_located - visibility_of - presence_of_all_elements_located - text_to_be_present_in_element - text_to_be_present_in_element_value - frame_to_be_available_and_switch_to_it - invisibility_of_element_located - element_to_be_clickable - 显示并可用. - staleness_of - element_to_be_selected - element_located_to_be_selected - element_selection_state_to_be - element_located_selection_state_to_be - alert_is_present
    请参考python selenium expected_conditions使用实例

隐式等待

隐式等待是告诉WebDriver去等待一定的时间后去查找元素。 默认等待时间是0秒,一旦设置该值,隐式等待是设置该WebDriver的实例的生命周期。

from selenium import webdriver

driver = webdriver.Firefox()
driver.implicitly_wait(10) # seconds
driver.get("http://somedomain/url_that_delays_loading")
myDynamicElement = driver.find_element_by_id("myDynamicElement")

获取页面元素

element = driver.find_element_by_id("passwd-id")
element = driver.find_element_by_name("passwd")
element = driver.find_elements_by_tag_name("input")
element = driver.find_element_by_xpath("//input[@id='passwd-id']")
# 根据链接文本获取超链接
element_link = driver.find_element_by_link_text('Continue')
element_link =  driver.find_element_by_partial_link_text('Conti') # 部分文本

获取元素属性

element.get_attribute('')
driver.title
element.text

操作元素

#  输入文本并点击 
element.send_keys("and some", Keys.ARROW_DOWN)
# 清除叠加的输入文本
element.clear()

填充表单

第一种方式

element = driver.find_element_by_xpath("//select[@name='name']")
all_options = element.find_elements_by_tag_name("option")
for option in all_options:
   print("Value is: %s" % option.get_attribute("value"))
   option.click()

第二种方式Select

from selenium.webdriver.support.ui import Select
select = Select(driver.find_element_by_name('name'))
select.select_by_index(index)  # 从0开始计数
select.select_by_visible_text("text")  # 根据text值
select.select_by_value(value)  # 根据value值

提交表单

driver.find_element_by_id("submit").click()
#  单独提交某个元素
element.submit()

添加头信息

# !/usr/bin/python
# -*- coding: utf-8 -*-

from selenium import webdriver
# 进入浏览器设置
options = webdriver.ChromeOptions()
# 设置中文
options.add_argument('lang=zh_CN.UTF-8')
# 更换头部
options.add_argument('user-agent="Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"')
browser = webdriver.Chrome(chrome_options=options)
url = "https://httpbin.org/get?show_env=1"
browser.get(url)
browser.quit()

Cookies

添加cookies

# Go to the correct domain
driver.get("http://www.example.com")
 
# Now set the cookie. This one's valid for the entire domain
cookie = {‘name’ : ‘foo’, ‘value’ : ‘bar’}
driver.add_cookie(cookie)

获取cookies

# Go to the correct domain
driver.get("http://www.example.com")
 
# And now output all the available cookies for the current URL
driver.get_cookies()

传递cookies

import requests
from selenium import webdriver

driver = webdriver.Firefox()
url = "some_url" #a redirect to a login page occurs
driver.get(url)

#storing the cookies generated by the browser
request_cookies_browser = driver.get_cookies()

#making a persistent connection using the requests library
params = {'os_username':'username', 'os_password':'password'}
s = requests.Session()

#passing the cookies generated from the browser to the session
c = [s.cookies.set(c['name'], c['value']) for c in request_cookies_browser]

resp = s.post(url, params) #I get a 200 status_code

#passing the cookie of the response to the browser
dict_resp_cookies = resp.cookies.get_dict()
response_cookies_browser = [{'name':name, 'value':value} for name, value in dict_resp_cookies.items()]
c = [driver.add_cookie(c) for c in response_cookies_browser]

#the browser now contains the cookies generated from the authentication    
driver.get(url)

截取元素图片

# 截取图片验证码并保存
element = self.driver.find_element_by_id(self.check_code_id)
element.screenshot(png_name)

Centos中部署所遇问题

  • centos中安装
yum update
yum -y install google-chrome-stable
  • centos中运行报错DevToolsActivePort file doesn\'t exist
    解决:
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome('/path/to/your_chrome_driver_dir/chromedriver',chrome_options=chrome_options)

stackoverflow

  • Chrome version must be between 70 and 73
    解决:
    通过/opt/google/chrome/chrome --version确认安装的chrome版本,检查下载的chromedriver版本,确保两者一致即可解决此问题

参考:
Selenium Client Driver
Python爬虫利器五之Selenium的用法
Selenium Navigating
Selenium passing cookies
Selenium 查找元素
selenium设置Chrome
CentOS 7 快速安装 Chrome 浏览器

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值