Selenium

Selenium

参考崔庆才爬虫

安装: pip install selenium
注意:需要下载Chrome的 webdriverwebdriver
下载完成后解压到与python.exe可执行文件同一目录下

基本使用

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

browser=webdriver.Chrome()
browser.get("https://www.taobao.com")
input_first=browser.find_element_by_id('q')
input_second=browser.find_element_by_css_selector('#q')
#input_third=browser.find_element_by_xpath('//*(@id="q")')
print(input_first,input_second)
browser.close()
<selenium.webdriver.remote.webelement.WebElement (session="4432d05b280a89cf54be45c143d95cff", element="0.02248678307764207-1")> <selenium.webdriver.remote.webelement.WebElement (session="4432d05b280a89cf54be45c143d95cff", element="0.02248678307764207-1")>

声明浏览器对象

from selenium import webdriver

browser=webdriver.Chrome()
#browser=webdriver.Firefox()
#browser=webdriver.Edge()

访问页面

from selenium import webdriver

browser=webdriver.Chrome()
browser.get("http://www.zhihu.com")
print(browser.page_source[:500])
browser.close()
<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml" lang="zh" data-hairline="true" data-theme="light"><head><meta charset="utf-8" /><title>知乎 - 有问题上知乎</title><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=1" /><meta name="renderer" content="webkit" /><meta name="force-rendering" content="webkit" /><meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" /><meta name="google-site-verification" content="FTeR0c8arOPKh8c5DYh_9uu98_zJbaWw53J-Sch9MTg" /><tit

查找元素

单个元素
from selenium import webdriver

browser=webdriver.Chrome()
browser.get("http://www.gdsgj.com")
print(browser.page_source[:500])
print("...............")
input_1=browser.find_element_by_id("nav_box")
input_2=browser.find_element_by_css_selector('#nav_box')
#input_3=browser.find_element_by_xpath('//*(@id="q")')
print(input_1,input_2)
browser.close()
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head>
 <!--360浏览器兼容模式定义如下一条语句--> 
<meta name="renderer" content="ie-comp" /> 
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Language" content="zh-cn" />
<meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
<meta name="Keywords" content="广东省高级技工学校,广东省技师学院,惠州博罗广东省高级技工学
...............
<selenium.webdriver.remote.webelement.WebElement (session="fbb5ecf6b543de1ed103c3ac991b3f60", element="0.3298393285200234-1")> <selenium.webdriver.remote.webelement.WebElement (session="fbb5ecf6b543de1ed103c3ac991b3f60", element="0.3298393285200234-1")>
  • find_element_by_name()
  • find_element_by_xpath()
  • find_element_by_id()
  • find_element_by_css_selector()
  • find_element_by_tag_name()
  • find_element_by_class_name()
  • find_element_by_link_text()
  • find_element_by_partial_link_text()
多个元素
from selenium import webdriver

browser=webdriver.Chrome()
browser.get("https://www.taobao.com")
li=browser.find_elements_by_css_selector('li')
print(li[:20])
browser.close()
[<selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-1")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-2")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-3")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-4")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-5")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-6")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-7")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-8")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-9")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-10")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-11")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-12")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-13")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-14")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-15")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-16")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-17")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-18")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-19")>, <selenium.webdriver.remote.webelement.WebElement (session="47c77f533e7b9de89a67db541307f4d0", element="0.12123117773549641-20")>]
  • find_elements_by_name()
  • find_elements_by_xpath()
  • find_elements_by_id()
  • find_elements_by_css_selector()
  • find_elements_by_tag_name()
  • find_elements_by_class_name()
  • find_elements_by_link_text()
  • find_elements_by_partial_link_text()

元素交互动作

对获取的元素调用交互方法
from selenium import webdriver
import time

browser=webdriver.Chrome()
browser.get("https://www.taobao.com")                 #搜索输入框
input=browser.find_element_by_id('q')
input.send_keys('iphone')
time.sleep(1)
input.clear()
input.send_keys('ipad')             
button=browser.find_element_by_class_name('btn-search') #搜索按钮
button.click()

交互动作

将动作附加到动作链中串行执行
from selenium import webdriver
from selenium.webdriver import ActionChains

browser=webdriver.Chrome()
url="http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable"
browser.get(url)
browser.switch_to.frame('iframeResult')
source=browser.find_element_by_css_selector('#draggable') #要拖拽区域
target=browser.find_element_by_css_selector('#droppable')  #目标区域
actions=ActionChains(browser) #创建一个动作链
actions.drag_and_drop(source,target) #执行动作链
actions.perform()

执行JavaScript

from selenium import webdriver

browser=webdriver.Chrome()
browser.get("https://www.zhihu.com/explore")
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')

获取元素

获取属性

from selenium import webdriver

browser=webdriver.Chrome()
browser.get("https://www.zhihu.com/explore")
logo=browser.find_element_by_id('zh-top-link-logo')
print(logo)
print(logo.get_attribute('class'))
<selenium.webdriver.remote.webelement.WebElement (session="1c8eaf5705221cac70f1bebd5ba4be05", element="0.010884747176694631-1")>
zu-top-link-logo

获取文本值

from selenium import webdriver

browser=webdriver.Chrome()
url="https://www.zhihu.com/explore"
browser.get(url)
input=browser.find_element_by_class_name('zu-top-add-question')
print(input.text)
提问

获取ID,位置,标签名,大小

from selenium import webdriver

browser=webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
input=browser.find_element_by_class_name('zu-top-add-question')
print(input.id)
print(input.location)
print(input.tag_name)
print(input.size)
0.9653522968329347-1
{'x': 759, 'y': 7}
button
{'width': 66, 'height': 32}

Frame

import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException

browser=webdriver.Chrome()
url="http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable"
browser.get(url)
browser.switch_to.frame('iframeResult')
source=browser.find_element_by_css_selector('#draggable')
print(source)
try:
    logo=browser.find_element_by_css_selector('logo')
except:
    print('NO LOGO')
browser.switch_to.parent_frame()
logo=browser.find_element_by_class_name('logo')
print(logo)
print(logo.text)
<selenium.webdriver.remote.webelement.WebElement (session="677c65a0ea4a4f9ceaf0df89eaf6bcde", element="0.6027971659640068-1")>
NO LOGO
<selenium.webdriver.remote.webelement.WebElement (session="677c65a0ea4a4f9ceaf0df89eaf6bcde", element="0.20115529620564865-2")>
RUNOOB.COM

等待

隐式等待

当使用了隐式等待执行测试的时候,如果WebDriver没有在DOM中找到元素,将继续等待,超出设定时间后则抛出找不到元素的异常,换句话说,当查找元素或元素并没有用立即出现问题的时候,隐式等待将等待一段时间再查找DOM元素,默认时间是0

from selenium import webdriver

browser=webdriver.Chrome()
browser.implicitly_wait(10)
browser.get('https://www.zhihu.com/explore')
input=browser.find_element_by_class_name('zu-top-add-question')
print(input)

<selenium.webdriver.remote.webelement.WebElement (session="3bcd9a9bf78deee11cee2dcc6e9f358a", element="0.6158222971089322-1")>

显示等待

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

browser=webdriver.Chrome()
browser.get("https://www.taobao.com/")
wait=WebDriverWait(browser,10)
input=wait.until(EC.presence_of_element_located((By.ID,'q')))
button=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'btn-search')))
print(input,button)
---------------------------------------------------------------------------

TimeoutException                          Traceback (most recent call last)

<ipython-input-14-88189fce9bb7> in <module>()
      8 wait=WebDriverWait(browser,10)
      9 input=wait.until(EC.presence_of_element_located((By.ID,'q')))
---> 10 button=wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'btn-search')))
     11 print(input,button)


C:\Program Files\Anaconda3\lib\site-packages\selenium\webdriver\support\wait.py in until(self, method, message)
     78             if time.time() > end_time:
     79                 break
---> 80         raise TimeoutException(message, screen, stacktrace)
     81 
     82     def until_not(self, method, message=''):


TimeoutException: Message: 

前进后退

from selenium import webdriver
import time

browser=webdriver.Chrome()
browser.get('https://www.taobao.com')
browser.get('http://www.zhihu.com')
browser.get('http://www.baidu.com')
browser.back()         #后退一步
time.sleep(1)
borwser.forward()      #前进一步
browser.close()

Cookies

from selenium import webdriver

browser=webdriver.Chrome()
browser.get('https://www.zhihu.com')
print(browser.get_cookies())
browser.add_cookie({'name':'name','domain':'www.zhihu.com','value':'19'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())
[{'httpOnly': False, 'value': '8ffa4a0b7ecd9bdb5ad19b8c1037b063', 'domain': 'www.zhihu.com', 'path': '/', 'expiry': 1547781217.917957, 'secure': False, 'name': 'tgw_l7_route'}, {'httpOnly': False, 'value': 'd5e6b7cd-b27f-4ad9-b093-59625fa87e08', 'domain': '.zhihu.com', 'path': '/', 'expiry': 1610852317.918046, 'secure': False, 'name': '_zap'}, {'httpOnly': False, 'value': 'Mg3lICZ9264KnmroQXynie0G5Cyu1p8s', 'domain': '.zhihu.com', 'path': '/', 'expiry': 1625540317.918098, 'secure': False, 'name': '_xsrf'}]
[{'httpOnly': False, 'value': '8ffa4a0b7ecd9bdb5ad19b8c1037b063', 'domain': 'www.zhihu.com', 'path': '/', 'expiry': 1547781217.917957, 'secure': False, 'name': 'tgw_l7_route'}, {'httpOnly': False, 'value': 'd5e6b7cd-b27f-4ad9-b093-59625fa87e08', 'domain': '.zhihu.com', 'path': '/', 'expiry': 1610852317.918046, 'secure': False, 'name': '_zap'}, {'httpOnly': False, 'value': 'Mg3lICZ9264KnmroQXynie0G5Cyu1p8s', 'domain': '.zhihu.com', 'path': '/', 'expiry': 1625540317.918098, 'secure': False, 'name': '_xsrf'}, {'httpOnly': False, 'value': '19', 'domain': 'www.zhihu.com', 'path': '/', 'expiry': 2178500320, 'secure': True, 'name': 'name'}]
[{'httpOnly': False, 'value': 'dylbYgEEdf0gMQDfYcNzi7XcXym3wJlH', 'domain': '.zhihu.com', 'path': '/', 'expiry': 1625540321.166329, 'secure': False, 'name': '_xsrf'}, {'httpOnly': False, 'value': '"AODj0QbN1w6PTv69r6L5VykWXy6ahFXDzko=|1547780320"', 'domain': '.zhihu.com', 'path': '/', 'expiry': 1642388320.980894, 'secure': False, 'name': 'd_c0'}, {'httpOnly': False, 'value': '025a67177706b199591bd562de56e55b', 'domain': 'www.zhihu.com', 'path': '/', 'expiry': 1547781220.166226, 'secure': False, 'name': 'tgw_l7_route'}]

选项卡管理

from selenium import webdriver
import time
browser=webdriver.Chrome()
browser.get('https://www.taobao.com')
browser.execute_script('window.open()')
print(browser.window_handles)
browser.switch_to_window(browser.window_handles[1])
browser.get('http://www.baidu.com')
time.sleep(1)
browser.switch_to_window(browser.window_handles[0])
browser.get('https://python.org')
['CDwindow-08C207F70FD501825F3B509EE9951B7E', 'CDwindow-949E824D7CE34A78D5C1843BFDCBF9CE', 'CDwindow-AE3C151DCAC4A78BCCFC94DB976AF680']


C:\Program Files\Anaconda3\lib\site-packages\ipykernel\__main__.py:7: DeprecationWarning: use driver.switch_to.window instead
C:\Program Files\Anaconda3\lib\site-packages\ipykernel\__main__.py:10: DeprecationWarning: use driver.switch_to.window instead

异常处理

from selenium import webdriver

browser=webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.find_element_by_id('hello')        #无法查找到id="hello"的元素,产生异常
---------------------------------------------------------------------------

NoSuchElementException                    Traceback (most recent call last)

<ipython-input-18-cc86308282e5> in <module>()
      3 browser=webdriver.Chrome()
      4 browser.get('https://www.baidu.com')
----> 5 browser.find_element_by_id('hello')        #无法查找到id="hello"的元素,产生异常


C:\Program Files\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in find_element_by_id(self, id_)
    358             element = driver.find_element_by_id('foo')
    359         """
--> 360         return self.find_element(by=By.ID, value=id_)
    361 
    362     def find_elements_by_id(self, id_):


C:\Program Files\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in find_element(self, by, value)
    976         return self.execute(Command.FIND_ELEMENT, {
    977             'using': by,
--> 978             'value': value})['value']
    979 
    980     def find_elements(self, by=By.ID, value=None):


C:\Program Files\Anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in execute(self, driver_command, params)
    319         response = self.command_executor.execute(driver_command, params)
    320         if response:
--> 321             self.error_handler.check_response(response)
    322             response['value'] = self._unwrap_value(
    323                 response.get('value', None))


C:\Program Files\Anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py in check_response(self, response)
    240                 alert_text = value['alert'].get('text')
    241             raise exception_class(message, screen, stacktrace, alert_text)
--> 242         raise exception_class(message, screen, stacktrace)
    243 
    244     def _value_or_default(self, obj, key, default):


NoSuchElementException: Message: no such element: Unable to locate element: {"method":"id","selector":"hello"}
  (Session info: chrome=71.0.3578.98)
  (Driver info: chromedriver=2.43.600210 (68dcf5eebde37173d4027fa8635e332711d2874a),platform=Windows NT 10.0.17134 x86_64)
from selenium import webdriver
from selenium.common.exceptions import TimeoutException,NoSuchElementException

browser=webdriver.Chrome()
try:
    browser.get('https://www.baidu.com')
    browser.find_element_by_id('hello')
except Exception as e:
    print('Exception',e)
Exception Message: no such element: Unable to locate element: {"method":"id","selector":"hello"}
  (Session info: chrome=71.0.3578.98)
  (Driver info: chromedriver=2.43.600210 (68dcf5eebde37173d4027fa8635e332711d2874a),platform=Windows NT 10.0.17134 x86_64)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值