selenium库行为链、页面等待以及打开多窗口和切换页面

1、行为链

有时候在⻚⾯中的操作可能要有很多步,那么这时候可以使⽤⿏标⾏为链类ActionChains来完成。⽐如现在要将⿏标移动到某个元素上并执⾏点击事件。

 from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
import time
driver = webdriver.Chrome()
driver.get('https://www.baidu.com/')
inpuntTag = driver.find_element(By.ID,'kw')
submitBtn = driver.find_element(By.ID,'su')
actions = ActionChains(driver)
actions.move_to_element(inpuntTag)
actions.send_keys_to_element(inpuntTag, 'python')
actions.move_to_element(submitBtn)
actions.click()
# actions.context_click()
# 提交行为链上的操作
actions.perform()
time.sleep(3)

还有更多的⿏标相关的操作:

2、Cookie操作

获取所有的cookie

cookies = driver.get_cookies()

根据cookie的name获取cookie

value = driver.get_cookie(key)

删除某个cookie

driver.delete_cookie('key')
from selenium import webdriver
import time
driver = webdriver.Chrome()
driver.get('https://www.baidu.com/')
cookies = driver.get_cookies()
for cookie in cookies:
    print(cookie)
print("="*30)
print(driver.get_cookie('BD_HOME'))
driver.delete_cookie('BD_HOME')
print("="*30)
print(driver.get_cookie('BD_HOME'))

3、页面等待

现在的⽹⻚越来越多采⽤了 Ajax 技术,这样程序便不能确定何时某个元素完全加载出来了。如果实际⻚⾯等待时间过⻓导致某个dom元素还没出来,但是你的代码直接使⽤了这个WebElement,那么就会抛出NullPointer的异常。为了解决这个问题。所以 Selenium 提供了两种等待⽅式:⼀种是隐式等待、⼀种是显式等待。

  • 隐式等待:调⽤driver.implicitly_wait。那么在获取不可⽤的元素之前,会先等待10秒中的时间。
driver.implicitly_wait(10)
  • 显示等待:显示等待是表明某个条件成⽴后才执⾏获取元素的操作。也可以在等待的时候指定⼀个最⼤的时间,如果超过这个时间那么就抛出⼀个异常。显示等待应该使⽤.
    selenium.webdriver.support.excepted_conditions期望的条件和selenium.webdriver.support.ui.WebDriverWait来配合完成。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get("https://www.baidu.com/")
try:
    element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "myDynamicElement")))
finally:
    driver.quit()
from selenium.webdriver.common.by import By
from selenium import webdriver
import time

# driver = webdriver.Chrome()

# driver.get('https://www.baidu.com/')

# time.sleep(2)
# 隐式等待
# driver.implicitly_wait(10)
# 页面元素没有加载出来会等待10秒
# driver.find_element_by_id('kwsss').send_keys('python')

# 显示等待
# 显示等待是表明某个条件成立后才执行获取元素的操作
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get("https://www.baidu.com/")
# driver.get("https://www.douban.com/")
try:
    element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "kw"))
        # EC.presence_of_element_located(driver.find_element_by_id('kw'))
    )
    time.sleep(2)
finally:
    # driver.quit()
    pass

⼀些其他的等待条件:

  • presence_of_element_located:某个元素已经加载完毕了。
  • presence_of_all_emement_located:⽹⻚中所有满⾜条件的元素都加载完毕了。
  • element_to_be_cliable:某个元素是可以点击了。
  • 更多条件请参考:http://selenium-python.readthedocs.io/waits.html

4、打开多窗口和切换页面

有时候窗⼝中有很多⼦tab⻚⾯。这时候肯定是需要进行切换的。selenium提供了⼀个叫做switch_to_window来进⾏切换,具体切换到哪个⻚⾯,可以从driver.window_handles中找到。

# 打开⼀个新的⻚⾯
driver.execute_script("window.open('url')")
print(driver.current_url)
# 切换到这个新的⻚⾯中
driver.switch_to.window(driver.window_handles[1])
from selenium import webdriver
import time
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()

driver.get('https://www.baidu.com/')

driver.execute_script("window.open('https://www.douban.com/')")

# 打印当前的URL地址
print(driver.current_url)
# 切换界面
# driver.switch_to_window()
driver.switch_to.window(driver.window_handles[1])
print(driver.current_url)
# time.sleep(2)
driver.switch_to.window(driver.window_handles[0])
driver.find_element(By.ID,'kw').send_keys('python')
time.sleep(5)
# driver.close()

# driver.quit()

登录qq邮箱



from selenium import webdriver
from configparser import ConfigParser
from selenium.webdriver.common.by import By
import time

cfg = ConfigParser()
r = cfg.read('password1.ini')
password = cfg.get('password', 'password')

driver = webdriver.Chrome()

driver.get('https://graph.qq.com/oauth2.0/authorize?response_type=code&client_id=102013353&scope=get_user_info%2Cget_app_friends&theme=10&redirect_uri=https%3A%2F%2Fwx.mail.qq.com%2Flist%2Freadtemplate%3Fname%3Dlogin_jump.html%26scene%3D1%26login_type%3Dqq')
driver.set_window_size(width=1366, height=700)

login_frame = driver.find_element(By.ID,'ptlogin_iframe')
#
driver.switch_to.frame(login_frame)
#
# driver.find_element(By.XPATH,'//a[@id="switcher_plogin"]').click()
a = driver.find_element(By.XPATH,'//a[@id="switcher_plogin"]')
#
# time.sleep(2)
a.click()
#
driver.find_element(By.ID,'u').send_keys('849538220')
driver.find_element(By.ID,'p').send_keys('scys1233')
driver.find_element(By.ID,'login_button').click()

time.sleep(2)
driver.find_element(By.ID,'readmailbtn_link').click()

mainFrame = driver.find_element(By.ID,'mainFrame')
driver.switch_to.frame(mainFrame)

emails = driver.find_elements(By.CSS_SELECTOR,'.toarea .F, .toarea .M')

for email in emails:
    print(email)

拉勾网


from selenium import webdriver
import time
from lxml import etree
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


class LagouSpider(object):
    def __init__(self):
        self.url = "https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput="
        self.driver = webdriver.Chrome()

    def run(self):
        self.driver.get(self.url)
        # 取消页面弹出框
        # self.driver.find_element(By.CLASS_NAME,'body-btn').click()
        while True:
            # 获取页面源代码
            sourse = self.driver.page_source
            self.parse_list_page(sourse)
            time.sleep(1)
            next_btn = WebDriverWait(driver=self.driver, timeout=10).until(
                EC.presence_of_element_located((By.XPATH, "//span[contains(@class, 'pager_next')]"))
            )

            # 翻页,点击下一页
            # self.driver.find_element_by_class_name('pager_next ').click()
            # 判断当前页面有没有最后一页的class pager_next pager_next_disabled
            # self.driver.find_element_by_xpath("//span[contains(@class, 'pager_next')]").click()
            if "pager_next pager_next_disabled" in next_btn.get_attribute('class'):
                break
            else:
                # 模糊定位
                next_btn.click()

    def parse_list_page(self, sourse):
        html = etree.HTML(sourse)

        links = html.xpath("//a[@class='position_link']/@href")
        for link in links:
            self.requests_detail_page(link)
            time.sleep(1)

    def requests_detail_page(self, link):
        self.driver.get(link)
        sourse = self.driver.page_source
        self.parse_detail_page(sourse)

    def parse_detail_page(self, sourse):
        html = etree.HTML(sourse)
        # data = []
        # job = {}
        job_name = html.xpath("//div[@class='job-name']//span[@class='position-head-wrap-position-name']/text()")[0]
        job_detail = html.xpath("//dl[@class='job_detail']//text()")
        print(job_name, job_detail)  # [{python,xxxx}]
        # job['job_name'] = job_name


if __name__ == '__main__':
    lg = LagouSpider()
    lg.run()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值