Python——Selenium爬虫常用函数

懒加载

from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.wait import WebDriverWait


def scroll_until_loaded():
    '''懒加载'''
    wait = WebDriverWait(dr, 10)
    check_height = dr.execute_script("return document.body.scrollHeight;")
    while True:
        dr.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        try:
            wait.until(
                lambda driver: dr.execute_script("return document.body.scrollHeight;") > check_height)
            check_height = dr.execute_script("return document.body.scrollHeight;")
        except TimeoutException:
            break

 点击网页元素

def Click(instruction):
    dr.execute_script("arguments[0].click();", instruction)
    time.sleep(random.random() * 2)

 显示等待的运用!

# encoding=utf-8
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


class WaitUtil(object):
    # 映射定位方式的字典对象
    def __init__(self, driver):
        self.locationTypeDict = {
            "xpath": By.XPATH,
            "id": By.ID,
            "name": By.NAME,
            "css_selector": By.CSS_SELECTOR,
            "class_name": By.CLASS_NAME,
            "tag_name": By.TAG_NAME,
            "link_text": By.LINK_TEXT,
            "partial_link_text": By.PARTIAL_LINK_TEXT
        }
        # 初始化driver对象
        self.driver = driver
        # 创建显示等待实例对象
        self.wait = WebDriverWait(self.driver, 30)

    def presenceOfElementLocated(self, locatorMethod, locatorExpression, *arg):
        '''显式等待页面元素出现在DOM中,但并一定可以见,
        存在则返回该页面元素对象'''
        try:
            if locatorMethod.lower() in self.locationTypeDict:
                element = self.wait.until(
                    EC.presence_of_element_located((
                        self.locationTypeDict[locatorMethod.lower()],
                        locatorExpression)))
                return element
            else:
                raise TypeError(u"未找到定位方式,请确认定位方法是否写正确")
        except Exception as e:
            raise e

    def frameToBeAvailableAndSwitchToIt(self, locationType, locatorExpression, *args):
        '''检查frame是否存在,存在则切换进frame控件中
        '''
        try:
            self.wait.until(
                EC.frame_to_be_available_and_switch_to_it((
                    self.locationTypeDict[locationType.lower()],
                    locatorExpression)))
        except Exception as e:
            # 抛出异常信息给上层调用者
            raise e

    def visibilityOfElementLocated(self, locationType, locatorExpression, *args):
        '''显式等待页面元素出现在DOM中,并且可见,存在返回该页面元素对象'''
        try:
            element = self.wait.until(
                EC.visibility_of_element_located((
                    self.locationTypeDict[locationType.lower()],
                    locatorExpression)))
            return element
        except Exception as e:
            raise e


if __name__ == '__main__':

    driver = webdriver.Chrome()
    driver.get("http://mail.126.com")
    waitUtil = WaitUtil(driver)
    waitUtil.frameToBeAvailableAndSwitchToIt("xpath", "//iframe[contains(@id,'URS')]")
    waitUtil.visibilityOfElementLocated("xpath", "//input[@name='email']")
    driver.quit()


 总结

       文中代码大多都是借用的各位前辈的之后进行了稍加修改,忘了引用哪篇文章的代码了,如有得罪,我会尽数撤回,在此说声抱歉。放于此,于私,我是为了方便以后好找,于公,也为他人借鉴。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值