新的自动化工具 nodriver的使用

FOAF-lambda

已于 2024-10-31 09:44:02 修改

阅读量1.5k

点赞数 4

文章标签：自动化运维

于 2024-08-06 16:35:44 首次发布

本文链接：https://blog.youkuaiyun.com/lwdfzr/article/details/140958590

版权

1. 安装

pip install -i https://pypi.tuna.tsinghua.edu.cn/simple nodriver

Undetected-Chromedriver python 包 的官方后继者。
不再有 webdriver，不再有 selenium,没有 chromedriver 二进制文件或 Selenium 依赖项

和pyppeteer 类似，异步执行，不过nodriver规避了检测，nodriver 可以执行css，用await page.find('')执行xpath

2. 基本使用

# -*- coding: utf-8 -*-
import subprocess
import time
#from pyppeteer import launch
import os
import requests
import json
import asyncio
import random
from undetected_chromedriver import find_chrome_executable # 获取浏览器路径
import nodriver as uc
from nodriver import cdp, loop



def cmd_start_brower(chrome_path, port, user_data_dir=None):
    # 打开浏览器
    start_params = r'{} --remote-debugging-port={} --no-first-run --no_sandbox --disable-infobars --allow-file-access-from-files --no-default-browser-check --profile-directory=Profile1 --disable-features=PrivacySandboxSettings4'
    subprocess.Popen(start_params.format(chrome_path, port), encoding="gbk", shell=True)


def get_session_url(chrome_path, port, user_data_dir):
    url = f'http://127.0.0.1:{port}/json/version'
    try:
        res = requests.get(url)
        print(res.text)
        webSocketDebuggerUrl = json.loads(res.text)['webSocketDebuggerUrl']
    except Exception as e:
        print('error:', e)
        cmd_start_brower(chrome_path, port, user_data_dir)
        time.sleep(random.randint(4, 6))



async def operate_setup(driver, page):
    # 获取标签的属性
    # is_invalid = await page.select('form#sb_form')
    # print('is_invalid:', dir(is_invalid))
    # print('attr:', is_invalid.attr)
    
    # 获取当前页面的响应
    # get_content = await page.get_content()
    get_content = await asyncio.wait_for(page.get_content(), timeout=60)
    # print(get_content)
    # 截图
    # await page.save_screenshot()
    
    # 获取当前页面URL
    frame_data = await page.send(uc.cdp.page.get_frame_tree())
    print('to_json:', frame_data.to_json())
    current_url = frame_data.frame.url
    current_url1 = page.url
    my_url = await page.evaluate("window.location.href")
    print('current_url1:', current_url1)
    print('current_url:', current_url)
    print('current_url:', my_url)

    # 页面向下滚动
    # await page.scroll_down(150)
    await page.sleep(6)

    locationhref = await page.evaluate('''window.location.href''')
    print('locationhref:', locationhref)
    
    # 执行js向输入框输入内容
    await page.evaluate('''document.querySelector("#kw").value="python"''')
    # 判断标签是否存在,执行js不用retrun,若存在返回标签的value
    result = await page.evaluate(
        '''{var temp = document.querySelector("input[id*='kw']");if(temp){temp.value}else{"0000"}}''',
        await_promise=True)
    print('result:', result)
    
    # 点击搜索按钮
    await page.evaluate('''var temp=document.querySelector('input[id="su"]');if(temp){temp.click()}''')
    await page.sleep(3.5)
    
    # 获取标签的innerText
    dynamic_content = await page.evaluate('document.querySelector("#kw").innerText')
    print(f'Dynamic Content: {dynamic_content}')
    
    # 获取所有的tab
    tabs = driver.tabs
    print('tabs:', tabs)
    targets = driver.targets
    print('targets:', targets)
    main_tab = driver.main_tab
    print('main_tab:', main_tab)
    print('handlers:', page.handlers)


async def change_handle_async(driver, url_params='blog.youkuaiyun.com'):
    # 多个tab页面切换，根据条件切换到指定页面
    for p in driver.tabs:
        title = p.target.title
        url = p.target.url
        print('title:', title)
        print('url:', url)
        if url_params in url:
            await p.bring_to_front()  # 将此页面前置,激活此页面
            await p
            return p
        else:
            await p.close()
        print('target_id:', p.target.target_id)


async def flash_spans(tab, i):
    await tab.activate() # 激活当前页面
    title = tab.target.title
    print('title:', title)


async def demo_drag_to_relative_position_in_steps(browser):
    # 将元素拖到指定位置
    tab = await browser.get('https://nowsecure.nl/mouse.html', new_tab=True)
    boxes = await tab.select_all('.box')
    for box in boxes:
        await box.mouse_drag((500, 500), relative=True, steps=50)


async def receive_handler(event: cdp.network.ResponseReceived):
    # 拦截响应
    print('request_id:', event.request_id)
    print('response:', event.response)


async def send_handler(event: cdp.network.RequestWillBeSent):
    # print('request_id:', event.request_id)
    r = event.request
    s = f"{r.method} {r.url}"
    for k, v in r.headers.items():
        s += f"\n\t{k} : {v}"
    # print('request:', s)



async def main1():
    config = uc.Config()
    config.host = "127.0.0.1"
    config.port = 62805
    # config.user_data_dir = user_data_dir # 指定缓存文件
    browser_args = ['--no-first-run', '--no-sandbox', '--window-size=1020,1080', '--disable-infobars', f"--proxy-server=http://127.0.0.1:10809"] # 浏览器参数
    driver = await uc.start(
        config=config,
        browser_args=browser_args
                            )
    # 将最小化窗口恢复正常并跳到第一个窗口
    await driver.main_tab.medimize()  # 恢复页面尺寸
    url_params = driver.main_tab.target.url
    main_tab = await change_handle_async(driver, url_params)
    
    # 拦截请求和响应
    main_tab = driver.main_tab
    main_tab.add_handler(cdp.network.RequestWillBeSent, send_handler)
    main_tab.add_handler(cdp.network.ResponseReceived, receive_handler)
    
    # 打开URL new_window=True 打开新的浏览器窗口, new_tab=True在新的页面访问URL
    page = await asyncio.wait_for(driver.get('https://www.baidu.com'), timeout=120)
    
    # page2 = await driver.get('https://twitter.com', new_tab=True)
    # page3 = await driver.get('https://github.com/ultrafunkamsterdam/nodriver', new_window=True)
    await page.reload()    # 刷新页面
    await page.bring_to_front() # 激活当前页面

    tabs = driver.tabs
    targets = driver.targets
    main_tab = driver.main_tab
    print('to_json:', page.to_json())
    input_text = await page.select('input[id="kw"]')
    await input_text.clear_input() # 清空输入框
    await input_text.send_keys('python')
    await page.sleep(2)
    
    # 根据文本查询
    create_account = await page.find("百度一下", best_match=True)
    print('create_account:', create_account)
    # await create_account.click()
    await page.sleep(3.5)
    su = await page.select('input#su')
    await su.click()
    await page.sleep(3.5)

    # 获取标签的id,class等属性
    input_thing = await page.select("a[href^='http://trust.baidu.com']")
    attrs = input_thing.attrs
    print('attrs:', attrs)

    # 异步执行多个操作
    # await asyncio.gather(*[flash_spans(tab, i) for (i, tab) in enumerate(driver.tabs)])
    await page.sleep(2)
    # await demo_drag_to_relative_position_in_steps(driver)

    content_left = await page.select('div#content_left>div a[href^="http://www.baidu.com/link"]')
    print('text_all:', content_left.text_all)
    
    # 获取当前页面的cookie
    frame_data = await page.send(cdp.page.get_frame_tree())
    print('frame_data00:', frame_data.frame.url)
    cookies1 = await page.send(cdp.network.get_cookies([frame_data.frame.url]))
    print('cookies1', cookies1)
    cookies2 = [{i.name: i.value for i in cookies1}]
    cookies3 = [i.to_json() for i in cookies1]
    with open('cookies.json', 'w') as f:
        f.write(json.dumps(cookies2))
    # await page.set_cookies(cookies)
    await operate_setup(driver, page)
    
    # 切换到指定窗口
    page = await change_handle_async(driver)

port = 62805
chrome_path = find_chrome_executable()
print('chrome_path:', chrome_path)
user_data_dir = './pyppeteer_chrome'
# 检测浏览器是否已打开
get_session_url(chrome_path, port, user_data_dir)
uc.loop().run_until_complete(main1())