1. 安装
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple nodriver
Undetected-Chromedriver python 包 的官方后继者。 不再有 webdriver,不再有 selenium,没有 chromedriver 二进制文件或 Selenium 依赖项
和pyppeteer 类似,异步执行,不过nodriver规避了检测,nodriver 可以执行css,用await page.find('')执行xpath
2. 基本使用
# -*- coding: utf-8 -*-
import subprocess
import time
#from pyppeteer import launch
import os
import requests
import json
import asyncio
import random
from undetected_chromedriver import find_chrome_executable # 获取浏览器路径
import nodriver as uc
from nodriver import cdp, loop
def cmd_start_brower(chrome_path, port, user_data_dir=None):
# 打开浏览器
start_params = r'{} --remote-debugging-port={} --no-first-run --no_sandbox --disable-infobars --allow-file-access-from-files --no-default-browser-check --profile-directory=Profile1 --disable-features=PrivacySandboxSettings4'
subprocess.Popen(start_params.format(chrome_path, port), encoding="gbk", shell=True)
def get_session_url(chrome_path, port, user_data_dir):
url = f'http://127.0.0.1:{port}/json/version'
try:
res = requests.get(url)
print(res.text)
webSocketDebuggerUrl = json.loads(res.text)['webSocketDebuggerUrl']
except Exception as e:
print('error:', e)
cmd_start_brower(chrome_path, port, user_data_dir)
time.sleep(random.randint(4, 6))
async def operate_setup(driver, page):
# 获取标签的属性
# is_invalid = await page.select('form#sb_form')
# print('is_invalid:', dir(is_invalid))
# print('attr:', is_invalid.attr)
# 获取当前页面的响应
# get_content = await page.get_content()
get_content = await asyncio.wait_for(page.get_content(), timeout=60)
# print(get_content)
# 截图
# await page.save_screenshot()
# 获取当前页面URL
frame_data = await page.send(uc.cdp.page.get_frame_tree())
print('to_json:', frame_data.to_json())
current_url = frame_data.frame.url
current_url1 = page.url
my_url = await page.evaluate("window.location.href")
print('current_url1:', current_url1)
print('current_url:', current_url)
print('current_url:', my_url)
# 页面向下滚动
# await page.scroll_down(150)
await page.sleep(6)
locationhref = await page.evaluate('''window.location.href''')
print('locationhref:', locationhref)
# 执行js向输入框输入内容
await page.evaluate('''document.querySelector("#kw").value="python"''')
# 判断标签是否存在,执行js不用retrun,若存在返回标签的value
result = await page.evaluate(
'''{var temp = document.querySelector("input[id*='kw']");if(temp){temp.value}else{"0000"}}''',
await_promise=True)
print('result:', result)
# 点击搜索按钮
await page.evaluate('''var temp=document.querySelector('input[id="su"]');if(temp){temp.click()}''')
await page.sleep(3.5)
# 获取标签的innerText
dynamic_content = await page.evaluate('document.querySelector("#kw").innerText')
print(f'Dynamic Content: {dynamic_content}')
# 获取所有的tab
tabs = driver.tabs
print('tabs:', tabs)
targets = driver.targets
print('targets:', targets)
main_tab = driver.main_tab
print('main_tab:', main_tab)
print('handlers:', page.handlers)
async def change_handle_async(driver, url_params='blog.youkuaiyun.com'):
# 多个tab页面切换,根据条件切换到指定页面
for p in driver.tabs:
title = p.target.title
url = p.target.url
print('title:', title)
print('url:', url)
if url_params in url:
await p.bring_to_front() # 将此页面前置,激活此页面
await p
return p
else:
await p.close()
print('target_id:', p.target.target_id)
async def flash_spans(tab, i):
await tab.activate() # 激活当前页面
title = tab.target.title
print('title:', title)
async def demo_drag_to_relative_position_in_steps(browser):
# 将元素拖到指定位置
tab = await browser.get('https://nowsecure.nl/mouse.html', new_tab=True)
boxes = await tab.select_all('.box')
for box in boxes:
await box.mouse_drag((500, 500), relative=True, steps=50)
async def receive_handler(event: cdp.network.ResponseReceived):
# 拦截响应
print('request_id:', event.request_id)
print('response:', event.response)
async def send_handler(event: cdp.network.RequestWillBeSent):
# print('request_id:', event.request_id)
r = event.request
s = f"{r.method} {r.url}"
for k, v in r.headers.items():
s += f"\n\t{k} : {v}"
# print('request:', s)
async def main1():
config = uc.Config()
config.host = "127.0.0.1"
config.port = 62805
# config.user_data_dir = user_data_dir # 指定缓存文件
browser_args = ['--no-first-run', '--no-sandbox', '--window-size=1020,1080', '--disable-infobars', f"--proxy-server=http://127.0.0.1:10809"] # 浏览器参数
driver = await uc.start(
config=config,
browser_args=browser_args
)
# 将最小化窗口恢复正常并跳到第一个窗口
await driver.main_tab.medimize() # 恢复页面尺寸
url_params = driver.main_tab.target.url
main_tab = await change_handle_async(driver, url_params)
# 拦截请求和响应
main_tab = driver.main_tab
main_tab.add_handler(cdp.network.RequestWillBeSent, send_handler)
main_tab.add_handler(cdp.network.ResponseReceived, receive_handler)
# 打开URL new_window=True 打开新的浏览器窗口, new_tab=True在新的页面访问URL
page = await asyncio.wait_for(driver.get('https://www.baidu.com'), timeout=120)
# page2 = await driver.get('https://twitter.com', new_tab=True)
# page3 = await driver.get('https://github.com/ultrafunkamsterdam/nodriver', new_window=True)
await page.reload() # 刷新页面
await page.bring_to_front() # 激活当前页面
tabs = driver.tabs
targets = driver.targets
main_tab = driver.main_tab
print('to_json:', page.to_json())
input_text = await page.select('input[id="kw"]')
await input_text.clear_input() # 清空输入框
await input_text.send_keys('python')
await page.sleep(2)
# 根据文本查询
create_account = await page.find("百度一下", best_match=True)
print('create_account:', create_account)
# await create_account.click()
await page.sleep(3.5)
su = await page.select('input#su')
await su.click()
await page.sleep(3.5)
# 获取标签的id,class等属性
input_thing = await page.select("a[href^='http://trust.baidu.com']")
attrs = input_thing.attrs
print('attrs:', attrs)
# 异步执行多个操作
# await asyncio.gather(*[flash_spans(tab, i) for (i, tab) in enumerate(driver.tabs)])
await page.sleep(2)
# await demo_drag_to_relative_position_in_steps(driver)
content_left = await page.select('div#content_left>div a[href^="http://www.baidu.com/link"]')
print('text_all:', content_left.text_all)
# 获取当前页面的cookie
frame_data = await page.send(cdp.page.get_frame_tree())
print('frame_data00:', frame_data.frame.url)
cookies1 = await page.send(cdp.network.get_cookies([frame_data.frame.url]))
print('cookies1', cookies1)
cookies2 = [{i.name: i.value for i in cookies1}]
cookies3 = [i.to_json() for i in cookies1]
with open('cookies.json', 'w') as f:
f.write(json.dumps(cookies2))
# await page.set_cookies(cookies)
await operate_setup(driver, page)
# 切换到指定窗口
page = await change_handle_async(driver)
port = 62805
chrome_path = find_chrome_executable()
print('chrome_path:', chrome_path)
user_data_dir = './pyppeteer_chrome'
# 检测浏览器是否已打开
get_session_url(chrome_path, port, user_data_dir)
uc.loop().run_until_complete(main1())