selenium.webdriver中添加代理

本文介绍如何使用Selenium为Firefox及PhantomJS浏览器设置代理IP,包括静态配置和动态切换的方法。具体步骤涉及使用FirefoxProfile设置Firefox代理,通过Proxy类为PhantomJS配置代理,并演示了如何更改会话中的代理IP实现动态切换。

Firefox中设置代理ip
  method_1
from selenium import webdriver
profile = webdriver.FirefoxProfile()
profile.set_preference(‘network.proxy.type’, 1)
profile.set_preference(‘network.proxy.http’, ‘127.0.0.1’)
profile.set_preference(‘network.proxy.http_port’, 17890) # int
profile.update_preferences()
driver = webdriver.Firefox(firefox_profile=profile)
driver.get(‘http://httpbin.org/ip‘)
  method_2
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType
proxy = Proxy(
{

‘proxyType’: ProxyType.MANUAL, # 用不用都行

‘httpProxy’: get_proxy_ip_port()
}
)
driver = webdriver.Firefox(proxy=proxy)
driver.get(‘http://httpbin.org/ip‘)
  phantomjs中设置代理ip
  方法一:太不优雅(还是看方法二吧)
  在phantomjs中不能像上面的Firefox的method2一样传入proxy
  phantomjs和Firefox均继承自WebDriver,父类WebDriver可以传入proxy
  phantomjs在初始化WebDriver时就没有留proxy参数
  所以可以像下图一样改一下phantomjs类的源码,就可以在phantomjs中传入proxy参数了

  下面才是示例
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType
proxy = Proxy(
{
‘proxyType’: ProxyType.MANUAL,
‘httpProxy’: get_proxy_ip_port()
}
)
driver = webdriver.PhantomJS(
executable_path=”/path/of/phantomjs”,
proxy=proxy
)
driver.get(‘http://httpbin.org/ip‘)
print driver.page_source
driver.close()
  方法二:
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType
proxy = Proxy(
{
‘proxyType’: ProxyType.MANUAL,
‘httpProxy’: ‘ip:port’ # 代理ip和端口
}
)

新建一个“期望技能”,哈哈

desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()

把代理ip加入到技能中

proxy.add_to_capabilities(desired_capabilities)
driver = webdriver.PhantomJS(
executable_path=”/path/of/phantomjs”,
desired_capabilities=desired_capabilities
)
driver.get(‘http://httpbin.org/ip‘)
print driver.page_source
driver.close()
  方法三(动态切换ip):
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType
proxy = Proxy(
{
‘proxyType’: ProxyType.MANUAL,
‘httpProxy’: ‘ip:port’ # 代理ip和端口
}
)

新建一个“期望技能”,哈哈

desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()

把代理ip加入到技能中

proxy.add_to_capabilities(desired_capabilities)
driver = webdriver.PhantomJS(
executable_path=”/path/of/phantomjs”,
desired_capabilities=desired_capabilities
)

测试一下

driver.get(‘http://httpbin.org/ip‘)
print driver.page_source

现在开始切换ip

再新建一个ip

proxy = Proxy(
{
‘proxyType’: ProxyType.MANUAL,
‘httpProxy’: ‘ip:port’ # 代理ip和端口
}
)

再新建一个“期望技能”,()

desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()

把代理ip加入到技能中

proxy.add_to_capabilities(desired_capabilities)

新建一个会话,并把技能传入

driver.start_session(desired_capabilities)
driver.get(‘http://httpbin.org/ip‘)
print driver.page_source
driver.close()

# # -*- coding: utf-8 -*- # 数据爬取文件 import scrapy import pymysql import pymssql from ..items import ErshoufanginfoItem import time from datetime import datetime,timedelta import datetime as formattime import re import random import platform import json import os import urllib from urllib.parse import urlparse import requests import emoji import numpy as np from DrissionPage import Chromium import pandas as pd from sqlalchemy import create_engine from selenium.webdriver import ChromeOptions, ActionChains from scrapy.http import TextResponse from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait import pandas as pd from sqlalchemy import create_engine from selenium.webdriver import ChromeOptions, ActionChains from scrapy.http import TextResponse from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait # 二手房信息 class ErshoufanginfoSpider(scrapy.Spider): name = 'ershoufanginfoSpider' spiderUrl = 'https://gy.anjuke.com/sale/p{}/' start_urls = spiderUrl.split(";") protocol = '' hostname = '' realtime = False def __init__(self,realtime=False,*args, **kwargs): super().__init__(*args, **kwargs) self.realtime = realtime=='true' def start_requests(self): plat = platform.system().lower() if not self.realtime and (plat == 'linux' or plat == 'windows'): connect = self.db_connect() cursor = connect.cursor() if self.table_exists(cursor, 's6036zs0_ershoufanginfo') == 1: cursor.close() connect.close() self.temp_data() return pageNum = 1 + 1 for url in self.start_urls: if '{}' in url: for page in range(1, pageNum): next_lin用什么库来解析
03-25
from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC def init_driver(): chrome_options = Options() # 基础配置 chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") chrome_options.add_argument("--headless=new") chrome_options.add_argument("--disable-gpu") chrome_options.add_argument("--single-process") # 强化反检测配置 chrome_options.add_argument("--disable-blink-features=AutomationControlled") chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"]) chrome_options.add_experimental_option('useAutomationExtension', False) # 初始化Driver(确保路径正确) service = Service(executable_path="/usr/bin/sensible-browser") driver: WebDriver = webdriver.Chrome(service=service) # 覆盖WebDriver属性 driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', { 'source': ''' Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); ''' }) return driver # 安全初始化Driver变量 driver = None try: driver = init_driver() driver.get('http://portal.sx.cmcc/home') # 显式等待元素可见(非仅存在) wait = WebDriverWait(driver, 15) main_content = wait.until( EC.visibility_of_element_located((By.ID, "main-content")) ) print("成功加载目标元素!") except TimeoutException as e: print(f"页面元素加载超时: {str(e)}") except WebDriverException as e: print(f"浏览器驱动异常: {str(e)}") if "net::ERR_NAME_NOT_RESOLVED" in str(e): print("提示:请检查网址是否正确或网络连接是否正常") except Exception as e: print(f"未知错误: {str(e)}") finally: if driver: driver.quit() else: print("警告:Driver未成功初始化,无需退出")
03-13
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值