package com.example.nettydemo.threadpool;
import cn.hutool.core.io.file.FileReader;
import cn.hutool.core.map.MapBuilder;
import org.openqa.selenium.By;
import org.openqa.selenium.Keys;
import org.openqa.selenium.Proxy;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openqa.selenium.devtools.DevTools;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* @author :周超
* @date :2022/9/9 13:49
*/
public class Test {
public static void main(String[] args) throws InterruptedException {
System.setProperty("webdriver.chrome.driver", "C:\\Program Files\\Google\\Chrome\\Application\\chromedriver.exe");
//创建浏览器参数
ChromeOptions options = new ChromeOptions();
//设置从ChromeDriver中获取属性(处理反爬机制)
//设置谷歌浏览器用户数据目录
//options.addArguments("--headless"); //无浏览器模式
options.addArguments("--no-sandbox");// 为了让root用户也能执行
//通过ChromeOptions的setExperimentalOption方法,传下面两个参数来禁止掉谷歌受自动化控制的信息栏
// 优化参数
options.addArguments("blink-settings=imagesEnabled=true");
options.addArguments("--ignore-certificate-errors");
options.addArguments("--start-maximized");
options.addArguments("lang=zh-CN,zh,zh-TW,en-US,en");
//options.addArguments("--headless");
options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36");
options.addArguments("--no-sandbox");
options.addArguments("--disable-browser-side-navigation");
options.addArguments("--disable-dev-shm-usage");
//options.addArguments("--disable-gpu");
//selenium在运行的时候会暴露出一些预定义的Javascript变量(特征字符串),例如"window.navigator.webdriver",在非selenium环境下其值为undefined,而在selenium环境下,其值为true,除此之外,还有一些其它的标志性字符串。
//下面代码是去掉这些属性,也是反爬机制的关键
options.addArguments("--disable-blink-features");
options.addArguments("--disable-blink-features=AutomationControlled");
options.setExperimentalOption("excludeSwitches", new String[]{"enable-automation"});
options.addArguments("disable-infobars");
options.setExperimentalOption("useAutomationExtension", true);
Map<String, Object> prefs = new HashMap<>(3);
prefs.put("credentials_enable_service", false);
prefs.put("profile.password_manager_enabled", false);
options.setExperimentalOption("prefs", prefs);
// 去除seleium全部指纹特征
FileReader fileReader = new FileReader("C:\\Users\\zc\\Desktop\\stealth.min.js");
String js = fileReader.readString();
// MapBuilder是依赖hutool工具包的api
// String js = " Object.defineProperty(navigator, 'webdriver', {\n" +
// " get: () => undefined\n" +
// " })";
Map<String, Object> commandMap = MapBuilder.create(new LinkedHashMap<String, Object>()).put("source", js)
.build();
String proxyServer = "127.0.0.1:8080";
Proxy proxy = new Proxy().setHttpProxy(proxyServer).setSslProxy(proxyServer);
//设置代理
//options.setProxy(proxy);
ChromeDriver driver = new ChromeDriver(options);
driver.executeCdpCommand("Page.addScriptToEvaluateOnNewDocument", commandMap);
driver.manage().window().maximize();
final DevTools devTools = driver.getDevTools();
//devTools.createSession();
//devTools.send(Network.enable(java.util.Optional.empty(), java.util.Optional.empty(), java.util.Optional.empty()));
driver.get("https://WWW.XXX.COM");
//打开搜索按钮
WebElement benginAddr = driver.findElement(By.xpath("/html/body/main/div[3]/div/div[1]/div[2]/section[1]/div/div/div/div[2]/div[1]/div/div[2]/form/div/div/div[1]/a[1]/div[1]"));
Thread.sleep(2000);
benginAddr.click();
//出发地
WebElement benginAddrModel = driver.findElement(By.xpath("/html/body/main/div[3]/div/div[2]/div[1]/div/div/div/div[2]/div/div[1]/div/div[1]/div[2]/input"));
Thread.sleep(1000);
benginAddrModel.sendKeys("xxx");
Thread.sleep(2000);
benginAddrModel.sendKeys(Keys.ENTER);
//driver.getMouse().mouseDown();
//目的地
WebElement endAddrModel = driver.findElement(By.xpath("/html/body/main/div[3]/div/div[2]/div[1]/div/div/div/div[2]/div/div[1]/div/div[2]/div[2]/input"));
endAddrModel.sendKeys("xxx");
Thread.sleep(3000);
endAddrModel.sendKeys(Keys.ENTER);
//关闭弹框
WebElement closeAddrModel = driver.findElement(By.xpath("/html/body/main/div[3]/div/div[2]/div[1]/div/div/div/div[1]/a"));
closeAddrModel.click();
Thread.sleep(1000);
//搜索
WebElement elements = driver.findElement(By.xpath("/html/body/main/div[3]/div/div[1]/div[2]/section[1]/div/div/div/div[2]/div[1]/div/div[2]/form/div/div/div[3]"));
Thread.sleep(1000);
elements.click();
System.out.println(elements);
//使用Jsoup来解析页面
// Document document = Jsoup.parse(driver.getPageSource());
// Elements newsHeadlines = document.selectXpath("//*[@id=\"app\"]/div/div/div[3]/div[4]/div[1]/div/div[3]");
// for (Element headline : newsHeadlines) {
// System.out.println(headline);
// }
while (true) {
Thread.sleep(1000);
}
}
}
selenium反爬虫
最新推荐文章于 2025-02-28 11:06:43 发布