我们需要创建一个Selenium WebDriver实例来加载需要截取的页面。
直接展示截取效果:
效果1:
网址:https://googlechromelabs.github.io/chrome-for-testing/
效果2:
网址:https://baike.baidu.com/item/%E7%A0%A5%E6%9F%B1%E4%B8%AD%E6%B5%81/89330?fr=aladdin
直接贴代码
package com.frame.html2pdf;
import java.awt.image.BufferedImage;
import java.io.File;
import javax.imageio.ImageIO;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import ru.yandex.qatools.ashot.AShot;
import ru.yandex.qatools.ashot.shooting.ShootingStrategies;
public class HtmlToPdfConverter
{
// 设置 ChromeDriver 路径(请根据你自己的路径修改)
private static final String CHROME_DRIVER_WINDOWS_PATH = "D:\\Programs\\chromedriver-win64\\chromedriver.exe";
// 设置 Chromedriver 路径(Linux 下一般是 /usr/local/bin/chromedriver)
private static final String CHROME_DRIVER_LINUX_PATH = "/usr/local/bin/chromedriver";
public static void main(String[] args) {
// String url =
// "https://baike.baidu.com/item/%E7%A0%A5%E6%9F%B1%E4%B8%AD%E6%B5%81/89330?fr=aladdin";
// // 目标网页URL
String url = "https://googlechromelabs.github.io/chrome-for-testing/";
String outputPath = "screenshot.png";
captureWebPage(url, outputPath);
}
public static void captureWebPage(String url, String outputPath) {
// 设置 ChromeDriver 路径
if (isWindows()) {
System.setProperty("webdriver.chrome.driver", CHROME_DRIVER_WINDOWS_PATH);
}
else {
System.setProperty("webdriver.chrome.driver", CHROME_DRIVER_LINUX_PATH);
}
ChromeOptions options = new ChromeOptions();
options.addArguments("--headless");// 使用无头模式
options.addArguments("--disable-gpu");
options.addArguments("--window-size=1920,1080"); // 设置窗口大小
options.addArguments("--allowed-ips=127.0.0.1,138.0.7204.92");
// options.addArguments("--whitelisted-ips=127.0.0.1,baike.baidu.com");
WebDriver driver = new ChromeDriver(options);
try {
driver.get(url);
BufferedImage image = new AShot().shootingStrategy(ShootingStrategies.viewportPasting(1000))
.takeScreenshot(driver).getImage();
ImageIO.write(image, "PNG", new File(outputPath));
}
catch (Exception e) {
e.printStackTrace();
}
finally {
driver.quit();
}
}
private static boolean isWindows() {
return System.getProperty("os.name").toLowerCase().contains("windows");
}
}
POM引用
<!-- Jsoup: 网页抓取 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
<dependency>
<groupId>org.seleniumhq.selenium</groupId>
<artifactId>selenium-java</artifactId>
<version>4.10.0</version>
</dependency>
<!-- 自动管理浏览器驱动 -->
<dependency>
<groupId>io.github.bonigarcia</groupId>
<artifactId>webdrivermanager</artifactId>
<version>5.4.1</version>
</dependency>
<dependency>
<groupId>ru.yandex.qatools.ashot</groupId>
<artifactId>ashot</artifactId>
<version>1.5.4</version>
</dependency>
<!-- Apache Commons IO: 文件操作工具 -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>22.0</version> <!-- 这里是 Guava 的版本 -->
</dependency>
注意事项
注意:服务器需要安装对应版本的谷歌浏览器,并下载对应的谷歌驱动包(版本比对网址:https://googlechromelabs.github.io/chrome-for-testing/),服务器最好存在显卡。