pdfbox-pdf转img

原创已于 2022-03-25 16:16:56 修改 · 578 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#java

于 2022-03-25 16:06:21 首次发布

工具类专栏收录该内容

1 篇文章

订阅专栏

这篇博客介绍了如何利用PDFBox Java库将PDF文件转换为图像。首先，详细说明了如何添加PDFBox依赖到项目中，无论是通过Maven还是Gradle。接着，提供了转换的工具代码段，展示如何实现转换功能。最后，给出了测试代码，以验证转换过程的正确性。

pdfbox-pdf转img

1、依赖导入

maven导入

<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
<dependency>
    <groupId>org.apache.pdfbox</groupId>
    <artifactId>pdfbox</artifactId>
    <version>2.0.22</version>
</dependency>

gradle导入

// https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox
implementation group: 'org.apache.pdfbox', name: 'pdfbox', version: '2.0.22'

2、工具代码

public static void pdfToImgCut(File file, String imgStore) throws Exception {
        File file1 = new File(imgStore);
        if (!file1.exists()) {
            boolean mkdirs = file1.mkdirs();
            if (!mkdirs) {
                log.error("创建转图片存贮文件夹失败 ，文件夹路径：{}", imgStore);
            }
        }
        String fileName = file.getName();
        PDDocument src = PDDocument.load(file);
        int totalPages = src.getNumberOfPages();
        for (int i = 0; i < totalPages; i++) {
            PDPage page = src.getPage(i);
            PDRectangle cropBox = page.getCropBox();
            float pageWidth = cropBox.getWidth();
            float pageHeight = cropBox.getHeight();
            float longSide = Math.max(pageHeight, pageWidth);
            float shortSide = Math.min(pageHeight, pageWidth);
            float scaleA = (1500 / longSide);
            float scaleB = 1000 / shortSide;
            float scale = (float) Math.floor((Math.min(scaleA, scaleB) * 10)) / 10;
            BufferedImage bufferedImage = pageConvertToImage(page, scale * 72, ImageType.RGB);
            byte[] jpgs = imgToBytes(bufferedImage, "jpg");
            String imgSavePath = imgStore + fileName.substring(0, fileName.lastIndexOf(".")) + "_" + i + ".jpg";
            cutImg(jpgs, imgSavePath);
        }
        src.close();
    }

    public static void cutImg(byte[] bs, String filePath) {
        BufferedOutputStream bos = null;
        FileOutputStream fos = null;
        File file = null;
        try {
            file = new File(filePath);
            fos = new FileOutputStream(file);
            bos = new BufferedOutputStream(fos);
            bos.write(bs);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (bos != null) {
                try {
                    bos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (fos != null) {
                try {
                    fos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    public static BufferedImage pageConvertToImage(PDPage page, float dpi, ImageType imageType) throws IOException {
        try (PDDocument document = new PDDocument()) {
            document.addPage(page);
            PDFRenderer renderer = new PDFRenderer(document);
            return renderer.renderImageWithDPI(0, dpi, imageType);
        }
    }

    public static byte[] imgToBytes(BufferedImage bufferedImage, String format) throws IOException {
        try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
            ImageIO.write(bufferedImage, format, outputStream);
            return outputStream.toByteArray();
        }
    }

3、测试代码

public class PdfToImgUtil {
    private static final Logger log = LogManager.getLogger(PdfToImgUtil.class);

    public static void main(String[] args) throws Exception {
        //pdf文件所在文件夹路径
        String path = "D:\\aaaaa\\file";
        //转换后的图片文件的文件夹路径
        String imgStore = "\"D:\\\\aaaaa\\\\img\"";
        File fileSrcStore = new File(path);
        File[] tmpFiles = fileSrcStore.listFiles();
        assert tmpFiles != null;
        for (File file : tmpFiles) {
            String fileName = file.getName();
            if (fileName.toLowerCase().endsWith(".pdf")) {
                pdfToImgCut(file, imgStore);
            } else {
                log.info("该文件不是pdf文件，文件名：{}", fileName);
            }
        }
    }
}

*转载请附加本文地址