按标题批量分割PDF文件为多个小PDF文件代码示例

本文链接：https://blog.youkuaiyun.com/weixin_45654227/article/details/130562619

POM依赖

        <dependency>
            <groupId>org.apache.pdfbox</groupId>
            <artifactId>pdfbox</artifactId>
            <version>2.0.4</version>
        </dependency>

代码


import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.multipdf.Splitter;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.interactive.action.PDActionGoTo;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.destination.PDPageDestination;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineNode;
import org.springframework.util.CollectionUtils;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

public class PDFUtil {
    private static File file;
    private static String savePath;
    private static int lastPage = 1;
    private static String lastPageName = "start";

    public static void main(String[] args) {
        try {
            startCut("E:\\9800 告警-21.1.pdf","E:\\test\\");
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static void startCut(String filePath,String savePath) throws Exception {
        PDFUtil.file = new File(filePath);
        PDFUtil.savePath = savePath;
        FileInputStream fis = new FileInputStream(file);
        RandomAccessBuffer randomAccessBuffer = new RandomAccessBuffer(fis);
        PDFParser parser = new PDFParser(randomAccessBuffer);
        parser.parse();
        PDDocument doc = parser.getPDDocument();
        PDDocumentCatalog catalog = doc.getDocumentCatalog();
        PDDocumentOutline outline = catalog.getDocumentOutline();
        PDFUtil util = new PDFUtil();
        if (outline != null) {
            util.printBookmarks(outline, "");
        }
    }
    private void printBookmarks(PDOutlineNode bookmark, String indentation) throws IOException {
        PDOutlineItem current = bookmark.getFirstChild();
        while (current != null) {
            int pages = 0;
            if (current.getDestination() instanceof PDPageDestination) {
                PDPageDestination pd = (PDPageDestination) current.getDestination();
                pages = pd.retrievePageNumber() + 1;
            }
            if (current.getAction() instanceof PDActionGoTo) {
                PDActionGoTo gta = (PDActionGoTo) current.getAction();
                if (gta.getDestination() instanceof PDPageDestination) {
                    PDPageDestination pd = (PDPageDestination) gta.getDestination();
                    pages = pd.retrievePageNumber() + 1;
                }
            }

            String title = current.getTitle();
            String[] s = title.split(" ");
            String name = s[1];
            String save = savePath + lastPageName + ".pdf";
            List<String> fileList = PDFUtil.partitionPdfFile(file.getAbsolutePath(), save, lastPage, pages - 1);
            if (!CollectionUtils.isEmpty(fileList)) {
                PDFUtil.mergePdfFile(fileList, save);
                for (String s1 : fileList) {
                    File file1 = new File(s1);
                    file1.delete();
                }
            }

            if (pages == 0) {
                System.out.println(indentation + current.getTitle());
            } else {
                System.out.println(indentation + current.getTitle() + "  " + pages);
            }
            lastPage = pages;
            lastPageName = name;
            printBookmarks(current, indentation + "    ");
            current = current.getNextSibling();
        }
    }

    /**
     * 对pdf文件按照指定页码进行文件的拆分，每一页拆分成一个新的pdf文件
     *
     * @param pdfFile 原Pdf文件全路径
     * @param newFile 拆分后的文件全路径（文件保存路径和文件名称）
     *                传入null或者空的话，将使用原文件路径和原文件名。
     * @param from    从第几页开始拆分 传入0或负数，将从第一页开始拆分
     * @param end     从第几页结束拆分 传入0或负数，将拆分之文档最后一页
     * @return true（文件合并成功）、false（文件合并失败）
     */
    public static List<String> partitionPdfFile(String pdfFile, String newFile, int from, int end) {
        if (Objects.isNull(pdfFile)) {
            throw new RuntimeException("pdfFile 不能为空");
        }
        if (!pdfFile.endsWith(".pdf") && !pdfFile.endsWith(".PDF")) {
            throw new RuntimeException("pdfFile 必须为pdf文件");
        }
        if (Objects.nonNull(newFile) && !newFile.endsWith(".pdf") && !newFile.endsWith(".PDF")) {
            throw new RuntimeException("newFile 必须为pdf文件");
        }
        File file = new File(pdfFile);
        PDDocument document = null;
        try {
            document = PDDocument.load(file);
            Splitter splitter = new Splitter();
            List<PDDocument> pages = splitter.split(document);
            //处理新传入的文件名称
            newFile = Objects.isNull(newFile) ? pdfFile : newFile;
            if (end > 0 && from > end) {
                throw new RuntimeException("参数from、end均为正整数时，from不能大于end");
            }
            //去除新文件名中的后缀
            int suffixIndex = 0;
            if (newFile.endsWith(".pdf")) {
                suffixIndex = newFile.lastIndexOf(".pdf");
            } else if (newFile.endsWith(".PDF")) {
                suffixIndex = newFile.lastIndexOf(".PDF");
            }
            if (suffixIndex > 0) {
                newFile = newFile.substring(0, suffixIndex);
            }
            if (newFile.lastIndexOf("\\") == -1 || !new File(newFile.substring(0, newFile.lastIndexOf("\\"))).isDirectory()) {
                throw new RuntimeException("参数newFile:" + newFile + ",格式不正确");
            }
            //根据传入的参数对文件列表进行筛选
            from = from <= 0 || from > pages.size() ? 0 : from - 1;
            end = end <= 0 || end > pages.size() ? pages.size() : end;
            pages = pages.subList(from, end);
            //对拆分后的文件进行命名、保存
            List<String> name = new ArrayList<>();
            for (int i = 0; i < pages.size(); i++) {
                PDDocument pd = pages.get(i);
                String fileName = newFile + "-" + (i + 1) + ".pdf";
                pd.save(fileName);
                name.add(fileName);
                pd.close();
            }
            return name;
        } catch (Exception ignored) {
        } finally {
            try {
                if (null != document) document.close();
            } catch (IOException ignored) {
            }
        }
        return null;
    }

    /**
     * 将多个pdf文档合并为一个新的pdf文档
     *
     * @param pdfFiles 要进行合并的pdf文件数组
     * @param newFile  合并后的文件全路径
     * @return true（文件合并成功）、false（文件合并失败）
     */
    public static boolean mergePdfFile(List<String> pdfFiles, String newFile) {
        List<File> files = new ArrayList<>();
        if (Objects.isNull(pdfFiles)) {
            throw new RuntimeException("pdfFiles 不能为空");
        }
        if (Objects.isNull(newFile)) {
            throw new RuntimeException("newFile 不能为空");
        } else if (!newFile.endsWith(".pdf") && !newFile.endsWith(".PDF")) {
            throw new RuntimeException("newFile 必须为pdf文件");
        }
        try {
            for (int i = 0; i < pdfFiles.size(); i++) {
                if (!pdfFiles.get(i).endsWith(".pdf") && !pdfFiles.get(i).endsWith(".PDF")) {
                    throw new RuntimeException(pdfFiles.get(i) + ",文件格式不是pdf");
                }
                File f = new File(pdfFiles.get(i));
                if (!f.exists()) {
                    throw new RuntimeException(f.getPath() + ",不存在");
                }
                files.add(f);
            }
            //Instantiating PDFMergerUtility class
            PDFMergerUtility PDFmerger = new PDFMergerUtility();
            //Setting the destination file
            PDFmerger.setDestinationFileName(newFile);
            //adding the source files
            for (File file : files) {
                PDFmerger.addSource(file);
            }
            //合并pdf
            PDFmerger.mergeDocuments();
            return true;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return false;
    }

}

pdfbox JAR包

链接：https://pan.baidu.com/s/1NwY2Hgif5ylFTu68TpGkWg 
提取码：t2sc

按标题批量分割PDF文件为多个小PDF文件 代码示例

按标题批量分割PDF文件为多个小PDF文件代码示例