springBoot+itext处理富文本转pdf

该博客介绍了如何在SpringBoot项目中利用iText库将包含HTML的富文本转换为PDF文件。文章详细展示了所需的pom.xml依赖,并提供了处理中文字符、避免换行问题的方法。此外,还给出了一个名为Breaker.java的类,用于处理文本的断行逻辑。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

pom文件引入依赖

 <!-- itext5 start -->
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itextpdf</artifactId>
            <version>5.5.13.2</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf</groupId>
            <artifactId>itext-asian</artifactId>
            <version>5.2.0</version>
        </dependency>
        <dependency>
            <groupId>com.itextpdf.tool</groupId>
            <artifactId>xmlworker</artifactId>
            <version>5.5.11</version>
        </dependency>
        <dependency>
            <groupId>org.xhtmlrenderer</groupId>
            <artifactId>flying-saucer-pdf-itext5</artifactId>
            <version>9.0.3</version>
        </dependency>
        <dependency>
            <groupId>org.xhtmlrenderer</groupId>
            <artifactId>core-renderer</artifactId>
            <version>R8</version>
        </dependency>
<!-- itext5 end -->

html转PDF方法

import com.itextpdf.text.pdf.BaseFont;
import com.lowagie.text.DocumentException;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;

import java.io.*;

/**
 * @auth zhang
 * @date 2021年06月06日:14:10
 */
public class HtmlToPdf {
    /**
     * 生成 PDF 文件-保存为流
     * @param out 输出流
     * @param html HTML字符串
     * @throws IOException IO异常
     * @throws DocumentException Document异常
     */
    public static InputStream createPDF(OutputStream out, String html) throws IOException, DocumentException {
        ITextRenderer renderer = new ITextRenderer();

        html=html.replace("&nbsp;","");
        html=html.replace("&shy;","");
        html=html.replace("&ldquo;","");
        html=html.replace("&rdquo;","");
        html=html.replaceAll("font-family:(.*?);","font-family: SimSun;");
        System.out.println("html:::"+html);
        renderer.setDocumentFromString(html);
        // 解决中文支持问题
        ITextFontResolver fontResolver = renderer.getFontResolver();

        if (System.getProperty("os.name").contains("Window")) {
            try {
                fontResolver.addFont("C:/Windows/Fonts/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
            } catch (com.itextpdf.text.DocumentException e) {
                e.printStackTrace();
            }
        } else {
            try {
                fontResolver.addFont("/usr/share/fonts/win/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
            } catch (com.itextpdf.text.DocumentException e) {
                e.printStackTrace();
            }
        }
        renderer.layout();
        try {
            renderer.createPDF(out);
        } catch (com.itextpdf.text.DocumentException e) {
            e.printStackTrace();
        }
        //转换流
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        byteArrayOutputStream = (ByteArrayOutputStream) out;
        InputStream resultInputStream = new ByteArrayInputStream(byteArrayOutputStream.toByteArray());
        return resultInputStream;
    }

    /**
     * 生成 PDF 文件-保存到本地
     * @param out 输出流
     * @param html HTML字符串
     * @throws IOException IO异常
     * @throws DocumentException Document异常
     */
    public static void createPDFByLocal(OutputStream out, String html) throws IOException, DocumentException {
        ITextRenderer renderer = new ITextRenderer();

        html=html.replace("&nbsp;","");
        html=html.replace("&shy;","");
        html=html.replace("&ldquo;","");
        html=html.replace("&rdquo;","");
        System.out.println("html:::"+html);
        renderer.setDocumentFromString(html);
        // 解决中文支持问题
        ITextFontResolver fontResolver = renderer.getFontResolver();

        if (System.getProperty("os.name").contains("Window")) {
            try {
                fontResolver.addFont("C:/Windows/Fonts/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
            } catch (com.itextpdf.text.DocumentException e) {
                e.printStackTrace();
            }

        } else {
            try {
                fontResolver.addFont("/usr/share/fonts/win/simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
            } catch (com.itextpdf.text.DocumentException e) {
                e.printStackTrace();
            }
        }
        renderer.layout();
        try {
            renderer.createPDF(out);
        } catch (com.itextpdf.text.DocumentException e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) throws IOException, DocumentException {
        String fileName="D:\\text\\"+"测试"+System.currentTimeMillis()+".pdf";
        File file = new File(fileName);
        FileOutputStream outputStream = new FileOutputStream(file);
//        OutputStream outputStream=new ByteArrayOutputStream();
        String customHtml="<p style=\"text-align: center; margin: 0cm; font-size: 10.5pt; font-family: 'Times New Roman', serif;\" align=\"center\"><strong><span style=\"font-size: 22pt; font-family: 宋体; color: #c00000;\">&shy;&shy;&shy;&shy;&shy;Xx</span></strong><strong><span style=\"font-size: 22pt; font-family: 宋体; color: #c00000;\">测试一下</span></strong></p>\n" +
                "<p style=\"margin: 0cm 0cm 15.6pt; text-align: center; font-size: 10.5pt; font-family: 'Times New Roman', serif;\" align=\"center\"><strong><span style=\"font-size: 22pt; font-family: 宋体; color: #c00000;\">20xx</span></strong><strong><span style=\"font-size: 22pt; font-family: 宋体; color: black;\">XXXXXXXXXXXXXXXXXX书</span></strong></p>\n";
        customHtml=customHtml.replaceAll("font-family:(.*?);","font-family: SimSun;");
        String html = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
                "<!DOCTYPE html[\n" +
                "<!ENTITY nbsp \" \">\n" +
                "]><html>\n" +
                        "<head>\n" +
                        "<style type=\"text/css\">\n" +
                        "body {\n" +
                        "\tfont-family: SimSun;\n" +
                        "}\n" +
                        "</style>\n" +
                        "</head>\n" +
                        "<body>\n" +
                        customHtml
                        +
                        "</body>\n" +
                        "</html>\n";


        createPDFByLocal(outputStream,html);

    }
}

处理不换行

创建包

新增Breaker.java类

package org.xhtmlrenderer.layout;

/*
 * Breaker.java
 * Copyright (c) 2004, 2005 Torbj�rn Gannholm,
 * Copyright (c) 2005 Wisconsin Court System
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */

import org.xhtmlrenderer.css.constants.IdentValue;
import org.xhtmlrenderer.css.style.CalculatedStyle;
import org.xhtmlrenderer.render.FSFont;

/**
 * A utility class that scans the text of a single inline box, looking for the
 * next break point.
 *
 * @author Torbj�rn Gannholm
 */
public class Breaker {

    public static void breakFirstLetter(LayoutContext c, LineBreakContext context,
                                        int avail, CalculatedStyle style) {
        FSFont font = style.getFSFont(c);
        context.setEnd(getFirstLetterEnd(context.getMaster(), context.getStart()));
        context.setWidth(c.getTextRenderer().getWidth(
                c.getFontContext(), font, context.getCalculatedSubstring()));

        if (context.getWidth() > avail) {
            context.setNeedsNewLine(true);
            context.setUnbreakable(true);
        }
    }

    private static int getFirstLetterEnd(String text, int start) {
        int i = start;
        while (i < text.length()) {
            char c = text.charAt(i);
            int type = Character.getType(c);
            if (type == Character.START_PUNCTUATION ||
                    type == Character.END_PUNCTUATION ||
                    type == Character.INITIAL_QUOTE_PUNCTUATION ||
                    type == Character.FINAL_QUOTE_PUNCTUATION ||
                    type == Character.OTHER_PUNCTUATION) {
                i++;
            } else {
                break;
            }
        }
        if (i < text.length()) {
            i++;
        }
        return i;
    }

    public static void breakText(LayoutContext c,
                                 LineBreakContext context, int avail, CalculatedStyle style) {
        FSFont font = style.getFSFont(c);
        IdentValue whitespace = style.getWhitespace();

        // ====== handle nowrap
        if (whitespace == IdentValue.NOWRAP) {
            context.setEnd(context.getLast());
            context.setWidth(c.getTextRenderer().getWidth(
                    c.getFontContext(), font, context.getCalculatedSubstring()));
            return;
        }

        //check if we should break on the next newline
        if (whitespace == IdentValue.PRE ||
                whitespace == IdentValue.PRE_WRAP ||
                whitespace == IdentValue.PRE_LINE) {
            int n = context.getStartSubstring().indexOf(WhitespaceStripper.EOL);
            if (n > -1) {
                context.setEnd(context.getStart() + n + 1);
                context.setWidth(c.getTextRenderer().getWidth(
                        c.getFontContext(), font, context.getCalculatedSubstring()));
                context.setNeedsNewLine(true);
                context.setEndsOnNL(true);
            } else if (whitespace == IdentValue.PRE) {
                context.setEnd(context.getLast());
                context.setWidth(c.getTextRenderer().getWidth(
                        c.getFontContext(), font, context.getCalculatedSubstring()));
            }
        }

        //check if we may wrap
        if (whitespace == IdentValue.PRE ||
                (context.isNeedsNewLine() && context.getWidth() <= avail)) {
            return;
        }

        context.setEndsOnNL(false);

        String currentString = context.getStartSubstring();
        int left = 0;
//    int right = currentString.indexOf(WhitespaceStripper.SPACE, left + 1);
        int right = getStrRight(currentString, left);
        int lastWrap = 0;
        int graphicsLength = 0;
        int lastGraphicsLength = 0;

        while (right > 0 && graphicsLength <= avail) {
            lastGraphicsLength = graphicsLength;
            graphicsLength += c.getTextRenderer().getWidth(
                    c.getFontContext(), font, currentString.substring(left, right));
            lastWrap = left;
            left = right;
//      right = currentString.indexOf(WhitespaceStripper.SPACE, left + 1);
            right = getStrRight(currentString, left + 1);
        }

        if (graphicsLength <= avail) {
            //try for the last bit too!
            lastWrap = left;
            lastGraphicsLength = graphicsLength;
            graphicsLength += c.getTextRenderer().getWidth(
                    c.getFontContext(), font, currentString.substring(left));
        }

        if (graphicsLength <= avail) {
            context.setWidth(graphicsLength);
            context.setEnd(context.getMaster().length());
            //It fit!
            return;
        }

        context.setNeedsNewLine(true);

        if (lastWrap != 0) {//found a place to wrap
            context.setEnd(context.getStart() + lastWrap);
            context.setWidth(lastGraphicsLength);
        } else {//unbreakable string
            if (left == 0) {
                left = currentString.length();
            }

            context.setEnd(context.getStart() + left);
            context.setUnbreakable(true);

            if (left == currentString.length()) {
                context.setWidth(c.getTextRenderer().getWidth(
                        c.getFontContext(), font, context.getCalculatedSubstring()));
            } else {
                context.setWidth(graphicsLength);
            }
        }
        return;
    }

    private static boolean isChinese(char c) {
        Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
                || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
                || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
                || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
                || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
            return true;
        }
        return false;
    }

    private static int getStrRight(String s, int left) {
        if (left >= s.length())
            return -1;
        char[] ch = s.toCharArray();
        for (int i = left; i < ch.length; i++) {
            if (isChinese(ch[i]) || ' ' == ch[i]) {
                return i == 0 ? i + 1 : i;
            }
        }
        return -1;
    }

}

ps:如果遇到大小写等生成失败的错误时,在标签上添加:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">```

### 使用 Apache POI 和 iTextPDF 实现 Word PDF 要将 Word 文档换为 PDF 文件,可以通过以下方式实现:首先利用 Apache POI 解析 Word 文档的内容,然后通过 iTextPDF 创建对应的 PDF 文件并将内容写入其中。 #### 主要流程说明 Apache POI 是用于处理 Microsoft Office 文件的强大工具库,而 iTextPDF 则专注于创建和操作 PDF 文件。两者的结合能够完成从 Word 到 PDF换过程[^1]。 以下是具体实现代码: ```java import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import com.itextpdf.kernel.pdf.PdfDocument; import com.itextpdf.kernel.pdf.PdfWriter; import com.itextpdf.layout.Document; import com.itextpdf.layout.element.Paragraph; import java.io.FileInputStream; import java.io.FileOutputStream; import java.util.List; public class WordToPdfConverter { public static byte[] convertWordToPdf(String wordFilePath, String pdfFilePath) throws Exception { // Step 1: Load the Word Document using Apache POI try (FileInputStream fis = new FileInputStream(wordFilePath); XWPFDocument document = new XWPFDocument(fis)) { List<XWPFParagraph> paragraphs = document.getParagraphs(); // Step 2: Create a PDF Writer and Document try (PdfWriter writer = new PdfWriter(pdfFilePath); PdfDocument pdfDoc = new PdfDocument(writer); Document layoutDocument = new Document(pdfDoc)) { // Step 3: Convert each paragraph from Word to PDF format for (XWPFParagraph para : paragraphs) { Paragraph p = new Paragraph(para.getText()); layoutDocument.add(p); // Add the paragraph content into the PDF } // Return the generated PDF as bytes array if needed return writer.getBytes(); } } } public static void main(String[] args) { try { String inputPath = "example.docx"; String outputPath = "output.pdf"; convertWordToPdf(inputPath, outputPath); System.out.println("Conversion completed successfully."); } catch (Exception e) { e.printStackTrace(); } } } ``` 上述代码展示了如何加载 `.docx` 格式的 Word 文件,并将其段落逐一提取出来,再通过 iTextPDF 库生成 PDF 文件[^2]。 --- #### 常见问题及解决方案 1. **不同版本的 Word 文件支持** - 如果需要兼容旧版(`.doc`),则需额外引入 `POIFSFileSystem` 或者使用 `HWPFDocument` 来解析文件。 2. **字体样式丢失** - 当前示例仅简单复制文本内容至 PDF 中,未涉及复杂样式的迁移。如果需要保留原 Word 文件中的字体、颜色等属性,则需要进一步分析每一段的具体格式并映射到 iTextPDF 支持的对象上。 3. **性能优化** - 处理大体积文档时可能会遇到内存不足的情况。建议分批读取数据或者采用流式传输的方式减少资源占用。 ---
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值