java 实现word转pdf

最新推荐文章于 2025-10-26 16:22:39 发布

原创最新推荐文章于 2025-10-26 16:22:39 发布 · 4.5w 阅读

67 ·

CC 4.0 BY-SA版权

文章标签：

#java #word #pdf #poi

java后台;前后端分离专栏收录该内容

2 篇文章

订阅专栏

本文介绍了一种将Word文档转换为PDF的方法，通过使用Apache POI和相关库实现了两种类型的Word文档（.doc和.docx）到PDF的转换，并且特别针对中文支持进行了详细说明。

将DOCX文档转化为PDF是项目中常见的需求之一，目前主流的方法可以分为两大类，一类是利用各种Office应用进行转换，譬如Microsoft Office、WPS以及LiberOffice，另一种是利用各种语言提供的对于Office文档读取的接口（譬如Apache POI）然后使用专门的PDFGenerator库，譬如IText进行PDF构建。总的来说，从样式上利用Office应用可以保证较好的样式，不过相对而言效率会比较低。其中Microsoft Office涉及版权，不可轻易使用（笔者所在公司就被抓包了），WPS目前使用比较广泛，不过存在超链接截断问题，即超过256个字符的超链接会被截断，LiberOffice的样式排版相对比较随意。而利用POI接口进行读取与生成的方式性能较好，适用于对于格式要求不是很高的情况。另外还有一些封装好的在线工具或者命令行工具，譬如docx2pdf与OfficeTOpdf。

以下是Apache POI实现word转pdf

1.maven jar

<dependency>
<groupId>args4j</groupId>
<artifactId>args4j</artifactId>
<version>2.32</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
<version>1.0.6</version>
</dependency>

<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.odftoolkit.odfdom.converter.pdf</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
            <groupId>com.googlecode.jaxb-namespaceprefixmapper-interfaces</groupId>
            <artifactId>JAXBNamespacePrefixMapper</artifactId>
            <version>2.2.4</version>
            <scope>runtime</scope>
        </dependency>


<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>2.2.11</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-core</artifactId>
<version>2.2.11</version>
</dependency>
       
<!-- https://mvnrepository.com/artifact/org.apache.xmlbeans/xmlbeans -->
<dependency>
   <groupId>org.apache.xmlbeans</groupId>
   <artifactId>xmlbeans</artifactId>
   <version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version><!--$NO-MVN-MAN-VER$-->
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version><!--$NO-MVN-MAN-VER$-->
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version><!--$NO-MVN-MAN-VER$-->
</dependency>

2.实现类

Converter

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;


public abstract class Converter {


private final String LOADING_FORMAT = "\nLoading stream\n\n";
private final String PROCESSING_FORMAT = "Load completed in %1$dms, now converting...\n\n";
private final String SAVING_FORMAT = "Conversion took %1$dms.\n\nTotal: %2$dms\n";


private long startTime;
private long startOfProcessTime;


protected InputStream inStream;
protected OutputStream outStream;


protected boolean showOutputMessages = false;
protected boolean closeStreamsWhenComplete = true;


public Converter(InputStream inStream, OutputStream outStream, boolean showMessages, boolean closeStreamsWhenComplete){
this.inStream = inStream;
this.outStream = outStream;
this.showOutputMessages = showMessages;
this.closeStreamsWhenComplete = closeStreamsWhenComplete;
}


public abstract void convert() throws Exception;


private void startTime(){
startTime = System.currentTimeMillis();
startOfProcessTime = startTime;
}


protected void loading(){
sendToOutputOrNot(String.format(LOADING_FORMAT));
startTime();
}


protected void processing(){
long currentTime = System.currentTimeMillis();
long prevProcessTook = currentTime - startOfProcessTime;


sendToOutputOrNot(String.format(PROCESSING_FORMAT, prevProcessTook));


startOfProcessTime = System.currentTimeMillis();


}


protected void finished(){
long currentTime = System.currentTimeMillis();
long timeTaken = currentTime - startTime;
long prevProcessTook = currentTime - startOfProcessTime;


startOfProcessTime = System.currentTimeMillis();


if(closeStreamsWhenComplete){
try {
inStream.close();
outStream.close();
} catch (IOException e) {
//Nothing done
}
}


sendToOutputOrNot(String.format(SAVING_FORMAT, prevProcessTook, timeTaken));
}




private void sendToOutputOrNot(String toBePrinted){
if(showOutputMessages){
actuallySendToOutput(toBePrinted);
}
}


protected void actuallySendToOutput(String toBePrinted){
}


}

DocToPDFConverter：

import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.URL;


import org.apache.commons.io.IOUtils;
import org.docx4j.Docx4J;
import org.docx4j.convert.in.Doc;
import org.docx4j.convert.out.FOSettings;
import org.docx4j.fonts.IdentityPlusMapper;
import org.docx4j.fonts.Mapper;
import org.docx4j.fonts.PhysicalFont;
import org.docx4j.fonts.PhysicalFonts;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.wml.RFonts;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;


public class DocToPDFConverter extends Converter {


public DocToPDFConverter(InputStream inStream, OutputStream outStream, boolean showMessages,
boolean closeStreamsWhenComplete) {
super(inStream, outStream, showMessages, closeStreamsWhenComplete);
}


@Override
public void convert() throws Exception {
loading();


InputStream iStream = inStream;
try {
WordprocessingMLPackage wordMLPackage = getMLPackage(iStream);
Mapper fontMapper = new IdentityPlusMapper();
String fontFamily = "SimSun";

Resource fileRource = new ClassPathResource("simsun.ttf");
String path =  fileRource.getFile().getAbsolutePath();
URL fontUrl = new URL("file:"+path);
PhysicalFonts.addPhysicalFont(fontUrl);

PhysicalFont simsunFont = PhysicalFonts.get(fontFamily);
fontMapper.put(fontFamily, simsunFont);


RFonts rfonts = Context.getWmlObjectFactory().createRFonts(); // 设置文件默认字体
rfonts.setAsciiTheme(null);
rfonts.setAscii(fontFamily);
wordMLPackage.getMainDocumentPart().getPropertyResolver().getDocumentDefaultRPr().setRFonts(rfonts);
wordMLPackage.setFontMapper(fontMapper);
FOSettings foSettings = Docx4J.createFOSettings();
foSettings.setWmlPackage(wordMLPackage);
Docx4J.toFO(foSettings, outStream, Docx4J.FLAG_EXPORT_PREFER_XSL);


} catch (Exception ex) {
ex.printStackTrace();
} finally {
IOUtils.closeQuietly(outStream);
}


/*
* InputStream iStream = inStream;
* 
* 
* 
* String regex = null; //Windows: // String
* regex=".*(calibri|camb|cour|arial|symb|times|Times|zapf).*"; regex=
* ".*(calibri|camb|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding).*";
* // Mac // String //
* regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*"
* ; PhysicalFonts.setRegex(regex); WordprocessingMLPackage
* wordMLPackage = getMLPackage(iStream); // WordprocessingMLPackage
* wordMLPackage = WordprocessingMLPackage.load(iStream) FieldUpdater
* updater = new FieldUpdater(wordMLPackage); updater.update(true); //
* process processing(); // Add font
* 
* Mapper fontMapper = new IdentityPlusMapper();
* 
* PhysicalFont font = PhysicalFonts.get("Arial UTF-8 MS"); if (font !=
* null) { fontMapper.put("Times New Roman", font);
* fontMapper.put("Arial", font); fontMapper.put("Calibri", font); }
* fontMapper.put("Calibri", PhysicalFonts.get("Calibri"));
* fontMapper.put("Algerian", font); fontMapper.put("华文行楷",
* PhysicalFonts.get("STXingkai")); fontMapper.put("华文仿宋",
* PhysicalFonts.get("STFangsong")); fontMapper.put("隶书",
* PhysicalFonts.get("LiSu")); fontMapper.put("Libian SC Regular",
* PhysicalFonts.get("SimSun"));
* wordMLPackage.setFontMapper(fontMapper); FOSettings foSettings =
* Docx4J.createFOSettings(); foSettings.setFoDumpFile(new
* java.io.File("E:/xi.fo")); foSettings.setWmlPackage(wordMLPackage);
* // Docx4J.toPDF(wordMLPackage, outStream); Docx4J.toFO(foSettings,
* outStream, Docx4J.FLAG_EXPORT_PREFER_XSL);
*/
finished();


}


protected WordprocessingMLPackage getMLPackage(InputStream iStream) throws Exception {
//PrintStream originalStdout = System.out;


System.setOut(new PrintStream(new OutputStream() {
public void write(int b) {
// DO NOTHING
}
}));


WordprocessingMLPackage mlPackage = Doc.convert(iStream);
//System.setOut(originalStdout);
//System.out.println(outStream);
return mlPackage;
}


}

DocxToPDFConverter：

import java.awt.Color;
import java.io.InputStream;
import java.io.OutputStream;


import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;


import com.lowagie.text.Font;
import com.lowagie.text.pdf.BaseFont;


import fr.opensagres.xdocreport.itext.extension.font.ITextFontRegistry;


public class DocxToPDFConverter extends Converter {


public DocxToPDFConverter(InputStream inStream, OutputStream outStream, boolean showMessages,
boolean closeStreamsWhenComplete) {
super(inStream, outStream, showMessages, closeStreamsWhenComplete);
}


@Override
public void convert() throws Exception {
loading();


PdfOptions options = PdfOptions.create();
XWPFDocument document = new XWPFDocument(inStream);


//支持中文字体
options.fontProvider(new ITextFontRegistry() {
public Font getFont(String familyName, String encoding, float size, int style, Color color) {
try {
Resource fileRource = new ClassPathResource("simsun.ttf");
String path =  fileRource.getFile().getAbsolutePath();


BaseFont bfChinese = BaseFont.createFont(path, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
Font fontChinese = new Font(bfChinese, size, style, color);
if (familyName != null)
fontChinese.setFamily(familyName);
return fontChinese;
} catch (Throwable e) {
e.printStackTrace();
return ITextFontRegistry.getRegistry().getFont(familyName, encoding, size, style, color);
}
}


});


processing();
PdfConverter.getInstance().convert(document, outStream, options);


finished();
}


}

main 方法的实现代码

Converter converter;

path = request.getSession().getServletContext().getRealPath("").replaceAll("\\\\", "/") + "/flyingsauser/preview.pdf";
File file = new File(path);
OutputStream outputStream = new FileOutputStream(file);
String url = attachmentEntity.getUrl();
inputStream = OSSClientUtil.getFileObject(url);

if(!file.exists()){
file.createNewFile();
}
if(url.endsWith(".docx")) {
converter = new DocxToPDFConverter(inputStream, outputStream, true, true);
converter.convert();
fileInputStream = new FileInputStream(file);
} else if(url.endsWith(".doc")){
converter = new DocToPDFConverter(inputStream, outputStream, true, true);
converter.convert();
fileInputStream = new FileInputStream(file);

以上就是word转pdf的实现，里面添加了对中文的支持需要添加simsun.ttf。

具体源码实现参照了下方的github的代码

https://github.com/yeokm1/docs-to-pdf-converter