java 实现word转pdf

本文介绍了一种将Word文档转换为PDF的方法,通过使用Apache POI和相关库实现了两种类型的Word文档(.doc和.docx)到PDF的转换,并且特别针对中文支持进行了详细说明。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

将DOCX文档转化为PDF是项目中常见的需求之一,目前主流的方法可以分为两大类,一类是利用各种Office应用进行转换,譬如Microsoft Office、WPS以及LiberOffice,另一种是利用各种语言提供的对于Office文档读取的接口(譬如Apache POI)然后使用专门的PDFGenerator库,譬如IText进行PDF构建。总的来说,从样式上利用Office应用可以保证较好的样式,不过相对而言效率会比较低。其中Microsoft Office涉及版权,不可轻易使用(笔者所在公司就被抓包了),WPS目前使用比较广泛,不过存在超链接截断问题,即超过256个字符的超链接会被截断,LiberOffice的样式排版相对比较随意。而利用POI接口进行读取与生成的方式性能较好,适用于对于格式要求不是很高的情况。另外还有一些封装好的在线工具或者命令行工具,譬如docx2pdf与OfficeTOpdf

以下是Apache POI实现word转pdf

 

1.maven jar

<dependency>
<groupId>args4j</groupId>
<artifactId>args4j</artifactId>
<version>2.32</version>
</dependency>
<dependency>
<groupId>org.docx4j</groupId>
<artifactId>docx4j</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
<version>1.0.6</version>
</dependency>

<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.odftoolkit.odfdom.converter.pdf</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
            <groupId>com.googlecode.jaxb-namespaceprefixmapper-interfaces</groupId>
            <artifactId>JAXBNamespacePrefixMapper</artifactId>
            <version>2.2.4</version>
            <scope>runtime</scope>
        </dependency>


<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>2.2.11</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-core</artifactId>
<version>2.2.11</version>
</dependency>
       
<!-- https://mvnrepository.com/artifact/org.apache.xmlbeans/xmlbeans -->
<dependency>
   <groupId>org.apache.xmlbeans</groupId>
   <artifactId>xmlbeans</artifactId>
   <version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version><!--$NO-MVN-MAN-VER$-->
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.14</version><!--$NO-MVN-MAN-VER$-->
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version><!--$NO-MVN-MAN-VER$-->
</dependency>

 

2.实现类

Converter

 

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;


public abstract class Converter {


private final String LOADING_FORMAT = "\nLoading stream\n\n";
private final String PROCESSING_FORMAT = "Load completed in %1$dms, now converting...\n\n";
private final String SAVING_FORMAT = "Conversion took %1$dms.\n\nTotal: %2$dms\n";


private long startTime;
private long startOfProcessTime;


protected InputStream inStream;
protected OutputStream outStream;


protected boolean showOutputMessages = false;
protected boolean closeStreamsWhenComplete = true;


public Converter(InputStream inStream, OutputStream outStream, boolean showMessages, boolean closeStreamsWhenComplete){
this.inStream = inStream;
this.outStream = outStream;
this.showOutputMessages = showMessages;
this.closeStreamsWhenComplete = closeStreamsWhenComplete;
}


public abstract void convert() throws Exception;


private void startTime(){
startTime = System.currentTimeMillis();
startOfProcessTime = startTime;
}


protected void loading(){
sendToOutputOrNot(String.format(LOADING_FORMAT));
startTime();
}


protected void processing(){
long currentTime = System.currentTimeMillis();
long prevProcessTook = currentTime - startOfProcessTime;


sendToOutputOrNot(String.format(PROCESSING_FORMAT, prevProcessTook));


startOfProcessTime = System.currentTimeMillis();


}


protected void finished(){
long currentTime = System.currentTimeMillis();
long timeTaken = currentTime - startTime;
long prevProcessTook = currentTime - startOfProcessTime;


startOfProcessTime = System.currentTimeMillis();


if(closeStreamsWhenComplete){
try {
inStream.close();
outStream.close();
} catch (IOException e) {
//Nothing done
}
}


sendToOutputOrNot(String.format(SAVING_FORMAT, prevProcessTook, timeTaken));
}




private void sendToOutputOrNot(String toBePrinted){
if(showOutputMessages){
actuallySendToOutput(toBePrinted);
}
}


protected void actuallySendToOutput(String toBePrinted){
}


}

 

 

 

DocToPDFConverter:

 

import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.net.URL;


import org.apache.commons.io.IOUtils;
import org.docx4j.Docx4J;
import org.docx4j.convert.in.Doc;
import org.docx4j.convert.out.FOSettings;
import org.docx4j.fonts.IdentityPlusMapper;
import org.docx4j.fonts.Mapper;
import org.docx4j.fonts.PhysicalFont;
import org.docx4j.fonts.PhysicalFonts;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.wml.RFonts;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;


public class DocToPDFConverter extends Converter {


public DocToPDFConverter(InputStream inStream, OutputStream outStream, boolean showMessages,
boolean closeStreamsWhenComplete) {
super(inStream, outStream, showMessages, closeStreamsWhenComplete);
}


@Override
public void convert() throws Exception {
loading();


InputStream iStream = inStream;
try {
WordprocessingMLPackage wordMLPackage = getMLPackage(iStream);
Mapper fontMapper = new IdentityPlusMapper();
String fontFamily = "SimSun";

Resource fileRource = new ClassPathResource("simsun.ttf");
String path =  fileRource.getFile().getAbsolutePath();
URL fontUrl = new URL("file:"+path);
PhysicalFonts.addPhysicalFont(fontUrl);

PhysicalFont simsunFont = PhysicalFonts.get(fontFamily);
fontMapper.put(fontFamily, simsunFont);


RFonts rfonts = Context.getWmlObjectFactory().createRFonts(); // 设置文件默认字体
rfonts.setAsciiTheme(null);
rfonts.setAscii(fontFamily);
wordMLPackage.getMainDocumentPart().getPropertyResolver().getDocumentDefaultRPr().setRFonts(rfonts);
wordMLPackage.setFontMapper(fontMapper);
FOSettings foSettings = Docx4J.createFOSettings();
foSettings.setWmlPackage(wordMLPackage);
Docx4J.toFO(foSettings, outStream, Docx4J.FLAG_EXPORT_PREFER_XSL);


} catch (Exception ex) {
ex.printStackTrace();
} finally {
IOUtils.closeQuietly(outStream);
}


/*
* InputStream iStream = inStream;
* 
* 
* 
* String regex = null; //Windows: // String
* regex=".*(calibri|camb|cour|arial|symb|times|Times|zapf).*"; regex=
* ".*(calibri|camb|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding).*";
* // Mac // String //
* regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*"
* ; PhysicalFonts.setRegex(regex); WordprocessingMLPackage
* wordMLPackage = getMLPackage(iStream); // WordprocessingMLPackage
* wordMLPackage = WordprocessingMLPackage.load(iStream) FieldUpdater
* updater = new FieldUpdater(wordMLPackage); updater.update(true); //
* process processing(); // Add font
* 
* Mapper fontMapper = new IdentityPlusMapper();
* 
* PhysicalFont font = PhysicalFonts.get("Arial UTF-8 MS"); if (font !=
* null) { fontMapper.put("Times New Roman", font);
* fontMapper.put("Arial", font); fontMapper.put("Calibri", font); }
* fontMapper.put("Calibri", PhysicalFonts.get("Calibri"));
* fontMapper.put("Algerian", font); fontMapper.put("华文行楷",
* PhysicalFonts.get("STXingkai")); fontMapper.put("华文仿宋",
* PhysicalFonts.get("STFangsong")); fontMapper.put("隶书",
* PhysicalFonts.get("LiSu")); fontMapper.put("Libian SC Regular",
* PhysicalFonts.get("SimSun"));
* wordMLPackage.setFontMapper(fontMapper); FOSettings foSettings =
* Docx4J.createFOSettings(); foSettings.setFoDumpFile(new
* java.io.File("E:/xi.fo")); foSettings.setWmlPackage(wordMLPackage);
* // Docx4J.toPDF(wordMLPackage, outStream); Docx4J.toFO(foSettings,
* outStream, Docx4J.FLAG_EXPORT_PREFER_XSL);
*/
finished();


}


protected WordprocessingMLPackage getMLPackage(InputStream iStream) throws Exception {
//PrintStream originalStdout = System.out;


System.setOut(new PrintStream(new OutputStream() {
public void write(int b) {
// DO NOTHING
}
}));


WordprocessingMLPackage mlPackage = Doc.convert(iStream);
//System.setOut(originalStdout);
//System.out.println(outStream);
return mlPackage;
}


}

 

 

 

 

DocxToPDFConverter:

import java.awt.Color;
import java.io.InputStream;
import java.io.OutputStream;


import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;


import com.lowagie.text.Font;
import com.lowagie.text.pdf.BaseFont;


import fr.opensagres.xdocreport.itext.extension.font.ITextFontRegistry;


public class DocxToPDFConverter extends Converter {


public DocxToPDFConverter(InputStream inStream, OutputStream outStream, boolean showMessages,
boolean closeStreamsWhenComplete) {
super(inStream, outStream, showMessages, closeStreamsWhenComplete);
}


@Override
public void convert() throws Exception {
loading();


PdfOptions options = PdfOptions.create();
XWPFDocument document = new XWPFDocument(inStream);


//支持中文字体
options.fontProvider(new ITextFontRegistry() {
public Font getFont(String familyName, String encoding, float size, int style, Color color) {
try {
Resource fileRource = new ClassPathResource("simsun.ttf");
String path =  fileRource.getFile().getAbsolutePath();


BaseFont bfChinese = BaseFont.createFont(path, BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
Font fontChinese = new Font(bfChinese, size, style, color);
if (familyName != null)
fontChinese.setFamily(familyName);
return fontChinese;
} catch (Throwable e) {
e.printStackTrace();
return ITextFontRegistry.getRegistry().getFont(familyName, encoding, size, style, color);
}
}


});


processing();
PdfConverter.getInstance().convert(document, outStream, options);


finished();
}


}

 

main 方法的实现代码

Converter converter;

 

path = request.getSession().getServletContext().getRealPath("").replaceAll("\\\\", "/") + "/flyingsauser/preview.pdf";
File file = new File(path);
OutputStream outputStream = new FileOutputStream(file);
String url = attachmentEntity.getUrl();
inputStream = OSSClientUtil.getFileObject(url);

if(!file.exists()){
file.createNewFile();
}
if(url.endsWith(".docx")) {
converter = new DocxToPDFConverter(inputStream, outputStream, true, true);
converter.convert();
fileInputStream = new FileInputStream(file);
} else if(url.endsWith(".doc")){
converter = new DocToPDFConverter(inputStream, outputStream, true, true);
converter.convert();
fileInputStream = new FileInputStream(file);

 

 

 

以上就是word转pdf的实现,里面添加了对中文的支持需要添加simsun.ttf。

具体源码实现参照了下方的github的代码

https://github.com/yeokm1/docs-to-pdf-converter

 

### 如何将Word文档换为PDF格式 在企业和个人办公场景中,WordPDF是两种最常见的文档格式。为了在分享和发布文档时保证文档格式不变,通常需要将Word文档换为PDF格式[^1]。以下是几种常见的实现方式: #### 使用Java实现WordPDFJava环境中,可以使用多种库来实现Word文档到PDF换。例如,通过Apache POI处理Word文档,并结合iText或Flying Saucer等工具生成PDF文件。此外,还可以调用第三方API服务完成换任务。 以下是一个简单的示例代码,展示如何使用Aspose.Words for Java实现WordPDF的功能: ```java import com.aspose.words.*; public class WordToPdfConverter { public static void main(String[] args) throws Exception { // 加载Word文档 Document doc = new Document("input.docx"); // 将文档保存为PDF格式 doc.save("output.pdf", SaveFormat.PDF); System.out.println("换成功!"); } } ``` #### 使用Python实现WordPDF 如果更倾向于使用Python语言,则可以通过`python-docx`库读取Word文档,并借助`pywin32`库调用Microsoft Word应用程序完成换操作[^3]。需要注意的是,此方法要求系统已安装Microsoft Word。 以下是一个Python脚本示例: ```python import win32com.client def word_to_pdf(input_file, output_file): # 创建Word应用程序对象 word_app = win32com.client.Dispatch("Word.Application") # 后台运行Word,不显示界面 word_app.Visible = False try: # 打开Word文档 doc = word_app.Documents.Open(input_file) # 保存为PDF格式 doc.SaveAs(output_file, FileFormat=17) # FileFormat=17表示PDF格式 # 关闭文档 doc.Close() finally: # 退出Word应用程序 word_app.Quit() # 调用函数进行word_to_pdf("input.docx", "output.pdf") print("换成功!") ``` #### 实现批量换功能 对于需要批量将Word文档换为PDF文件的场景,可以编写一个脚本来遍历指定目录下的所有Word文档并逐一进行换[^2]。以下是一个基于Python的批量换示例代码: ```python import os import win32com.client def get_filenames(directory, extension=".docx"): return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(extension)] def batch_word_to_pdf(source_dir, target_dir): word_app = win32com.client.Dispatch("Word.Application") word_app.Visible = False word_files = get_filenames(source_dir, ".docx") for word_file in word_files: base_name = os.path.splitext(os.path.basename(word_file))[0] pdf_file = os.path.join(target_dir, f"{base_name}.pdf") try: doc = word_app.Documents.Open(word_file) doc.SaveAs(pdf_file, FileFormat=17) doc.Close() print(f"换成功: {word_file} -> {pdf_file}") except Exception as e: print(f"换失败: {word_file}, 错误信息: {e}") word_app.Quit() # 调用批量换函数 batch_word_to_pdf("source_path", "target_path") ``` ### 注意事项 - 在使用上述方法时,请确保已正确安装相关依赖库。 - 如果选择调用Microsoft Word应用程序的方式,则需确认目标系统已安装Microsoft Word软件。
评论 34
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值