docx4j word转pdf

使用docx4j转换为PDF的代码示例

最新推荐文章于 2025-07-05 09:05:12 发布

原创最新推荐文章于 2025-07-05 09:05:12 发布 · 1.4k 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#java

本文提供了一个使用docx4j将Word文档转换为PDF的示例代码，包括模板加载、数据替换、PDF配置和输出过程。代码中详细展示了如何处理模板中的表格，并使用正则表达式指定允许的字体范围。

使用docx4j转pdf,代码及模板在下面。模板在附件中，字体使用的是宋体。

import java.io.OutputStream;
import java.util.HashMap;
import java.util.List;

import org.docx4j.Docx4J;
import org.docx4j.TraversalUtil;
import org.docx4j.XmlUtils;
import org.docx4j.convert.out.FOSettings;
import org.docx4j.finders.ClassFinder;
import org.docx4j.fonts.IdentityPlusMapper;
import org.docx4j.fonts.Mapper;
import org.docx4j.fonts.PhysicalFont;
import org.docx4j.fonts.PhysicalFonts;
import org.docx4j.model.datastorage.migration.VariablePrepare;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.wml.Tbl;
import org.docx4j.wml.Tr;

/**
 * 
 * 将模板内容替换后转化成PDF输出
 *
 */
public class MyexapmleConvertToPDF {

private static final  String templetate_docx  = "\\myexamples\\replace_text_PDF_templetate.docx";
	
	private static final  String output_pdf  = "\\myexamples\\replace_text_PDF.pdf";
	
	public static void main(String[] args) throws Exception {
		//加载模板
				WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage
						.load(new java.io.File(System.getProperty("user.dir")+templetate_docx));

				//准备数据
				HashMap<String, String> mappings = new HashMap<String, String>();
				mappings.put("username", "张三");
				mappings.put("party_date", "2014年10月25日");
				mappings.put("numberCount", "150");
				mappings.put("pay_acount", "99.50");
				mappings.put("now_date", "2014年09月25日");
				
				//进行数据合并
				MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
		 
				ClassFinder finder = new ClassFinder(Tbl.class); // <----- change this to suit
				new TraversalUtil(documentPart.getContent(), finder);
				//查找模板表格（第i个表格 ）
				int seleTableIndex = 0;
				Tbl table_selected =  (Tbl) finder.results.get(seleTableIndex);
				List trs = table_selected.getContent(); 
				//模板行，第2行为模板行
				int table_templetate_row_index = 2;
				org.docx4j.wml.Tr templetate_row = (Tr)trs.get(table_templetate_row_index);
				String templetate_row_string = XmlUtils.marshaltoString(templetate_row,true,true);
				//System.out.println(templetate_row_string);
				//替换第二行的数据
				List<Object>tds = templetate_row.getContent();
				HashMap datamap = new HashMap();
				datamap.put("table_index", "1");
				datamap.put("product_name", "唐代陶瓷");
			 
				
				HashMap datamap2 = new HashMap();
				datamap2.put("table_index", "2");
				datamap2.put("product_name", "明代王冠"); 
				
				//合并表格数据1
				Object newTr = XmlUtils.unmarshallFromTemplate(templetate_row_string,datamap);
				table_selected.getContent().add(newTr);
				//合并表格数据2
				newTr = XmlUtils.unmarshallFromTemplate(templetate_row_string,datamap2);
				table_selected.getContent().add(newTr);
				//移除第2行
				table_selected.getContent().remove(table_templetate_row_index);
					
				//数据替换预处理，调用API包
				//在表格替换后调用这个方法
				VariablePrepare.prepare(wordMLPackage);
				documentPart.variableReplace(mappings);
	 
		//pdf准备工作
		// Font regex (optional)
		// Set regex if you want to restrict to some defined subset of fonts
		// Here we have to do this before calling createContent,
		// since that discovers fonts
		String regex = null;
		// Windows:
		// String  字体类型
		// regex=".*(calibri|camb|cour|arial|symb|times|Times|zapf).*";
		regex=".*(simsunb|simsun|calibri|camb|cour|arial|times|comic|georgia|impact|LSANS|pala|tahoma|trebuc|verdana|symbol|webdings|wingding).*";
		// Mac
		// String
		// regex=".*(Courier New|Arial|Times New Roman|Comic Sans|Georgia|Impact|Lucida Console|Lucida Sans Unicode|Palatino Linotype|Tahoma|Trebuchet|Verdana|Symbol|Webdings|Wingdings|MS Sans Serif|MS Serif).*";
		PhysicalFonts.setRegex(regex);
		
		// Set up font mapper (optional)
		Mapper fontMapper = new IdentityPlusMapper();
		
		// .. example of mapping font Times New Roman which doesn't have certain Arabic glyphs
		// eg Glyph "ي" (0x64a, afii57450) not available in font "TimesNewRomanPS-ItalicMT".
		// eg Glyph "ج" (0x62c, afii57420) not available in font "TimesNewRomanPS-ItalicMT".
		// to a font which does
		PhysicalFont font = PhysicalFonts.getPhysicalFonts().get("calibri"); 
			// make sure this is in your regex (if any)!!!
		if (font!=null) {
			fontMapper.getFontMappings().put("Times New Roman", font);
		}
		fontMapper.getFontMappings().put("Libian SC Regular", PhysicalFonts.getPhysicalFonts().get("SimSun"));
		wordMLPackage.setFontMapper(fontMapper);
		// FO exporter setup (required)
		// .. the FOSettings object
    	FOSettings foSettings = Docx4J.createFOSettings();
    	 
    	foSettings.setFoDumpFile(new java.io.File(System.getProperty("user.dir")+templetate_docx + ".fo"));
    	
		foSettings.setWmlPackage(wordMLPackage);
		
		// Document format: 
		// The default implementation of the FORenderer that uses Apache Fop will output
		// a PDF document if nothing is passed via 
		// foSettings.setApacheFopMime(apacheFopMime)
		// apacheFopMime can be any of the output formats defined in org.apache.fop.apps.MimeConstants eg org.apache.fop.apps.MimeConstants.MIME_FOP_IF or
		// FOSettings.INTERNAL_FO_MIME if you want the fo document as the result.
		//foSettings.setApacheFopMime(FOSettings.INTERNAL_FO_MIME);
		
		// exporter writes to an OutputStream.		
		OutputStream os = new java.io.FileOutputStream(System.getProperty("user.dir")+output_pdf);
    	
		// Specify whether PDF export uses XSLT or not to create the FO
		// (XSLT takes longer, but is more complete).
		
		// Don't care what type of exporter you use
		//Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
		
		// Prefer the exporter, that uses a xsl transformation
		Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
		
		// Prefer the exporter, that doesn't use a xsl transformation (= uses a visitor)
		// .. faster, but not yet at feature parity
		// Docx4J.toFO(foSettings, os, Docx4J.FLAG_EXPORT_PREFER_NONXSL);
		System.out.println("创建完成 " );

	}

}

其中代码字体需要注意了。

其中的名称获取方式是在c://windows/fonts目录下找到需要的文字。右键-属性查看其名称。Linux没有试过。自己测试。

如下截图：