package com.baiyang.lc.util;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.w3c.dom.Document;
public class Word {
/**
* word2003转Html
*
* @param fileName
* @return
* @throws FileNotFoundException
* @throws IOException
* @throws ParserConfigurationException
* @throws TransformerException
* @author baiyang
*/
public static String Word2003ToHtml(String fileName) throws FileNotFoundException, IOException, ParserConfigurationException, TransformerException {
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(new FileInputStream(fileName));
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.flush();
out.close();
String result = new String(out.toByteArray());
return result;
}
}
所需jar包
Word转换成Html
最新推荐文章于 2025-07-29 17:33:45 发布
本文详细介绍了如何使用Apache POI库将Word文档转换为HTML格式,包括代码实现及注意事项。
267

被折叠的 条评论
为什么被折叠?



