word转html

最新推荐文章于 2025-10-12 09:06:44 发布

原创最新推荐文章于 2025-10-12 09:06:44 发布 · 246 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#html #word #word转html

本文介绍了如何将Word文档转换成HTML格式，包括在线工具和软件方法，以便于在网络上传播和分享。

在线打开

//打开文件
function getfilepath(filepath){
	$.ajax({
		url:filepath,
		//data:{"fileurl":imageurl},
		type:"GET",
		success:function(){
			//alert("成功率");
// 			window.location=filepath;
			$("#fileDialog").dialogPlugin("open");
			$("#fileContainer").attr("src",filepath);
		},
		error:function(data){
			//window.location="http://www.baidu.com";
		//alert("出错了");
		$.msgAlert("系统提示","文件出现问题,请联系技术人员","info");				
		}	
	});
}

/**
 * Project Name:bph_sp<br>
 * File Name:PoiWordToHtmlUtil.java<br>
 * Package Name:com.msunsoft<br>
 * Date:2018-2-7上午11:00:24<br>
 * Copyright (c) 2018, MSunSoft All Rights Reserved.
 *
*/

package com.msunsoft.common.utils.file;

import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.w3c.dom.Document;

/**  
 * @ClassName: PoiWordToHtmlUtil
 * @Description: TODO(poi work文档转换html)
 * @author liang
 * @date 2016年1月12日 上午11:58:51
 *
 */
public class PoiWordToHtmlUtil {
	
	private final static Log logger = LogFactory.getLog(PoiWordToHtmlUtil.class);
	
	public static final String ENCODING="UTF-8";
	public static String wordToHtml(String docFile, final String saveName,final String htmlPath) throws TransformerException, IOException, ParserConfigurationException {    		
		String ext = GetFileExt(saveName);

		String content=null;
		try {
			if(ext.equals("doc")){
				logger.info("*****doc转html 正在转换...*****");			
				HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(docFile));   
		        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());    	          
		        wordToHtmlConverter.processDocument(wordDocument);    
		        Document htmlDocument = wordToHtmlConverter.getDocument();    
		        ByteArrayOutputStream out = new ByteArrayOutputStream();    
		        DOMSource domSource = new DOMSource(htmlDocument);    
		        StreamResult streamResult = new StreamResult(out);    
		    
		        TransformerFactory tf = TransformerFactory.newInstance();    
		        Transformer serializer = tf.newTransformer();    
		        serializer.setOutputProperty(OutputKeys.ENCODING, "utf8");    
		        serializer.setOutputProperty(OutputKeys.INDENT, "yes");    
		        serializer.setOutputProperty(OutputKeys.METHOD, "html");    
		        serializer.transform(domSource, streamResult);    
		        out.close();
		        content=out.toString("utf8");
		        writeFile(content, htmlPath); 		        

		        logger.info("*****doc转html 转换结束...*****");
			}else if(ext.equals("docx")){
				logger.info("*****docx转html 正在转换...*****");
				// 1) 加载word文档生成 XWPFDocument对象  
				InputStream in = new FileInputStream(new File(docFile));  
				XWPFDocument document = new XWPFDocument(in); 
				// 2) 解析 XHTML配置 (这里设置IURIResolver来设置图片存放的目录)  
				XHTMLOptions options = XHTMLOptions.create();		
//				options.setExtractor(new FileImageExtractor(picturesDir));
//				options.URIResolver(new BasicURIResolver(picturesPath));
//				//options.URIResolver(new BasicURIResolver(saveName+".files"));
				/*options.setIgnoreStylesIfUnused(false);  
				options.setFragment(true);*/
				// 3) 将 XWPFDocument转换成XHTML  								
				ByteArrayOutputStream baos = new ByteArrayOutputStream(); 
				XHTMLConverter.getInstance().convert(document, baos, options); 								
				baos.close();
				content = baos.toString();
		        writeFile(content, htmlPath); 		        
				logger.info("*****docx转html 转换结束...*****");
			}
		}catch(Exception e){   
            e.printStackTrace();   
        }
		return content;		
    }
	
	public  static String GetFileExt(String name) { 
		String ext = null;
        int i = name.lastIndexOf('.');  
        if (i > 0 && i < name.length() - 1) {  
            ext = name.substring(i + 1).toLowerCase();  
        }  
        return ext;  
    }
	
	private static void writeFile(String content, String path) {    
        OutputStream os = null;    
        BufferedWriter bw = null;    
        try {    
            File file = new File(path);    
            os = new FileOutputStream(file);    
            bw = new BufferedWriter(new OutputStreamWriter(os,ENCODING));    
            bw.write(content);    
        } catch (FileNotFoundException fnfe) {    
            fnfe.printStackTrace();    
        } catch (IOException ioe) {    
            ioe.printStackTrace();    
        } finally {    
            try {    
                if (bw != null)    
                    bw.close();    
                if (os != null)    
                    os.close();    
            } catch (IOException ie) {
            	ie.printStackTrace(); 
            }    
        }    
    }
	
}