java POI实现word转html(doc、docx)

word内容读取为html

背景: 需要将word内容导入到富文本,工具类包含了doc和docx文件读取。

工具类 WordToHtml.java:

import fr.opensagres.poi.xwpf.converter.core.ImageManager;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLConverter;
import fr.opensagres.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import org.w3c.dom.Document;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;

/**
 * @description word转html
 */
@Component
public class WordToHtml {

    //图片保存目录
    @Value("${word.pic.save.path}")
    private String picPath;

    /**
     * @param file 待转换的文件
     * @return java.lang.String
     * @description 对文件进行word转换成html字符串返回
     */
    public String readeWordToHtml(MultipartFile file) {
        // 需要判断文件是否为doc,docx
        if (file == null) {
            return "";
        }
        String suffix = file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf(".") + 1);
        // 配置服务器访问体制
        String picViewPath = "http://127.0.0.1:8761/server/dietc/source/view/word/pic/";
        try {
            if (suffix.equals("doc") || suffix.equals("DOC")) {
                HWPFDocument wordDocument = new HWPFDocument(file.getInputStream());
                WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                        DocumentBuilderFactory.newInstance().newDocumentBuilder()
                                .newDocument());
                wordToHtmlConverter.setPicturesManager(new PicturesManager() {
                    @Override
                    public String savePicture(byte[] content,
                                              PictureType pictureType, String suggestedName,
                                              float widthInches, float heightInches) {
                        return picViewPath + suggestedName;
                    }
                });
                wordToHtmlConverter.processDocument(wordDocument);
                //save pictures
                List pics = wordDocument.getPicturesTable().getAllPictures();
                if (pics != null) {
                    for (int i = 0; i < pics.size(); i++) {
                        Picture pic = (Picture) pics.get(i);
                        try {
                            pic.writeImageContent(new FileOutputStream(new File(picPath
                                    + pic.suggestFullFileName())));
                        } catch (FileNotFoundException e) {
                            e.printStackTrace();
                        }
                    }
                }
                Document htmlDocument = wordToHtmlConverter.getDocument();
                ByteArrayOutputStream out = new ByteArrayOutputStream();
                DOMSource domSource = new DOMSource(htmlDocument);
                StreamResult streamResult = new StreamResult(out);
                TransformerFactory tf = TransformerFactory.newInstance();
                Transformer serializer = tf.newTransformer();
                serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
                serializer.setOutputProperty(OutputKeys.INDENT, "yes");
                serializer.setOutputProperty(OutputKeys.METHOD, "html");
                serializer.transform(domSource, streamResult);
                String result = new String(out.toByteArray()).replaceAll("↵", "");
                out.close();
                return result;
            } else if (suffix.equals("docx") || suffix.equals("DOCX")) {
                XWPFDocument document = new XWPFDocument(file.getInputStream());
                XHTMLOptions options = XHTMLOptions.create();
                //图片提取
                //图片路径
                ImageManager imageManager = new ImageManager(new File(picPath), "");
                options.setIgnoreStylesIfUnused(false);
                options.setFragment(true);
                options.setImageManager(imageManager);

                // 3) 将 XWPFDocument转换成XHTML
                ByteArrayOutputStream out = new ByteArrayOutputStream();

                XHTMLConverter.getInstance().convert(document, out, options);
                String result = new String(out.toByteArray());
                out.close();
                return result.replaceAll("<img src=\"", "<img src=\"" + picViewPath);
            } else {
                return "请上传.doc或者.docx文件";
            }
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("文件格式错误!");
            return "文件格式错误!";
        }
    }
}

感谢阅读,有问题欢迎留言,看到第一时间回复!(*^_^*)

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Lovme_du

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值