word转html,图片转为base64,预览
public class InlineImageWordToHtmlConverter extends WordToHtmlConverter {
public InlineImageWordToHtmlConverter(Document document) {
super(document);
}
@Override
protected void processImageWithoutPicturesManager(Element currentBlock,
boolean inlined, Picture picture)
{
Element imgNode = currentBlock.getOwnerDocument().createElement("img");
StringBuilder sb = new StringBuilder();
sb.append(Base64.getMimeEncoder().encodeToString(picture.getRawContent()));
sb.insert(0, "data:"+picture.getMimeType()+";base64,");
imgNode.setAttribute("src", sb.toString());
currentBlock.appendChild(imgNode);
}
}
//docx转换html
public static String word2007ToHtmlStr(InputStream inputStream){
ByteArrayOutputStream baos = null;
String content = "";
try{
XWPFDocument docxDocument = new XWPFDocument(inputStream);
// 配置
XHTMLOptions options = XHTMLOptions.create();
// 设置图片存储路径
String path = System.getProperty("java.io.tmpdir");
String firstImagePathStr = path + "/" + String.valueOf(System.currentTimeMillis());
options.setExtractor(new FileImageExtractor(new File(firstImagePathStr)));
options.URIResolver(new BasicURIResolver(firstImagePathStr));
// 转换html
baos = new ByteArrayOutputStream();
XHTMLConverter.getInstance().convert(docxDocument, baos, options);
content = baos.toString();
// 将image文件转换为base64并替换到html字符串里
String middleImageDirStr = "/word/media";
String imageDirStr = firstImagePathStr + middleImageDirStr;
File imageDir = new File(imageDirStr);
String[] imageList = imageDir.list();
if (imageList != null) {
for (int i = 0; i < imageList.length; i++) {
String oneImagePathStr = imageDirStr + "/" + imageList[i];
File oneImageFile = new File(oneImagePathStr);
String imageBase64Str = new String(Base64.encodeBase64(FileUtils.readFileToByteArray(oneImageFile)), "UTF-8");
content = content.replace(oneImagePathStr, "data:image/png;base64," + imageBase64Str);
}
}
//删除图片路径
File firstImagePath = new File(firstImagePathStr);
FileUtils.deleteDirectory(firstImagePath);
if(baos!=null)baos.close();
if(inputStream!=null)inputStream.close();
} catch (Exception e){
try{
if(baos!=null)baos.close();
if(inputStream!=null)inputStream.close();
} catch (IOException e1) {
log.error("wordutil.docxToHtml IOException:"+e1.getMessage());
}
log.error("wordutil.docxToHtml Exception:"+e.getMessage());
}
return content;
}
private static String word2003ToHtmlStr(InputStream inputStream){
ByteArrayOutputStream baos = null;
String content = "";
try{
HWPFDocument wordDocument = new HWPFDocument(inputStream);
WordToHtmlConverter wordToHtmlConverter = new InlineImageWordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
// 解析word文档
wordToHtmlConverter.processDocument(wordDocument);
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
// 也可以使用字符数组流获取解析的内容
baos = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(baos);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
// 也可以使用字符数组流获取解析的内容
content = new String(baos.toByteArray());
baos.close();
inputStream.close();
} catch (Exception e) {
try{
if(baos !=null)baos.close();
if(inputStream !=null)inputStream.close();
} catch (IOException e1) {
log.error("wordutil.word2003ToHtmlStr IOException:"+e1.getMessage());
}
log.error("wordutil.word2003ToHtmlStr Exception:"+e.getMessage());
}
return content;
}