public static void main(String[] args) throws Exception { docxToHtml(); } public static void docxToHtml() throws Exception { //D:\zpdtolly\工作总结文档\zpd使用文档\v4\用户使用手册\客户端使用手册 String sourceFileName = "C:\\Users\\luoguoqing\\Desktop\\20230920客户端v使用手册.doc"; String targetFileName = "C:\\Users\\luoguoqing\\Desktop\\20230920客户端v使用手册.html"; //下载图片位置 String imagePathStr = "D:/doc2htmltest/image/"; String style = "body{background:#f0eeee;text-align:center;}\n" + "div{width:85%;margin:0 auto;background:#ecdfdf;text-align:left;}\n" + "* {\n" + " padding: 0;\n" + " margin: 0;\n" + " box-sizing: border-box;\n" + "}\n" + ".aside {\n" + " width: 240px;\n" + " height: 100%;\n" + " position: fixed;\n" + " left: -240px;\n" + " top: 0px;\n" + " border-right: 1px solid #ccc;\n" + " -ms-transition: all 0.3s linear;\n" + " -moz-transition: all 0.3s linear;\n" + " -webkit-transition: all 0.3s linear;\n" + " transition: all 0.3s linear;\n" + "}\n" + ".aside:hover{\n" + " left: 0;\n" + "}\n" + ".aside:hover + .article{\n" + " padding-left: 260px;\n" + "}\n" + ".nav-list{\n" + " width: 100%;\n" + " height: 100%;\n" + " overflow: auto;\n" + " padding: 10px 0px;\n" + "}\n" + ".nav-mark{\n" + " position: absolute;\n" + " right: -20px;\n" + " top: 50%;\n" + " z-index: 2;\n" + " height: 80px;\n" + " width: 20px;\n" + " margin-top: -40px;\n" + " background-color: #44a7ff;\n" + " box-shadow: 2px 0px 3px #eee;\n" + " border-radius: 0 40px 40px 0;\n" + " font-size: 12px;\n" + " text-align: center;\n" + " line-height: 24px;\n" + " padding-top: 16px;\n" + " color: #fff;\n" + "}\n" + ".nav {\n" + " display: block;\n" + " width: 100%;\n" + " height: 32px;\n" + " line-height: 32px;\n" + " font-size: 16px;\n" + " color: #333;\n" + " text-decoration: none;\n" + " padding-left: 20px;\n" + "}\n" + ".nav:hover {\n" + " background-color: #44a7ff;\n" + " color: #fff;\n" + "}\n" + ".grade2 {\n" + " text-indent: 1em;\n" + "}\n" + ".grade3 {\n" + " text-indent: 2em;\n" + "}"; HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(sourceFileName)); WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()); wordToHtmlConverter.setPicturesManager((a, b, suggestedName, d, e) -> { // convertFileToBase64() out.println(suggestedName); //返回图片路径 //return "image" + File.separator + suggestedName; //返回图片base64值 return "data:image/"+b.getExtension().toLowerCase()+";base64,"+new String(Base64.encodeBase64(a)); }); wordToHtmlConverter.processDocument(wordDocument); List<Picture> pics = wordDocument.getPicturesTable().getAllPictures(); out.println(pics); Iterator var8 = pics.iterator(); //转换图片 imgToBase64 imgToBase64pl=new imgToBase64(); int i=1; while(var8.hasNext()) { Picture pic = (Picture)var8.next(); //下载图片 // pic.writeImageContent(new FileOutputStream(imagePathStr + pic.suggestFullFileName())); //System.out.println("第"+i++ +":"+imgToBase64pl.convertFileToBase64(imagePathStr + pic.suggestFullFileName())); } Document htmlDocument = wordToHtmlConverter.getDocument(); //生成空文档 DOMSource domSource = new DOMSource(htmlDocument); //获取路径下html StreamResult streamResult = new StreamResult(new File(targetFileName)); //设置html文件规范 TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty("encoding", "utf-8"); serializer.setOutputProperty("indent", "yes"); serializer.setOutputProperty("method", "html"); serializer.transform(domSource, streamResult); out.println("doc转换完毕!"+streamResult.getSystemId()); addStyleToHtml a=new addStyleToHtml(); a.addstyle(targetFileName,style); }
package com.lly.demo.util; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class addStyleToHtml { public void addstyle(String targetFileName, String style) throws IOException { File htmlFile = new File(targetFileName); Document doc = Jsoup.parse(htmlFile, "UTF-8"); Element head = doc.head(); head.append("<style>" + style + "</style>"); Element spanA = doc.select("span:contains(TOC)").first(); // 获取第一个 span 元素 Element spanB = doc.select("span:contains(第1章 前言)").last().parent().parent(); // 获取最后一个 span 元素 // 创建新的<aside>元素 Element aside = doc.createElement("aside"); aside.addClass("aside"); // 创建包含目录的<div>元素 Element navList = doc.createElement("div"); navList.addClass("nav-list"); // 创建导航元素 Element navMark = doc.createElement("div"); navMark.addClass("nav-mark").text("导航"); Element currentElement = spanA; while (currentElement != null) { System.out.println(currentElement.outerHtml()); if (spanA != null && spanB != null) { navList.appendChild(currentElement); // 将<div>元素和导航元素添加到<aside>元素中 aside.appendChild(navList); aside.appendChild(navMark); // 在目录后面的元素之前插入<aside>元素 spanB.before(aside); } else { System.out.println("空的"); } if (currentElement == spanB) { break; } currentElement = currentElement.nextElementSibling(); } Element span = doc.select("span:contains(TOC)").first(); //span.remove(); // 删除span元素 // 输出更新后的HTML //System.out.println(doc.html()); FileWriter writer = new FileWriter(htmlFile); writer.write(doc.outerHtml()); writer.close(); } }