这里写自定义目录标题
自己瞎整的,供参考
网上的文档太少了,只能根据自己一边琢磨一边AI的整,最后整了一个还算比较满意的版本出来,尤其是对word中的表格处理这块。
代码
直接贴代码
pom.xml
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
这是第一次上传的,有一些问题,比如图片是emf格式的。
package com.ruoyi.project.project.util;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.RandomUtil;
import com.alibaba.fastjson2.JSONArray;
import com.ruoyi.framework.config.RuoYiConfig;
import com.ruoyi.framework.config.ServerConfig;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.wml.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import javax.xml.bind.JAXBElement;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Component
public class DocUtil {
// 定义汉字数字
private static final String CHINESE_NUMBERS = "一二三四五六七八九十";
// 定义正则表达式模式
private static final String PATTERN_STRING = "^第[" + CHINESE_NUMBERS + "]+章";
private static final String PATTERN_STRING1 = "^第[" + CHINESE_NUMBERS + "]+节";
static ServerConfig serverConfig;
@Autowired
ServerConfig tempServerConfig;
//判断是否第一章这种一级标题
public static boolean isPara(String input) {
Pattern pattern = Pattern.compile(PATTERN_STRING);
Matcher matcher = pattern.matcher(input);
return matcher.find();
}
//判断是否第一节这种二级标题
public static boolean isPara1(String input) {
Pattern pattern = Pattern.compile(PATTERN_STRING1);
Matcher matcher = pattern.matcher(input);
return matcher.find();
}
public static void main(String[] args) throws Docx4JException, IOException {
// 加载 docx 文件
WordprocessingMLPackage wordprocessingMLPackage = WordprocessingMLPackage.load(new File("/Users/xxxx/Desktop/workspace/表格测试.docx"));
try {
// 获取文档中的所有嵌入部件(包括图片)
MainDocumentPart part = wordprocessingMLPackage.getMainDocumentPart();
List<Object> list = part.getContent();
List<Map<String, Object>> maps = parseObject(list, part.getRelationshipsPart());
List<Map<String, Object>> targetList = DocUtil.formatDocumentObject(maps, null);
targetList = DocUtil.formatDocumentTbl(targetList);
System.out.println("最后的结果:---------------------");
for (Map<String, Object> stringObjectMap : targetList) {
System.out.println(stringObjectMap);
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static List<Map<String, Object>> formatDocumentTbl(List<Map<String, Object>> maps) {
for (Map<String, Object> map : maps) {
// 判断是否为表格
if (!map.containsKey("type") || !map.get("type").equals("TBL")) {
continue;
}
ArrayList<Map<String, Object>> tblContent = (ArrayList<Map<String, Object>>) map.get("content");
// 每一行数据
for (int i = 0; i < tblContent.size(); i++) {
Map<String, Object> row = tblContent.get(i);
ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
int index = 0;
int size = rowData.size();
for (int i1 = 0; i1 < size; i1++) {
Map<String, Object> stringObjectMap = rowData.get(i1);
// 这个是对单元格处理,vMerge是竖向合并,hMerge是横向合并
// 这里才是每一个格的数据,
// 获得当前的下标
// 判断是不是有vMerge,有的话如果是1就得找到上一级然后加上1
if (stringObjectMap.containsKey("vMerge")) {
String str = stringObjectMap.get("vMerge").toString();
if (str.equals("restart")) {
stringObjectMap.put("vMerge", 1);
} else if (str.equals("continue")) {
searchAndChangePreviousData(i, index, tblContent);
stringObjectMap.remove("vMerge");
}
}
if (stringObjectMap.containsKey("hMerge")) {
index += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
} else {
index++;
}
}
}
for (Map<String, Object> tempMap : tblContent) {
List<Map<String, Object>> content = (List<Map<String, Object>>) tempMap.get("content");
for (int i = 0; i < content.size(); i++) {
Map<String, Object> stringObjectMap = content.get(i);
if (stringObjectMap.isEmpty()) {
content.remove(i);
i--;
} else if (stringObjectMap.containsKey("hMerge") && stringObjectMap.size() == 1) {
content.remove(i);
i--;
}
}
}
map.put("content", JSONArray.toJSONString(tblContent));
}
return maps;
}
private static void searchAndChangePreviousData(int rowIndex, int index, ArrayList<Map<String, Object>> tblContent) {
for (int i = rowIndex - 1; i >= 0; i--) {
Map<String, Object> row = tblContent.get(i);
ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
int index1 = 0;
for (int i1 = 0; i1 < rowData.size(); i1++) {
Map<String, Object> stringObjectMap = rowData.get(i1);
// 这里才是每一个格的数据,获得当前的下标
if (index1 == index) {
if (stringObjectMap.containsKey("vMerge") && !stringObjectMap.get("vMerge").equals("") && !stringObjectMap.get("vMerge").equals("continue")) {
int flag = (int) stringObjectMap.get("vMerge");
stringObjectMap.put("vMerge", flag + 1);
return;
}
}
if (stringObjectMap.containsKey("hMerge")) {
index1 += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
} else {
index1++;
}
}
}
}
public static List<Map<String, Object>> parseObject(List<Object> list, RelationshipsPart part) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (Object obj1 : list) {
if (obj1 instanceof P) {
P p = (P) obj1;
List<Object> content = p.getContent();
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "P").put("content", parseObject(content, part)).build());
} else if (obj1 instanceof R) {
R r = (R) obj1;
List<Object> content = r.getContent();
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "R").put("content", parseObject(content, part)).build());
} else if (obj1 instanceof JAXBElement) {
JAXBElement element = (JAXBElement) obj1;
Object value = element.getValue();
if (value instanceof Tbl) {
List<Object> tableList = new ArrayList<>();
Tbl table = (Tbl) value;
List<Object> rows = table.getContent();
for (Object obj : rows) {
if (obj instanceof Tr) {
Tr row = (Tr) obj;
List<Object> cells = row.getContent();
tableList.add(parseObject(cells, part));
}
}
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TBL").put("content", tableList).build());
} else if (value instanceof Tc) {
Tc tc = (Tc) value;
List<Object> content = tc.getContent();
Map<String, Object> build = MapUtil.builder(new HashMap<String, Object>()).put("type", "tc").put("content", parseObject(content, part)).build();
if (tc.getTcPr().getGridSpan() != null) {
build.put("hMerge", tc.getTcPr().getGridSpan().getVal());
}
if (tc.getTcPr().getVMerge() != null) {
if (tc.getTcPr().getVMerge().getVal() == null) {
build.put("vMerge", "continue");
} else {
build.put("vMerge", tc.getTcPr().getVMerge().getVal());
}
}
resultList.add(build);
} else if (value instanceof Drawing) {
Drawing image = (Drawing) value;
Object o = image.getAnchorOrInline().get(0);
if (o instanceof Inline) {
Inline inline = (Inline) o;
String relId = inline.getGraphic().getGraphicData().getPic().getBlipFill().getBlip().getEmbed();
//获取图片的像素宽高
long cx = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCx();
long cy = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCy();
int widthPx = emuToPixels((cx), 96);
int heightPx = emuToPixels((cy), 96);
// 获取对应的图像Part
Part imagePart = part.getPart(relId);
String fileurl = "1";
String filename = "image_" + System.currentTimeMillis() + RandomUtil.randomNumbers(6) + ".png";
if (imagePart != null && imagePart instanceof BinaryPart) {
// 获取图片的二进制数据
byte[] imageData = ((BinaryPart) imagePart).getBytes();
// 将图片保存为文件
fileurl = saveFile(imageData, filename);
}
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "IMG")
.put("width", widthPx).put("height", heightPx).put("content", fileurl).build());
}
} else if (value instanceof Text) {
Text text = (Text) value;
// resultList.add(text.getValue());
if (!text.getValue().isEmpty() && !text.getValue().startsWith("HYPERLINK"))
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", text.getValue()).build());
}
}
}
return resultList;
}
// EMU 转英寸
private static double emuToInches(long emu) {
return emu / 914400.0;
}
// EMU 转像素(需指定 DPI)
private static int emuToPixels(long emu, int dpi) {
return (int) (emuToInches(emu) * dpi);
}
//这块可能得你们自己调整一下,或者直接用你们自己的保存文件方法,返回保存的路径就行了
private static String saveFile(byte[] imageData, String filename) {
// 上传文件路径,你本地的保存路径
String filePath = "xxxxxxxxxxx";
// 上传并返回新文件名称
try {
File file = new File(filePath + "/newFolder/");
if (!file.exists()) file.mkdirs();
FileOutputStream fos = new FileOutputStream(filePath + "/newFolder/" + filename);
fos.write(imageData);
System.out.println("Image saved successfully.");
} catch (FileNotFoundException e) {
System.out.println("Image saved error.");
throw new RuntimeException(e);
} catch (IOException e) {
System.out.println("Image saved error.");
throw new RuntimeException(e);
}
return "/profile/upload/newFolder/" + filename;
}
public static List<Map<String, Object>> formatDocumentObject(List<Map<String, Object>> maps, Map<String, Object> parentMap) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (int i = 0; i < maps.size(); i++) {
Map<String, Object> map = maps.get(i);
if (map.get("type").equals("R")) {
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
resultList.addAll(content);
} else if (map.get("type").equals("tc")) {
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
StringBuffer sb = new StringBuffer();
List<Map<String, Object>> tempList = new ArrayList<>();
if (content.size() > 0) {
for (Map<String, Object> stringObjectMap : content) {
if (stringObjectMap.containsKey("type") && stringObjectMap.get("type").equals("TEXT")) {
sb.append(stringObjectMap.get("content"));
sb.append("\n");
} else {
if (sb.length() > 0) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "TEXT");
tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
tempList.add(tempMap);
}
tempList.add(stringObjectMap);
sb = new StringBuffer();
}
}
}
if (sb.length() != 0) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "TEXT");
tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
tempList.add(tempMap);
sb = new StringBuffer();
}
content = tempList;
if (map.containsKey("hMerge") || map.containsKey("vMerge")) {
if (content.isEmpty() && map.get("vMerge") != null && map.get("vMerge").equals("continue")) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("vMerge", map.get("vMerge"));
if (map.containsKey("hMerge")) {
tempMap.put("hMerge", map.get("hMerge"));
}
content.add(tempMap);
} else {
if (map.containsKey("hMerge")) {
for (Map<String, Object> stringObjectMap : content) {
stringObjectMap.put("hMerge", map.get("hMerge"));
}
}
if (map.containsKey("vMerge")) {
for (Map<String, Object> stringObjectMap : content) {
stringObjectMap.put("vMerge", map.get("vMerge"));
}
}
}
}
if (content.size() > 1) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "ARRAY");
tempMap.put("content", content);
content = new ArrayList<>();
content.add(tempMap);
}
resultList.addAll(content);
} else if (map.get("type").equals("P")) {
// P开头的,一般是一段,里面都是一句话,可能还有其他结构
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
if (content.isEmpty()) {
if (parentMap == null) {
// 解决了竖项合并单元格
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
} else if (parentMap.get("type").equals("tc") && parentMap.containsKey("vMerge") && parentMap.get("vMerge").equals("continue")) {
// 解决单元格内容为空时的问题
} else {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
}
} else if (content.size() == 1) {
resultList.addAll(content);
} else {
boolean isAllText = true;
StringBuffer sb = new StringBuffer();
for (Map<String, Object> stringObjectMap : content) {
if (stringObjectMap.containsKey("type") && !stringObjectMap.get("type").equals("TEXT"))
isAllText = false;
sb.append(stringObjectMap.get("content"));
}
if (isAllText) {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
} else {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "ARRAY").put("content", content).build());
}
}
} else if (map.get("type").equals("TBL")) {
Map<String, Object> newMap = new HashMap<>();
newMap.put("type", "TBL");
newMap.put("content", formatDocumentObjectList((List<Object>) map.get("content")));
resultList.add(newMap);
} else {
resultList.add(map);
}
}
return resultList;
}
private static List<Map<String, Object>> formatDocumentObjectList(List<Object> content) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (Object o : content) {
if (o instanceof ArrayList) {
ArrayList oList = (ArrayList) o;
if (oList.get(0) instanceof ArrayList) {
resultList.addAll(formatDocumentObjectList(oList));
} else if (oList.get(0) instanceof HashMap) {
Map<String, Object> newMap = new HashMap<>();
newMap.put("type", "TD");
newMap.put("content", formatDocumentObject(oList, newMap));
resultList.add(newMap);
}
} else if (o instanceof HashMap) {
HashMap oMap = (HashMap) o;
resultList.add(oMap);
}
}
return resultList;
}
@PostConstruct
public void init() {
serverConfig = this.tempServerConfig;
}
}
历时一个星期搞出来的,总算是符合了客户的要求。给自己记录一下。
项目是基于ruoyi做的。里面很多用的都是ruoyi的东西,感谢若以大佬。
第二次更改
拿去交付发现有很多图片解析不出来,打开发现是emf格式的文件。不得不说,word真坑人啊。
尝试加了一下处理,最后还是没法解决,改成把问题抛给前端去处理,开心.jpg
package com.ruoyi.project.project.util;
import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.RandomUtil;
import com.alibaba.fastjson2.JSONArray;
import com.ruoyi.framework.config.RuoYiConfig;
import com.ruoyi.framework.config.ServerConfig;
import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
import org.apache.batik.transcoder.TranscoderInput;
import org.apache.batik.transcoder.TranscoderOutput;
import org.apache.batik.transcoder.image.PNGTranscoder;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.wml.*;
import org.freehep.graphicsio.emf.EMFInputStream;
import org.freehep.graphicsio.emf.EMFRenderer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import javax.annotation.PostConstruct;
import javax.imageio.ImageIO;
import javax.xml.bind.JAXBElement;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.awt.Color;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Component
public class DocUtil {
// 定义汉字数字
private static final String CHINESE_NUMBERS = "一二三四五六七八九十";
// 定义正则表达式模式
private static final String PATTERN_STRING = "^第[" + CHINESE_NUMBERS + "]+章";
private static final String PATTERN_STRING1 = "^第[" + CHINESE_NUMBERS + "]+节";
static ServerConfig serverConfig;
private static Integer i = 0;
@Autowired
ServerConfig tempServerConfig;
public static boolean isPara(String input) {
Pattern pattern = Pattern.compile(PATTERN_STRING);
Matcher matcher = pattern.matcher(input);
return matcher.find();
}
public static boolean isPara1(String input) {
Pattern pattern = Pattern.compile(PATTERN_STRING1);
Matcher matcher = pattern.matcher(input);
return matcher.find();
}
public static void main(String[] args) throws Docx4JException, IOException {
// 加载 docx 文件
WordprocessingMLPackage wordprocessingMLPackage = WordprocessingMLPackage.load(new File("/Users/hudongdong/Desktop/workspace/hongming/ebook-online/部署软件包/教研室拍摄数字教材2025.6.6/王昱蘅/202408 GJF210A型工程抢险救援箱组操作与使用.docx"));
try {
// 获取文档中的所有嵌入部件(包括图片)
MainDocumentPart part = wordprocessingMLPackage.getMainDocumentPart();
List<Object> list = part.getContent();
List<Map<String, Object>> maps = parseObject(list, part.getRelationshipsPart());
List<Map<String, Object>> targetList = DocUtil.formatDocumentObject(maps, null);
targetList = DocUtil.formatDocumentTbl(targetList);
// System.out.println("最后的结果:---------------------");
// for (Map<String, Object> stringObjectMap : targetList) {
// System.out.println(stringObjectMap);
// }
} catch (Exception e) {
e.printStackTrace();
}
}
public static List<Map<String, Object>> formatDocumentTbl(List<Map<String, Object>> maps) {
for (Map<String, Object> map : maps) {
// 判断是否为表格
if (!map.containsKey("type") || !map.get("type").equals("TBL")) {
continue;
}
ArrayList<Map<String, Object>> tblContent = (ArrayList<Map<String, Object>>) map.get("content");
// 每一行数据
for (int i = 0; i < tblContent.size(); i++) {
Map<String, Object> row = tblContent.get(i);
ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
int index = 0;
int size = rowData.size();
for (int i1 = 0; i1 < size; i1++) {
Map<String, Object> stringObjectMap = rowData.get(i1);
// 这里才是每一个格的数据,
// 获得当前的下标
// 判断是不是有vMerge,有的话如果是1就得找到上一级然后加上1
if (stringObjectMap.containsKey("vMerge")) {
String str = stringObjectMap.get("vMerge").toString();
if (str.equals("restart")) {
stringObjectMap.put("vMerge", 1);
} else if (str.equals("continue")) {
searchAndChangePreviousData(i, index, tblContent);
stringObjectMap.remove("vMerge");
}
}
if (stringObjectMap.containsKey("hMerge")) {
index += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
} else {
index++;
}
}
}
for (Map<String, Object> tempMap : tblContent) {
List<Map<String, Object>> content = (List<Map<String, Object>>) tempMap.get("content");
for (int i = 0; i < content.size(); i++) {
Map<String, Object> stringObjectMap = content.get(i);
if (stringObjectMap.isEmpty()) {
content.remove(i);
i--;
} else if (stringObjectMap.containsKey("hMerge") && stringObjectMap.size() == 1) {
content.remove(i);
i--;
}
}
}
map.put("content", JSONArray.toJSONString(tblContent));
}
return maps;
}
private static void searchAndChangePreviousData(int rowIndex, int index, ArrayList<Map<String, Object>> tblContent) {
for (int i = rowIndex - 1; i >= 0; i--) {
Map<String, Object> row = tblContent.get(i);
ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
int index1 = 0;
for (int i1 = 0; i1 < rowData.size(); i1++) {
Map<String, Object> stringObjectMap = rowData.get(i1);
// 这里才是每一个格的数据,获得当前的下标
if (index1 == index) {
if (stringObjectMap.containsKey("vMerge") && !stringObjectMap.get("vMerge").equals("") && !stringObjectMap.get("vMerge").equals("continue")) {
int flag = (int) stringObjectMap.get("vMerge");
stringObjectMap.put("vMerge", flag + 1);
return;
}
}
if (stringObjectMap.containsKey("hMerge")) {
index1 += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
} else {
index1++;
}
}
}
}
public static List<Map<String, Object>> parseObject(List<Object> list, RelationshipsPart part) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (Object obj1 : list) {
if (obj1 instanceof P) {
P p = (P) obj1;
List<Object> content = p.getContent();
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "P").put("content", parseObject(content, part)).build());
} else if (obj1 instanceof R) {
R r = (R) obj1;
List<Object> content = r.getContent();
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "R").put("content", parseObject(content, part)).build());
} else if (obj1 instanceof JAXBElement) {
JAXBElement element = (JAXBElement) obj1;
Object value = element.getValue();
if (value instanceof Tbl) {
List<Object> tableList = new ArrayList<>();
Tbl table = (Tbl) value;
List<Object> rows = table.getContent();
for (Object obj : rows) {
if (obj instanceof Tr) {
Tr row = (Tr) obj;
List<Object> cells = row.getContent();
tableList.add(parseObject(cells, part));
}
}
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TBL").put("content", tableList).build());
} else if (value instanceof Tc) {
Tc tc = (Tc) value;
List<Object> content = tc.getContent();
Map<String, Object> build = MapUtil.builder(new HashMap<String, Object>()).put("type", "tc").put("content", parseObject(content, part)).build();
if (tc.getTcPr().getGridSpan() != null) {
build.put("hMerge", tc.getTcPr().getGridSpan().getVal());
}
if (tc.getTcPr().getVMerge() != null) {
if (tc.getTcPr().getVMerge().getVal() == null) {
build.put("vMerge", "continue");
} else {
build.put("vMerge", tc.getTcPr().getVMerge().getVal());
}
}
resultList.add(build);
} else if (value instanceof Drawing) {
Drawing image = (Drawing) value;
Object o = image.getAnchorOrInline().get(0);
if (o instanceof Inline) {
Inline inline = (Inline) o;
String relId = inline.getGraphic().getGraphicData().getPic().getBlipFill().getBlip().getEmbed();
long cx = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCx();
long cy = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCy();
int widthPx = emuToPixels((cx), 96);
int heightPx = emuToPixels((cy), 96);
// 获取对应的图像Part
Part imagePart = part.getPart(relId);
String fileurl = "1";
String filename = "image_" + System.currentTimeMillis() + RandomUtil.randomNumbers(6) + ".png";
if (imagePart != null && imagePart instanceof BinaryPart) {
String contentType = imagePart.getContentType();
if (contentType.equals("image/x-emf")) {
contentType = "emf";
} else if (contentType.equals("image/x-wmf")) {
contentType = "wmf";
}
// 获取图片的二进制数据
byte[] imageData = ((BinaryPart) imagePart).getBytes();
// 将图片保存为文件
fileurl = saveFileNew(imageData, filename, contentType);
}
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "IMG")
.put("width", widthPx).put("height", heightPx).put("content", fileurl).build());
}
} else if (value instanceof Text) {
Text text = (Text) value;
// resultList.add(text.getValue());
if (!text.getValue().isEmpty() && !text.getValue().startsWith("HYPERLINK"))
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", text.getValue()).build());
}
}
}
return resultList;
}
// EMU 转英寸
private static double emuToInches(long emu) {
return emu / 914400.0;
}
// EMU 转像素(需指定 DPI)
private static int emuToPixels(long emu, int dpi) {
return (int) (emuToInches(emu) * dpi);
}
private static String saveFile(byte[] imageData, String filename, String type) {
// 上传文件路径
String filePath = RuoYiConfig.getUploadPath();
// 上传并返回新文件名称
try {
File file = new File(filePath + "/documentImages/");
if (!file.exists()) file.mkdirs();
BufferedImage result = ImageIO.read(new ByteArrayInputStream(imageData));
if (type.equals("emf")) {
EMFInputStream inputStream = new EMFInputStream(new ByteArrayInputStream(imageData));
System.out.println("height:" + inputStream.readHeader().getBounds().getHeight());
System.out.println("width:" + inputStream.readHeader().getBounds().getWidth());
EMFRenderer emfRenderer = new EMFRenderer(inputStream);
// create buffered image object from EMF render
final int width = (int) inputStream.readHeader().getBounds().getWidth();
final int height = (int) inputStream.readHeader().getBounds().getHeight();
System.out.println("widht = " + width + " and height = " + height);
result = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
Graphics2D g2 = (Graphics2D) result.createGraphics();
emfRenderer.paint(g2);
}
ImageIO.write(result, "png", new File(filePath + "/documentImages/" + filename));
// FileOutputStream fos = new FileOutputStream(filePath + "/documentImages/" + filename);
// fos.write(imageData);
System.out.println("Image saved successfully.");
} catch (FileNotFoundException e) {
System.out.println("Image saved error.");
throw new RuntimeException(e);
} catch (IOException e) {
System.out.println("Image saved error.");
throw new RuntimeException(e);
}
return "/profile/upload/documentImages/" + filename;
}
/**
* 保存文件:
* - PNG/JPG直接保存
* - EMF 用 FreeHEP 转 PNG,失败尝试保存jpeg,再失败原文件保存
* - WMF 用 wmf2svg 转 SVG,再用 Batik 转 PNG
*/
public static String saveFileNew(byte[] imageData, String filename, String type) {
String filePath = RuoYiConfig.getUploadPath();
String saveDir = filePath + "/documentImages/";
try {
File dir = new File(saveDir);
if (!dir.exists()) dir.mkdirs();
byte[] bytesToWrite = imageData;
if (type.equals("emf")) {
System.out.println("Detected EMF, converting to PNG...");
BufferedImage bufferedImage;
try {
InputStream bais = new ByteArrayInputStream(imageData);
EMFInputStream emfIn = new EMFInputStream(bais, EMFInputStream.DEFAULT_VERSION);
EMFRenderer renderer = new EMFRenderer(emfIn);
Rectangle bounds = emfIn.readHeader().getBounds();
bufferedImage = new BufferedImage(bounds.width, bounds.height, BufferedImage.TYPE_INT_ARGB);
Graphics2D g2 = bufferedImage.createGraphics();
g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
renderer.paint(g2);
g2.dispose();
// 转成 PNG bytes
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, "png", baos);
bytesToWrite = baos.toByteArray();
} catch (NullPointerException e) {
System.out.println("EMF文件解析失败,尝试保存jpeg");
try {
bytesToWrite = saveJpeg(imageData);
filename = filename.substring(0, filename.lastIndexOf(".")) + ".jpeg";
} catch (Exception exception) {
System.out.println("尝试保存jpeg失败,元文件保存");
bytesToWrite = imageData;
filename = filename.substring(0, filename.lastIndexOf(".")) + ".emf";
}
}
}
if (type.equals("wmf")) {
System.out.println("Detected WMF, converting to PNG via SVG...");
org.w3c.dom.Document svgDoc;
try (InputStream bais = new ByteArrayInputStream(imageData)) {
WmfParser parser = new WmfParser();
SvgGdi gdi = new SvgGdi(false);
parser.parse(bais, gdi);
svgDoc = gdi.getDocument(); // ✅ 正确写法
}
// 把 Document -> String(Java 标准 Transformer)
StringWriter writer = new StringWriter();
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(svgDoc), new StreamResult(writer));
String svgXml = writer.toString();
// 用 Batik 转成 PNG
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PNGTranscoder transcoder = new PNGTranscoder();
TranscoderInput input = new TranscoderInput(new StringReader(svgXml));
TranscoderOutput output = new TranscoderOutput(baos);
transcoder.transcode(input, output);
baos.flush();
bytesToWrite = baos.toByteArray();
}
// 最终保存
FileOutputStream fos = new FileOutputStream(saveDir + filename);
fos.write(bytesToWrite);
fos.close();
System.out.println("Image saved successfully: " + filename);
} catch (Exception e) {
e.printStackTrace();
System.out.println(i + ":Image saved error.");
// throw new RuntimeException(e);
}
return "/profile/upload/documentImages/" + filename;
}
private static byte[] saveJpeg(byte[] imageData) throws IOException {
// 1. 强制渲染为不透明的 RGB 位图(避免透明度问题)
BufferedImage image = new BufferedImage(
800, 600, BufferedImage.TYPE_INT_RGB); // 固定尺寸(兼容性优先)
Graphics2D g2d = image.createGraphics();
// 2. 填充白色背景(避免黑底)
g2d.setColor(Color.WHITE);
g2d.fillRect(0, 0, image.getWidth(), image.getHeight());
// 3. 尝试渲染 EMF(如果失败至少返回白图)
EMFInputStream emfIn = new EMFInputStream(new ByteArrayInputStream(imageData));
new EMFRenderer(emfIn).paint(g2d);
g2d.dispose();
// 4. 转为 JPEG(最低质量保证兼容性)
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// ImageIO.write(image, "JPEG", baos); // 默认压缩质量
return baos.toByteArray();
}
public static List<Map<String, Object>> formatDocumentObject(List<Map<String, Object>> maps, Map<String, Object> parentMap) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (int i = 0; i < maps.size(); i++) {
Map<String, Object> map = maps.get(i);
if (map.get("type").equals("R")) {
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
resultList.addAll(content);
} else if (map.get("type").equals("tc")) {
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
StringBuffer sb = new StringBuffer();
List<Map<String, Object>> tempList = new ArrayList<>();
if (content.size() > 0) {
for (Map<String, Object> stringObjectMap : content) {
if (stringObjectMap.containsKey("type") && stringObjectMap.get("type").equals("TEXT")) {
sb.append(stringObjectMap.get("content"));
sb.append("\n");
} else {
if (sb.length() > 0) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "TEXT");
tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
tempList.add(tempMap);
}
tempList.add(stringObjectMap);
sb = new StringBuffer();
}
}
}
if (sb.length() != 0) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "TEXT");
tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
tempList.add(tempMap);
sb = new StringBuffer();
}
content = tempList;
if (map.containsKey("hMerge") || map.containsKey("vMerge")) {
if (content.isEmpty() && map.get("vMerge") != null && map.get("vMerge").equals("continue")) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("vMerge", map.get("vMerge"));
if (map.containsKey("hMerge")) {
tempMap.put("hMerge", map.get("hMerge"));
}
content.add(tempMap);
} else {
if (map.containsKey("hMerge")) {
for (Map<String, Object> stringObjectMap : content) {
stringObjectMap.put("hMerge", map.get("hMerge"));
}
}
if (map.containsKey("vMerge")) {
for (Map<String, Object> stringObjectMap : content) {
stringObjectMap.put("vMerge", map.get("vMerge"));
}
}
}
}
if (content.size() > 1) {
Map<String, Object> tempMap = new HashMap<>();
tempMap.put("type", "ARRAY");
tempMap.put("content", content);
content = new ArrayList<>();
content.add(tempMap);
}
resultList.addAll(content);
} else if (map.get("type").equals("P")) {
// P开头的,一般是一段,里面都是一句话,可能还有其他结构
List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
if (content.isEmpty()) {
if (parentMap == null) {
// 解决了竖项合并单元格
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
} else if (parentMap.get("type").equals("tc") && parentMap.containsKey("vMerge") && parentMap.get("vMerge").equals("continue")) {
// 解决单元格内容为空时的问题
} else {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
}
} else if (content.size() == 1) {
resultList.addAll(content);
} else {
boolean isAllText = true;
StringBuffer sb = new StringBuffer();
for (Map<String, Object> stringObjectMap : content) {
if (stringObjectMap.containsKey("type") && !stringObjectMap.get("type").equals("TEXT"))
isAllText = false;
sb.append(stringObjectMap.get("content"));
}
if (isAllText) {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
} else {
resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "ARRAY").put("content", content).build());
}
}
// StringBuilder sb = new StringBuilder();
// for (Map<String, Object> temp : content) {
// if (temp.get("type").equals("TEXT")) {
// sb.append(temp.get("content"));
// } else {
// if (sb.toString().length() > 0) {
// resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
// sb = new StringBuilder();
// } else {
// resultList.add(temp);
// }
// }
// }
// resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
} else if (map.get("type").equals("TBL")) {
Map<String, Object> newMap = new HashMap<>();
newMap.put("type", "TBL");
newMap.put("content", formatDocumentObjectList((List<Object>) map.get("content")));
resultList.add(newMap);
} else {
resultList.add(map);
}
}
return resultList;
}
private static List<Map<String, Object>> formatDocumentObjectList(List<Object> content) {
List<Map<String, Object>> resultList = new ArrayList<>();
for (Object o : content) {
if (o instanceof ArrayList) {
ArrayList oList = (ArrayList) o;
if (oList.get(0) instanceof ArrayList) {
resultList.addAll(formatDocumentObjectList(oList));
} else if (oList.get(0) instanceof HashMap) {
Map<String, Object> newMap = new HashMap<>();
newMap.put("type", "TD");
newMap.put("content", formatDocumentObject(oList, newMap));
resultList.add(newMap);
}
} else if (o instanceof HashMap) {
HashMap oMap = (HashMap) o;
resultList.add(oMap);
}
}
return resultList;
}
@PostConstruct
public void init() {
serverConfig = this.tempServerConfig;
}
}
下面是mvn
<!-- EMF 渲染 -->
<dependency>
<groupId>org.freehep</groupId>
<artifactId>freehep-graphicsio-emf</artifactId>
<version>2.4</version>
</dependency>
<!-- Batik,用于 SVG -> PNG -->
<dependency>
<groupId>org.apache.xmlgraphics</groupId>
<artifactId>batik-transcoder</artifactId>
<version>1.14</version>
</dependency>
<dependency>
<groupId>org.apache.xmlgraphics</groupId>
<artifactId>batik-codec</artifactId>
<version>1.14</version>
</dependency>
<!-- WMF -> SVG 转换库 -->
<dependency>
<groupId>net.arnx</groupId>
<artifactId>wmf2svg</artifactId>
<version>0.9.8</version>
</dependency>