【无标题】poi-ooxml解析word

自己瞎整的,供参考

网上的文档太少了,只能根据自己一边琢磨一边AI的整,最后整了一个还算比较满意的版本出来,尤其是对word中的表格处理这块。

代码

直接贴代码

pom.xml

<dependency>
	<groupId>org.apache.poi</groupId>
	<artifactId>poi-ooxml</artifactId>
	<version>4.1.2</version>
</dependency>

这是第一次上传的,有一些问题,比如图片是emf格式的。

package com.ruoyi.project.project.util;

import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.RandomUtil;
import com.alibaba.fastjson2.JSONArray;
import com.ruoyi.framework.config.RuoYiConfig;
import com.ruoyi.framework.config.ServerConfig;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.wml.*;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import javax.annotation.PostConstruct;
import javax.xml.bind.JAXBElement;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Component
public class DocUtil {

    // 定义汉字数字
    private static final String CHINESE_NUMBERS = "一二三四五六七八九十";
    // 定义正则表达式模式
    private static final String PATTERN_STRING = "^第[" + CHINESE_NUMBERS + "]+章";
    private static final String PATTERN_STRING1 = "^第[" + CHINESE_NUMBERS + "]+节";
    static ServerConfig serverConfig;
    @Autowired
    ServerConfig tempServerConfig;

	//判断是否第一章这种一级标题
    public static boolean isPara(String input) {
        Pattern pattern = Pattern.compile(PATTERN_STRING);
        Matcher matcher = pattern.matcher(input);
        return matcher.find();
    }

//判断是否第一节这种二级标题
    public static boolean isPara1(String input) {
        Pattern pattern = Pattern.compile(PATTERN_STRING1);
        Matcher matcher = pattern.matcher(input);
        return matcher.find();
    }

    public static void main(String[] args) throws Docx4JException, IOException {
        // 加载 docx 文件
        WordprocessingMLPackage wordprocessingMLPackage = WordprocessingMLPackage.load(new File("/Users/xxxx/Desktop/workspace/表格测试.docx"));
        try {
//            获取文档中的所有嵌入部件(包括图片)
            MainDocumentPart part = wordprocessingMLPackage.getMainDocumentPart();
            List<Object> list = part.getContent();
            List<Map<String, Object>> maps = parseObject(list, part.getRelationshipsPart());
            List<Map<String, Object>> targetList = DocUtil.formatDocumentObject(maps, null);
            targetList = DocUtil.formatDocumentTbl(targetList);
            System.out.println("最后的结果:---------------------");
            for (Map<String, Object> stringObjectMap : targetList) {
                System.out.println(stringObjectMap);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static List<Map<String, Object>> formatDocumentTbl(List<Map<String, Object>> maps) {
        for (Map<String, Object> map : maps) {
//            判断是否为表格
            if (!map.containsKey("type") || !map.get("type").equals("TBL")) {
                continue;
            }
            ArrayList<Map<String, Object>> tblContent = (ArrayList<Map<String, Object>>) map.get("content");
//                    每一行数据
            for (int i = 0; i < tblContent.size(); i++) {
                Map<String, Object> row = tblContent.get(i);
                ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
                int index = 0;
                int size = rowData.size();
                for (int i1 = 0; i1 < size; i1++) {
                    Map<String, Object> stringObjectMap = rowData.get(i1);
                   // 这个是对单元格处理,vMerge是竖向合并,hMerge是横向合并
//                            这里才是每一个格的数据,
//                          获得当前的下标
//                        判断是不是有vMerge,有的话如果是1就得找到上一级然后加上1
                    if (stringObjectMap.containsKey("vMerge")) {
                        String str = stringObjectMap.get("vMerge").toString();
                        if (str.equals("restart")) {
                            stringObjectMap.put("vMerge", 1);
                        } else if (str.equals("continue")) {
                            searchAndChangePreviousData(i, index, tblContent);
                            stringObjectMap.remove("vMerge");
                        }
                    }
                    if (stringObjectMap.containsKey("hMerge")) {
                        index += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
                    } else {
                        index++;
                    }
                }
            }
            for (Map<String, Object> tempMap : tblContent) {
                List<Map<String, Object>> content = (List<Map<String, Object>>) tempMap.get("content");
                for (int i = 0; i < content.size(); i++) {

                    Map<String, Object> stringObjectMap = content.get(i);
                    if (stringObjectMap.isEmpty()) {
                        content.remove(i);
                        i--;
                    } else if (stringObjectMap.containsKey("hMerge") && stringObjectMap.size() == 1) {
                        content.remove(i);
                        i--;
                    }
                }
            }
            map.put("content", JSONArray.toJSONString(tblContent));
        }
        return maps;
    }

    private static void searchAndChangePreviousData(int rowIndex, int index, ArrayList<Map<String, Object>> tblContent) {
        for (int i = rowIndex - 1; i >= 0; i--) {
            Map<String, Object> row = tblContent.get(i);
            ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
            int index1 = 0;
            for (int i1 = 0; i1 < rowData.size(); i1++) {
                Map<String, Object> stringObjectMap = rowData.get(i1);
//                            这里才是每一个格的数据,获得当前的下标
                if (index1 == index) {
                    if (stringObjectMap.containsKey("vMerge") && !stringObjectMap.get("vMerge").equals("") && !stringObjectMap.get("vMerge").equals("continue")) {
                        int flag = (int) stringObjectMap.get("vMerge");
                        stringObjectMap.put("vMerge", flag + 1);
                        return;
                    }
                }
                if (stringObjectMap.containsKey("hMerge")) {
                    index1 += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
                } else {
                    index1++;
                }
            }
        }
    }

    public static List<Map<String, Object>> parseObject(List<Object> list, RelationshipsPart part) {
        List<Map<String, Object>> resultList = new ArrayList<>();
        for (Object obj1 : list) {
            if (obj1 instanceof P) {
                P p = (P) obj1;
                List<Object> content = p.getContent();
                resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "P").put("content", parseObject(content, part)).build());
            } else if (obj1 instanceof R) {
                R r = (R) obj1;
                List<Object> content = r.getContent();
                resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "R").put("content", parseObject(content, part)).build());
            } else if (obj1 instanceof JAXBElement) {
                JAXBElement element = (JAXBElement) obj1;
                Object value = element.getValue();
                if (value instanceof Tbl) {
                    List<Object> tableList = new ArrayList<>();
                    Tbl table = (Tbl) value;
                    List<Object> rows = table.getContent();
                    for (Object obj : rows) {
                        if (obj instanceof Tr) {
                            Tr row = (Tr) obj;
                            List<Object> cells = row.getContent();
                            tableList.add(parseObject(cells, part));
                        }
                    }
                    resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TBL").put("content", tableList).build());
                } else if (value instanceof Tc) {
                    Tc tc = (Tc) value;
                    List<Object> content = tc.getContent();
                    Map<String, Object> build = MapUtil.builder(new HashMap<String, Object>()).put("type", "tc").put("content", parseObject(content, part)).build();
                    if (tc.getTcPr().getGridSpan() != null) {
                        build.put("hMerge", tc.getTcPr().getGridSpan().getVal());
                    }
                    if (tc.getTcPr().getVMerge() != null) {
                        if (tc.getTcPr().getVMerge().getVal() == null) {
                            build.put("vMerge", "continue");
                        } else {
                            build.put("vMerge", tc.getTcPr().getVMerge().getVal());
                        }
                    }
                    resultList.add(build);
                } else if (value instanceof Drawing) {
                    Drawing image = (Drawing) value;
                    Object o = image.getAnchorOrInline().get(0);
                    if (o instanceof Inline) {
                        Inline inline = (Inline) o;
                        String relId = inline.getGraphic().getGraphicData().getPic().getBlipFill().getBlip().getEmbed();
                        //获取图片的像素宽高
                        long cx = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCx();
                        long cy = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCy();
                        int widthPx = emuToPixels((cx), 96);
                        int heightPx = emuToPixels((cy), 96);
//                        获取对应的图像Part
                        Part imagePart = part.getPart(relId);
                        String fileurl = "1";
                        String filename = "image_" + System.currentTimeMillis() + RandomUtil.randomNumbers(6) + ".png";
                        if (imagePart != null && imagePart instanceof BinaryPart) {
//                            获取图片的二进制数据
                            byte[] imageData = ((BinaryPart) imagePart).getBytes();
//                            将图片保存为文件
                            fileurl = saveFile(imageData, filename);
                        }
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "IMG")
                                .put("width", widthPx).put("height", heightPx).put("content", fileurl).build());
                    }
                } else if (value instanceof Text) {
                    Text text = (Text) value;
//                    resultList.add(text.getValue());
                    if (!text.getValue().isEmpty() && !text.getValue().startsWith("HYPERLINK"))
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", text.getValue()).build());
                }
            }
        }
        return resultList;
    }

    // EMU 转英寸
    private static double emuToInches(long emu) {
        return emu / 914400.0;
    }

    // EMU 转像素(需指定 DPI)
    private static int emuToPixels(long emu, int dpi) {
        return (int) (emuToInches(emu) * dpi);
    }

	//这块可能得你们自己调整一下,或者直接用你们自己的保存文件方法,返回保存的路径就行了
    private static String saveFile(byte[] imageData, String filename) {
        // 上传文件路径,你本地的保存路径
        String filePath = "xxxxxxxxxxx";
        // 上传并返回新文件名称
        try {
            File file = new File(filePath + "/newFolder/");
            if (!file.exists()) file.mkdirs();
            FileOutputStream fos = new FileOutputStream(filePath + "/newFolder/" + filename);
            fos.write(imageData);
            System.out.println("Image saved successfully.");
        } catch (FileNotFoundException e) {
            System.out.println("Image saved error.");
            throw new RuntimeException(e);
        } catch (IOException e) {
            System.out.println("Image saved error.");
            throw new RuntimeException(e);
        }
        return "/profile/upload/newFolder/" + filename;
    }

    public static List<Map<String, Object>> formatDocumentObject(List<Map<String, Object>> maps, Map<String, Object> parentMap) {
        List<Map<String, Object>> resultList = new ArrayList<>();
        for (int i = 0; i < maps.size(); i++) {
            Map<String, Object> map = maps.get(i);
            if (map.get("type").equals("R")) {
                List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
                resultList.addAll(content);
            } else if (map.get("type").equals("tc")) {
                List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
                StringBuffer sb = new StringBuffer();
                List<Map<String, Object>> tempList = new ArrayList<>();
                if (content.size() > 0) {
                    for (Map<String, Object> stringObjectMap : content) {
                        if (stringObjectMap.containsKey("type") && stringObjectMap.get("type").equals("TEXT")) {
                            sb.append(stringObjectMap.get("content"));
                            sb.append("\n");
                        } else {
                            if (sb.length() > 0) {
                                Map<String, Object> tempMap = new HashMap<>();
                                tempMap.put("type", "TEXT");
                                tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
                                tempList.add(tempMap);
                            }
                            tempList.add(stringObjectMap);
                            sb = new StringBuffer();
                        }
                    }
                }
                if (sb.length() != 0) {
                    Map<String, Object> tempMap = new HashMap<>();
                    tempMap.put("type", "TEXT");
                    tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
                    tempList.add(tempMap);
                    sb = new StringBuffer();
                }
                content = tempList;
                if (map.containsKey("hMerge") || map.containsKey("vMerge")) {
                    if (content.isEmpty() && map.get("vMerge") != null && map.get("vMerge").equals("continue")) {
                        Map<String, Object> tempMap = new HashMap<>();
                        tempMap.put("vMerge", map.get("vMerge"));
                        if (map.containsKey("hMerge")) {
                            tempMap.put("hMerge", map.get("hMerge"));
                        }
                        content.add(tempMap);
                    } else {
                        if (map.containsKey("hMerge")) {
                            for (Map<String, Object> stringObjectMap : content) {
                                stringObjectMap.put("hMerge", map.get("hMerge"));
                            }
                        }
                        if (map.containsKey("vMerge")) {
                            for (Map<String, Object> stringObjectMap : content) {
                                stringObjectMap.put("vMerge", map.get("vMerge"));
                            }
                        }
                    }
                }
                if (content.size() > 1) {
                    Map<String, Object> tempMap = new HashMap<>();
                    tempMap.put("type", "ARRAY");
                    tempMap.put("content", content);
                    content = new ArrayList<>();
                    content.add(tempMap);
                }
                resultList.addAll(content);
            } else if (map.get("type").equals("P")) {
//                P开头的,一般是一段,里面都是一句话,可能还有其他结构
                List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
                if (content.isEmpty()) {
                    if (parentMap == null) {
//                        解决了竖项合并单元格
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
                    } else if (parentMap.get("type").equals("tc") && parentMap.containsKey("vMerge") && parentMap.get("vMerge").equals("continue")) {
//                        解决单元格内容为空时的问题
                    } else {
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
                    }
                } else if (content.size() == 1) {
                    resultList.addAll(content);
                } else {
                    boolean isAllText = true;
                    StringBuffer sb = new StringBuffer();
                    for (Map<String, Object> stringObjectMap : content) {
                        if (stringObjectMap.containsKey("type") && !stringObjectMap.get("type").equals("TEXT"))
                            isAllText = false;
                        sb.append(stringObjectMap.get("content"));
                    }
                    if (isAllText) {
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
                    } else {
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "ARRAY").put("content", content).build());
                    }
                }

            } else if (map.get("type").equals("TBL")) {
                Map<String, Object> newMap = new HashMap<>();
                newMap.put("type", "TBL");
                newMap.put("content", formatDocumentObjectList((List<Object>) map.get("content")));
                resultList.add(newMap);
            } else {
                resultList.add(map);
            }
        }
        return resultList;
    }

    private static List<Map<String, Object>> formatDocumentObjectList(List<Object> content) {
        List<Map<String, Object>> resultList = new ArrayList<>();
        for (Object o : content) {
            if (o instanceof ArrayList) {
                ArrayList oList = (ArrayList) o;
                if (oList.get(0) instanceof ArrayList) {
                    resultList.addAll(formatDocumentObjectList(oList));
                } else if (oList.get(0) instanceof HashMap) {
                    Map<String, Object> newMap = new HashMap<>();
                    newMap.put("type", "TD");
                    newMap.put("content", formatDocumentObject(oList, newMap));
                    resultList.add(newMap);
                }
            } else if (o instanceof HashMap) {
                HashMap oMap = (HashMap) o;
                resultList.add(oMap);
            }
        }
        return resultList;
    }

    @PostConstruct
    public void init() {
        serverConfig = this.tempServerConfig;
    }
}

历时一个星期搞出来的,总算是符合了客户的要求。给自己记录一下。
项目是基于ruoyi做的。里面很多用的都是ruoyi的东西,感谢若以大佬。

第二次更改

拿去交付发现有很多图片解析不出来,打开发现是emf格式的文件。不得不说,word真坑人啊。
尝试加了一下处理,最后还是没法解决,改成把问题抛给前端去处理,开心.jpg

package com.ruoyi.project.project.util;

import cn.hutool.core.map.MapUtil;
import cn.hutool.core.util.RandomUtil;
import com.alibaba.fastjson2.JSONArray;
import com.ruoyi.framework.config.RuoYiConfig;
import com.ruoyi.framework.config.ServerConfig;
import net.arnx.wmf2svg.gdi.svg.SvgGdi;
import net.arnx.wmf2svg.gdi.wmf.WmfParser;
import org.apache.batik.transcoder.TranscoderInput;
import org.apache.batik.transcoder.TranscoderOutput;
import org.apache.batik.transcoder.image.PNGTranscoder;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.wml.*;
import org.freehep.graphicsio.emf.EMFInputStream;
import org.freehep.graphicsio.emf.EMFRenderer;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import javax.annotation.PostConstruct;
import javax.imageio.ImageIO;
import javax.xml.bind.JAXBElement;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.awt.Color;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@Component
public class DocUtil {

    // 定义汉字数字
    private static final String CHINESE_NUMBERS = "一二三四五六七八九十";
    // 定义正则表达式模式
    private static final String PATTERN_STRING = "^第[" + CHINESE_NUMBERS + "]+章";
    private static final String PATTERN_STRING1 = "^第[" + CHINESE_NUMBERS + "]+节";
    static ServerConfig serverConfig;
    private static Integer i = 0;
    @Autowired
    ServerConfig tempServerConfig;

    public static boolean isPara(String input) {
        Pattern pattern = Pattern.compile(PATTERN_STRING);
        Matcher matcher = pattern.matcher(input);
        return matcher.find();
    }

    public static boolean isPara1(String input) {
        Pattern pattern = Pattern.compile(PATTERN_STRING1);
        Matcher matcher = pattern.matcher(input);
        return matcher.find();
    }

    public static void main(String[] args) throws Docx4JException, IOException {
        // 加载 docx 文件
        WordprocessingMLPackage wordprocessingMLPackage = WordprocessingMLPackage.load(new File("/Users/hudongdong/Desktop/workspace/hongming/ebook-online/部署软件包/教研室拍摄数字教材2025.6.6/王昱蘅/202408  GJF210A型工程抢险救援箱组操作与使用.docx"));
        try {
//            获取文档中的所有嵌入部件(包括图片)
            MainDocumentPart part = wordprocessingMLPackage.getMainDocumentPart();
            List<Object> list = part.getContent();
            List<Map<String, Object>> maps = parseObject(list, part.getRelationshipsPart());
            List<Map<String, Object>> targetList = DocUtil.formatDocumentObject(maps, null);
            targetList = DocUtil.formatDocumentTbl(targetList);
//            System.out.println("最后的结果:---------------------");
//            for (Map<String, Object> stringObjectMap : targetList) {
//                System.out.println(stringObjectMap);
//            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static List<Map<String, Object>> formatDocumentTbl(List<Map<String, Object>> maps) {
        for (Map<String, Object> map : maps) {
//            判断是否为表格
            if (!map.containsKey("type") || !map.get("type").equals("TBL")) {
                continue;
            }
            ArrayList<Map<String, Object>> tblContent = (ArrayList<Map<String, Object>>) map.get("content");
//                    每一行数据
            for (int i = 0; i < tblContent.size(); i++) {
                Map<String, Object> row = tblContent.get(i);
                ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
                int index = 0;
                int size = rowData.size();
                for (int i1 = 0; i1 < size; i1++) {
                    Map<String, Object> stringObjectMap = rowData.get(i1);
//                            这里才是每一个格的数据,
//                          获得当前的下标
//                        判断是不是有vMerge,有的话如果是1就得找到上一级然后加上1
                    if (stringObjectMap.containsKey("vMerge")) {
                        String str = stringObjectMap.get("vMerge").toString();
                        if (str.equals("restart")) {
                            stringObjectMap.put("vMerge", 1);
                        } else if (str.equals("continue")) {
                            searchAndChangePreviousData(i, index, tblContent);
                            stringObjectMap.remove("vMerge");
                        }
                    }
                    if (stringObjectMap.containsKey("hMerge")) {
                        index += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
                    } else {
                        index++;
                    }
                }
            }
            for (Map<String, Object> tempMap : tblContent) {
                List<Map<String, Object>> content = (List<Map<String, Object>>) tempMap.get("content");
                for (int i = 0; i < content.size(); i++) {

                    Map<String, Object> stringObjectMap = content.get(i);
                    if (stringObjectMap.isEmpty()) {
                        content.remove(i);
                        i--;
                    } else if (stringObjectMap.containsKey("hMerge") && stringObjectMap.size() == 1) {
                        content.remove(i);
                        i--;
                    }
                }
            }
            map.put("content", JSONArray.toJSONString(tblContent));
        }
        return maps;
    }

    private static void searchAndChangePreviousData(int rowIndex, int index, ArrayList<Map<String, Object>> tblContent) {
        for (int i = rowIndex - 1; i >= 0; i--) {
            Map<String, Object> row = tblContent.get(i);
            ArrayList<Map<String, Object>> rowData = (ArrayList<Map<String, Object>>) row.get("content");
            int index1 = 0;
            for (int i1 = 0; i1 < rowData.size(); i1++) {
                Map<String, Object> stringObjectMap = rowData.get(i1);
//                            这里才是每一个格的数据,获得当前的下标
                if (index1 == index) {
                    if (stringObjectMap.containsKey("vMerge") && !stringObjectMap.get("vMerge").equals("") && !stringObjectMap.get("vMerge").equals("continue")) {
                        int flag = (int) stringObjectMap.get("vMerge");
                        stringObjectMap.put("vMerge", flag + 1);
                        return;
                    }
                }
                if (stringObjectMap.containsKey("hMerge")) {
                    index1 += ((BigInteger) stringObjectMap.get("hMerge")).intValue();
                } else {
                    index1++;
                }
            }
        }
    }

    public static List<Map<String, Object>> parseObject(List<Object> list, RelationshipsPart part) {
        List<Map<String, Object>> resultList = new ArrayList<>();
        for (Object obj1 : list) {
            if (obj1 instanceof P) {
                P p = (P) obj1;
                List<Object> content = p.getContent();
                resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "P").put("content", parseObject(content, part)).build());
            } else if (obj1 instanceof R) {
                R r = (R) obj1;
                List<Object> content = r.getContent();
                resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "R").put("content", parseObject(content, part)).build());
            } else if (obj1 instanceof JAXBElement) {
                JAXBElement element = (JAXBElement) obj1;
                Object value = element.getValue();
                if (value instanceof Tbl) {
                    List<Object> tableList = new ArrayList<>();
                    Tbl table = (Tbl) value;
                    List<Object> rows = table.getContent();
                    for (Object obj : rows) {
                        if (obj instanceof Tr) {
                            Tr row = (Tr) obj;
                            List<Object> cells = row.getContent();
                            tableList.add(parseObject(cells, part));
                        }
                    }
                    resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TBL").put("content", tableList).build());
                } else if (value instanceof Tc) {
                    Tc tc = (Tc) value;
                    List<Object> content = tc.getContent();
                    Map<String, Object> build = MapUtil.builder(new HashMap<String, Object>()).put("type", "tc").put("content", parseObject(content, part)).build();
                    if (tc.getTcPr().getGridSpan() != null) {
                        build.put("hMerge", tc.getTcPr().getGridSpan().getVal());
                    }
                    if (tc.getTcPr().getVMerge() != null) {
                        if (tc.getTcPr().getVMerge().getVal() == null) {
                            build.put("vMerge", "continue");
                        } else {
                            build.put("vMerge", tc.getTcPr().getVMerge().getVal());
                        }
                    }
                    resultList.add(build);
                } else if (value instanceof Drawing) {
                    Drawing image = (Drawing) value;
                    Object o = image.getAnchorOrInline().get(0);
                    if (o instanceof Inline) {
                        Inline inline = (Inline) o;
                        String relId = inline.getGraphic().getGraphicData().getPic().getBlipFill().getBlip().getEmbed();
                        long cx = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCx();
                        long cy = inline.getGraphic().getGraphicData().getPic().getSpPr().getXfrm().getExt().getCy();
                        int widthPx = emuToPixels((cx), 96);
                        int heightPx = emuToPixels((cy), 96);
//                        获取对应的图像Part
                        Part imagePart = part.getPart(relId);
                        String fileurl = "1";
                        String filename = "image_" + System.currentTimeMillis() + RandomUtil.randomNumbers(6) + ".png";
                        if (imagePart != null && imagePart instanceof BinaryPart) {
                            String contentType = imagePart.getContentType();
                            if (contentType.equals("image/x-emf")) {
                                contentType = "emf";
                            } else if (contentType.equals("image/x-wmf")) {
                                contentType = "wmf";
                            }
//                            获取图片的二进制数据
                            byte[] imageData = ((BinaryPart) imagePart).getBytes();
//                            将图片保存为文件
                            fileurl = saveFileNew(imageData, filename, contentType);

                        }
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "IMG")
                                .put("width", widthPx).put("height", heightPx).put("content", fileurl).build());
                    }
                } else if (value instanceof Text) {
                    Text text = (Text) value;
//                    resultList.add(text.getValue());
                    if (!text.getValue().isEmpty() && !text.getValue().startsWith("HYPERLINK"))
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", text.getValue()).build());
                }
            }
        }
        return resultList;
    }

    // EMU 转英寸
    private static double emuToInches(long emu) {
        return emu / 914400.0;
    }

    // EMU 转像素(需指定 DPI)
    private static int emuToPixels(long emu, int dpi) {
        return (int) (emuToInches(emu) * dpi);
    }

    private static String saveFile(byte[] imageData, String filename, String type) {
        // 上传文件路径
        String filePath = RuoYiConfig.getUploadPath();
        // 上传并返回新文件名称
        try {
            File file = new File(filePath + "/documentImages/");
            if (!file.exists()) file.mkdirs();
            BufferedImage result = ImageIO.read(new ByteArrayInputStream(imageData));

            if (type.equals("emf")) {
                EMFInputStream inputStream = new EMFInputStream(new ByteArrayInputStream(imageData));
                System.out.println("height:" + inputStream.readHeader().getBounds().getHeight());
                System.out.println("width:" + inputStream.readHeader().getBounds().getWidth());
                EMFRenderer emfRenderer = new EMFRenderer(inputStream);

                // create buffered image object from EMF render
                final int width = (int) inputStream.readHeader().getBounds().getWidth();
                final int height = (int) inputStream.readHeader().getBounds().getHeight();
                System.out.println("widht = " + width + " and height = " + height);
                result = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);
                Graphics2D g2 = (Graphics2D) result.createGraphics();
                emfRenderer.paint(g2);

            }
            ImageIO.write(result, "png", new File(filePath + "/documentImages/" + filename));


//            FileOutputStream fos = new FileOutputStream(filePath + "/documentImages/" + filename);
//            fos.write(imageData);
            System.out.println("Image saved successfully.");
        } catch (FileNotFoundException e) {
            System.out.println("Image saved error.");
            throw new RuntimeException(e);
        } catch (IOException e) {
            System.out.println("Image saved error.");
            throw new RuntimeException(e);
        }
        return "/profile/upload/documentImages/" + filename;
    }

    /**
     * 保存文件:
     * - PNG/JPG直接保存
     * - EMF 用 FreeHEP 转 PNG,失败尝试保存jpeg,再失败原文件保存
     * - WMF 用 wmf2svg 转 SVG,再用 Batik 转 PNG
     */
    public static String saveFileNew(byte[] imageData, String filename, String type) {
        String filePath = RuoYiConfig.getUploadPath();
        String saveDir = filePath + "/documentImages/";

        try {
            File dir = new File(saveDir);
            if (!dir.exists()) dir.mkdirs();

            byte[] bytesToWrite = imageData;

            if (type.equals("emf")) {
                System.out.println("Detected EMF, converting to PNG...");
                BufferedImage bufferedImage;
                try {
                    InputStream bais = new ByteArrayInputStream(imageData);
                    EMFInputStream emfIn = new EMFInputStream(bais, EMFInputStream.DEFAULT_VERSION);
                    EMFRenderer renderer = new EMFRenderer(emfIn);
                    Rectangle bounds = emfIn.readHeader().getBounds();
                    bufferedImage = new BufferedImage(bounds.width, bounds.height, BufferedImage.TYPE_INT_ARGB);
                    Graphics2D g2 = bufferedImage.createGraphics();
                    g2.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
                    renderer.paint(g2);
                    g2.dispose();
                    // 转成 PNG bytes
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    ImageIO.write(bufferedImage, "png", baos);
                    bytesToWrite = baos.toByteArray();
                } catch (NullPointerException e) {
                    System.out.println("EMF文件解析失败,尝试保存jpeg");
                    try {
                        bytesToWrite = saveJpeg(imageData);
                        filename = filename.substring(0, filename.lastIndexOf(".")) + ".jpeg";
                    } catch (Exception exception) {
                        System.out.println("尝试保存jpeg失败,元文件保存");
                        bytesToWrite = imageData;
                        filename = filename.substring(0, filename.lastIndexOf(".")) + ".emf";
                    }
                }
            }
            if (type.equals("wmf")) {
                System.out.println("Detected WMF, converting to PNG via SVG...");
                org.w3c.dom.Document svgDoc;
                try (InputStream bais = new ByteArrayInputStream(imageData)) {
                    WmfParser parser = new WmfParser();
                    SvgGdi gdi = new SvgGdi(false);
                    parser.parse(bais, gdi);
                    svgDoc = gdi.getDocument();  // ✅ 正确写法
                }
                // 把 Document -> String(Java 标准 Transformer)
                StringWriter writer = new StringWriter();
                Transformer transformer = TransformerFactory.newInstance().newTransformer();
                transformer.transform(new DOMSource(svgDoc), new StreamResult(writer));
                String svgXml = writer.toString();

                // 用 Batik 转成 PNG
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                PNGTranscoder transcoder = new PNGTranscoder();
                TranscoderInput input = new TranscoderInput(new StringReader(svgXml));
                TranscoderOutput output = new TranscoderOutput(baos);
                transcoder.transcode(input, output);
                baos.flush();
                bytesToWrite = baos.toByteArray();
            }

            // 最终保存
            FileOutputStream fos = new FileOutputStream(saveDir + filename);
            fos.write(bytesToWrite);
            fos.close();
            System.out.println("Image saved successfully: " + filename);
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println(i + ":Image saved error.");
//            throw new RuntimeException(e);
        }
        return "/profile/upload/documentImages/" + filename;
    }

    private static byte[] saveJpeg(byte[] imageData) throws IOException {
        // 1. 强制渲染为不透明的 RGB 位图(避免透明度问题)
        BufferedImage image = new BufferedImage(
                800, 600, BufferedImage.TYPE_INT_RGB); // 固定尺寸(兼容性优先)

        Graphics2D g2d = image.createGraphics();
        // 2. 填充白色背景(避免黑底)
        g2d.setColor(Color.WHITE);
        g2d.fillRect(0, 0, image.getWidth(), image.getHeight());

        // 3. 尝试渲染 EMF(如果失败至少返回白图)
        EMFInputStream emfIn = new EMFInputStream(new ByteArrayInputStream(imageData));
        new EMFRenderer(emfIn).paint(g2d);
        g2d.dispose();

        // 4. 转为 JPEG(最低质量保证兼容性)
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
//        ImageIO.write(image, "JPEG", baos); // 默认压缩质量
        return baos.toByteArray();
    }

    public static List<Map<String, Object>> formatDocumentObject(List<Map<String, Object>> maps, Map<String, Object> parentMap) {
        List<Map<String, Object>> resultList = new ArrayList<>();
        for (int i = 0; i < maps.size(); i++) {
            Map<String, Object> map = maps.get(i);
            if (map.get("type").equals("R")) {
                List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
                resultList.addAll(content);
            } else if (map.get("type").equals("tc")) {
                List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
                StringBuffer sb = new StringBuffer();
                List<Map<String, Object>> tempList = new ArrayList<>();
                if (content.size() > 0) {
                    for (Map<String, Object> stringObjectMap : content) {
                        if (stringObjectMap.containsKey("type") && stringObjectMap.get("type").equals("TEXT")) {
                            sb.append(stringObjectMap.get("content"));
                            sb.append("\n");
                        } else {
                            if (sb.length() > 0) {
                                Map<String, Object> tempMap = new HashMap<>();
                                tempMap.put("type", "TEXT");
                                tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
                                tempList.add(tempMap);
                            }
                            tempList.add(stringObjectMap);
                            sb = new StringBuffer();
                        }
                    }
                }
                if (sb.length() != 0) {
                    Map<String, Object> tempMap = new HashMap<>();
                    tempMap.put("type", "TEXT");
                    tempMap.put("content", sb.toString().substring(0, sb.toString().length() - 1));
                    tempList.add(tempMap);
                    sb = new StringBuffer();
                }
                content = tempList;
                if (map.containsKey("hMerge") || map.containsKey("vMerge")) {
                    if (content.isEmpty() && map.get("vMerge") != null && map.get("vMerge").equals("continue")) {
                        Map<String, Object> tempMap = new HashMap<>();
                        tempMap.put("vMerge", map.get("vMerge"));
                        if (map.containsKey("hMerge")) {
                            tempMap.put("hMerge", map.get("hMerge"));
                        }
                        content.add(tempMap);
                    } else {
                        if (map.containsKey("hMerge")) {
                            for (Map<String, Object> stringObjectMap : content) {
                                stringObjectMap.put("hMerge", map.get("hMerge"));
                            }
                        }
                        if (map.containsKey("vMerge")) {
                            for (Map<String, Object> stringObjectMap : content) {
                                stringObjectMap.put("vMerge", map.get("vMerge"));
                            }
                        }
                    }
                }
                if (content.size() > 1) {
                    Map<String, Object> tempMap = new HashMap<>();
                    tempMap.put("type", "ARRAY");
                    tempMap.put("content", content);
                    content = new ArrayList<>();
                    content.add(tempMap);
                }
                resultList.addAll(content);
            } else if (map.get("type").equals("P")) {
//                P开头的,一般是一段,里面都是一句话,可能还有其他结构
                List<Map<String, Object>> content = formatDocumentObject((List<Map<String, Object>>) map.get("content"), map);
                if (content.isEmpty()) {
                    if (parentMap == null) {
//                        解决了竖项合并单元格
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
                    } else if (parentMap.get("type").equals("tc") && parentMap.containsKey("vMerge") && parentMap.get("vMerge").equals("continue")) {
//                        解决单元格内容为空时的问题
                    } else {
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", "").build());
                    }
                } else if (content.size() == 1) {
                    resultList.addAll(content);
                } else {
                    boolean isAllText = true;
                    StringBuffer sb = new StringBuffer();
                    for (Map<String, Object> stringObjectMap : content) {
                        if (stringObjectMap.containsKey("type") && !stringObjectMap.get("type").equals("TEXT"))
                            isAllText = false;
                        sb.append(stringObjectMap.get("content"));
                    }
                    if (isAllText) {
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
                    } else {
                        resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "ARRAY").put("content", content).build());
                    }
                }
//                StringBuilder sb = new StringBuilder();
//                for (Map<String, Object> temp : content) {
//                    if (temp.get("type").equals("TEXT")) {
//                        sb.append(temp.get("content"));
//                    } else {
//                        if (sb.toString().length() > 0) {
//                            resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
//                            sb = new StringBuilder();
//                        } else {
//                            resultList.add(temp);
//                        }
//                    }
//                }
//                resultList.add(MapUtil.builder(new HashMap<String, Object>()).put("type", "TEXT").put("content", sb.toString()).build());
            } else if (map.get("type").equals("TBL")) {
                Map<String, Object> newMap = new HashMap<>();
                newMap.put("type", "TBL");
                newMap.put("content", formatDocumentObjectList((List<Object>) map.get("content")));
                resultList.add(newMap);
            } else {
                resultList.add(map);
            }
        }
        return resultList;
    }

    private static List<Map<String, Object>> formatDocumentObjectList(List<Object> content) {
        List<Map<String, Object>> resultList = new ArrayList<>();
        for (Object o : content) {
            if (o instanceof ArrayList) {
                ArrayList oList = (ArrayList) o;
                if (oList.get(0) instanceof ArrayList) {
                    resultList.addAll(formatDocumentObjectList(oList));
                } else if (oList.get(0) instanceof HashMap) {
                    Map<String, Object> newMap = new HashMap<>();
                    newMap.put("type", "TD");
                    newMap.put("content", formatDocumentObject(oList, newMap));
                    resultList.add(newMap);
                }
            } else if (o instanceof HashMap) {
                HashMap oMap = (HashMap) o;
                resultList.add(oMap);
            }
        }
        return resultList;
    }

    @PostConstruct
    public void init() {
        serverConfig = this.tempServerConfig;
    }

}

下面是mvn

<!-- EMF 渲染 -->
        <dependency>
            <groupId>org.freehep</groupId>
            <artifactId>freehep-graphicsio-emf</artifactId>
            <version>2.4</version>
        </dependency>

        <!-- Batik,用于 SVG -> PNG -->
        <dependency>
            <groupId>org.apache.xmlgraphics</groupId>
            <artifactId>batik-transcoder</artifactId>
            <version>1.14</version>
        </dependency>
        <dependency>
            <groupId>org.apache.xmlgraphics</groupId>
            <artifactId>batik-codec</artifactId>
            <version>1.14</version>
        </dependency>

        <!-- WMF -> SVG 转换库 -->
        <dependency>
            <groupId>net.arnx</groupId>
            <artifactId>wmf2svg</artifactId>
            <version>0.9.8</version>
        </dependency>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值