java:excel文件读写四、XSSF and SAX (Event API)

java:excel文件读写四、XSSF and SAX (Event API)

1 前言

参考apache poi官网的事件api:https://poi.apache.org/components/spreadsheet/how-to.html
XSSF and SAX (Event API)为xlsx文件操作的事件API。

其中如下提到了一种处理方式:

An example is also provided showing how to combine the user API and the SAX API by doing a streaming parse of larger worksheets and a traditional user-model parse of the rest of a workbook.

重写SheetContentsHandler的startRow、endRow、cell方法来获取excel的数据:

private static SheetContentsHandler createSheetContentsHandler() {
    return new SheetContentsHandler() {

        @Override
        public void startRow(int rowNum) {
        }

        @Override
        public void endRow(int rowNum) {
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
        }
    };
}

在处理excel的sheet表数据时,执行顺序是,先执行startRow,rowNum从0开始(表格的第一行);然后执行多次cell(每行有多个cell数据),第一个参数cellReference,指的是该cell的列的字母,尾巴后带上数字,formattedValue是cell的格式化内容;最后执行endRow,亦是从0开始(表格的第一行)。

依赖配置如下:

<parent>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-parent</artifactId>
    <version>2.5.4</version>
</parent>

<dependencies>

    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>
    
	<!--  xls  -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi</artifactId>
        <version>5.2.1</version>
    </dependency>

	<!--  xlsx  -->
    <dependency>
        <groupId>org.apache.poi</groupId>
        <artifactId>poi-ooxml</artifactId>
        <version>5.2.1</version>
    </dependency>
    
	<!--  读取xml  -->
    <dependency>
        <groupId>dom4j</groupId>
        <artifactId>dom4j</artifactId>
        <version>1.6.1</version>
    </dependency>

</dependencies>

2 使用

官方文档中获取某个sheet表的inputStream流操作如下:

public void processOneSheet(String filename) throws Exception {
    OPCPackage pkg = OPCPackage.open(filename);
    XSSFReader r = new XSSFReader( pkg );
    SharedStringsTable sst = r.getSharedStringsTable();
    XMLReader parser = fetchSheetParser(sst);
    // To look up the Sheet Name / Sheet Order / rID,
    //  you need to process the core Workbook stream.
    // Normally it's of the form rId# or rSheet#
    InputStream sheet2 = r.getSheet("rId2");
    InputSource sheetSource = new InputSource(sheet2);
    parser.parse(sheetSource);
    sheet2.close();
}

如上,是根据sheet表的rId来获取inputStream流,如下方式可以获取sheet表对应的rId:

(1)新建一个test456.xlsx,3个sheet表
(2)修改该excel文件的后缀名为.zip
(3)解压后在xl下找到workbook.xml并打开
在这里插入图片描述
可以发现,excel的rId在workbook.xml中下的sheets标签下,多个子sheet标签中具有该值:
在这里插入图片描述
故而先创建xml读取的工具类,代码如下:

package com.xiaoxu.utils.xml;

import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * @author xiaoxu
 * @date 2022-04-20
 * spring_boot:com.xiaoxu.utils.xml.XmlUtil
 */
public class XmlUtil {
    @SuppressWarnings(value = {"unchecked"})
    public static List<Map<String, String>> parseWorkBookXml(InputStream inputStream){
        // xml读取对象        
        SAXReader saxReader = new SAXReader();
        // 获得文档对象        
        Document doc;
        try {
            doc = saxReader.read(inputStream);
        } catch (DocumentException e) {
            e.printStackTrace();
            return new ArrayList<>();
        }
        // 通过文档获取根元素
        Element rootElement = doc.getRootElement();
        // 开始解析元素
        System.out.println("开始解析:"+rootElement.getName());
        // 获取所有子元素
        List<Element> elements = rootElement.elements();
        List<Map<String, String>> sheetElementAttrs = new ArrayList<>();
        for (int i = 0; i < elements.size(); i++) {
            Element element = elements.get(i);
            if(element.getName().equals("sheets")){
                sheetElementAttrs = getSheetElementAttrs(element);
                System.out.println(sheetElementAttrs);
            }
        }
        return sheetElementAttrs;
    }

    /**
     * @param e workbook xml  表的根sheets元素
     * @return 数据
     */
    @SuppressWarnings(value = {"unchecked"})
    public static List<Map<String,String>> getSheetElementAttrs(Element e){
        List<Map<String,String>> attrs = new ArrayList<>();
        List<Element> elements = e.elements();
        Element element;
        Map<String,String> perElement;
        List<Attribute> attributes;
        for (Element value : elements) {
            perElement = new HashMap<>();
            element = value;
            attributes = element.attributes();
            for (Attribute attribute : attributes) {
                perElement.put(attribute.getName(), attribute.getValue());
            }
            attrs.add(perElement);
        }
        return attrs;
    }

}

然后创建XSSF事件API工具类:

package com.xiaoxu.utils.Excel;

import com.xiaoxu.utils.ExcelUtil;
import com.xiaoxu.utils.xml.XmlUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.util.XMLHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.lang.NonNull;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import javax.xml.parsers.ParserConfigurationException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author xiaoxu
 * @date 2022-04-18
 * spring_boot:com.xiaoxu.utils.Excel.XSSFEventApiUtil
 */
public class XSSFEventApiUtil {

    public static void processOnSheet(@NonNull String fileName, String sheetName, XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler){
        try {
            String excelFileName = ExcelUtil.getExcelFileName(fileName);

            FileInputStream fileInputStream = new FileInputStream(excelFileName);
            XSSFWorkbook wb = new XSSFWorkbook(fileInputStream);

            ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(wb.getPackage());
            XSSFSheetXMLHandler xssfSheetXMLHandler;
            if(sheetContentsHandler==null){
                xssfSheetXMLHandler = new XSSFSheetXMLHandler(wb.getStylesSource(), sst, getSheetContentsHandler(), false);
            }else{
                xssfSheetXMLHandler = new XSSFSheetXMLHandler(wb.getStylesSource(), sst, sheetContentsHandler, false);
            }

            XMLReader parser = XMLHelper.newXMLReader();
            parser.setContentHandler(xssfSheetXMLHandler);

            XSSFReader x = new XSSFReader(wb.getPackage());

            // getWorkbookData可以获取workbook.xml的内容     
            InputStream workbookData = x.getWorkbookData();

            List<Map<String, String>> maps = XmlUtil.parseWorkBookXml(workbookData);
            String rId = "";
            String s = Optional.ofNullable(sheetName).orElse("rId1");
            if(s.equals("rId1")){
                rId = s;
            }else{
                for (Map<String, String> map : maps) {
                    if(map.get("name").equals(s)){
                        rId = map.get("id");
                    }
                }
                if(rId.equals("")){
                    throw new RuntimeException("rId根据workbook的xml没有获取到,请检查");
                }
            }
            System.out.println("最终rId:"+rId);
            InputStream input = x.getSheet(rId);

            InputSource inputSource1 = new InputSource(input);
            parser.parse(inputSource1);
            input.close();
            wb.close();
        } catch (IOException | SAXException | ParserConfigurationException | OpenXML4JException e ) {
            e.printStackTrace();
        }
    }

    public static XSSFSheetXMLHandler.SheetContentsHandler getSheetContentsHandler(){
        return new XSSFSheetXMLHandler.SheetContentsHandler() {
            @Override
            public void startRow(int i) {
                System.out.println("开始的rowNum:"+i);
            }

            @Override
            public void endRow(int i) {
                System.out.println("结束的rowNum:"+i);
            }

            //s1,比如某列是AO127,那么s1将会打印出AO127;s是cell的String类型的value(内容)            
            @Override
            public void cell(String s1, String s, XSSFComment xssfComment) {
                System.out.println("cell的序列号:"+s1);
                System.out.println("cell的内容:"+s);
            }
        };
    }

    // 获取excel中对应的列明的属性,形如A\B\C\..AO...
    public static String getColumnName(String str){
        String res = "";
        String pattern  = "[A-Z]{1,3}";
        Pattern p = Pattern.compile(pattern);
        Matcher m = p.matcher(str);
        if(m.find()){
            res = m.group(0);
        }
        return res;
    }
}

ExcelUtil中的getExcelFileName方法:

public static String getExcelFileName(@NonNull String fileName){
    File deskUrl = FileSystemView.getFileSystemView().getHomeDirectory();
    List<String> splitArr = ExcelUtil.splitFileName(fileName);

    if(splitArr.size()==1){
        fileName = deskUrl+ separator + splitArr.get(0) +xlsSeparator;
    }else if(splitArr.size()==2){
        if(!(xlsxSeparator.equals(splitArr.get(1))||xlsSeparator.equals(splitArr.get(1)))){
            throw new RuntimeException("文件名后缀必须为.xls或者.xlsx");
        }
        StringBuilder sb = new StringBuilder();
        sb.append(deskUrl).append(separator);
        for (String s : splitArr) {
            sb.append(s);
        }
        fileName = sb.toString();
    }else{
        throw new RuntimeException("文件名不正确");
    }
    return fileName;
}

SheetContentsHandler抽象类:

package com.xiaoxu.utils.Excel;

import lombok.Data;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * @author xiaoxu
 * @date 2022-04-20
 * spring_boot:com.xiaoxu.utils.Excel.AbstractSheetContentHandler
 */
@Data
public abstract class AbstractSheetContentHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
    // 每行数据存储
    private Map<String,String> cols =new HashMap<>();

    // 全部的excel数据存储
    private List<Map<String,String>> totalExcelData = new ArrayList<>();

    // 1.每行数据开始前,清空每行的数据
    @Override
    public void startRow(int i) {
        cols = new HashMap<>();
    }

    @Override
    public void endRow(int i) {
        processPerRowData(i,cols,totalExcelData);
    }

    //2.每行数据遍历每个cell时
    @Override
    public void cell(String s, String s1, XSSFComment xssfComment) {
        cols.put(XSSFEventApiUtil.getColumnName(s),s1);
    }

    // 重写该方法决定筛选的数据
    abstract public void processPerRowData(int rowNum,Map<String,String> m,List<Map<String,String>> total);

}

编写demo类测试:

package com.xiaoxu.tool.demo;

import com.xiaoxu.utils.Excel.AbstractSheetContentHandler;
import com.xiaoxu.utils.Excel.XSSFEventApiUtil;

import java.util.List;
import java.util.Map;

/**
 * @author xiaoxu
 * @date 2022-04-24
 * spring_boot:com.xiaoxu.tool.demo.TestXmlUtil
 */
public class TestXmlUtil {
    public static void main(String[] args) {
        DemoAbstractSheetContentHandler demoAbstractSheetContentHandler = new DemoAbstractSheetContentHandler();
        XSSFEventApiUtil.processOnSheet("test456.xlsx","Sheet1",demoAbstractSheetContentHandler);
        List<Map<String, String>> totalExcelData = demoAbstractSheetContentHandler.getTotalExcelData();
        totalExcelData.forEach(System.out::println);
    }
}

class DemoAbstractSheetContentHandler extends AbstractSheetContentHandler{
    @Override
    public void processPerRowData(int rowNum, Map<String, String> m, List<Map<String, String>> total) {
        // 将第0行开始的数据都写入(因为rowNum是从0开始的)    
        if(rowNum>=0){
            total.add(m);
        }
    }
}

excel的Sheet1数据如下:
在这里插入图片描述

执行结果如下,每个map代表excel中一行的数据:

14:14:58.938 [main] DEBUG org.apache.poi.openxml4j.opc.PackageRelationshipCollection - Parsing relationship: /xl/_rels/workbook.xml.rels
14:14:58.956 [main] DEBUG org.apache.poi.openxml4j.opc.PackageRelationshipCollection - Parsing relationship: /xl/worksheets/_rels/sheet2.xml.rels
14:14:58.958 [main] DEBUG org.apache.poi.openxml4j.opc.PackageRelationshipCollection - Parsing relationship: /_rels/.rels
14:14:59.467 [main] DEBUG org.apache.poi.ooxml.POIXMLFactory - using default POIXMLDocumentPart for http://schemas.openxmlformats.org/officeDocument/2006/relationships/printerSettings
开始解析:workbook
[{name=Sheet1, sheetId=1, id=rId1}, {name=Sheet2, sheetId=2, id=rId2}, {name=Sheet3, sheetId=3, id=rId3}]
最终rId:rId1
{A=31232321, C=你好, D=hai, E=TRUE, F=4/5/22}
{D=中国来啦, J=2}
{C=345, G=哈哈}
{H=1}
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值