java:excel文件读写四、XSSF and SAX (Event API)
1 前言
参考apache poi官网的事件api:https://poi.apache.org/components/spreadsheet/how-to.html
XSSF and SAX (Event API)为xlsx文件操作的事件API。
其中如下提到了一种处理方式:
An example is also provided showing how to combine the user API and the SAX API by doing a streaming parse of larger worksheets and a traditional user-model parse of the rest of a workbook.
重写SheetContentsHandler的startRow、endRow、cell方法来获取excel的数据:
private static SheetContentsHandler createSheetContentsHandler() {
return new SheetContentsHandler() {
@Override
public void startRow(int rowNum) {
}
@Override
public void endRow(int rowNum) {
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
}
};
}
在处理excel的sheet表数据时,执行顺序是,先执行startRow,rowNum从0开始(表格的第一行);然后执行多次cell(每行有多个cell数据),第一个参数cellReference,指的是该cell的列的字母,尾巴后带上数字,formattedValue是cell的格式化内容;最后执行endRow,亦是从0开始(表格的第一行)。
依赖配置如下:
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.5.4</version>
</parent>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<!-- xls -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.2.1</version>
</dependency>
<!-- xlsx -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.1</version>
</dependency>
<!-- 读取xml -->
<dependency>
<groupId>dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>1.6.1</version>
</dependency>
</dependencies>
2 使用
官方文档中获取某个sheet表的inputStream流操作如下:
public void processOneSheet(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader( pkg );
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
// To look up the Sheet Name / Sheet Order / rID,
// you need to process the core Workbook stream.
// Normally it's of the form rId# or rSheet#
InputStream sheet2 = r.getSheet("rId2");
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
如上,是根据sheet表的rId来获取inputStream流,如下方式可以获取sheet表对应的rId:
(1)新建一个test456.xlsx,3个sheet表
(2)修改该excel文件的后缀名为.zip
(3)解压后在xl下找到workbook.xml并打开

可以发现,excel的rId在workbook.xml中下的sheets标签下,多个子sheet标签中具有该值:

故而先创建xml读取的工具类,代码如下:
package com.xiaoxu.utils.xml;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @author xiaoxu
* @date 2022-04-20
* spring_boot:com.xiaoxu.utils.xml.XmlUtil
*/
public class XmlUtil {
@SuppressWarnings(value = {"unchecked"})
public static List<Map<String, String>> parseWorkBookXml(InputStream inputStream){
// xml读取对象
SAXReader saxReader = new SAXReader();
// 获得文档对象
Document doc;
try {
doc = saxReader.read(inputStream);
} catch (DocumentException e) {
e.printStackTrace();
return new ArrayList<>();
}
// 通过文档获取根元素
Element rootElement = doc.getRootElement();
// 开始解析元素
System.out.println("开始解析:"+rootElement.getName());
// 获取所有子元素
List<Element> elements = rootElement.elements();
List<Map<String, String>> sheetElementAttrs = new ArrayList<>();
for (int i = 0; i < elements.size(); i++) {
Element element = elements.get(i);
if(element.getName().equals("sheets")){
sheetElementAttrs = getSheetElementAttrs(element);
System.out.println(sheetElementAttrs);
}
}
return sheetElementAttrs;
}
/**
* @param e workbook xml 表的根sheets元素
* @return 数据
*/
@SuppressWarnings(value = {"unchecked"})
public static List<Map<String,String>> getSheetElementAttrs(Element e){
List<Map<String,String>> attrs = new ArrayList<>();
List<Element> elements = e.elements();
Element element;
Map<String,String> perElement;
List<Attribute> attributes;
for (Element value : elements) {
perElement = new HashMap<>();
element = value;
attributes = element.attributes();
for (Attribute attribute : attributes) {
perElement.put(attribute.getName(), attribute.getValue());
}
attrs.add(perElement);
}
return attrs;
}
}
然后创建XSSF事件API工具类:
package com.xiaoxu.utils.Excel;
import com.xiaoxu.utils.ExcelUtil;
import com.xiaoxu.utils.xml.XmlUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.util.XMLHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.lang.NonNull;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import javax.xml.parsers.ParserConfigurationException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author xiaoxu
* @date 2022-04-18
* spring_boot:com.xiaoxu.utils.Excel.XSSFEventApiUtil
*/
public class XSSFEventApiUtil {
public static void processOnSheet(@NonNull String fileName, String sheetName, XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler){
try {
String excelFileName = ExcelUtil.getExcelFileName(fileName);
FileInputStream fileInputStream = new FileInputStream(excelFileName);
XSSFWorkbook wb = new XSSFWorkbook(fileInputStream);
ReadOnlySharedStringsTable sst = new ReadOnlySharedStringsTable(wb.getPackage());
XSSFSheetXMLHandler xssfSheetXMLHandler;
if(sheetContentsHandler==null){
xssfSheetXMLHandler = new XSSFSheetXMLHandler(wb.getStylesSource(), sst, getSheetContentsHandler(), false);
}else{
xssfSheetXMLHandler = new XSSFSheetXMLHandler(wb.getStylesSource(), sst, sheetContentsHandler, false);
}
XMLReader parser = XMLHelper.newXMLReader();
parser.setContentHandler(xssfSheetXMLHandler);
XSSFReader x = new XSSFReader(wb.getPackage());
// getWorkbookData可以获取workbook.xml的内容
InputStream workbookData = x.getWorkbookData();
List<Map<String, String>> maps = XmlUtil.parseWorkBookXml(workbookData);
String rId = "";
String s = Optional.ofNullable(sheetName).orElse("rId1");
if(s.equals("rId1")){
rId = s;
}else{
for (Map<String, String> map : maps) {
if(map.get("name").equals(s)){
rId = map.get("id");
}
}
if(rId.equals("")){
throw new RuntimeException("rId根据workbook的xml没有获取到,请检查");
}
}
System.out.println("最终rId:"+rId);
InputStream input = x.getSheet(rId);
InputSource inputSource1 = new InputSource(input);
parser.parse(inputSource1);
input.close();
wb.close();
} catch (IOException | SAXException | ParserConfigurationException | OpenXML4JException e ) {
e.printStackTrace();
}
}
public static XSSFSheetXMLHandler.SheetContentsHandler getSheetContentsHandler(){
return new XSSFSheetXMLHandler.SheetContentsHandler() {
@Override
public void startRow(int i) {
System.out.println("开始的rowNum:"+i);
}
@Override
public void endRow(int i) {
System.out.println("结束的rowNum:"+i);
}
//s1,比如某列是AO127,那么s1将会打印出AO127;s是cell的String类型的value(内容)
@Override
public void cell(String s1, String s, XSSFComment xssfComment) {
System.out.println("cell的序列号:"+s1);
System.out.println("cell的内容:"+s);
}
};
}
// 获取excel中对应的列明的属性,形如A\B\C\..AO...
public static String getColumnName(String str){
String res = "";
String pattern = "[A-Z]{1,3}";
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(str);
if(m.find()){
res = m.group(0);
}
return res;
}
}
ExcelUtil中的getExcelFileName方法:
public static String getExcelFileName(@NonNull String fileName){
File deskUrl = FileSystemView.getFileSystemView().getHomeDirectory();
List<String> splitArr = ExcelUtil.splitFileName(fileName);
if(splitArr.size()==1){
fileName = deskUrl+ separator + splitArr.get(0) +xlsSeparator;
}else if(splitArr.size()==2){
if(!(xlsxSeparator.equals(splitArr.get(1))||xlsSeparator.equals(splitArr.get(1)))){
throw new RuntimeException("文件名后缀必须为.xls或者.xlsx");
}
StringBuilder sb = new StringBuilder();
sb.append(deskUrl).append(separator);
for (String s : splitArr) {
sb.append(s);
}
fileName = sb.toString();
}else{
throw new RuntimeException("文件名不正确");
}
return fileName;
}
SheetContentsHandler抽象类:
package com.xiaoxu.utils.Excel;
import lombok.Data;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @author xiaoxu
* @date 2022-04-20
* spring_boot:com.xiaoxu.utils.Excel.AbstractSheetContentHandler
*/
@Data
public abstract class AbstractSheetContentHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
// 每行数据存储
private Map<String,String> cols =new HashMap<>();
// 全部的excel数据存储
private List<Map<String,String>> totalExcelData = new ArrayList<>();
// 1.每行数据开始前,清空每行的数据
@Override
public void startRow(int i) {
cols = new HashMap<>();
}
@Override
public void endRow(int i) {
processPerRowData(i,cols,totalExcelData);
}
//2.每行数据遍历每个cell时
@Override
public void cell(String s, String s1, XSSFComment xssfComment) {
cols.put(XSSFEventApiUtil.getColumnName(s),s1);
}
// 重写该方法决定筛选的数据
abstract public void processPerRowData(int rowNum,Map<String,String> m,List<Map<String,String>> total);
}
编写demo类测试:
package com.xiaoxu.tool.demo;
import com.xiaoxu.utils.Excel.AbstractSheetContentHandler;
import com.xiaoxu.utils.Excel.XSSFEventApiUtil;
import java.util.List;
import java.util.Map;
/**
* @author xiaoxu
* @date 2022-04-24
* spring_boot:com.xiaoxu.tool.demo.TestXmlUtil
*/
public class TestXmlUtil {
public static void main(String[] args) {
DemoAbstractSheetContentHandler demoAbstractSheetContentHandler = new DemoAbstractSheetContentHandler();
XSSFEventApiUtil.processOnSheet("test456.xlsx","Sheet1",demoAbstractSheetContentHandler);
List<Map<String, String>> totalExcelData = demoAbstractSheetContentHandler.getTotalExcelData();
totalExcelData.forEach(System.out::println);
}
}
class DemoAbstractSheetContentHandler extends AbstractSheetContentHandler{
@Override
public void processPerRowData(int rowNum, Map<String, String> m, List<Map<String, String>> total) {
// 将第0行开始的数据都写入(因为rowNum是从0开始的)
if(rowNum>=0){
total.add(m);
}
}
}
excel的Sheet1数据如下:

执行结果如下,每个map代表excel中一行的数据:
14:14:58.938 [main] DEBUG org.apache.poi.openxml4j.opc.PackageRelationshipCollection - Parsing relationship: /xl/_rels/workbook.xml.rels
14:14:58.956 [main] DEBUG org.apache.poi.openxml4j.opc.PackageRelationshipCollection - Parsing relationship: /xl/worksheets/_rels/sheet2.xml.rels
14:14:58.958 [main] DEBUG org.apache.poi.openxml4j.opc.PackageRelationshipCollection - Parsing relationship: /_rels/.rels
14:14:59.467 [main] DEBUG org.apache.poi.ooxml.POIXMLFactory - using default POIXMLDocumentPart for http://schemas.openxmlformats.org/officeDocument/2006/relationships/printerSettings
开始解析:workbook
[{name=Sheet1, sheetId=1, id=rId1}, {name=Sheet2, sheetId=2, id=rId2}, {name=Sheet3, sheetId=3, id=rId3}]
最终rId:rId1
{A=31232321, C=你好, D=hai, E=TRUE, F=4/5/22}
{D=中国来啦, J=2}
{C=345, G=哈哈}
{H=1}
7347

被折叠的 条评论
为什么被折叠?



