个人用途,不擅长描述,用于将文件解析成json数据
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.gta.edu.sdk.util.StringUtil;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
/**
* @author y
* @create 2018-01-19 14:18
* @desc 用于解决.xls2003版本大数据量问题
**/
public class ExcelXlsReader implements HSSFListener {
private int minColums = -1;
private POIFSFileSystem fs;
/**
* 总行数
*/
private int totalRows = 0;
/**
* 上一行row的序号
*/
private int lastRowNumber;
/**
* 上一单元格的序号
*/
private int lastColumnNumber;
/**
* 是否输出formula,还是它对应的值
*/
private boolean outputFormulaValues = true;
/**
* 用于转换formulas
*/
private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
//excel2003工作簿
private HSSFWorkbook stubWorkbook;
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
private final HSSFDataFormatter formatter = new HSSFDataFormatter();
/**
* 文件的绝对路径
*/
private String filePath = "";
//表索引
private int sheetIndex = 0;
private BoundSheetRecord[] orderedBSRs;
@SuppressWarnings("unchecked")
private ArrayList boundSheetRecords = new ArrayList();
private int nextRow;
private int nextColumn;
private int curSize;
private int column = 0;
private boolean outputNextStringRecord;
//当前行
private int curRow = 0;
//存储一行记录所有单元格的容器
private List<String> cellList = new ArrayList<>();
// 单个sheet表的列头集合
private List<String> columnList;
//列的类型
private List<String> columnType = new ArrayList<>();
//存储sheet表每行
private JSONObject objSheet;
//单个sheet表的数据
private JSONArray arrSheet = new JSONArray();
//保存所有sheet名称
public List<String> sheetNames = new ArrayList<>();
/**
* 判断整行是否为空行的标记
*/
private boolean flag = false;
@SuppressWarnings("unused")
private String sheetName;
/**
* 遍历excel下所有的sheet
*
* @param file
* @throws Exception
*/
public JSONArray process(File file) throws Exception {
//filePath = fileName;
sheetNames.clear();
this.fs = new POIFSFileSystem(new FileInputStream(file));
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
return arrSheet; //返回该excel文件的总行数,不包括首列和空行
}
/**
* HSSFListener 监听方法,处理Record
* 处理每个单元格
*
* @param record
*/
@Override
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid: //开始处理每个sheet
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
//如果有需要,则建立子工作簿
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
if (columnList != null) {
curSize = 0;
columnList.clear();
}
sheetNames.add(sheetName);
sheetIndex++;
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid: //单元格为空白
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "null";
cellList.add(thisColumn, thisStr);
break;
case BoolErrRecord.sid: //单元格为布尔类型
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = berec.getBooleanValue() + "";
cellList.add(thisColumn, thisStr);
checkRowIsNull(thisStr); //如果里面某个单元格含有值,则标识该行不为空行
break;
case FormulaRecord.sid://单元格为公式类型
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression());
}
} else {
thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression());
}
cellList.add(thisColumn, thisStr);
checkRowIsNull(thisStr); //如果里面某个单元格含有值,则标识该行不为空行
break;
case StringRecord.sid: //单元格中公式的字符串
if (outputNextStringRecord) {
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("") ? "null" : value;
cellList.add(thisColumn, value);
checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行
break;
case LabelSSTRecord.sid: //单元格为字符串类型
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
cellList.add(thisColumn, "null");
} else {
value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("") ? "null" : value;
cellList.add(thisColumn, value);
checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行
}
break;
case NumberRecord.sid: //单元格为数字类型
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
//第一种方式
//value = formatListener.formatNumberDateCell(numrec).trim();//这个被写死,采用的m/d/yy h:mm格式,不符合要求
//第二种方式,参照formatNumberDateCell里面的实现方法编写
Double valueDouble = ((NumberRecord) numrec).getValue();
int formatIndex = formatListener.getFormatIndex(numrec);
String formatString = formatListener.getFormatString(numrec);
if(formatString.indexOf("%") == -1){
if (formatIndex == 185 || formatIndex == 186 || formatIndex == 190 || formatIndex == 191
|| formatIndex == 22 || formatIndex == 177 || formatIndex == 180 || formatIndex == 181) {
formatString = "yyyy-MM-dd HH:mm:ss";
if (valueDouble.toString().indexOf("0.") == 0) {
formatString = "HH:mm:ss";
}
} else if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58
|| (176 <= formatIndex && formatIndex <= 179) || (182 <= formatIndex && formatIndex <= 196)
|| (210 <= formatIndex && formatIndex <= 213) || (208 == formatIndex)) { // 日期
formatString = "yyyy-MM-dd";
if (valueDouble.toString().indexOf("0.") == 0) {
formatString = "HH:mm:ss";
}
} else if (formatIndex == 20 || formatIndex == 32 || formatIndex == 183 || (200 <= formatIndex && formatIndex <= 209)) { // 时间
formatString = "HH:mm:ss";
}
}else{
formatString = "General";
}
value = formatter.formatRawCellContents(valueDouble, formatIndex, formatString).trim();
value = value.equals("") ? "null" : value;
//向容器加入列值
cellList.add(thisColumn, value);
checkRowIsNull(value); //如果里面某个单元格含有值,则标识该行不为空行
break;
}
//遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
//空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
cellList.add(thisColumn, "null");
}
//更新行和列的值
if (thisRow > -1) {
lastRowNumber = thisRow;
}
if (thisColumn > -1) {
lastColumnNumber = thisColumn;
}
//行结束时的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColums > 0) {
//列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
lastColumnNumber = -1;
//获取列头
//以下为自己的逻辑代码,可自行编写
//获取列头做列名 json格式: 表名.列名+类型:值
if (curRow == 0) {
int size = cellList.size();
if (cellList.size() > 0) {
for (int i = 0; i < size; i++) {
if (!StringUtil.isNullOrEmpty(cellList.get(i))) {
curSize++;
}
}
if (curSize > 1) {
columnList = new ArrayList<>();
for (int i = 0; i < size; i++) {
columnList.add(cellList.get(i));
System.out.print(cellList.get(i) + " ");
}
column = 0;
System.out.println();
}
} else {
return;
}
}
//判断列头是否有合并单元格
if (curRow == 1 && curSize < 2) {
columnList = new ArrayList<>();
int size = cellList.size();
for (int i = 0; i < size; i++) {
columnList.add(cellList.get(i));
System.out.print(cellList.get(i) + " ");
curSize++;
}
column = 1;
System.out.println();
}
//数据的获取与添加
if (curRow > column && curSize > 1) { //该行不为空行且该行不是第一行,发送(第一行为列名,不需要)
int columnSize = 0;
int size = columnList.size();
if (column < 1) {
columnSize = 1;
} else {
columnSize = 2;
}
if (curRow == columnSize) {
String headName = null;
for (int i = 0; i < size; i++) {
headName = columnList.get(i).trim();
if (headName.length() > 0) {
if (headName.equals("null") || !DataFileTypeUtil.isLetterDigitOrChinese(headName)) {
columnList.set(i, "第" + (i + 1) + "列");
}
} else {
columnList.set(i, "第" + (i + 1) + "列");
}
}
theColumnType(cellList, size);
}
objSheet = new JSONObject(true);
int cellSize = cellList.size() - 1;
for (int i = 0; i < size; i++) {
if (cellSize >= i) {
objSheet.put(sheetName + "." + columnList.get(i) + columnType.get(i), cellList.get(i).trim().length() > 0 ? cellList.get(i) : "null");
} else {
objSheet.put(sheetName + "." + columnList.get(i) + columnType.get(i), "null");
}
}
arrSheet.add(objSheet);
//ExcelReaderUtil.sendRows(sheetName, sheetIndex, curRow + 1, cellList, objSheet); //每行结束时,调用sendRows()方法
totalRows++;
}
//清空容器
cellList.clear();
flag = false;
}
}
/**
* 判断列的类型
*
* @return
*/
public List<String> theColumnType(List<String> cellList, Integer cellize) {
int size = cellList.size() - 1;
for (int i = 0; i < cellize; i++) {
if (size >= i) {
if (DataFileTypeUtil.isNumeric01(cellList.get(i))) {
columnType.add("01");
} else if (DataFileTypeUtil.isDateTime02(cellList.get(i))) {
columnType.add("02");
} else if (DataFileTypeUtil.estimateAddressType(cellList.get(i))) {
columnType.add("04");
} else {
columnType.add("00");
}
} else {
columnType.add("00");
}
}
return columnType;
}
/**
* 如果里面某个单元格含有值,则标识该行不为空行
*
* @param value
*/
public void checkRowIsNull(String value) {
if (value != null && !"".equals(value)) {
flag = true;
}
}
以上为所需代码,部分工具类未添加,去掉就好,不影响使用。以下为所需特殊的jar包
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>