java 文件解析成JSON数据(其四)

该博客主要介绍用于将文件解析成JSON数据的代码,适用于个人用途。还给出了所需特殊的jar包,如org.apache.poi下的poi、poi - ooxml、poi - ooxml - schemas等,部分工具类去掉不影响使用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

个人用途,不擅长描述,用于将文件解析成json数据

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.gta.edu.sdk.util.StringUtil;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;

/**
 * @author y
 * @create 2018-01-19 14:18
 * @desc 用于解决.xls2003版本大数据量问题
 **/
public class ExcelXlsReader implements HSSFListener {

    private int minColums = -1;

    private POIFSFileSystem fs;

    /**
     * 总行数
     */
    private int totalRows = 0;

    /**
     * 上一行row的序号
     */
    private int lastRowNumber;

    /**
     * 上一单元格的序号
     */
    private int lastColumnNumber;
    
    /**
     * 是否输出formula,还是它对应的值
     */
    private boolean outputFormulaValues = true;

    /**
     * 用于转换formulas
     */
    private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;

    //excel2003工作簿
    private HSSFWorkbook stubWorkbook;

    private SSTRecord sstRecord;

    private FormatTrackingHSSFListener formatListener;

    private final HSSFDataFormatter formatter = new HSSFDataFormatter();

    /**
     * 文件的绝对路径
     */
    private String filePath = "";

    //表索引
    private int sheetIndex = 0;

    private BoundSheetRecord[] orderedBSRs;

    @SuppressWarnings("unchecked")
    private ArrayList boundSheetRecords = new ArrayList();

    private int nextRow;

    private int nextColumn;

    private int curSize;

    private int column = 0;

    private boolean outputNextStringRecord;

    //当前行
    private int curRow = 0;
    //存储一行记录所有单元格的容器
    private List<String> cellList = new ArrayList<>();

    // 单个sheet表的列头集合
    private List<String> columnList;

    //列的类型
    private List<String> columnType = new ArrayList<>();

    //存储sheet表每行
    private JSONObject objSheet;
    //单个sheet表的数据
    private JSONArray arrSheet = new JSONArray();

    //保存所有sheet名称
    public List<String> sheetNames = new ArrayList<>();

    /**
     * 判断整行是否为空行的标记
     */
    private boolean flag = false;

    @SuppressWarnings("unused")
    private String sheetName;

    /**
     * 遍历excel下所有的sheet
     *
     * @param file
     * @throws Exception
     */
    public JSONArray process(File file) throws Exception {
        //filePath = fileName;
        sheetNames.clear();
        this.fs = new POIFSFileSystem(new FileInputStream(file));
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        formatListener = new FormatTrackingHSSFListener(listener);
        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();
        if (outputFormulaValues) {
            request.addListenerForAllRecords(formatListener);
        } else {
            workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
            request.addListenerForAllRecords(workbookBuildingListener);
        }
        factory.processWorkbookEvents(request, fs);

        return arrSheet; //返回该excel文件的总行数,不包括首列和空行
    }
    
    /**
     * HSSFListener 监听方法,处理Record
     * 处理每个单元格
     *
     * @param record
     */
    @Override
    @SuppressWarnings("unchecked")
    public void processRecord(Record record) {
        int thisRow = -1;
        int thisColumn = -1;
        String thisStr = null;
        String value = null;
        switch (record.getSid()) {
            case BoundSheetRecord.sid:
                boundSheetRecords.add(record);
                break;
            case BOFRecord.sid: //开始处理每个sheet
                BOFRecord br = (BOFRecord) record;
                if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
                    //如果有需要,则建立子工作簿
                    if (workbookBuildingListener != null && stubWorkbook == null) {
                        stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
                    }
                    if (orderedBSRs == null) {
                        orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
                    }
                    sheetName = orderedBSRs[sheetIndex].getSheetname();
                    if (columnList != null) {
                        curSize = 0;
                        columnList.clear();
                    }
                    sheetNames.add(sheetName);
                    sheetIndex++;
                }
                break;
            case SSTRecord.sid:
                sstRecord = (SSTRecord) record;
                break;
            case BlankRecord.sid: //单元格为空白
                BlankRecord brec = (BlankRecord) record;
                thisRow = brec.getRow();
                thisColumn = brec.getColumn();
                thisStr = "null";
                cellList.add(thisColumn, thisStr);
                break;
            case BoolErrRecord.sid: //单元格为布尔类型
                BoolErrRecord berec = (BoolErrRecord) record;
                thisRow = berec.getRow();
                thisColumn = berec.getColumn();
                thisStr = berec.getBooleanValue() + "";
                cellList.add(thisColumn, thisStr);
                checkRowIsNull(thisStr);  //如果里面某个单元格含有值,则标识该行不为空行
                break;
            case FormulaRecord.sid://单元格为公式类型
                FormulaRecord frec = (FormulaRecord) record;
                thisRow = frec.getRow();
                thisColumn = frec.getColumn();
                if (outputFormulaValues) {
                    if (Double.isNaN(frec.getValue())) {
                        outputNextStringRecord = true;
                        nextRow = frec.getRow();
                        nextColumn = frec.getColumn();
                    } else {
                        thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression());
                    }
                } else {
                    thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression());
                }
                cellList.add(thisColumn, thisStr);
                checkRowIsNull(thisStr);  //如果里面某个单元格含有值,则标识该行不为空行
                break;
           case StringRecord.sid: //单元格中公式的字符串
                if (outputNextStringRecord) {
                    StringRecord srec = (StringRecord) record;
                    thisStr = srec.getString();
                    thisRow = nextRow;
                    thisColumn = nextColumn;
                    outputNextStringRecord = false;
                }
                break;
            case LabelRecord.sid:
                LabelRecord lrec = (LabelRecord) record;
                curRow = thisRow = lrec.getRow();
                thisColumn = lrec.getColumn();
                value = lrec.getValue().trim();
                value = value.equals("") ? "null" : value;
                cellList.add(thisColumn, value);
                checkRowIsNull(value);  //如果里面某个单元格含有值,则标识该行不为空行
                break;
            case LabelSSTRecord.sid: //单元格为字符串类型
                LabelSSTRecord lsrec = (LabelSSTRecord) record;
                curRow = thisRow = lsrec.getRow();
                thisColumn = lsrec.getColumn();
                if (sstRecord == null) {
                    cellList.add(thisColumn, "null");
                } else {
                    value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
                    value = value.equals("") ? "null" : value;
                    cellList.add(thisColumn, value);
                    checkRowIsNull(value);  //如果里面某个单元格含有值,则标识该行不为空行
                }
                break;
            case NumberRecord.sid: //单元格为数字类型
                NumberRecord numrec = (NumberRecord) record;
                curRow = thisRow = numrec.getRow();
                thisColumn = numrec.getColumn();
                //第一种方式
                //value = formatListener.formatNumberDateCell(numrec).trim();//这个被写死,采用的m/d/yy h:mm格式,不符合要求
                //第二种方式,参照formatNumberDateCell里面的实现方法编写
                Double valueDouble = ((NumberRecord) numrec).getValue();
                int formatIndex = formatListener.getFormatIndex(numrec);
                String formatString = formatListener.getFormatString(numrec);
                if(formatString.indexOf("%") == -1){
                    if (formatIndex == 185 || formatIndex == 186 || formatIndex == 190 || formatIndex == 191
                            || formatIndex == 22 || formatIndex == 177 || formatIndex == 180 || formatIndex == 181) {
                        formatString = "yyyy-MM-dd HH:mm:ss";
                        if (valueDouble.toString().indexOf("0.") == 0) {
                            formatString = "HH:mm:ss";
                        }
                    } else if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58
                            || (176 <= formatIndex && formatIndex <= 179) || (182 <= formatIndex && formatIndex <= 196)
                            || (210 <= formatIndex && formatIndex <= 213) || (208 == formatIndex)) { // 日期
                        formatString = "yyyy-MM-dd";
                        if (valueDouble.toString().indexOf("0.") == 0) {
                            formatString = "HH:mm:ss";
                        }
                    } else if (formatIndex == 20 || formatIndex == 32 || formatIndex == 183 || (200 <= formatIndex && formatIndex <= 209)) { // 时间
                        formatString = "HH:mm:ss";
                    }
                }else{
                    formatString = "General";
                }
                value = formatter.formatRawCellContents(valueDouble, formatIndex, formatString).trim();
                value = value.equals("") ? "null" : value;
                //向容器加入列值
                cellList.add(thisColumn, value);
                checkRowIsNull(value);  //如果里面某个单元格含有值,则标识该行不为空行
                break;
        }
        //遇到新行的操作
        if (thisRow != -1 && thisRow != lastRowNumber) {
            lastColumnNumber = -1;
        }
        //空值的操作
        if (record instanceof MissingCellDummyRecord) {
            MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
            curRow = thisRow = mc.getRow();
            thisColumn = mc.getColumn();
            cellList.add(thisColumn, "null");
        }
        //更新行和列的值
        if (thisRow > -1) {
            lastRowNumber = thisRow;
        }
        if (thisColumn > -1) {
            lastColumnNumber = thisColumn;
        }
        //行结束时的操作
        if (record instanceof LastCellOfRowDummyRecord) {
            if (minColums > 0) {
                //列值重新置空
                if (lastColumnNumber == -1) {
                    lastColumnNumber = 0;
                }
            }
            lastColumnNumber = -1;
            //获取列头
            //以下为自己的逻辑代码,可自行编写
            //获取列头做列名  json格式: 表名.列名+类型:值
            if (curRow == 0) {
                int size = cellList.size();
                if (cellList.size() > 0) {
                    for (int i = 0; i < size; i++) {
                        if (!StringUtil.isNullOrEmpty(cellList.get(i))) {
                            curSize++;
                        }
                    }
                    if (curSize > 1) {
                        columnList = new ArrayList<>();
                        for (int i = 0; i < size; i++) {
                            columnList.add(cellList.get(i));
                            System.out.print(cellList.get(i) + "  ");
                        }
                        column = 0;
                        System.out.println();
                    }
                } else {
                    return;
                }
            }
            //判断列头是否有合并单元格
            if (curRow == 1 && curSize < 2) {
                columnList = new ArrayList<>();
                int size = cellList.size();
                for (int i = 0; i < size; i++) {
                    columnList.add(cellList.get(i));
                    System.out.print(cellList.get(i) + "  ");
                    curSize++;
                }
                column = 1;
                System.out.println();
            }
             //数据的获取与添加
            if (curRow > column && curSize > 1) { //该行不为空行且该行不是第一行,发送(第一行为列名,不需要)
                int columnSize = 0;
                int size = columnList.size();
                if (column < 1) {
                    columnSize = 1;
                } else {
                    columnSize = 2;
                }
                if (curRow == columnSize) {
                    String headName = null;
                    for (int i = 0; i < size; i++) {
                        headName = columnList.get(i).trim();
                        if (headName.length() > 0) {
                            if (headName.equals("null") || !DataFileTypeUtil.isLetterDigitOrChinese(headName)) {
                                columnList.set(i, "第" + (i + 1) + "列");
                            }
                        } else {
                            columnList.set(i, "第" + (i + 1) + "列");
                        }
                    }
                    theColumnType(cellList, size);
                }
                objSheet = new JSONObject(true);
                int cellSize = cellList.size() - 1;
                for (int i = 0; i < size; i++) {
                    if (cellSize >= i) {
                        objSheet.put(sheetName + "." + columnList.get(i) + columnType.get(i), cellList.get(i).trim().length() > 0 ? cellList.get(i) : "null");
                    } else {
                        objSheet.put(sheetName + "." + columnList.get(i) + columnType.get(i), "null");
                    }
                }
                arrSheet.add(objSheet);
                //ExcelReaderUtil.sendRows(sheetName, sheetIndex, curRow + 1, cellList, objSheet); //每行结束时,调用sendRows()方法
                totalRows++;
            }
            //清空容器
            cellList.clear();
            flag = false;
        }
    }
 
    /**
     * 判断列的类型
     *
     * @return
     */
    public List<String> theColumnType(List<String> cellList, Integer cellize) {
        int size = cellList.size() - 1;
        for (int i = 0; i < cellize; i++) {
            if (size >= i) {
                if (DataFileTypeUtil.isNumeric01(cellList.get(i))) {
                    columnType.add("01");
                } else if (DataFileTypeUtil.isDateTime02(cellList.get(i))) {
                    columnType.add("02");
                } else if (DataFileTypeUtil.estimateAddressType(cellList.get(i))) {
                    columnType.add("04");
                } else {
                    columnType.add("00");
                }
            } else {
                columnType.add("00");
            }
        }
        return columnType;
    }

    /**
     * 如果里面某个单元格含有值,则标识该行不为空行
     *
     * @param value
     */
    public void checkRowIsNull(String value) {
        if (value != null && !"".equals(value)) {
            flag = true;
        }
    }

以上为所需代码,部分工具类未添加,去掉就好,不影响使用。以下为所需特殊的jar包

<dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.17</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.17</version>
        </dependency>

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值