POI读取Excel2003和Excel2007(代码)

这篇博客介绍了如何使用Apache POI库来读取Excel2003和2007文件。针对Excel2003,通过HSSFEventUserModel事件模型实现低内存读取,包括处理各种类型的记录,如BlankRecord、LabelRecord等。对于Excel2007,采用XSSFReader和SAX解析器进行数据处理,减少内存占用。同时,定义了一个RowProcessor接口用于处理读取到的行数据。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1. 读取Excel2003

import java.io.FileInputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RecordFormatException;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

import fhszreport.tool.LogTool;

/**
 * Excel 97-2003数据读取类
 *
 * @author darrenlin
 */
public class Excel2003Reader implements HSSFListener
{
    
    private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    
    private SSTRecord sstRecord;
    private FormatTrackingHSSFListener formatListener;

    /**
     * 当前Sheet索引
     */
    private int sheetIndex = -1;

    private BoundSheetRecord[] orderedBSRs;
    private List<BoundSheetRecord> boundSheetRecords = new ArrayList<BoundSheetRecord>();
    private int nextColumn;
    private boolean outputNextStringRecord;

    /**
     * 当前行
     */
    private int curRow = 0;

    /**
     * 存储行记录的容器
     */
    private List<String> rowlist = new ArrayList<String>();

    /**
     * 当前Sheet名
     */
    private String currentSheetName;

    /**
     * 当前Sheet索引
     */
    private Integer currentSheetIndex = 0;

    /**
     * 要处理的目标Sheet名
     */
    private String targetSheetName = "";
    
    /**
     * 要处理的目标Sheet索引
     */
    private Integer targetSheetIndex = null;

    public void setTargetSheetName(String targetSheetName)
    {
        this.targetSheetName = targetSheetName;
    }
    
    public void setTargetSheetIndex(Integer targetSheetIndex)
    {
        this.targetSheetIndex = targetSheetIndex;
    }

    /**
     * 行数据处理对象
     */
    private RowProcessor rowReader;

    public void setRowReader(RowProcessor rowReader)
    {
        this.rowReader = rowReader;
    }

    /**
     * 主处理函数
     *
     * @param targetFile 目标文件路径
     * @param logType 日志记录类型
     * @throws IOException IO异常
     */
    public void process(String targetFile, String logType) throws IOException
    {
        // 读取Excel流文件
        FileInputStream file = new FileInputStream(targetFile);
        POIFSFileSystem fs = new POIFSFileSystem(file);

        // 创建HSSFListener
        MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
        formatListener = new FormatTrackingHSSFListener(listener);

        HSSFEventFactory factory = new HSSFEventFactory();
        HSSFRequest request = new HSSFRequest();
        request.addListenerForAllRecords(formatListener);

        try
        {
            // 开始读取Excel数据
            factory.processWorkbookEvents(request, fs);
        }
        // 记日志,对该异常不做抛出
        // 该异常是由于Excel结尾处带有无法读取的宏信息等导致的,而这时候数据已经读取完了
        catch (RecordFormatException e)
        {
            LogTool.logInfo("(Excel2003Reader)HSSFEventFactory.processWorkbookEvents Catch RecordFormatException: " + e.toString(), logType);
        }
        

        // 关闭流文件
        if (null != file)
        {
            file.close();
        }
    }

    /**
     * 重载HSSFListener监听方法
     */
    @Override
    public void processRecord(Record record)
    {
        int thisColumn = -1;
        String thisStr = null;
        String value = null;

        switch (record.getSid())
        {
        case BoundSheetRecord.sid:
            boundSheetRecords.add((BoundSheetRecord) record);
            break;

        case BOFRecord.sid:
            BOFRecord bofRecord = (BOFRecord) record;
            if (bofRecord.getType() == BOFRecord.TYPE_WORKSHEET)
            {
                sheetIndex++;

                if (null == orderedBSRs)
                {
                    orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
                }

                // 获取当前Sheet名
                currentSheetName = orderedBSRs[sheetIndex].getSheetname();
                currentSheetIndex = sheetIndex + 1;
            }
            break;

        case SSTRecord.sid:
            sstRecord = (SSTRecord) record;
            break;

        case BlankRecord.sid:
            BlankRecord blankRecord = (BlankRecord) record;
            thisColumn = blankRecord.getColumn();
            thisStr = "";
            rowlist.add(thisColumn, thisStr);
            break;

        // 单元格为布尔类型
        case BoolErrRecord.sid:
            BoolErrRecord boolErrRecord = (BoolErrRecord) record;
            thisColumn = boolErrRecord.getColumn();
            thisStr = boolErrRecord.getBooleanValue() + "";
            rowlist.add(thisColumn, thisStr);
            break;

        // 单元格为公式类型,不处理
        case FormulaRecord.sid:
            thisStr = "";
            rowlist.add(rowlist.size(), thisStr);
            break;

        // 单元格中公式的字符串
        case StringRecord.sid:
            if (outputNextStringRecord)
            {
                StringRecord stringRecord = (StringRecord) record;
                thisStr = stringRecord.getString();
                thisColumn = nextColumn;
                outputNextStringRecord = false;
            }
            break;

        case LabelRecord.sid:
            LabelRecord labelRecord = (LabelRecord) record;
            curRow = labelRecord.getRow();
            thisColumn = labelRecord.getColumn();
            value = labelRecord.getValue().trim();
            value = value.equals("") ? " " : value;
            rowlist.add(thisColumn, value);
            break;

        // 单元格为字符串类型
        case LabelSSTRecord.sid:
            LabelSSTRecord labelSSTRecord = (LabelSSTRecord) record;
            curRow = labelSSTRecord.getRow();
            thisColumn = labelSSTRecord.getColumn();
            if (null == sstRecord)
            {
                rowlist.add(thisColumn, " ");
            }
            else
            {
                value = sstRecord.getString(labelSSTRecord.getSSTIndex()).toString().trim();
                value = value.equals("") ? " " : value;
                rowlist.add(thisColumn, value);
            }
            break;

        // 单元格为数字类型
        case NumberRecord.sid:
            NumberRecord numberRecord = (NumberRecord) record;  
            String formatStr = formatListener.getFormatString(numberRecord);
            curRow = numberRecord.getRow();
            thisColumn = numberRecord.getColumn();
            value = String.valueOf(numberRecord.getValue());  
            if(formatStr.toLowerCase().contains("d") ||formatStr.toLowerCase().contains("m")
                    ||formatStr.toLowerCase().contains("y"))
            {
                value = sdf.format(HSSFDateUtil.getJavaDate(Double.parseDouble(value)));
            }           
            value = value.equals("") ? " " : value;          
            rowlist.add(thisColumn, value);          
            break;

        default:
            break;
        }

        // 空值的操作
        if (record instanceof MissingCellDummyRecord)
        {
            MissingCellDummyRecord missingCellDummyRecord = (MissingCellDummyRecord) record;
            curRow = missingCellDummyRecord.getRow();
            thisColumn = missingCellDummyRecord.getColumn();
            rowlist.add(thisColumn, " ");
        }

        // 行结束时的操作
        if (record instanceof LastCellOfRowDummyRecord)
        {
            // 如果是目标Sheet,则进行处理
            if (targetSheetName.equals(currentSheetName) || targetSheetIndex == currentSheetIndex)
            {
                // 处理行数据
                rowReader.process(curRow, rowlist);
            }

            // 清空数据
            rowlist.clear();
        }
    }
}


2. 读取Excel2007

import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import fhszreport.tool.LogTool;

/**
 * Excel 2007数据读取类
 *
 * @author darrenlin
 */
public class Excel2007Reader extends DefaultHandler
{
    private SharedStringsTable sst;
    private String lastContents = "";

    /**
     * 行数据
     */
    private List<String> rowlist = new ArrayList<String>();
    
    /**
     * 当前行
     */
    private int curRow = 0;
    
    /**
     * 当前列
     */
    private int curCol = 0;
    
    /**
     * 单元格的值是否字符串标志
     */
    private boolean isString;
    
    /**
     * 单元格的值是否日期标志
     */
    private boolean dateFlag;

    /**
     * 当前节点是否t元素标示
     */
    private boolean isTElement;

    /**
     * Excel行数据处理器
     */
    private RowProcessor rowReader;

    public void setRowReader(RowProcessor rowReader)
    {
        this.rowReader = rowReader;
    }

    /**
     * Excel数据处理,默认处理第一个sheet
     *
     * @param targetFile 目标文件路径
     * @param logType 日志记录类型
     * @throws Exception
     */
    public void process(String targetFile, String logType) throws Exception
    {
        processOneSheet(targetFile, 1, logType);
    }

    /**
     * Excel数据处理,根据sheet索引进行处理
     *
     * @param targetFile 目标文件路径
     * @param sheetId sheet索引
     * @param logType 日志记录类型
     * @throws Exception
     */
    public void processOneSheet(String targetFile, int sheetId, String logType) throws Exception
    {
        OPCPackage opcPackage = null;
        XSSFReader xssfReader = null;
        SharedStringsTable sst = null;
        XMLReader parser = null;
        InputStream sheet = null;
        InputSource sheetSource = null;

        try
        {
            // 获取目标文件
            opcPackage = OPCPackage.open(targetFile);
            xssfReader = new XSSFReader(opcPackage);

            // 获取XML解析对象
            sst = xssfReader.getSharedStringsTable();
            parser = getSheetParser(sst);

            // 读取相应sheet数据
            sheet = xssfReader.getSheet("rId" + sheetId);
            sheetSource = new InputSource(sheet);

            // 开始解析数据
            parser.parse(sheetSource);
        }
        catch (Exception e)
        {
            // 记录日志
            LogTool.logInfo("(Excel2007Reader)Parse Excel-2007 File Catch Exception: " + e.toString(), logType);
            throw e;
        }
        finally
        {
            // 关闭流文件
            if (null != opcPackage)
            {
                try
                {
                    opcPackage.close();
                }
                catch (IOException e)
                {
                    // 记录日志
                    LogTool.logInfo("(Excel2007Reader)Close OPCPackage Catch IOException: " + e.toString(), logType);
                }
            }

            // 关闭流文件
            if (null != sheet)
            {
                try
                {
                    sheet.close();
                }
                catch (IOException e)
                {
                    // 记录日志
                    LogTool.logInfo("(Excel2007Reader)Close InputStream Catch IOException: " + e.toString(), logType);
                }
            }
        }
    }

    /**
     * 获取XML解析对象
     *
     * @param sst SharedStringsTable
     * @return XMLReader XML解析对象
     * @throws SAXException
     */
    public XMLReader getSheetParser(SharedStringsTable sst) throws SAXException
    {
        XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
        this.sst = sst;
        parser.setContentHandler(this);
        return parser;
    }

    /**
     * 解析节点开始前处理方法
     */
    @Override
    public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException
    {
        // 如果是单元格
        if ("c".equals(name))
        {
            isString = false;
            dateFlag = false;
            
            // 如果下一个元素字符串( 在Excel-2007中字符串是使用SST索引)
            String cellType = attributes.getValue("t");
            if ("s".equals(cellType))
            {
                isString = true;
            }
            
            // 如果不是字符串
            if (!isString)
            {
                String cellStyle = attributes.getValue("s");

                // 判断是否日期格式
                if ("1".equals(cellStyle) || "5".equals(cellStyle) || "6".equals(cellStyle))
                {
                    dateFlag = true;
                }
            }
        }

        isTElement = false;
        
        // 当元素为t时
        if ("t".equals(name))
        {
            isTElement = true;
        }
    }

    /**
     * 单元格节点值处理方法
     */
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException
    {
        // 得到单元格内容的值
        lastContents += new String(ch, start, length);
    }

    /**
     * 解析节点结束处理方法
     */
    @Override
    public void endElement(String uri, String localName, String name) throws SAXException
    {
        // 如果单元格是字符串类型
        if (isString)
        {
            // 使用SST索引获取真正的字符串值
            int idx = Integer.parseInt(lastContents);
            lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
            isString = false;
        }

        // 如果是t元素
        if (isTElement)
        {
            String value = lastContents.trim();
            rowlist.add(curCol, value);
            curCol++;
            isTElement = false;
            
            // 置空
            lastContents = "";
        }
        // 如果是v元素(既是单元格的值节点)
        else if ("v".equals(name))
        {
            String value = lastContents.trim();
            value = value.equals("") ? " " : value;

            // 如果是日期类型
            if (dateFlag)
            {
                Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value));
                SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
                value = dateFormat.format(date);
            }

            rowlist.add(curCol, value);
            curCol++;
            
            // 置空
            lastContents = "";
        }
        else
        {
            // 如果是row元素(既是行结束节点)
            if (name.equals("row"))
            {
                // 调用行处理器处理数据
                rowReader.process(curRow, rowlist);

                // 清理数据
                rowlist.clear();

                curRow++;
                curCol = 0;
            }
        }
    }
}


3. 行数据处理接口

import java.util.List;

/**
 * Excel行数据处理器接口
 *
 * @author darrenlin
 */
public interface RowProcessor
{
    /**
     * 处理方法
     * @param curRow 当前Excel行数
     * @param rowlist 当前行数据
     */
    public void process(int curRow, List<String> rowlist);
}



至于具体怎么使用看看代码应该都懂的~再见

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值