importjava.io.IOException;importjava.io.InputStream;importjava.io.PrintStream;importjavax.xml.parsers.ParserConfigurationException;importjavax.xml.parsers.SAXParser;importjavax.xml.parsers.SAXParserFactory;importorg.apache.poi.openxml4j.exceptions.OpenXML4JException;importorg.apache.poi.openxml4j.opc.OPCPackage;importorg.apache.poi.openxml4j.opc.PackageAccess;importorg.apache.poi.ss.usermodel.BuiltinFormats;importorg.apache.poi.ss.usermodel.DataFormatter;importorg.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;importorg.apache.poi.xssf.eventusermodel.XSSFReader;importorg.apache.poi.xssf.model.StylesTable;importorg.apache.poi.xssf.usermodel.XSSFCellStyle;//import org.apache.poi.xssf.usermodel.XSSFRichTextString;importorg.apache.poi.xssf.usermodel.XSSFRichTextString;importorg.xml.sax.Attributes;importorg.xml.sax.ContentHandler;importorg.xml.sax.InputSource;importorg.xml.sax.SAXException;importorg.xml.sax.XMLReader;importorg.xml.sax.helpers.DefaultHandler;publicclass XLSXTOCSV {/**
* The type of the data value is indicated by an attribute on the cell. The
* value is usually in a "v" element within the cell.
*/enum xssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,}/**
* Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
* <p/>
* Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
* <p/>
* A web-friendly version is http://openiso.org/Ecma/376/Part4
*/classMyXSSFSheetHandlerextendsDefaultHandler{/**
* Table with styles
*/privateStylesTable stylesTable;/**
* Table with unique strings
*/privateReadOnlySharedStringsTable sharedStringsTable;/**
* Destination for data
*/privatefinalPrintStream output;/**
* Number of columns to read starting with leftmost
*/privatefinalint minColumnCount;// Set when V start element is seenprivateboolean vIsOpen;// Set when cell start element is seen;// used when cell close element is seen.private xssfDataType nextDataType;// Used to format numeric cell values.privateshort formatIndex;privateString formatString;privatefinalDataFormatter formatter;privateint thisColumn =-1;// The last column printed to the output streamprivateint lastColumnNumber =-1;// Gathers characters as they are seen.privateStringBuffer value;/**
* Accepts objects needed while parsing.
*
* @param styles Table of styles
* @param strings Table of shared strings
* @param cols Minimum number of columns to show
* @param target Sink for output
*/publicMyXSSFSheetHandler(StylesTable styles,ReadOnlySharedStringsTable strings,int cols,PrintStream target){this.stylesTable = styles;this.sharedStringsTable = strings;this.minColumnCount = cols;this.output = target;this.value =newStringBuffer();this.nextDataType = xssfDataType.NUMBER;this.formatter =newDataFormatter();}/*
* (non-Javadoc)
*
* @see
* org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
* java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/publicvoidstartElement(String uri,String localName,String name,Attributes attributes)throwsSAXException{if("inlineStr".equals(name)||"v".equals(name)){
vIsOpen =true;// Clear contents cache
value.setLength(0);}// c => cellelseif("c".equals(name)){// Get the cell referenceString r = attributes.getValue("r");int firstDigit =-1;for(int c =0; c < r.length();++c){if(Character.isDigit(r.charAt(c))){
firstDigit = c;break;}}
thisColumn =nameToColumn(r.substring(0, firstDigit));// Set up defaults.this.nextDataType = xssfDataType.NUMBER;this.formatIndex =-1;this.formatString =null;String cellType = attributes.getValue("t");String cellStyleStr = attributes.getValue("s");if("b".equals(cellType))
nextDataType = xssfDataType.BOOL;elseif("e".equals(cellType))
nextDataType = xssfDataType.ERROR;elseif("inlineStr".equals(cellType))
nextDataType = xssfDataType.INLINESTR;elseif("s".equals(cellType))
nextDataType = xssfDataType.SSTINDEX;elseif("str".equals(cellType))
nextDataType = xssfDataType.FORMULA;elseif(cellStyleStr !=null){// It's a number, but almost certainly one// with a special style or formatint styleIndex =Integer.parseInt(cellStyleStr);XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);this.formatIndex = style.getDataFormat();this.formatString = style.getDataFormatString();if(this.formatString ==null)this.formatString =BuiltinFormats.getBuiltinFormat(this.formatIndex);}}}/*
* (non-Javadoc)
*
* @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
* java.lang.String, java.lang.String)
*/publicvoidendElement(String uri,String localName,String name)throwsSAXException{String thisStr =null;// v => contents of a cellif("v".equals(name)){// Process the value contents as required.// Do now, as characters() may be called more than onceswitch(nextDataType){case BOOL:char first = value.charAt(0);
thisStr = first =='0'?"FALSE":"TRUE";break;case ERROR:
thisStr ="\"ERROR:"+ value.toString()+'"';break;case FORMULA:// A formula could result in a string value,// so always add double-quote characters.
thisStr ='"'+ value.toString()+'"';break;case INLINESTR:// TODO: have seen an example of this, so it's untested.XSSFRichTextString rtsi =newXSSFRichTextString(value
.toString());
thisStr ='"'+ rtsi.toString()+'"';break;case SSTINDEX:String sstIndex = value.toString();try{int idx =Integer.parseInt(sstIndex);XSSFRichTextString rtss =newXSSFRichTextString(
sharedStringsTable.getEntryAt(idx));
thisStr ='"'+ rtss.toString()+'"';}catch(NumberFormatException ex){
output.println("Failed to parse SST index '"+ sstIndex
+"': "+ ex.toString());}break;case NUMBER:String n = value.toString();if(this.formatString !=null)
thisStr = formatter.formatRawCellContents(Double.parseDouble(n),this.formatIndex,this.formatString);else
thisStr = n;break;default:
thisStr ="(TODO: Unexpected type: "+ nextDataType +")";break;}// Output after we've seen the string contents// Emit commas for any fields that were missing on this rowif(lastColumnNumber ==-1){
lastColumnNumber =0;}for(int i = lastColumnNumber; i < thisColumn;++i)
output.print(',');// Might be the empty string.
output.print(thisStr);// Update columnif(thisColumn >-1)
lastColumnNumber = thisColumn;}elseif("row".equals(name)){// Print out any missing commas if neededif(minColumns >0){// Columns are 0 basedif(lastColumnNumber ==-1){
lastColumnNumber =0;}for(int i = lastColumnNumber; i <(this.minColumnCount); i++){
output.print(',');}}// We're onto a new row
output.println();
lastColumnNumber =-1;}}/**
* Captures characters only if a suitable element is open. Originally
* was just "v"; extended for inlineStr also.
*/publicvoidcharacters(char[] ch,int start,int length)throwsSAXException{if(vIsOpen)
value.append(ch, start, length);}/**
* Converts an Excel column name like "C" to a zero-based index.
*
* @param name
* @return Index corresponding to the specified name
*/privateintnameToColumn(String name){int column =-1;for(int i =0; i < name.length();++i){int c = name.charAt(i);
column =(column +1)*26+ c -'A';}return column;}}// /privateOPCPackage xlsxPackage;privateint minColumns;privatePrintStream output;privatefinalString OUTPUT_CHARSET ="GBK";/**
* Creates a new XLSX -> CSV converter
*
* @param pkg The XLSX package to process
* @param output The PrintStream to output the CSV to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/publicXLSXTOCSV(OPCPackage pkg,PrintStream output,int minColumns){this.xlsxPackage = pkg;this.output = output;this.minColumns = minColumns;}//TODO catch exceptionspublicXLSXTOCSV(String inputFilePath,String outputFilePath)throwsException{
xlsxPackage =OPCPackage.open(inputFilePath,PackageAccess.READ);
output =newPrintStream(outputFilePath, OUTPUT_CHARSET);
minColumns =-1;}/**
* Parses and shows the content of one sheet using the specified styles and
* shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/publicvoidprocessSheet(StylesTable styles,ReadOnlySharedStringsTable strings,InputStream sheetInputStream)throwsIOException,ParserConfigurationException,SAXException{InputSource sheetSource =newInputSource(sheetInputStream);SAXParserFactory saxFactory =SAXParserFactory.newInstance();SAXParser saxParser = saxFactory.newSAXParser();XMLReader sheetParser = saxParser.getXMLReader();ContentHandler handler =newMyXSSFSheetHandler(styles, strings,this.minColumns,this.output);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);}/**
* Initiates the processing of the XLS workbook file to CSV.
*
* @throws IOException
* @throws OpenXML4JException
* @throws ParserConfigurationException
* @throws SAXException
*/publicvoidprocess()throwsIOException,OpenXML4JException,ParserConfigurationException,SAXException{ReadOnlySharedStringsTable strings =newReadOnlySharedStringsTable(this.xlsxPackage);XSSFReader xssfReader =newXSSFReader(this.xlsxPackage);StylesTable styles = xssfReader.getStylesTable();XSSFReader.SheetIterator iter =(XSSFReader.SheetIterator) xssfReader
.getSheetsData();int index =0;while(iter.hasNext()){InputStream stream = iter.next();String sheetName = iter.getSheetName();this.output.println();this.output.println(sheetName +" [index="+ index +"]:");processSheet(styles, strings, stream);
stream.close();++index;}}// public static void main(String[] args) throws Exception {// XLSXTOCSV xlsx2csv = new XLSXTOCSV("D:\\1.8.0\\1617.xlsx","D:\\1.8.0\\test.csv");// xlsx2csv.process();// }}