package excelReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; /** * 读取Excel文件 * * @author 李波 * <p> * 2010-08-04 * </p> */ public class PrintExcelTest { public static void main(String[] args) throws Exception { // String inputfile = // "D:/Documents and Settings/Administrator/桌面/a.xlsx"; String inputfile = "D:/Documents and Settings/Administrator/桌面/test.xlsx"; // String inputfile = // "D:/Documents and Settings/Administrator/桌面/把一个excel写进数据库/2.xls"; showFile(inputfile); } private static Row topRow; /** * 显示excel文档 * * @param inputfile * excel文档的路径 * @return excel文档的内容 * @throws IOException * @throws FileNotFoundException * @throws Exception */ public static StringBuffer showFile(String inputfile) throws IOException, FileNotFoundException, Exception { // 判断参数是否为空或没有意义 if (inputfile == null || inputfile.trim().equals("")) { throw new IOException("no input file specified"); } // excel文档 Workbook workbook = getWorkBook(inputfile); // Sheet数量 int numOfSheets = workbook.getNumberOfSheets(); StringBuffer sb = new StringBuffer(); // 循环获取所有的表 for (int i = 0; i < numOfSheets; i++) { // 表 Sheet sheet = workbook.getSheetAt(i); // 显示表 sb.append(showSheet(sheet)); } // Sheet sheet = workbook.getSheetAt(2); // // // 显示表 // sb.append(showSheet(sheet)); return sb; } /** * 显示表 * * @param sheet * 表实体 * @return 表的内容 * @throws Exception */ public static StringBuffer showSheet(Sheet sheet) throws Exception { StringBuffer sb = new StringBuffer(); Iterator<Row> rows = sheet.rowIterator(); // 显示行 while (rows.hasNext()) { Row rowUser = (Row) rows.next(); if (rowUser.getRowNum() == 0) { topRow = rowUser; continue; } sb.append(showRow(rowUser) + "\n"); } return sb; } /** * 显示行 * * @param rowUser * 行实体 * @return 行的内容 * @throws Exception */ public static StringBuffer showRow(Row rowUser) throws Exception { StringBuffer sb = new StringBuffer(); Iterator<Cell> iterUser = rowUser.cellIterator(); while (iterUser.hasNext()) { Cell cell = iterUser.next(); String cellValue = getCell(cell); if (cellValue.indexOf(",") != -1 || cellValue.indexOf("\n") != -1 || cellValue.indexOf(":") != -1) { throw new Exception("文件中存在 ‘,’ , ‘:’ 或 ‘\n’ "); } int columnIndex = cell.getColumnIndex(); Cell topRowCell = topRow.getCell(columnIndex); String topRowCellValue = getCell(topRowCell); sb.append(topRowCellValue + ":" + cellValue + ","); } if (sb.length() > 1) { sb.setLength(sb.length() - 1); } return sb; } public static Workbook getWorkBook(String inputfile) throws FileNotFoundException, IOException, Exception { Workbook workbook = null; File file = new File(inputfile); InputStream fint = new FileInputStream(file); // 取得文件名的后缀名赋值给filetype String filetype = inputfile.substring(inputfile.lastIndexOf(".") + 1); // 如果是Excel文件则创建HSSFWorkbook读取 if (filetype.equalsIgnoreCase("xls")) { // Excel2003 // 另一种方式创建HSSFWorkbook // POIFSFileSystem poiFileSystem = new POIFSFileSystem(fint); // workbook = new HSSFWorkbook(poiFileSystem); workbook = new HSSFWorkbook(fint); } else if (filetype.equalsIgnoreCase("xlsx")) { // Excel2007 workbook = new XSSFWorkbook(fint); } else { throw new Exception("不是Excel文件"); } return workbook; } /** * 根据单元格的类型显示值 * * @param cell * 单元格的实体 * @return 单元格的内容 */ public static String getCell(Cell cell) { if (cell == null) return ""; switch (cell.getCellType()) { case HSSFCell.CELL_TYPE_NUMERIC: return cell.getNumericCellValue() + ""; case HSSFCell.CELL_TYPE_STRING: return cell.getStringCellValue(); case HSSFCell.CELL_TYPE_FORMULA: return cell.getCellFormula(); case HSSFCell.CELL_TYPE_BLANK: return ""; case HSSFCell.CELL_TYPE_BOOLEAN: return cell.getBooleanCellValue() + ""; case HSSFCell.CELL_TYPE_ERROR: return cell.getErrorCellValue() + ""; } return ""; } }
package wordReader; /** * 需要的jar包: * poi-3.0.2-FINAL-20080204.jar * poi-contrib-3.0.2-FINAL-20080204.jar * poi-scratchpad-3.0.2-FINAL-20080204.jar * poi-3.5-beta6-20090622.jar * geronimo-stax-api_1.0_spec-1.0.jar * ooxml-schemas-1.0.jar * openxml4j-bin-beta.jar * poi-ooxml-3.5-beta6-20090622.jar * xmlbeans-2.3.0.jar * dom4j-1.6.1.jar * */ import java.io.IOException; import java.io.InputStream; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.xmlbeans.XmlException; /** * 读取word文件 * * @author 李波 * <p> * 2010-08-04 * </p> */ public class WordAndExcelExtractor { public static void main(String[] args) { try { // String wordFile = // "D:/Documents and Settings/Administrator/桌面/xxxx.docx"; String wordFile = "D:/Documents and Settings/Administrator/桌面/javaPOI.doc"; String wordText2007 = WordAndExcelExtractor .extractTextFromDOC2007(wordFile); System.out.println("wordText2007=======" + wordText2007); // InputStream is = new FileInputStream("D:/XXX研发中心技术岗位职位需求.xls"); // String excelText = WordAndExcelExtractor.extractTextFromXLS(is); // System.out.println("text2003==========" + excelText); // String excelFile = "D:/Hello2007.xlsx"; // String excelText2007 = WordAndExcelExtractor // .extractTextFromXLS2007(excelFile); // System.out.println("excelText2007==========" + excelText2007); } catch (Exception e) { e.printStackTrace(); } } /** * @Method: extractTextFromDOCX * @Description: 从word 2003文档中提取纯文本 * @param * @return String * @throws */ public static String extractTextFromDOC(InputStream is) throws IOException { WordExtractor ex = new WordExtractor(is); // is是WORD文件的InputStream return ex.getText(); } /** * @Method: extractTextFromDOCX * @Description: 从word 2007文档中提取纯文本 * @param * @return String * @throws */ public static String extractTextFromDOC2007(String fileName) throws IOException, OpenXML4JException, XmlException { OPCPackage opcPackage = POIXMLDocument.openPackage(fileName); POIXMLTextExtractor ex = new XWPFWordExtractor(opcPackage); return ex.getText(); } /** * * @Method: extractTextFromXLS * * @Description: 从excel 2003文档中提取纯文本 * @param * * @return String * * @throws */ @SuppressWarnings("deprecation") private static String extractTextFromXLS(InputStream is) throws IOException { StringBuffer content = new StringBuffer(); HSSFWorkbook workbook = new HSSFWorkbook(is); // 创建对Excel工作簿文件的引用 for (int numSheets = 0; numSheets < workbook.getNumberOfSheets(); numSheets++) { if (null != workbook.getSheetAt(numSheets)) { HSSFSheet aSheet = workbook.getSheetAt(numSheets); // 获得一个sheet for (int rowNumOfSheet = 0; rowNumOfSheet <= aSheet .getLastRowNum(); rowNumOfSheet++) { if (null != aSheet.getRow(rowNumOfSheet)) { HSSFRow aRow = aSheet.getRow(rowNumOfSheet); // 获得一行 for (short cellNumOfRow = 0; cellNumOfRow <= aRow .getLastCellNum(); cellNumOfRow++) { if (null != aRow.getCell(cellNumOfRow)) { HSSFCell aCell = aRow.getCell(cellNumOfRow); // 获得列值 if (aCell.getCellType() == HSSFCell.CELL_TYPE_NUMERIC) { content.append(aCell.getNumericCellValue()); } else if (aCell.getCellType() == HSSFCell.CELL_TYPE_BOOLEAN) { content.append(aCell.getBooleanCellValue()); } else { content.append(aCell.getStringCellValue()); } } } } } } } return content.toString(); } /** * @Method: extractTextFromXLS2007 * @Description: 从excel 2007文档中提取纯文本 * @param * * @return String * @throws */ private static String extractTextFromXLS2007(String fileName) throws Exception { StringBuffer content = new StringBuffer(); // 构造 XSSFWorkbook 对象,strPath // 传入文件路径 XSSFWorkbook xwb = new XSSFWorkbook(fileName); // 循环工作表Sheet for (int numSheet = 0; numSheet < xwb.getNumberOfSheets(); numSheet++) { XSSFSheet xSheet = xwb.getSheetAt(numSheet); if (xSheet == null) { continue; } // 循环行Row for (int rowNum = 0; rowNum <= xSheet.getLastRowNum(); rowNum++) { XSSFRow xRow = xSheet.getRow(rowNum); if (xRow == null) { continue; } // 循环列Cell for (int cellNum = 0; cellNum <= xRow.getLastCellNum(); cellNum++) { XSSFCell xCell = xRow.getCell(cellNum); if (xCell == null) { continue; } if (xCell.getCellType() == XSSFCell.CELL_TYPE_BOOLEAN) { content.append(xCell.getBooleanCellValue()); } else if (xCell.getCellType() == XSSFCell.CELL_TYPE_NUMERIC) { content.append(xCell.getNumericCellValue()); } else { content.append(xCell.getStringCellValue()); } } } } return content.toString(); } }