package com.caac.utils;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
/**
* 获取附件内容公共类
*/
public class FjUtils {
public FjUtils() {
super();
}
/**
* 读取附件的内容,返回字符串
* @param path 附件路径
* @return
*/
public static String getFjToStr(String fjPath) {
String fjStr = "";
String prefix = "";
if (!"".equals(fjPath)) {
prefix = fjPath.substring(fjPath.lastIndexOf(".") + 1); //后缀名
try {
if ("doc".equals(prefix) || "docx".equals(prefix)) { //word
fjStr = getDocContent(fjPath, fjStr, prefix);
} else if ("xls".equals(prefix) || "xlsx".equals(prefix)) { //excel
fjStr = getExcelContent(fjPath);
} else if ("txt".equals(prefix)) { //txt
fjStr = getTxtContent(fjPath, fjStr);
} else if ("pdf".equals(prefix)) { //pdf
fjStr = getPdfContent(fjPath);
}
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
return fjStr;
}
/**
* 获取word内容
* @param fjPath
* @param fjStr
* @param lx
* @return
* @throws IOException
* @throws FileNotFoundException
*/
private static String getDocContent(String fjPath, String fjStr, String lx) throws java.io.IOException,
java.io.FileNotFoundException {
if ("doc".equals(lx)) { //word 2003
InputStream is = new FileInputStream(new File(fjPath));
WordExtractor ex = new WordExtractor(is);
fjStr = ex.getText();
} else if ("docx".equals(lx)) { //word 2007
InputStream is2 = new FileInputStream(new File(fjPath));
XWPFDocument document = new XWPFDocument(is2);
POIXMLTextExtractor extractor = new XWPFWordExtractor(document);
fjStr = extractor.getText();
}
return fjStr;
}
/**
* 获取excel内容
* @param fjPath
* @return
*/
private static String getExcelContent(String fjPath) {
StringBuilder result = new StringBuilder();
try {
// Excel获得文件
InputStream inp = new FileInputStream(new File(fjPath));
Workbook wb = WorkbookFactory.create(inp);
// 获得第一个工作表对象
Sheet sheet = wb.getSheetAt(0); //读取Excel中第一个sheet的数据
int maxRowNum = sheet.getLastRowNum() + 1; //最大行数
int maxCellNum = sheet.getRow(0).getLastCellNum(); //最大列数
// 得到第一列第一行的单元格
for (int i = 0; i < maxRowNum; i++) {
for (int j = 0; j < maxCellNum; j++) {
if (isBlankRow(sheet.getRow(i), maxCellNum)) { //空行则跳过
continue;
}
result.append(getCellToStr(sheet.getRow(i).getCell(j)) + ",");
}
}
} catch (Exception e) {
e.printStackTrace();
}
return result.toString();
}
/**
* 获取EXCEL单元格的值,一律转为String返回
* @param cell
* @return
*/
private static String getCellToStr(Cell cell) {
String value = "";
if (cell != null) {
switch (cell.getCellType()) {
case Cell.CELL_TYPE_NUMERIC:
String v = "" + cell.getNumericCellValue() + "";
value += v;
break;
case Cell.CELL_TYPE_STRING:
value += cell.getStringCellValue();
break;
case Cell.CELL_TYPE_FORMULA:
break;
case Cell.CELL_TYPE_BOOLEAN:
value += cell.getBooleanCellValue() + "";
break;
default:
break;
}
}
return value;
}
/**
* 功能:判断是否是空行
* */
private static boolean isBlankRow(org.apache.poi.ss.usermodel.Row columnRow, int excelLastcell) {
String value = "";
for (int i = 0; i < excelLastcell; i++) {
Cell cell = columnRow.getCell(i);
if (cell != null) {
switch (cell.getCellType()) {
case Cell.CELL_TYPE_NUMERIC:
value += cell.getNumericCellValue() + "";
break;
case Cell.CELL_TYPE_STRING:
value += cell.getStringCellValue();
break;
case Cell.CELL_TYPE_FORMULA:
break;
case Cell.CELL_TYPE_BOOLEAN:
value += cell.getBooleanCellValue() + "";
break;
default:
break;
}
}
}
if (value == null || "".equals(value)) {
return true;
} else {
return false;
}
}
/**
* 获取pdf内容
* @param fjPath
* @return
*/
private static String getPdfContent(String fjPath) {
String str = "";
FileInputStream fis;
try {
fis = new FileInputStream(new File(fjPath));
PDFParser p = new PDFParser(fis);
p.parse();
PDDocument pdd = p.getPDDocument();
PDFTextStripper ts = new PDFTextStripper();
str = ts.getText(pdd);
pdd.close();
fis.close();
} catch (Exception e) {
}
return str;
}
/**
* 获取txt内容
* @param fjPath
* @param fjStr
* @return
* @throws IOException
* @throws FileNotFoundException
*/
private static String getTxtContent(String fjPath, String fjStr) throws java.io.IOException,
java.io.FileNotFoundException {
StringBuilder result = new StringBuilder();
String bm = getCharset(fjPath); //编码
System.out.println("bm:" + bm);
BufferedReader br = null;
if ("UTF-8".equals(bm)) {
br = new BufferedReader(new InputStreamReader(new FileInputStream(fjPath), "UTF-8"));
} else {
br = new BufferedReader(new FileReader(new File(fjPath))); //构造一个BufferedReader类来读取文件
}
String s = null;
while ((s = br.readLine()) != null) { //使用readLine方法,一次读一行
result.append(System.lineSeparator() + s);
}
br.close();
fjStr = result.toString();
if (!"UTF-8".equals(bm)) {
writeFile(fjPath, fjStr);
}
return fjStr;
}
/**
* 获取txt编码格式
* @param fileName
* @return
* @throws IOException
*/
private static String getCharset(String fileName) throws IOException {
BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileName));
int p = (bin.read() << 8) + bin.read();
String code = null;
switch (p) {
case 0xefbb:
code = "UTF-8";
break;
case 0xfffe:
code = "Unicode";
break;
case 0xfeff:
code = "UTF-16BE";
break;
default:
code = "GBK";
}
return code;
}
/**
* 把utf-8编码的内容写回原文件
* @param filePathAndName 含路径文件名
* @param fileContent 写入文件的字符串
*/
public static void writeFile(String filePathAndName, String fileContent) {
try {
File f = new File(filePathAndName);
if (!f.exists()) {
f.createNewFile();
}
//定义编码
OutputStreamWriter write = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
BufferedWriter writer = new BufferedWriter(write);
writer.write(fileContent);
writer.close();
} catch (Exception e) {
System.out.println("写文件内容操作出错");
e.printStackTrace();
}
}
}
pdfbox和fontbox的版本要一致
--可参考测试例子 testFjContent