解析doc,请下载tm-extractors-0.4.jar、 解析xls,请下载jxl.jar包。
一、解析doc文件:
01
public static String readDOC(String path) {
02
// 创建输入流读取doc文件
03
FileInputStream in;
04
String text = null;
05
// Environment.getExternalStorageDirectory().getAbsolutePath()+ "/aa.doc")
06
try {
07
in = new FileInputStream(new File(path));
08
int a= in.available();
09
WordExtractor extractor = null;
10
// 创建WordExtractor
11
extractor = new WordExtractor();
12
// 对doc文件进行提取
13
text = extractor.extractText(in);
14
System.out.println("解析得到的东西"+text);
15
} catch (FileNotFoundException e) {
16
e.printStackTrace();
17
} catch (Exception e) {
18
e.printStackTrace();
19
}
20
if (text == null) {
21
text = "解析文件出现问题";
22
}
23
return text;
24
}
二、解析xls文件:
01
public static String readXLS(String path) {
02
String str = "";
03
try {
04
Workbook workbook = null;
05
workbook = Workbook.getWorkbook(new File(path));
06
Sheet sheet = workbook.getSheet(0);
07
Cell cell = null;
08
int columnCount = sheet.getColumns();
09
int rowCount = sheet.getRows();
10
for (int i = 0; i < rowCount; i++) {
11
for (int j = 0; j < columnCount; j++) {
12
cell = sheet.getCell(j, i);
13
String temp2 = "";
14
if (cell.getType() == CellType.NUMBER) {
15
temp2 = ((NumberCell) cell).getValue() + "";
16
} else if (cell.getType() == CellType.DATE) {
17
temp2 = "" + ((DateCell) cell).getDate();
18
} else {
19
temp2 = "" + cell.getContents();
20
}
21
str = str + " " + temp2;
22
}
23
str += " ";
24
}
25
workbook.close();
26
} catch (Exception e) {
27
}
28
if (str == null) {
29
str = "解析文件出现问题";
30
}
31
return str;
32
}
三、解析docx文件:
01
public static String readDOCX(String path) {
02
String river = "";
03
try {
04
ZipFile xlsxFile = new ZipFile(new File(path));
05
ZipEntry sharedStringXML = xlsxFile.getEntry("word/document.xml");
06
InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);
07
XmlPullParser xmlParser = Xml.newPullParser();
08
xmlParser.setInput(inputStream, "utf-8");
09
int evtType = xmlParser.getEventType();
10
while (evtType != XmlPullParser.END_DOCUMENT) {
11
switch (evtType) {
12
case XmlPullParser.START_TAG:
13
String tag = xmlParser.getName();
14
System.out.println(tag);
15
if (tag.equalsIgnoreCase("t")) {
16
river += xmlParser.nextText() + " ";
17
}
18
break;
19
case XmlPullParser.END_TAG:
20
break;
21
default:
22
break;
23
}
24
evtType = xmlParser.next();
25
}
26
} catch (ZipException e) {
27
e.printStackTrace();
28
} catch (IOException e) {
29
e.printStackTrace();
30
} catch (XmlPullParserException e) {
31
e.printStackTrace();
32
}
33
if (river == null) {
34
river = "解析文件出现问题";
35
}
36
return river;
37
}
四、解析xls文件:
01
public static String readXLSX(String path) {
02
String str = "";
03
String v = null;
04
boolean flat = false;
05
List<String> ls = new ArrayList<String>();
06
try {
07
ZipFile xlsxFile = new ZipFile(new File(path));
08
ZipEntry sharedStringXML = xlsxFile
09
.getEntry("xl/sharedStrings.xml");
10
InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);
11
XmlPullParser xmlParser = Xml.newPullParser();
12
xmlParser.setInput(inputStream, "utf-8");
13
int evtType = xmlParser.getEventType();
14
while (evtType != XmlPullParser.END_DOCUMENT) {
15
switch (evtType) {
16
case XmlPullParser.START_TAG:
17
String tag = xmlParser.getName();
18
if (tag.equalsIgnoreCase("t")) {
19
ls.add(xmlParser.nextText());
20
}
21
break;
22
case XmlPullParser.END_TAG:
23
break;
24
default:
25
break;
26
}
27
evtType = xmlParser.next();
28
}
29
ZipEntry sheetXML = xlsxFile.getEntry("xl/worksheets/sheet1.xml");
30
InputStream inputStreamsheet = xlsxFile.getInputStream(sheetXML);
31
XmlPullParser xmlParsersheet = Xml.newPullParser();
32
xmlParsersheet.setInput(inputStreamsheet, "utf-8");
33
int evtTypesheet = xmlParsersheet.getEventType();
34
while (evtTypesheet != XmlPullParser.END_DOCUMENT) {
35
switch (evtTypesheet) {
36
case XmlPullParser.START_TAG:
37
String tag = xmlParsersheet.getName();
38
if (tag.equalsIgnoreCase("row")) {
39
} else if (tag.equalsIgnoreCase("c")) {
40
String t = xmlParsersheet.getAttributeValue(null, "t");
41
if (t != null) {
42
flat = true;
43
System.out.println(flat + "有");
44
} else {
45
System.out.println(flat + "没有");
46
flat = false;
47
}
48
} else if (tag.equalsIgnoreCase("v")) {
49
v = xmlParsersheet.nextText();
50
if (v != null) {
51
if (flat) {
52
str += ls.get(Integer.parseInt(v)) + " ";
53
} else {
54
str += v + " ";
55
}
56
}
57
}
58
break;
59
case XmlPullParser.END_TAG:
60
if (xmlParsersheet.getName().equalsIgnoreCase("row")
61
&& v != null) {
62
str += " ";
63
}
64
break;
65
}
66
evtTypesheet = xmlParsersheet.next();
67
}
68
System.out.println(str);
69
} catch (ZipException e) {
70
e.printStackTrace();
71
} catch (IOException e) {
72
e.printStackTrace();
73
} catch (XmlPullParserException e) {
74
e.printStackTrace();
75
}
76
if (str == null) {
77
str = "解析文件出现问题";
78
}
79
return str;
80
}