可以直接复制实验,
解析doc,要tm-extractors-0.4.jar这个包
解析xls,要jxl.jar这个包
解析xls
解析docx
解析xlsx
解析doc,要tm-extractors-0.4.jar这个包
解析xls,要jxl.jar这个包
01 | public static
String readDOC(String path) { |
02 | // 创建输入流读取doc文件 |
03 | FileInputStream in; |
04 | String text =
null ; |
05 | // Environment.getExternalStorageDirectory().getAbsolutePath()+ "/aa.doc") |
06 | try
{ |
07 | in =
new FileInputStream( new
File(path)); |
08 | int
a= in.available(); |
09 | WordExtractor extractor =
null ; |
10 | // 创建WordExtractor |
11 | extractor =
new WordExtractor(); |
12 | // 对doc文件进行提取 |
13 | text = extractor.extractText(in); |
14 | System.out.println( "解析得到的东西" +text); |
15 | }
catch (FileNotFoundException e) { |
16 | e.printStackTrace(); |
17 | }
catch (Exception e) { |
18 | e.printStackTrace(); |
19 | } |
20 | if
(text == null ) { |
21 | text =
"解析文件出现问题" ; |
22 | } |
23 | return
text; |
24 | } |
01 | public static
String readXLS(String path) { |
02 | String str =
"" ; |
03 | try
{ |
04 | Workbook workbook =
null ; |
05 | workbook = Workbook.getWorkbook( new
File(path)); |
06 | Sheet sheet = workbook.getSheet( 0 ); |
07 | Cell cell =
null ; |
08 | int
columnCount = sheet.getColumns(); |
09 | int
rowCount = sheet.getRows(); |
10 | for
( int
i = 0 ; i < rowCount; i++) { |
11 | for
( int
j = 0 ; j < columnCount; j++) { |
12 | cell = sheet.getCell(j, i); |
13 | String temp2 =
"" ; |
14 | if
(cell.getType() == CellType.NUMBER) { |
15 | temp2 = ((NumberCell) cell).getValue() +
"" ; |
16 | }
else if
(cell.getType() == CellType.DATE) { |
17 | temp2 =
"" + ((DateCell) cell).getDate(); |
18 | }
else { |
19 | temp2 =
"" + cell.getContents(); |
20 | } |
21 | str = str +
" " + temp2; |
22 | } |
23 | str +=
"\n" ; |
24 | } |
25 | workbook.close(); |
26 | }
catch (Exception e) { |
27 | } |
28 | if
(str == null ) { |
29 | str =
"解析文件出现问题" ; |
30 | } |
31 | return
str; |
32 | } |
01 | public static
String readDOCX(String path) { |
02 | String river =
"" ; |
03 | try
{ |
04 | ZipFile xlsxFile =
new ZipFile( new
File(path)); |
05 | ZipEntry sharedStringXML = xlsxFile.getEntry( "word/document.xml" ); |
06 | InputStream inputStream = xlsxFile.getInputStream(sharedStringXML); |
07 | XmlPullParser xmlParser = Xml.newPullParser(); |
08 | xmlParser.setInput(inputStream,
"utf-8" ); |
09 | int
evtType = xmlParser.getEventType(); |
10 | while
(evtType != XmlPullParser.END_DOCUMENT) { |
11 | switch
(evtType) { |
12 | case
XmlPullParser.START_TAG: |
13 | String tag = xmlParser.getName(); |
14 | System.out.println(tag); |
15 | if
(tag.equalsIgnoreCase( "t" )) { |
16 | river += xmlParser.nextText() +
"\n" ; |
17 | } |
18 | break ; |
19 | case
XmlPullParser.END_TAG: |
20 | break ; |
21 | default : |
22 | break ; |
23 | } |
24 | evtType = xmlParser.next(); |
25 | } |
26 | }
catch (ZipException e) { |
27 | e.printStackTrace(); |
28 | }
catch (IOException e) { |
29 | e.printStackTrace(); |
30 | }
catch (XmlPullParserException e) { |
31 | e.printStackTrace(); |
32 | } |
33 | if
(river == null ) { |
34 | river =
"解析文件出现问题" ; |
35 | } |
36 | return
river; |
37 | } |
01 | public static
String readXLSX(String path) { |
02 | String str =
"" ; |
03 | String v =
null ; |
04 | boolean
flat = false ; |
05 | List<String> ls =
new ArrayList<String>(); |
06 | try
{ |
07 | ZipFile xlsxFile =
new ZipFile( new
File(path)); |
08 | ZipEntry sharedStringXML = xlsxFile |
09 | .getEntry( "xl/sharedStrings.xml" ); |
10 | InputStream inputStream = xlsxFile.getInputStream(sharedStringXML); |
11 | XmlPullParser xmlParser = Xml.newPullParser(); |
12 | xmlParser.setInput(inputStream,
"utf-8" ); |
13 | int
evtType = xmlParser.getEventType(); |
14 | while
(evtType != XmlPullParser.END_DOCUMENT) { |
15 | switch
(evtType) { |
16 | case
XmlPullParser.START_TAG: |
17 | String tag = xmlParser.getName(); |
18 | if
(tag.equalsIgnoreCase( "t" )) { |
19 | ls.add(xmlParser.nextText()); |
20 | } |
21 | break ; |
22 | case
XmlPullParser.END_TAG: |
23 | break ; |
24 | default : |
25 | break ; |
26 | } |
27 | evtType = xmlParser.next(); |
28 | } |
29 | ZipEntry sheetXML = xlsxFile.getEntry( "xl/worksheets/sheet1.xml" ); |
30 | InputStream inputStreamsheet = xlsxFile.getInputStream(sheetXML); |
31 | XmlPullParser xmlParsersheet = Xml.newPullParser(); |
32 | xmlParsersheet.setInput(inputStreamsheet,
"utf-8" ); |
33 | int
evtTypesheet = xmlParsersheet.getEventType(); |
34 | while
(evtTypesheet != XmlPullParser.END_DOCUMENT) { |
35 | switch
(evtTypesheet) { |
36 | case
XmlPullParser.START_TAG: |
37 | String tag = xmlParsersheet.getName(); |
38 | if
(tag.equalsIgnoreCase( "row" )) { |
39 | }
else if
(tag.equalsIgnoreCase( "c" )) { |
40 | String t = xmlParsersheet.getAttributeValue( null ,
"t" ); |
41 | if
(t != null ) { |
42 | flat =
true ; |
43 | System.out.println(flat +
"有" ); |
44 | }
else { |
45 | System.out.println(flat +
"没有" ); |
46 | flat =
false ; |
47 | } |
48 | }
else if
(tag.equalsIgnoreCase( "v" )) { |
49 | v = xmlParsersheet.nextText(); |
50 | if
(v != null ) { |
51 | if
(flat) { |
52 | str += ls.get(Integer.parseInt(v)) +
" " ; |
53 | }
else { |
54 | str += v +
" " ; |
55 | } |
56 | } |
57 | } |
58 | break ; |
59 | case
XmlPullParser.END_TAG: |
60 | if
(xmlParsersheet.getName().equalsIgnoreCase( "row" ) |
61 | && v !=
null ) { |
62 | str +=
"\n" ; |
63 | } |
64 | break ; |
65 | } |
66 | evtTypesheet = xmlParsersheet.next(); |
67 | } |
68 | System.out.println(str); |
69 | }
catch (ZipException e) { |
70 | e.printStackTrace(); |
71 | }
catch (IOException e) { |
72 | e.printStackTrace(); |
73 | }
catch (XmlPullParserException e) { |
74 | e.printStackTrace(); |
75 | } |
76 | if
(str == null ) { |
77 | str =
"解析文件出现问题" ; |
78 | } |
79 | return
str; |
80 | } |