Android中解析doc、docx、xls、xlsx格式文件

最新推荐文章于 2021-08-26 14:41:17 发布
转载最新推荐文章于 2021-08-26 14:41:17 发布 · 912 阅读
文章标签：
#android #string #null #path #exception #n2
android 专栏收录该内容
476 篇文章
订阅专栏
本文提供了一个简单的Java方法，用于解析doc, xls与docx文件，并提取其内容。通过使用适当的库（如tm-extractors-0.4.jar, jxl.jar等），可以有效地读取并解析不同类型的Office文档。
摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >
                    
可以直接复制实验，

       解析doc，要tm-extractors-0.4.jar这个包

       解析xls，要jxl.jar这个包

01public static
String readDOC(String path) {

02                // 创建输入流读取doc文件

03                FileInputStream in;

04                String text =
null;

05//                Environment.getExternalStorageDirectory().getAbsolutePath()+ "/aa.doc")

06                try
{

07                        in =
new FileInputStream(new
File(path));

08                        int
a= in.available();

09                        WordExtractor extractor =
null;

10                        // 创建WordExtractor

11                        extractor =
new WordExtractor();

12                        // 对doc文件进行提取

13                        text = extractor.extractText(in);

14                        System.out.println("解析得到的东西"+text);

15                }
catch (FileNotFoundException e) {

16                        e.printStackTrace();

17                }
catch (Exception e) {

18                        e.printStackTrace();

19                }

20                if
(text == null) {

21                        text =
"解析文件出现问题";

22                }

23                return
text;

24        }

       解析xls

01public static
String readXLS(String path) {

02                String str =
"";

03                try
{

04                        Workbook workbook =
null;

05                        workbook = Workbook.getWorkbook(new
File(path));

06                        Sheet sheet = workbook.getSheet(0);

07                        Cell cell =
null;

08                        int
columnCount = sheet.getColumns();

09                        int
rowCount = sheet.getRows();

10                        for
(int 
i = 0; i < rowCount; i++) {

11                                for
(int 
j = 0; j < columnCount; j++) {

12                                        cell = sheet.getCell(j, i);

13                                        String temp2 =
"";

14                                        if
(cell.getType() == CellType.NUMBER) {

15                                                temp2 = ((NumberCell) cell).getValue() +
"";

16                                        }
else if 
(cell.getType() == CellType.DATE) {

17                                                temp2 =
"" + ((DateCell) cell).getDate();

18                                        }
else {

19                                                temp2 =
"" + cell.getContents();

20                                        }

21                                        str = str +
"  " + temp2;

22                                }

23                                str +=
"\n";

24                        }

25                        workbook.close();

26                }
catch (Exception e) {

27                }

28                if
(str == null) {

29                        str =
"解析文件出现问题";

30                }

31                return
str;

32        }

  解析docx

01public static
String readDOCX(String path) {

02                String river =
"";

03                try
{

04                        ZipFile xlsxFile =
new ZipFile(new
File(path));

05                        ZipEntry sharedStringXML = xlsxFile.getEntry("word/document.xml");

06                        InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);

07                        XmlPullParser xmlParser = Xml.newPullParser();

08                        xmlParser.setInput(inputStream,
"utf-8");

09                        int
evtType = xmlParser.getEventType();

10                        while
(evtType != XmlPullParser.END_DOCUMENT) {

11                                switch
(evtType) {

12                                case
XmlPullParser.START_TAG:

13                                        String tag = xmlParser.getName();

14                                        System.out.println(tag);

15                                        if
(tag.equalsIgnoreCase("t")) {

16                                                river += xmlParser.nextText() +
"\n";

17                                        }

18                                        break;

19                                case
XmlPullParser.END_TAG:

20                                        break;

21                                default:

22                                        break;

23                                }

24                                evtType = xmlParser.next();

25                        }

26                }
catch (ZipException e) {

27                        e.printStackTrace();

28                }
catch (IOException e) {

29                        e.printStackTrace();

30                }
catch (XmlPullParserException e) {

31                        e.printStackTrace();

32                }

33                if
(river == null) {

34                        river =
"解析文件出现问题";

35                }

36                return
river;

37        }

        解析xlsx

01public static
String readXLSX(String path) {

02                String str =
"";

03                String v =
null;

04                boolean
flat = false;

05                List<String> ls =
new ArrayList<String>();

06                try
{

07                        ZipFile xlsxFile =
new ZipFile(new
File(path));

08                        ZipEntry sharedStringXML = xlsxFile

09                                        .getEntry("xl/sharedStrings.xml");

10                        InputStream inputStream = xlsxFile.getInputStream(sharedStringXML);

11                        XmlPullParser xmlParser = Xml.newPullParser();

12                        xmlParser.setInput(inputStream,
"utf-8");

13                        int
evtType = xmlParser.getEventType();

14                        while
(evtType != XmlPullParser.END_DOCUMENT) {

15                                switch
(evtType) {

16                                case
XmlPullParser.START_TAG:

17                                        String tag = xmlParser.getName();

18                                        if
(tag.equalsIgnoreCase("t")) {

19                                                ls.add(xmlParser.nextText());

20                                        }

21                                        break;

22                                case
XmlPullParser.END_TAG:

23                                        break;

24                                default:

25                                        break;

26                                }

27                                evtType = xmlParser.next();

28                        }

29                        ZipEntry sheetXML = xlsxFile.getEntry("xl/worksheets/sheet1.xml");

30                        InputStream inputStreamsheet = xlsxFile.getInputStream(sheetXML);

31                        XmlPullParser xmlParsersheet = Xml.newPullParser();

32                        xmlParsersheet.setInput(inputStreamsheet,
"utf-8");

33                        int
evtTypesheet = xmlParsersheet.getEventType();

34                        while
(evtTypesheet != XmlPullParser.END_DOCUMENT) {

35                                switch
(evtTypesheet) {

36                                case
XmlPullParser.START_TAG:

37                                        String tag = xmlParsersheet.getName();

38                                        if
(tag.equalsIgnoreCase("row")) {

39                                        }
else if 
(tag.equalsIgnoreCase("c")) {

40                                                String t = xmlParsersheet.getAttributeValue(null,
"t");

41                                                if
(t != null) {

42                                                        flat =
true;

43                                                        System.out.println(flat +
"有");

44                                                }
else {

45                                                        System.out.println(flat +
"没有");

46                                                        flat =
false;

47                                                }

48                                        }
else if 
(tag.equalsIgnoreCase("v")) {

49                                                v = xmlParsersheet.nextText();

50                                                if
(v != null) {

51                                                        if
(flat) {

52                                                                str += ls.get(Integer.parseInt(v)) +
"  ";

53                                                        }
else {

54                                                                str += v +
"  ";

55                                                        }

56                                                }

57                                        }

58                                        break;

59                                case
XmlPullParser.END_TAG:

60                                        if
(xmlParsersheet.getName().equalsIgnoreCase("row")

61                                                        && v !=
null) {

62                                                str +=
"\n";

63                                        }

64                                        break;

65                                }

66                                evtTypesheet = xmlParsersheet.next();

67                        }

68                        System.out.println(str);

69                }
catch (ZipException e) {

70                        e.printStackTrace();

71                }
catch (IOException e) {

72                        e.printStackTrace();

73                }
catch (XmlPullParserException e) {

74                        e.printStackTrace();

75                }

76                if
(str == null) {

77                        str =
"解析文件出现问题";

78                }

79                return
str;

80        }