相关依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.9</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.9</version>
</dependency>
实现代码
public static int getWordCount(File file) {
try {
FileInputStream fis = new FileInputStream(file);
String suffix = file.getName().substring(file.getName().lastIndexOf(".") + 1);
String content = "";
switch (suffix) {
case "doc":
WordExtractor wordExtractor = new WordExtractor(fis);
content = wordExtractor.getText();
break;
case "docx":
XWPFDocument document = new XWPFDocument(fis);
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
content = extractor.getText();
break;
default:
break;
}
fis.close();
String cnWords = content.replaceAll("[^(\\u4e00-\\u9fa5,。《》?;’‘:“”【】、)(……¥!·)]", "");
int cnWordsCount = cnWords.length();
String noCnWords = content.replaceAll("[^(a-zA-Z0-9`\\-=\';.,/~!@#$%^&*()_+|}{\":><?\\[\\])]", " ");
int noCnWordsCount = 0;
String[] ss = noCnWords.split(" ");
for (String s : ss) {
if (s.trim().length() != 0) {
noCnWordsCount++;
}
}
return cnWordsCount + noCnWordsCount;
} catch (IOException e) {
e.printStackTrace();
}
return 0;
}