主要利用JAVA 的map,arraylist方法进行词频统计
package count;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
public class WordCount {
public static void main(String args[]) throws Exception{
BufferedReader br=new BufferedReader(new FileReader("F:/oldman.txt")); //读取文件
List<String> lists=new ArrayList<String>();
String readline=null;
while((readline=br.readLine())!=null){ //readline 读取文件行
String[] wordsArr=readline.split("[^a-zA-Z]"); //正则匹配英文
for (int i=0;i<wordsArr.length;i++){ //遍历数组,将其添加到列表中
lists.add(wordsArr[i]);
}
}System.out.println(lists.size());
br.close();
Map<String,Integer> wordcount=new TreeMap<String,Integer>(); //实例TreeMap,好处是实现英文字母顺序排列
for(int j=0;j<lists.size();j++){
String key=lists.get(j).toLowerCase();
if(wordcount.get(key)!=null){ //判断是否单词是否重复
wordcount.put(key, wordcount.get(key)+1); // value++
}else{
wordcount.put(key, 1);
}
}
System.out.println(wordcount.entrySet());
}}