import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
/**
* 统计一个文件中单词出现的频率
* 如果首字母相同,按单词的长度输出,短的在前面
* 如果长度相等按字母顺序输出
* @author tpf
*
*/
public class WordCount {
public static void main(String[] args) {
String file = "1.txt";//英文文件(根据自己的路径设定文件地址)
//统计单词次数
Map<String, String> wordTimesMap = countWordTimes(file);
//输出单词对应的次数
display(wordTimesMap);
}
/**
* 显示单词出现的次数
*
* @param wordTimesMap map
*/
private static void display(Map<String, String> wordTimesMap) {
//获取到map中的key值
Set<String> set = wordTimesMap.keySet();
//将set装换为数组
String[] obj = set.toArray(new String[0]);
//对key进行排序,匿名内部类,实现comparator接口
Arrays.sort(obj, new Comparator<String>() {
public int compare(String o1, String o2) {
return o1.length() == 0 ?o1.compareTo(o2):(o1.length()-o2.length());
}
});
//输出单词和出现的次数
for (int i = 0; i < obj.length; i++) {
String times = wordTimesMap.get(obj[i]);
System.out.println(obj[i] + "---->" + times);
}
}
/**
* 统计给定文件中单词出现的次数
*
* @param str
*/
private static Map<String, String> countWordTimes(String file) {
Map<String, String> wordTimes = new TreeMap<String, String>();// 存放单词出现的次数
// 读文件的流
BufferedReader in = null;
String line = "";// 保存读取的行
StringTokenizer strLine = null;
try {
in = new BufferedReader(new FileReader(file));
while ((line = in.readLine()) != null) {
strLine = new StringTokenizer(line, ",.;:()-!?/' ");// 分割单词
while (strLine.hasMoreElements()) {
String word = strLine.nextToken().toLowerCase();// 取出每个单词忽略大小写
String times = wordTimes.get(word);// 在map中获取单词出现的次数
if (times == null) {
times = "1";// 第一次出现
} else {
int n = Integer.parseInt(times);// 将次数转换int类型
++n;// 如果单词已经存在了,就在原来的次数上加1
times = "" + n;
}
wordTimes.put(word, times);// 在放回到map中
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
in = null;
}
}
return wordTimes;
}
}