示例程序中使用到的文件如下:
示例程序:
package Array_list_study;
import java.io.*;
import java.util.*;
public class Vocabulary_compare {
public static void main(String[] args) throws FileNotFoundException{
Scanner console = new Scanner(System.in);
giveIntro();
System.out.print("file #1 name?"); // poem1
Scanner in1 = new Scanner(new File(console.nextLine()));
System.out.print("file #2 name?"); // poem2
Scanner in2 = new Scanner(new File(console.nextLine()));
System.out.println();
ArrayList<String> list1 = getWords(in1);
ArrayList<String> list2 = getWords(in2);
ArrayList<String> common = getOverlap(list1,list2);
reportResults(list1,list2,common);
}
// 读取单词,转换为小写,返回唯一单词的排序列表
public static ArrayList<String> getWords(Scanner input){
// 忽略除字母和撇号以外的所有字符
input.useDelimiter("[^a-zA-Z']+"); // 参数是一个正则表达式
// 读入文件中的单词并排序
ArrayList<String> words = new ArrayList<String>();
while (input.hasNext()){
String next = input.next().toLowerCase(); // 将单词统一转换为小写形式
words.add(next);
}
Collections.sort(words);
// 创建一个没有重复单词的新列表并返回
ArrayList<String> result = new ArrayList<String>();
if(words.size() > 0){
result.add(words.get(0));
for(int i = 1;i < words.size();i++){
if(!words.get(i).equals(words.get(i-1))){
result.add(words.get(i));
}
}
}
return result;
}
// 前提:list1,list2经过排序且不存在重复的单词
// 该方法用于返回一个包含两个列表中重叠部分的列表
public static ArrayList<String> getOverlap(ArrayList<String> lst1,ArrayList<String> lst2){
ArrayList<String> result = new ArrayList<String>();
int i1 = 0;
int i2 = 0;
while(i1 < lst1.size() && i2 < lst2.size()){
int num = lst1.get(i1).compareTo(lst2.get(i2));
if(num == 0){
result.add(lst1.get(i1));
i1++;
i2++;
}else if(num<0){
i1++;
}else {
i2++;
}
}
return result;
}
// 向用户解释该程序
public static void giveIntro(){
System.out.println("这个程序比较两个文本文件,并报告共同的字数和重叠的百分比。");
System.out.println();
}
// 有关列表及其重叠的统计信息
public static void reportResults(ArrayList<String> lst1,ArrayList<String> lst2,ArrayList<String> common){
System.out.println("file #1 words = " + lst1.size());
System.out.println("file #2 words = " + lst2.size());
System.out.println("commom words = " + common.size());
double percent1 = 100.0 * common.size()/lst1.size();
double percent2 = 100.0 * common.size()/lst2.size();
System.out.println("% of file #1 in overlap = " + percent1);
System.out.println("% of file #2 in overlap = " + percent2);
}
}
运行结果: