统计英文单词

Animal凌

已于 2022-05-19 20:01:12 修改

阅读量153

点赞数

分类专栏： Java学习文章标签： java

于 2022-05-19 19:54:33 首次发布

本文链接：https://blog.youkuaiyun.com/m0_54091790/article/details/124870183

版权

Java学习专栏收录该内容

9 篇文章

订阅专栏

本文档介绍了WordStatistic.java程序，用于读取英文文本文件，统计单词并找出无重复的单词。OutputWordMess.java展示了如何使用这个统计类，输出单词总数、唯一单词数及按频率排序的单词列表。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

WordStatistic.java

import java.io.*;
import java.util.*;
public class WordStatistic {
    Vector<String> allWord,noSameWord;
    File file = new File("english.txt");
    Scanner sc = null;
    String regex;
    WordStatistic() {
        allWord = new Vector<String>();
        noSameWord = new Vector<String>();
//regex 是由空格、数字和符号(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)组成的正则表达式
        regex= "[\\s\\d\\p{Punct}]+";
        try{  sc = new Scanner(file); //创建指向file的sc
            sc.useDelimiter(regex);//sc调用useDelimiter(String regex)方法,向参数传递regex
        }
        catch(IOException exp) {
            System.out.println(exp.toString());
        }
    }
    void setFileName(String name) {
        file = new File(name);
        try{ sc = new Scanner(file);
            sc.useDelimiter(regex);
        }
        catch(IOException exp) {
            System.out.println(exp.toString());
        }
    }
    public void wordStatistic() {
        try{
            while(sc.hasNext()){
                String word = sc.next();
                allWord.add(word);
                if(!noSameWord.contains(word))
                    noSameWord.add(word);
            }
        }
        catch(Exception e){}
    }public Vector<String> getAllWord() {
        return allWord;
    }
    public Vector<String> getNoSameWord() {
        return noSameWord;
    }
}

OutputWordMess.java

import java.util.*;
public class OutputWordMess{
    public static void main(String args[]) {
        Vector<String> allWord,noSameWord;
        WordStatistic statistic =new WordStatistic();
        statistic.setFileName("hello.txt");
        statistic.wordStatistic(); //statistic调用wordStatistic()方法
        allWord=statistic.getAllWord();
        noSameWord=statistic.getNoSameWord();
        System.out.println("共有"+allWord.size()+"个英文单词");
        System.out.println("有"+noSameWord.size()+"个互不相同英文单词");
        System.out.println("按出现频率排列:");
        int count[]=new int[noSameWord.size()];
        for(int i=0;i<noSameWord.size();i++) {
            String s1 = noSameWord.elementAt(i);
            for(int j=0;j<allWord.size();j++) {
                String s2=allWord.elementAt(j);
                if(s1.equals(s2))
                    count[i]++;
            }
        }
        for(int m=0;m<noSameWord.size();m++) {
            for(int n=m+1;n<noSameWord.size();n++) {
                if(count[n]>count[m]) {
                    String temp=noSameWord.elementAt(m);
                    noSameWord.setElementAt(noSameWord.elementAt(n),m);
                    noSameWord.setElementAt(temp,n);
                    int t=count[m];
                    count[m]=count[n];
                    count[n]=t;
                }
            }
        }
        for(int m=0;m<noSameWord.size();m++) {
            double frequency=(1.0*count[m])/allWord.size();
            System.out.printf("%s:%-7.3f",noSameWord.elementAt(m),frequency);
        }
    }
}

运行结果：