提取英语单词并翻译存入数据库并导出

为备战考研,作者开发了一款简易程序,用于读取英文文献并统计词频,同时利用百度翻译API进行翻译,结果保存至数据库并导出文本。项目采用Java语言,借助IDEA开发环境完成。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

明年考研了,想着写个简单的检查词频的程序,这个程序功能就是,读取文本,分割出每个单词,以及每个单词出现的次数,保存到数据库并且导出文本。写的比较粗糙,由于不会Java爬虫,要读取的文本只能自己去网上找一些文献复制,数据库那里用的基础的jdbc写的比较混乱,翻译调用的百度翻译的接口,开发环境IDEA。项目保存在https://github.com/Adam-hohai/WordSplit,我用的英文报刊是参考的何凯文的每日一句。

package hhuc.cenhelm;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import hhuc.cenhelm.tools.TransApi;

import java.io.*;
import java.sql.*;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;

public class Main {

    /**
     * 百度翻译的id和密钥
     */
    private static final String APP_ID = "*********";
    private static final String SECURITY_KEY = "*********";

    /**
     * 读取txt文件
     *
     * @param pathName txt路径
     * @return 文件内容字符串
     */
    public static String read(String pathName) throws IOException {
        StringBuilder content = new StringBuilder();
        File fileName = new File(pathName);
        InputStreamReader reader = new InputStreamReader(new FileInputStream(fileName));
        BufferedReader br = new BufferedReader(reader);
        String line = br.readLine();
        content.append(line);
        while (line != null) {
            line = br.readLine();
            content.append(line);
        }
        return content.toString();
    }

    /**
     * 将英文文献中的单词分离保存到键值对
     *
     * @param rule    分离规则
     * @param content 文献内容
     * @return 单词和次数的键值对
     */
    public static Map<String, Integer> splitOut(String content, String rule) {
        StringTokenizer st = new StringTokenizer(content, rule);
        Map<String, Integer> wordMap = new HashMap<String, Integer>();
        while (st.hasMoreElements()) {
            String word = st.nextElement().toString().toLowerCase();
//            System.out.println(word);
            if (word.length() > 3) {
                if (wordMap.get(word) == null) {
                    wordMap.put(word, 1);
                } else {
                    int frequency = wordMap.get(word);
                    wordMap.remove(word);
                    wordMap.put(word, frequency + 1);
                }
            }

        }
        return wordMap;
    }

    /**
     * 调用百度翻译接口
     *
     * @param transApi 百度翻译接口
     * @param word     要翻译的内容
     * @return 翻译结果
     */
    public static String translate(TransApi transApi, String word) {
        //百度翻译接口返回的是json字符串
        String jsonResult = transApi.getTransResult(word, "auto", "zh");
        StringTokenizer tokenizer = new StringTokenizer(jsonResult, "\"}]");
        String result = "";
        //找到最后一个
        while (tokenizer.hasMoreTokens()) {
            result = tokenizer.nextToken();
        }
        //将json字符串简化一下
        String json = "{\"result\":\"" + result + "\"}";
        JSONObject jsonObject = JSON.parseObject(json);
        return jsonObject.get("result").toString();
    }

    /**
     * 连接数据库
     *
     * @return 返回Connection对象
     * @throws Exception 可能抛出异常
     */
    public static Connection getConnection() throws Exception {
        Class.forName("com.microsoft.sqlserver.jdbc.SQLServerDriver");
        String url = "jdbc:sqlserver://127.0.0.1:1433;DatabaseName=EnNewspaperHKW";
        String user = "sa";
        String password = "*****";
        Connection conn = DriverManager.getConnection(url, user, password);
        System.out.println("数据库连接成功");
        return conn;
    }

    public static void main(String[] args) throws Exception {
        // write your code here

        String content = null;
        try {
            content = read("src/main/resources/2020033150.txt");
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println(content);
        String rule = ", !./';:?\"()“”‘’-—$%#!&*——_1234567890|`~·[]{}";
        Map<String, Integer> wordMap = splitOut(content, rule);
        //调用百度翻译的接口
        TransApi transApi = new TransApi(APP_ID, SECURITY_KEY);
        //遍历键值对,操作数据库
        Connection connection = getConnection();
        PreparedStatement psSel = null, psIns = null, psUpd = null;
        ResultSet rsSel = null;
        String sqlSel, sqlIns, sqlUpd = "";
        Iterator<String> iterator = wordMap.keySet().iterator();
        while (iterator.hasNext()) {
            String word = iterator.next();
            String translation = translate(transApi, word);
            int frequency = (int)wordMap.get(word);
            System.out.println(word + " " + translation + " " + frequency);

            //写入txt
            File fileName = new File("src/main/resources/2020033150output.txt");
            BufferedWriter out = new BufferedWriter(new FileWriter(fileName,true));//文件追加
            out.write(word + " " + translation + " " + frequency + "\r\n");
            out.flush();
            out.close();

            //操作数据库
            sqlSel = "select * from hkwDailyData where word=?";
            psSel = connection.prepareStatement(sqlSel);
            psSel.setString(1, word);
            rsSel = psSel.executeQuery();
            if (rsSel.next()) {
                sqlUpd = "update hkwDailyData set frequency = frequency +? where word =?";
                psUpd = connection.prepareStatement(sqlUpd);
                psUpd.setInt(1,frequency);
                psUpd.setString(2,word);
                psUpd.executeUpdate();
                System.out.println("更新成功");

            } else {
                sqlIns = "insert into hkwDailyData(word,translation,frequency) values (?,?,?)";
                psIns = connection.prepareStatement(sqlIns);
                psIns.setString(1, word);
                psIns.setString(2, translation);
                psIns.setInt(3, frequency);
                psIns.executeUpdate();
                System.out.println("插入成功");
            }

        }
        psUpd.close();
        psIns.close();
        psSel.close();
        rsSel.close();
        connection.close();
        System.out.println("提取结束");
    }
}

 

 

 

 

 

 

 

 

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值