使用Google接口实时翻译

该博客介绍了如何在Java项目中使用Google翻译接口进行实时翻译。通过调用指定URL并处理返回的响应,可以将英文内容转换为中文。由于Google服务可能需要代理才能访问,因此在实际应用中需要考虑代理设置。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

        
 在项目当中,获得到的数据是英文,如果想翻译为中文内容,可直接调用Google接口:
https://translate.google.com.hk/translate_a/single?client=t&sl=en&tl=zh-CN&hl=zh-CN&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&dt=at&ie=UTF-8&oe=UTF-8&source=btn&ssel=0&tsel=0&kc=0&tk=470115|78768&q=%22world%22

当然,Google服务需要翻墙,所以要用到代理,这里代理的获取就暂且不细说(可以代理商购买)。下面主要提供java程序与配置文件参考:
TranslateUtil.java  :

package com.ttz.crawl.util;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import com.ttz.crawl.common.Page;
import com.ttz.crawl.config.CrawlConfig;
import com.ttz.crawl.fetch.FetchRet;
import com.ttz.crawl.fetch.HttpClientFetch;
import com.ttz.crawl.proxy.ProxyPoolMan;

/**
 * 调用google 翻译接口
 * @author zhaoyuchun
 */
public class TranslateUtil
{
	public static Pattern cntValidPatt = null;
	public static Pattern cntCHSPatt = null;
	public static HttpClientFetch fetcher = null;
	public static String translateUrl = null;
	public static Logger log = Logger.getLogger(TranslateUtil.class);
	
	static
	{
		fetcher = new HttpClientFetch();
		cntValidPatt = Pattern.compile("\\[\\[\\[.*?\\]\\]\\]");
		cntCHSPatt = Pattern.compile("\\[\\[\\[\"“(.*?)”\",");
		translateUrl = CrawlConfig.getParam("translateUrl");
	}
	
	public static String translate(String str)
	{
		ProxyPoolMan.enable = true;
		String [] listUrlPattern = translateUrl.split("\\*");
		try
		{
			String url = listUrlPattern[0] 
					+ URLEncoder.encode(listUrlPattern[1],"utf-8")
					+ listUrlPattern[2]
					+ URLEncoder.encode(str,"utf-8")
					+ listUrlPattern[3];
			FetchRet ret = fetcher.getPageRet(url, cntValidPatt, null, null); 
			if(ret == null || ret.page == null)
			{
				log.error("can not get the translatePage. url:" + url);
				return null;
			}
			Page page = ret.page;
			//来处理得到的字符串
			str = PageExtractUtil.extractPageLabel(page, cntCHSPatt);
		} 
		catch (Exception e) 
		{
			log.error(e);
			str = null;
		}
		finally
		{
			ProxyPoolMan.enable = false;
		}
		return str;	
	}
	
	public static void main(String args[]) throws UnsupportedEncodingException
	{
		translate("proxy");
	}
}

StockTwits.properties:
#common
home_path = C:\\Users\\zhaoyuchun\\workspace\\91z_2014
db_conf = res/db.properties
log_conf = res/log4j.properties
tair_conf = res/tair.properties
redis_conf = res/redis.properties
enable_encode_to_utf8 = true
socket_port = 50010
site_id = 3
data_name = StockTwits

#以分割不同的url
base_url = http://stocktwits.com/streams/poll?stream=symbol&max=*&stream_id=*&substream=*&item_id=*http://stocktwits.com/symbol/http://stocktwits.com/
 
fetch_socket_timeout = 30000
fetch_connect_timeout = 10000
#unit: second
sleeptime = 1
#unit: minute
intervals = 60
#stop condition:0--crawl all; 1-- < page_size; 2-- begin_date <= pub_date < end_date; 3--when crawl crawled page; 4--only crawl uncrawled url, only compare url in dedup db
#if thread_no > 1, stop_type 可以有多个值,对应不同的thread,以;分割, e.g. 0;4
stop_type = 0
stock_type = 2
#set value when stop_type == 1
page_size = 1
#set value when stop_type == 2
#date format:yyyy-mm-dd
crawl_begin_date = 2012-02-20
crawl_end_date = 2012-02-21

#cookie setting
enable_cookie = false
#当内存中存储的失败url超过max_capacity之后,会写入文件
max_url_capacity = 1000
max_page_size = 5000

#regex       
regex_stream_id = 'streamId' : '(\\d+)'\,
regex_url_id = max_id: (\\d+)\,.*?poll_id: '(\\d+)'\,.*?substream:\\s'(.*?)'\,
regex_list =<li data-src=.*?id.*?(\\d+).*?created_at":"(.*?\\d+.*?) -.*?user_path":"\\\\\\/(.*?)"\,"avatar_url":"(.*?)"[\\s\\S]*?body.*?;:\\\\\\?"([\\s\\S]*?);links[\\s\\S]*?total_likes":(\\d+?)\,(?:[^<]*?replies":(\\d+)\,)?
#proxy
enable_proxy = true
check_file = 10000000
proxy_file = data/proxy.xml
proxy_crawl_properties = res/proxy.properties
#the upper-bound pages to fetch from a host in a WATCH_INTERVAL
max_per_interval = 6000
#fetch density control interval, "ms" as unit
watch_interval = 60000
#about a proxy:in BLOCK_FORBIDEN_TIME ,BLOCKVALUE urls are not fetched,then the proxy is invalid for the host,delete the proxy
#and when the proxy is delete,delte the current url;
block_forbidden_item = 120000
block_span = 3600000
#about a host:in FORBIDEN_PERIOD ,FORBIDEN_COUNT urls are not fetched,the host is invalid,then delete all the host urls in FORBIDEN_PERIOD
failure_max_count = 2
#forbidden_period = 1000000
failure_watch_interval = 1000000
failure_clear_span = 3600000
proxy_pool_min_size = 500
pool_update_span = 3600000

block_span = 3600000
#about a host:in FORBIDEN_PERIOD ,FORBIDEN_COUNT urls are not fetched,the host is invalid,then delete all the host urls in FORBIDEN_PERIOD
#forbidden_count = 10
failure_value = 2
#forbidden_period = 1000000
failure_watch_interval = 1000000
failure_clear_span = 3600000
proxy_pool_minsize = 10
pool_update_span = 3600000

#dedup
enable_dedup = true

#writer
enable_write = true
save_page_path = ${home_path}/data/page
max_frequent = 1800000
max_per_queue = 5000
min_page_size = 10

#tair setting
space_name = 1000
redis_key  = EXTRACT0000

#thread setting
thread_numbers=1
thread_no=1

#host setting
machine_id=1

proxy_url = http://www.yun-daili.com/api.asp?key=20151109200040011100200069397641&getnum=200&area=2&proxytype=1
translateUrl = https://translate.google.com.hk/translate_a/single?client=t&sl=en&tl=zh-CN&hl=zh-CN&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&dt=at&ie=UTF-8&oe=UTF-8&source=btn&ssel=0&tsel=0&kc=0&tk=470115*|*78768&q=%22*%22



评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值