很多系统中需要做自己的翻译器,所以大家都想到把现成的网站扒下来用,Google Translate和Yahoo Babel Fish就成了大家的首选.Google好说,直接用HttpURLConnection做post请求即可,但是Google防盗措施非常好,用个几次就出现一个防盗链的页面,就无法玩了,所以Yahoo Babel Fish就成了大家的选择.但是发现最新版本的宝贝鱼用HttpURLConnection怎么请求,怎么设置编码方式得到的中文都是乱码.后来发现,在java中必须设置connection.setRequestProperty( "User-agent" , "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50215; fqSpider)" );才能得到正确的中文显示,c#中同样.还需要注意的是,在输入的link中是中文的话,除了用java.net.URLEncoder.encode编码外还需要在link中加入&ei=gb2312.下面是全部编码.
import java.io.*;
import java.net.*;
public class Yahoo {
static final String yahooUrl ="http://babelfish.yahoo.com.cn/translate_txt";
public String yahootranslate(String query,String lang) throws MalformedURLException,IOException,UnsupportedEncodingException
{
String urlstr = yahooUrl;
String content=null;
try{
content=getHtmlContent(urlstr,"trtext=" + encodeText(query) + "&lp=" + lang+"&ei=gb2312","UTF-8");
}catch(Exception e){}
return getyahooContent(content).trim();
}
private String getyahooContent(String htmltext) {
String ss = "<div id=/"pd/" class=/"pd/">" ;
String se = "</div>" ;
int ssidx = htmltext.indexOf(ss);
int seidx = htmltext.indexOf(se,ssidx);
if(ssidx<0)return null;
String restr = htmltext.substring(ssidx + ss.length(),seidx);
return restr;
}
private String getHtmlContent(String _url,String _post,String _charset) throws Exception
{
String post=null;
URLConnection connection=null;
Proxy proxy=null;
URL url = new URL(_url);
if(proxy!=null)connection = (HttpURLConnection)url.openConnection(proxy);
else connection = (HttpURLConnection)url.openConnection();
connection.setRequestProperty( "User-agent" , "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50215; fqSpider)" );
connection.setDoInput(true);
connection.setDoOutput(true);
connection.connect();
OutputStreamWriter out = new
OutputStreamWriter(connection.getOutputStream(),"UTF-8");
out.write(_post);
// remember to clean up
out.flush();
out.close();
String charset = _charset;
BufferedReader in = new BufferedReader( new
InputStreamReader(connection.getInputStream(),charset)); // 使用指定编码接收数据
String line = null ;
StringBuilder sb = new StringBuilder();
while ((line = in.readLine()) != null ) {
sb.append(line);
}
in.close();
return sb.toString();
}
public static String encodeText(String text) {
String str = java.net.URLEncoder.encode(text);
return str;
}
public static void main(String[] args) throws Exception{
Yahoo yahoo=new Yahoo();
//中->英
System.out.println(yahoo.yahootranslate("你好吗?","zh_en"));
//英->中
System.out.println(yahoo.yahootranslate("hello China","en_ch"));
}
}