一、分析一下
通过在谷歌浏览器上F12打开百度翻译分析其url发现
接口
几次调用发现sgin 是变动的 好有个token,这两个怎么获取的呢
发现在进如翻译页面的时候是走了一个https://fanyi.baidu.com/ 的url的
返回的是一个html
发现其中有token 并和调用翻译时的token一致,并且还有个和sign相似的 gtk,通过查阅资料发现sign是通过一段js获取的,代码如下 r为翻译的内容,gtk是从html获取的
function a(r, o) {
for (var t = 0; t < o.length - 2; t += 3) {
var a = o.charAt(t + 2);
a = a >= "a" ? a.charCodeAt(0) - 87 : Number(a),
a = "+" === o.charAt(t + 1) ? r >>> a: r << a,
r = "+" === o.charAt(t) ? r + a & 4294967295 : r ^ a
}
return r
}
var C = null;
var token = function(r, _gtk) {
var o = r.length;
o > 30 && (r = "" + r.substr(0, 10) + r.substr(Math.floor(o / 2) - 5, 10) + r.substring(r.length, r.length - 10));
var t = void 0,
t = null !== C ? C: (C = _gtk || "") || "";
for (var e = t.split("."), h = Number(e[0]) || 0, i = Number(e[1]) || 0, d = [], f = 0, g = 0; g < r.length; g++) {
var m = r.charCodeAt(g);
128 > m ? d[f++] = m: (2048 > m ? d[f++] = m >> 6 | 192 : (55296 === (64512 & m) && g + 1 < r.length && 56320 === (64512 & r.charCodeAt(g + 1)) ? (m = 65536 + ((1023 & m) << 10) + (1023 & r.charCodeAt(++g)), d[f++] = m >> 18 | 240, d[f++] = m >> 12 & 63 | 128) : d[f++] = m >> 12 | 224, d[f++] = m >> 6 & 63 | 128), d[f++] = 63 & m | 128)
}
for (var S = h,
u = "+-a^+6",
l = "+-3^+b+-f",
s = 0; s < d.length; s++) S += d[s],
S = a(S, u);
return S = a(S, l),
S ^= i,
0 > S && (S = (2147483647 & S) + 2147483648),
S %= 1e6,
S.toString() + "." + (S ^ h)
}
知道这些我们就开始写代码吧
二、使用步骤
1.引入库
我使用的是JAVA语言,需要操作HTML我们需要引入jsoup, 还需要解析返回的Json,我们引入阿里的 fastjson
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.12.1</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
</dependency>
2.具体代码
代码如下(示例):
private static String baiduTranslate(String q, String from, String to) throws IOException, ScriptException {
String mainpage_url = "https://fanyi.baidu.com/";
// Jsoup 获取页面
Document document = Jsoup.connect(mainpage_url).cookie("Cookie", "BAIDUID=1D8BC57A03641735D0F46872B391F36B; PSTM=1621752923; __yjs_duid=1_73eebc74c04c0586214b0074041092b91621754117386; REALTIME_TRANS_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=U1dmtTdlBaMG1MTHlQZWNkZnVCQm5vOHozVmdHdWcwajlzRjJBZS1-cU5WSEpoRVFBQUFBJCQAAAAAAAAAAAEAAABayrGns7257bntue0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAI3HSmGNx0phbX; BDUSS_BFESS=U1dmtTdlBaMG1MTHlQZWNkZnVCQm5vOHozVmdHdWcwajlzRjJBZS1-cU5WSEpoRVFBQUFBJCQAAAAAAAAAAAEAAABayrGns7257bntue0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAI3HSmGNx0phbX; BAIDUID=D1117626448036AD2AF919EC711025C3:FG=1; APPGUIDE_10_0_2=1; BDSFRCVID=P1IOJeC62w0oC0cHg4qyuRZb6V5Z9OQTH6aoVUmNkwmru95RKuk4EG0PhU8g0K4bGxQJogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJPJVI82tCD3fP36qRbsMJ8thl63-4oX2TTKWjrJaDvaMKJOy4oTj6j30l3Mql37MI6Qo454yJ_-OMQp5UQj3MvB-fnlXJoUWGFHLU7lWpTpEI3OQft20MkEeMtjBMoaBGvILR7jWhvdhl72y-chQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCeJ6KfJJ4DoIv5b-0_HRT1Mt5Eh-cH-UnLqh_L02OZ0l8Ktt02DIjnhx7JjMFN5J5z5j5h-jTh2UomWIQHDUoXDfTI3TkDQnLfQfnt2aR4KKJx2UKWeIJoj-5n2h_phUJiBMAHBan7W45IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC8lj6t-D5oQepJf-K6a2CJ03JTs26rjDnCr05QzXUI8LNDH-5Oy0bR2an02-4ThVxcPjlDhW6Fg0JO7ttoyQHTL2Jv5a4ohbD5-ynoOjML1Db33L6vMtg0J3q3yLlcoepvoX55c3MkD5tjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKJC3P; BDSFRCVID_BFESS=P1IOJeC62w0oC0cHg4qyuRZb6V5Z9OQTH6aoVUmNkwmru95RKuk4EG0PhU8g0K4bGxQJogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJPJVI82tCD3fP36qRbsMJ8thl63-4oX2TTKWjrJaDvaMKJOy4oTj6j30l3Mql37MI6Qo454yJ_-OMQp5UQj3MvB-fnlXJoUWGFHLU7lWpTpEI3OQft20MkEeMtjBMoaBGvILR7jWhvdhl72y-chQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCeJ6KfJJ4DoIv5b-0_HRT1Mt5Eh-cH-UnLqh_L02OZ0l8Ktt02DIjnhx7JjMFN5J5z5j5h-jTh2UomWIQHDUoXDfTI3TkDQnLfQfnt2aR4KKJx2UKWeIJoj-5n2h_phUJiBMAHBan7W45IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC8lj6t-D5oQepJf-K6a2CJ03JTs26rjDnCr05QzXUI8LNDH-5Oy0bR2an02-4ThVxcPjlDhW6Fg0JO7ttoyQHTL2Jv5a4ohbD5-ynoOjML1Db33L6vMtg0J3q3yLlcoepvoX55c3MkD5tjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKJC3P; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; delPer=0; PSINO=3; BAIDUID_BFESS=D1117626448036AD2AF919EC711025C3:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1642991662,1643016044,1643016289,1643016293; H_PS_PSSID=35104_31254_35776_34584_35491_35797_35318_26350_35746; BA_HECTOR=258haka50ga42h00ib1guvce50r; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1643101993; ab_sr=1.0.1_MTg0OTU5NzFjYzk5NzFhOTIwMWRmOTkyNjc0MjIxZDhmY2UxYjVhZTRjZjljNzJmODdiYWUxYzc3ZTcyYzQzYTVkZmZmMTk0YzBmODQ4YTRlOTUwYzZjYjNhNDI4ZDg5ZTY3MTExNWVmNGZiMWE2YmU3MWQwYTcxZTY2ZmJlYmE=").timeout(10000).get();
// 寻找url返回 HTML 里的 script 这里有 我们需要的 gtk 和 token
Elements elements = document.getElementsByTag("script");
String jscode ="var window={};try{";
for (Element element : elements) {
String data = element.data();
// 通过查看html 发现token很gtk 存在两个 script里 将踏面取出
if (data.startsWith("window") || data.startsWith("\nwindow") ) {
jscode += data + ";";
}
}
// 多拼了个;去除
jscode = jscode.substring(0, jscode.length() -1);
// 注意去除此段代码 防止js 运行报错
jscode = jscode.replace("window.top.location.href = 'https://fanyi.baidu.com/';", "");
jscode += "}catch(e){}";
// 执行Js
ScriptEngine engine = new ScriptEngineManager().getEngineByName("js");
engine.eval(jscode);
Map window = new HashMap();
if (engine instanceof Invocable) {
window = (Map) engine.get("window");
}
// 获取token和gtk
String token = (String) ((Map) window.get("common")).get("token");
String gtk = (String) window.get("gtk");
String baiduUrl = "https://fanyi.baidu.com/v2transapi";
// 获取sign
String sign = token(q, gtk);
Map<String, String> params = new HashMap<String, String>();
params.put("from", from);
params.put("to", to);
params.put("query", q);
params.put("transtype", "translang");
params.put("simple_means_flag", "3");
params.put("sign", sign);
params.put("token", token);
params.put("domain", "common");
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpPost request = new HttpPost(baiduUrl);
List<NameValuePair> paramList = new ArrayList<>();
Set<String> keySet = params.keySet();
for (String key : keySet) {
paramList.add( new BasicNameValuePair(key, params.get(key)));
}
request.setEntity(new UrlEncodedFormEntity(paramList, "UTF-8"));
request.setHeader("Cookie", "BIDUPSID=1D8BC57A03641735D0F46872B391F36B; PSTM=1621752923; __yjs_duid=1_73eebc74c04c0586214b0074041092b91621754117386; REALTIME_TRANS_SWITCH=1; HISTORY_SWITCH=1; FANYI_WORD_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDUSS=U1dmtTdlBaMG1MTHlQZWNkZnVCQm5vOHozVmdHdWcwajlzRjJBZS1-cU5WSEpoRVFBQUFBJCQAAAAAAAAAAAEAAABayrGns7257bntue0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAI3HSmGNx0phbX; BDUSS_BFESS=U1dmtTdlBaMG1MTHlQZWNkZnVCQm5vOHozVmdHdWcwajlzRjJBZS1-cU5WSEpoRVFBQUFBJCQAAAAAAAAAAAEAAABayrGns7257bntue0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAI3HSmGNx0phbX; BAIDUID=D1117626448036AD2AF919EC711025C3:FG=1; APPGUIDE_10_0_2=1; BDSFRCVID=P1IOJeC62w0oC0cHg4qyuRZb6V5Z9OQTH6aoVUmNkwmru95RKuk4EG0PhU8g0K4bGxQJogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF=tJPJVI82tCD3fP36qRbsMJ8thl63-4oX2TTKWjrJaDvaMKJOy4oTj6j30l3Mql37MI6Qo454yJ_-OMQp5UQj3MvB-fnlXJoUWGFHLU7lWpTpEI3OQft20MkEeMtjBMoaBGvILR7jWhvdhl72y-chQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCeJ6KfJJ4DoIv5b-0_HRT1Mt5Eh-cH-UnLqh_L02OZ0l8Ktt02DIjnhx7JjMFN5J5z5j5h-jTh2UomWIQHDUoXDfTI3TkDQnLfQfnt2aR4KKJx2UKWeIJoj-5n2h_phUJiBMAHBan7W45IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC8lj6t-D5oQepJf-K6a2CJ03JTs26rjDnCr05QzXUI8LNDH-5Oy0bR2an02-4ThVxcPjlDhW6Fg0JO7ttoyQHTL2Jv5a4ohbD5-ynoOjML1Db33L6vMtg0J3q3yLlcoepvoX55c3MkD5tjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKJC3P; BDSFRCVID_BFESS=P1IOJeC62w0oC0cHg4qyuRZb6V5Z9OQTH6aoVUmNkwmru95RKuk4EG0PhU8g0K4bGxQJogKKL2OTHmuF_2uxOjjg8UtVJeC6EG0Ptf8g0f5; H_BDCLCKID_SF_BFESS=tJPJVI82tCD3fP36qRbsMJ8thl63-4oX2TTKWjrJaDvaMKJOy4oTj6j30l3Mql37MI6Qo454yJ_-OMQp5UQj3MvB-fnlXJoUWGFHLU7lWpTpEI3OQft20MkEeMtjBMoaBGvILR7jWhvdhl72y-chQlRX5q79atTMfNTJ-qcH0KQpsIJM5-DWbT8IjHCeJ6KfJJ4DoIv5b-0_HRT1Mt5Eh-cH-UnLqh_L02OZ0l8Ktt02DIjnhx7JjMFN5J5z5j5h-jTh2UomWIQHDUoXDfTI3TkDQnLfQfnt2aR4KKJx2UKWeIJoj-5n2h_phUJiBMAHBan7W45IXKohJh7FM4tW3J0ZyxomtfQxtNRJ0DnjtnLhbC8lj6t-D5oQepJf-K6a2CJ03JTs26rjDnCr05QzXUI8LNDH-5Oy0bR2an02-4ThVxcPjlDhW6Fg0JO7ttoyQHTL2Jv5a4ohbD5-ynoOjML1Db33L6vMtg0J3q3yLlcoepvoX55c3MkD5tjdJJQOBKQB0KnGbUQkeq8CQft20b0EeMtjKjLEK5r2SCKKJC3P; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; delPer=0; PSINO=3; BAIDUID_BFESS=D1117626448036AD2AF919EC711025C3:FG=1; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1642991662,1643016044,1643016289,1643016293; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1643102040; ab_sr=1.0.1_N2Y5ZGI2ZTE4MGIxMWQxNmU4MDIyMzRhMjg5NTg4MzRkZDk2Njc3MmY0M2Q3NDFkYzY1MzdlZWEzOGE1MmZkOWFjMDU0OGMxZmU4MDFiZmJiZDVhMDIwODRmNWY0YWY3ZTZiZmUzNGQ1MmMyNTQ4YjIyMWUwM2UyZTY2Mzg3YmU=; H_PS_PSSID=35104_31254_35776_34584_35491_35797_35318_26350_35746; BA_HECTOR=2k2kah210hag8004dr1guvj1c0q");
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36");
CloseableHttpResponse response = httpClient.execute(request);
HttpEntity entity = response.getEntity();
// 解析JSON
String result = EntityUtils.toString(entity, "utf-8");
JSONObject jsonObject = JSONObject.parseObject(result);
JSONObject object = (JSONObject) jsonObject.get("trans_result");
JSONArray object1 = (JSONArray) object.get("data");
JSONObject object2 = (JSONObject)object1.get(0);
String dst = (String) object2.get("dst");
EntityUtils.consume(entity);
response.getEntity().getContent().close();
response.close();
return dst;
}
// 获取sign
private static String token(String value, String gtk) {
String result = "";
ScriptEngine engine = new ScriptEngineManager().getEngineByName("js");
try {
// 这里的js 就是上文提到的JS 我将他放入文件中
FileReader reader = new FileReader("C:\\NC\\js.js");
engine.eval(reader);
if (engine instanceof Invocable) {
Invocable invoke = (Invocable) engine;
result = String.valueOf(invoke.invokeFunction("token", value, gtk));
}
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
//执行一下
public static void main(String[] args) throws IOException, ScriptException {
String s = baiduTranslate("你好", "zh", "jp");
System.out.println(s);
}
总结
第一次写文章