只需将返回的字符串做如下处理:
String content = EntityUtils.toString(entity);
content = new String(content.getBytes("ISO-8859-1"),charset);
其中,charset为页面上对应的编码格式,可能是gbk,可能是gb2312等等,视具体页面而定,这里只需设置成和页面保持一致即可。
public static String getByUrl(final String url,final String charset){
/*RequestConfig defaultRequestConfig = RequestConfig.custom()
.setConnectTimeout(5000)
.setConnectionRequestTimeout(5000)
.build();*/
//CloseableHttpClient httpclient = HttpClients.custom().setMaxConnTotal(800).setMaxConnPerRoute(800).setDefaultRequestConfig(defaultRequestConfig).build();
CloseableHttpClient httpclient = HttpClients.createDefault();
try {
HttpGet httpget = new HttpGet(url);
//System.out.println("executing request " + httpget.getURI());
ResponseHandler<String> responseHandler = new ResponseHandler<String>() {
public String handleResponse(final HttpResponse response) throws ClientProtocolException, IOException {
int status = response.getStatusLine().getStatusCode();
//System.out.println("========responseStatusCode:"+status + " "+url);
if (status == 200) {
HttpEntity entity = response.getEntity();
if(entity == null){
System.out.println("========entity is null:"+status + " "+url);
return null;
}else{
String content = EntityUtils.toString(entity);
if(charset != null){
content = new String(content.getBytes("ISO-8859-1"),charset);
}
return content;
}
} else {
throw new ClientProtocolException("Unexpected response status: " + status);
}
}
};
String responseBody = httpclient.execute(httpget, responseHandler);
return responseBody;
} catch (ClientProtocolException e) {
System.out.println("========ClientProtocolException===="+e.getMessage() + " "+url);
//e.printStackTrace();
closeHttpclient(httpclient);
return getByUrl(url,charset);
} catch (IOException e) {
System.out.println("========IOException===="+e.getMessage() + " "+url);
//e.printStackTrace();
closeHttpclient(httpclient);
return getByUrl(url,charset);
} finally {
closeHttpclient(httpclient);
}
}