1. pom文件中加入httpClient依赖包
<dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.3.1</version> </dependency>
2. 创建一个调用httpClient的工具类
public class HttpClientUtil { private CloseableHttpClient closeableHttpClient; private RequestConfig requestConfig; // 最大的连接数 private int maxTotal = 10; // 最大的并发数 private int defaultMaxPerRoute = 5; // 连接超时数 private int connectTimeOut = 2000; // 数据传输的最长时间 private int socketTimeout = 10000; // 在连接之前测试连接可不可用 private boolean staleConnectionCheckEnabled = true; // 从数据池中获取连接的最长时间 private int connectionRequestTimeOut = 500; public HttpClientUtil() { createCloseableHttpClient(); createRequestConfig(); } /** * 创建CloseableHttpClient */ private void createCloseableHttpClient() { PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(); connectionManager.setMaxTotal(maxTotal); connectionManager.setDefaultMaxPerRoute(defaultMaxPerRoute); HttpClientBuilder httpClientBuilder = HttpClientBuilder.create(); httpClientBuilder.setConnectionManager(connectionManager); this.closeableHttpClient = httpClientBuilder.build(); } /** * 创建requestConfig */ private void createRequestConfig() { RequestConfig.Builder custom = RequestConfig.custom(); custom.setConnectTimeout(connectTimeOut) .setSocketTimeout(socketTimeout) .setStaleConnectionCheckEnabled(staleConnectionCheckEnabled) .setConnectionRequestTimeout(connectionRequestTimeOut); this.requestConfig = custom.build(); } /** * get请求不带参数 * @param url * @return * @throws Exception */ public String doGet(String url) throws Exception { // 先获取地址的请求对象 HttpGet httpGet = new HttpGet(url); // 配置参数 httpGet.setConfig(requestConfig); // 执行请求 CloseableHttpResponse response = closeableHttpClient.execute(httpGet); if (response.getStatusLine().getStatusCode() == 200) { return EntityUtils.toString(response.getEntity(),"UTF-8"); } return null; } /** * get请求带参数 * @param url * @param map * @return * @throws Exception */ public String doGet(String url, Map<String, Object> map) throws Exception { URIBuilder uriBuilder = new URIBuilder(url); if (map != null) { Set<Map.Entry<String, Object>> entrySet = map.entrySet(); for (Map.Entry<String, Object> entry : entrySet) { uriBuilder.addParameter(entry.getKey(),entry.getValue().toString()); } } return this.doGet(uriBuilder.build().toString()); } /** * 带参数的post请求 * @param url * @param map * @return * @throws Exception */ public String doPost(String url, Map<String, Object> map) throws Exception { HttpPost httpPost = new HttpPost(url); httpPost.setConfig(requestConfig); if (map != null) { List<NameValuePair> pairList = new ArrayList<>(); Set<Map.Entry<String, Object>> entrySet = map.entrySet(); for (Map.Entry<String,Object> entry : entrySet) { pairList.add(new BasicNameValuePair(entry.getKey(),entry.getValue().toString())); } UrlEncodedFormEntity entity = new UrlEncodedFormEntity(pairList); httpPost.setEntity(entity); } CloseableHttpResponse response = closeableHttpClient.execute(httpPost); if (response.getStatusLine().getStatusCode() == 200) { return EntityUtils.toString(response.getEntity(),"UTF-8"); } return null; } /** * post请求不带参数 * @param url * @return * @throws Exception */ public String doPost(String url) throws Exception { return this.doPost(url,null); }
3. 利用unit进行单元测试
@Test public void test1() { String url = "https://blog.youkuaiyun.com/javalixy/article/details/76284524"; HttpClientUtil clientUtil = new HttpClientUtil(); try { String result = clientUtil.doGet(url); parseHtml(result); } catch (Exception e) { e.printStackTrace(); }
可以得到返回的Html页面
4.使用开源框架Jsoup进行html页面的解析
4.1 加入jsoup依赖
<dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.7.3</version> </dependency>
4.2 jsoup解析页面
private void parseHtml(String result) { Document document = Jsoup.parse(result); Elements linkElements = document.select("link[href]"); Elements textElements = document.select("span"); Elements imgElements = document.select("img"); System.out.println(String.format("LinkElements: (%d)", linkElements.size())); System.out.println(String.format("TextElements: (%d)", textElements.size())); System.out.println(String.format("ImgElements: (%d)", imgElements.size())); for(Element link : linkElements){ print(" * a: <%s> (%s)", link.attr("abs:href"), trim(link.text(), 35)); } for (Element text : textElements) { print("* text: <%s> (%s)",text.attr("abs:class"), trim(text.text(),35)); } for (Element img : imgElements) { print("* text: <%s> (%s)",img.attr("abs:src"), trim(img.text(),35)); } } private void print(String str , Object...msg) { System.out.println(String.format(str,msg)); } private static String trim(String str, int width){ if(str.length() > width) { return str.substring(0, width + 1) + "."; } else { return str; } }