HttpClient 3.x,4.x都提供http连接池管理器,当使用了请求连接池管理器(比如PoolingHttpClientConnectionManager)后,HttpClient就可以同时执行多个线程的请求了。
hc3.x和4.x的早期版本,提供了PoolingClientConnectionManager,DefaultHttpClient等类来实现http连接池,但这些类在4.3.x版本之后大部分就已经过时,本文使用4.3.x提供的最新的PoolingHttpClientConnectionManager等类进行http连接池的实现.
废话不多说,下面是全部代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
public class PoolTest { private static void config(HttpRequestBase httpRequestBase) { httpRequestBase.setHeader( "User-Agent" , "Mozilla/5.0" ); httpRequestBase.setHeader( "Accept" , "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" ); httpRequestBase.setHeader( "Accept-Language" , "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3" ); //"en-US,en;q=0.5"); httpRequestBase.setHeader( "Accept-Charset" , "ISO-8859-1,utf-8,gbk,gb2312;q=0.7,*;q=0.7" ); // 配置请求的超时设置 RequestConfig requestConfig = RequestConfig.custom() .setConnectionRequestTimeout( 3000 ) .setConnectTimeout( 3000 ) .setSocketTimeout( 3000 ) .build(); httpRequestBase.setConfig(requestConfig); } public static void main(String[] args) { ConnectionSocketFactory plainsf = PlainConnectionSocketFactory.getSocketFactory(); LayeredConnectionSocketFactory sslsf = SSLConnectionSocketFactory.getSocketFactory(); Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create() .register( "http" , plainsf) .register( "https" , sslsf) .build(); PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(registry); // 将最大连接数增加到200 cm.setMaxTotal( 200 ); // 将每个路由基础的连接增加到20 cm.setDefaultMaxPerRoute( 20 ); // 将目标主机的最大连接数增加到50 HttpHost localhost = new HttpHost( "http://blog.youkuaiyun.com/gaolu" , 80 ); cm.setMaxPerRoute( new HttpRoute(localhost), 50 ); //请求重试处理 HttpRequestRetryHandler httpRequestRetryHandler = new HttpRequestRetryHandler() { public boolean retryRequest(IOException exception, int executionCount, HttpContext context) { if (executionCount >= 5 ) { // 如果已经重试了5次,就放弃 return false ; } if (exception instanceof NoHttpResponseException) { // 如果服务器丢掉了连接,那么就重试 return true ; } if (exception instanceof SSLHandshakeException) { // 不要重试SSL握手异常 return false ; } if (exception instanceof InterruptedIOException) { // 超时 return false ; } if (exception instanceof UnknownHostException) { // 目标服务器不可达 return false ; } if (exception instanceof ConnectTimeoutException) { // 连接被拒绝 return false ; } if (exception instanceof SSLException) { // ssl握手异常 return false ; } HttpClientContext clientContext = HttpClientContext.adapt(context); HttpRequest request = clientContext.getRequest(); // 如果请求是幂等的,就再次尝试 if (!(request instanceof HttpEntityEnclosingRequest)) { return true ; } return false ; } }; CloseableHttpClient httpClient = HttpClients.custom() .setConnectionManager(cm) .setRetryHandler(httpRequestRetryHandler) .build(); // URL列表数组 String[] urisToGet = { "http://blog.youkuaiyun.com/gaolu/article/details/48466059" , "http://blog.youkuaiyun.com/gaolu/article/details/48243103" , "http://blog.youkuaiyun.com/gaolu/article/details/47656987" , "http://blog.youkuaiyun.com/gaolu/article/details/47055029" , "http://blog.youkuaiyun.com/gaolu/article/details/46400883" , "http://blog.youkuaiyun.com/gaolu/article/details/46359127" , "http://blog.youkuaiyun.com/gaolu/article/details/46224821" , "http://blog.youkuaiyun.com/gaolu/article/details/45305769" , "http://blog.youkuaiyun.com/gaolu/article/details/43701763" , "http://blog.youkuaiyun.com/gaolu/article/details/43195449" , "http://blog.youkuaiyun.com/gaolu/article/details/42915521" , "http://blog.youkuaiyun.com/gaolu/article/details/41802319" , "http://blog.youkuaiyun.com/gaolu/article/details/41045233" , "http://blog.youkuaiyun.com/gaolu/article/details/40395425" , "http://blog.youkuaiyun.com/gaolu/article/details/40047065" , "http://blog.youkuaiyun.com/gaolu/article/details/39891877" , "http://blog.youkuaiyun.com/gaolu/article/details/39499073" , "http://blog.youkuaiyun.com/gaolu/article/details/39314327" , "http://blog.youkuaiyun.com/gaolu/article/details/38820809" , "http://blog.youkuaiyun.com/gaolu/article/details/38439375" , }; long start = System.currentTimeMillis(); try { int pagecount = urisToGet.length; ExecutorService executors = Executors.newFixedThreadPool(pagecount); CountDownLatch countDownLatch = new CountDownLatch(pagecount); for ( int i = 0 ; i< pagecount;i++){ HttpGet httpget = new HttpGet(urisToGet[i]); config(httpget); //启动线程抓取 executors.execute( new GetRunnable(httpClient,httpget,countDownLatch)); } countDownLatch.await(); executors.shutdown(); } catch (InterruptedException e) { e.printStackTrace(); } finally { System.out.println( "线程" + Thread.currentThread().getName() + "," + System.currentTimeMillis() + ", 所有线程已完成,开始进入下一步!" ); } long end = System.currentTimeMillis(); System.out.println( "consume -> " + (end - start)); } static class GetRunnable implements Runnable { private CountDownLatch countDownLatch; private final CloseableHttpClient httpClient; private final HttpGet httpget; public GetRunnable(CloseableHttpClient httpClient, HttpGet httpget, CountDownLatch countDownLatch){ this .httpClient = httpClient; this .httpget = httpget; this .countDownLatch = countDownLatch; } @Override public void run() { CloseableHttpResponse response = null ; try { response = httpClient.execute(httpget,HttpClientContext.create()); HttpEntity entity = response.getEntity(); System.out.println(EntityUtils.toString(entity, "utf-8" )) ; EntityUtils.consume(entity); } catch (IOException e) { e.printStackTrace(); } finally { countDownLatch.countDown(); try { if (response != null ) response.close(); } catch (IOException e) { e.printStackTrace(); } } } } } |
主要参考文档:
http://free0007.iteye.com/blog/2012308