4.5版本的HttpClient中的连接池管理器PoolingHttpClientConnectionManager类实现了HTTP连接池管理,其管理连接的单位为路由(Route),每个路由维护一定数量(默认是2)的连接;当给定路由的所有连接都被租用时,则新的连接将发生阻塞,直到某连接被释放回连接池。PoolingHttpClientConnectionManager维护的连接次数也受总数MaxTotal(默认是20)的限制。
当HttpClient配置了PoolingHttpClientConnectionManager时,其可以同时执行多个HTTP请求,即实现多线程操作。程序3-22提供了一个简单的多线程请求多URL的案例。由程序3-22所知,使用实例化的PoolingHttpClientConnectionManager可以设置最大连接数Connection信息和Socket信息等。另外,本案例是通过继承Thread类,重写Thread类的run()方法实现的多线程,有兴趣的读者可以通过实现Runnable接口的方式实现多线程。截图为程序在控制台输出的结果。
//程序3-22
public class HttpClientThread {
public static void main(String[] args) throws FileNotFoundException {
//添加连接参数
ConnectionConfig connectionConfig = ConnectionConfig.custom().setMalformedInputAction(CodingErrorAction.IGNORE).setUnmappableInputAction(CodingErrorAction.IGNORE).setCharset(Consts.UTF_8).build();
//添加socket参数
SocketConfig socketConfig = SocketConfig.custom().setTcpNoDelay(true).build();
//配置连接池管理器
PoolingHttpClientConnectionManager pcm = new PoolingHttpClientConnectionManager();
// 设置最大连接数
pcm.setMaxTotal(100);
// 设置每个连接的路由数
pcm.setDefaultMaxPerRoute(10);
//设置连接信息
pcm.setDefaultConnectionConfig(connectionConfig);
//设置socket信息
pcm.setDefaultSocketConfig(socketConfig);
//设置全局请求配置,包括Cookie规范,HTTP认证,超时
RequestConfig defaultConfig = RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD_STRICT).setExpectContinueEnabled(true).setTargetPreferredAuthSchemes(Arrays.asList(AuthSchemes.NTLM, AuthSchemes.DIGEST)).setProxyPreferredAuthSchemes(Arrays.asList(AuthSchemes.BASIC)).setConnectionRequestTimeout(30*1000).setConnectTimeout(30*1000).setSocketTimeout(30*1000).build();
CloseableHttpClient httpClient = HttpClients.custom().setConnectionManager(pcm).setDefaultRequestConfig(defaultConfig).build();
// 请求的URL
String[] urlArr = {"https://hbr.org/podcasts","https://hbr.org/magazine","https://hbr.org/most-popular","https://hbr.org/big-ideas","https://hbr.org/reading-lists"};
//创建固定大小的线程池
ExecutorService exec = Executors.newFixedThreadPool(3);
for(int i = 0; i< urlArr.length;i++){
String filename = urlArr[i].split("org/")[1]; //HTML需要输出的文件名
//创建HTML文件输出目录
OutputStream out = new FileOutputStream("/Users/steven/Documents/代码/project/spider/src/main/java/com/topicBet/util/" + filename);
HttpGet httpget = new HttpGet(urlArr[i]);
//启动线程执行请求
exec.execute(new DownHtmlFileThread(httpClient, httpget, out));
}
//关闭线程
exec.shutdown();
}
}
public class DownHtmlFileThread extends Thread {
private final CloseableHttpClient httpClient;
private final HttpContext context;
private final HttpGet httpget;
private final OutputStream out;
//输入的参数
public DownHtmlFileThread(CloseableHttpClient httpClient,HttpGet httpget, OutputStream out) {
this.httpClient = httpClient;
this.context = HttpClientContext.create();
this.httpget = httpget;
this.out = out;
}
@Override
public void run() {
System.out.println(Thread.currentThread().getName() + "线程请求的URL为:" + httpget.getURI());
try {
CloseableHttpResponse response = httpClient.execute(httpget, context); //执行请求
try {
//HTML文件写入文档
out.write(EntityUtils.toString(response.getEntity(),"utf-8").getBytes());
out.close();
//消耗实体
EntityUtils.consume(response.getEntity());
} finally{
//关闭响应
response.close();
}
} catch (ClientProtocolException ex) {
// 处理 protocol错误
ex.printStackTrace();
} catch (IOException ex) {
// 处理I/O错误
ex.printStackTrace();
}
}
}

5798

被折叠的 条评论
为什么被折叠?



