简介
HttpClient 是 Apache Jakarta Common 下的子项目,可以用来提供高效的、最新的、功能丰富的支持 HTTP 协议的客户端编程工具包,并且它支持 HTTP 协议最新的版本和建议。
官网 http://hc.apache.org/httpcomponents-client-4.5.x/index.html
简单实例
创建一个maven工程,添加httpclient依赖最新版为4.5.7
1 2 3 4 5
| <dependency> <groupId>org.apache.httpcomponents</groupId> <artifactId>httpclient</artifactId> <version>4.5.7</version> </dependency>
|
创建一个httpclient实例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
| import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientDemo { public static void main(String[] args) throws IOException{ CloseableHttpClient httpclient = HttpClients.createDefault(); HttpGet httpget = new HttpGet("https://www.cnblogs.com/");
CloseableHttpResponse response = httpclient.execute(httpget); HttpEntity entity = response.getEntity();
String toString = EntityUtils.toString(entity, "utf-8"); System.out.println(toString);
response.close(); httpclient.close();
} }
> 成功获取并打印
|

模拟浏览器
有的网站有限制比如请求 www.tuicool.com
不加请求头
返回
1 2 3 4 5 6 7 8 9
| <!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> </head> <body> <p>系统检测亲不是真人行为,因系统资源限制,我们只能拒绝你的请求。如果你有疑问,可以通过微博 http://weibo.com/tuicool2012/ 联系我们。</p> </body> </html>
|
设置请求头
通过设置 User-Agent 达到模拟请求
httpget.setHeader(“User-Agent”,”Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36”);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientDemo { public static void main(String[] args) throws IOException { CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet("https://www.tuicool.com/");
httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36");
CloseableHttpResponse response = httpclient.execute(httpget); HttpEntity entity = response.getEntity();
String toString = EntityUtils.toString(entity, "utf-8"); System.out.println(toString);
response.close(); httpclient.close();
} }
|

获取响应内容
HttpClient获取响应内容类型Content-Type
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
| import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientDemo { public static void main(String[] args) throws IOException { CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet("https://www.tuicool.com/");
httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36");
CloseableHttpResponse response = httpclient.execute(httpget); HttpEntity entity = response.getEntity();
String toString = EntityUtils.toString(entity, "utf-8"); System.out.println(toString);
Header contentType = entity.getContentType();
System.out.println(contentType);
response.close(); httpclient.close();
} }
|
1
| Content-Type: text/html; charset=utf-8
|
获取响应状态码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
| import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientDemo { public static void main(String[] args) throws IOException { CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet("https://www.tuicool.com/");
httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36");
CloseableHttpResponse response = httpclient.execute(httpget); HttpEntity entity = response.getEntity();
String toString = EntityUtils.toString(entity, "utf-8"); System.out.println(toString);
Header contentType = entity.getContentType();
System.out.println(contentType);
int statusCode = response.getStatusLine().getStatusCode();
System.out.println(statusCode); response.close(); httpclient.close();
} }
|
设置代理
在爬取网页的时候,有的目标站点有反爬虫机制,对于频繁访问站点以及规则性访问站点的行为,会采集屏蔽IP措施。
使用代理可以使用。
代理ip获取 百度 代理ip
https://www.kuaidaili.com/free/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
| import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpHost; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientDemo { public static void main(String[] args) throws IOException { CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet("https://www.tuicool.com/");
httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36");
HttpHost proxy=new HttpHost("222.135.92.68", 38094); RequestConfig requestConfig=RequestConfig.custom().setProxy(proxy).build(); httpget.setConfig(requestConfig); CloseableHttpResponse response = httpclient.execute(httpget); HttpEntity entity = response.getEntity();
String toString = EntityUtils.toString(entity, "utf-8"); System.out.println(toString);
Header contentType = entity.getContentType();
System.out.println(contentType);
int statusCode = response.getStatusLine().getStatusCode();
System.out.println(" statusCode "+statusCode);
response.close(); httpclient.close();
} }
|
设置超时
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
|
import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpHost; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils;
import java.io.IOException;
public class HttpClientDemo { public static void main(String[] args) throws IOException { CloseableHttpClient httpclient = HttpClients.createDefault();
HttpGet httpget = new HttpGet("https://www.cnblogs.com/");
httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36");
HttpHost proxy=new HttpHost("101.89.132.131", 80); RequestConfig requestConfig=RequestConfig.custom(). setSocketTimeout(5000) .setConnectTimeout(5000) .setProxy(proxy) .build(); httpget.setConfig(requestConfig);
CloseableHttpResponse response = httpclient.execute(httpget); HttpEntity entity = response.getEntity();
String toString = EntityUtils.toString(entity, "utf-8"); System.out.println(toString);
Header contentType = entity.getContentType();
System.out.println(contentType);
int statusCode = response.getStatusLine().getStatusCode();
System.out.println(" statusCode "+statusCode);
response.close(); httpclient.close();
} }
|