Java爬虫 学习记录1
1.pom依赖:
2.编写test代码:
这里是以自如网站为例子
package cn.kgc.crawel.test;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import sun.net.www.http.HttpClient;
import java.io.IOException;
/**
* Created by jiang on 6/20/21 11:32 AM
*/
public class CrawellerTest {
public static void main(String[] args) throws IOException {
//1.打开一个浏览器,创建httpclient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2.输入那个网址,发起get请求创建HttpGet对象
HttpGet httpGet = new HttpGet("https://www.ziroom.com/?utm_source=pinzhuan&utm_medium=baidu&utm_term=ziru&utm_content=biaoti&utm_campaign=pinzhuan");
//3.回车发起请求,返回响应,使用HttpClient对象发起请求
CloseableHttpResponse response=httpClient.execute(httpGet);
//4.解析响应获取数据
//判断状态码是否是200
if(response.getStatusLine().getStatusCode()==200){
HttpEntity httpEntity=response.getEntity();
String content = EntityUtils.toString(httpEntity,"utf8");
System.out.println(content);
}
}
}
3.控制台查看效果
总结:通过httpclient自动抓取到自如的页面结构并打印在控制台,如果创建HttpClients对象时报错,可以使用idea右侧maven按钮先clean在install即可解决问题