主要代码:
package com.itquwei.spider;
import java.io.IOException;
import java.nio.charset.Charset;
import org.apache.http.HttpEntity;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.itquwei.spider.dao.IpInfoDao;
import com.itquwei.spider.pojo.IpInfo;
public class XCSpider {
private static IpInfoDao dao = new IpInfoDao();
public static void main(String[] args) throws Exception {
for (int page = 1; page < 664; page++) {
paging(page);
System.out.println("第一"+page+"页");
Thread.sleep(5000);
}
}
// 分页查询
public static void paging(int page) throws IOException,
ClientProtocolException {
// 创建一个客户端
String url = "http://www.xicidaili.com/nt/" + page;
String html = getIndex(url);
IpInfo ipInfo = getIpInfo(html);
if (ipInfo != null) {
dao.saveIpInfo(ipInfo);
}
}
// 获取ip详细信息
public static IpInfo getIpInfo(String html) {
Document doc = Jsoup.parse(html);
Elements trs = doc.select("#ip_list tr[class]");
for (Element element : trs) {
IpInfo info = new IpInfo();
Elements tds = element.select("tr td");
// 获取ip地址
String ip = tds.get(1).text();
info.setIp(ip);
// 获取端口号
String port = tds.get(2).text();
info.setPort(port);
// 获取服务器地址
String address = tds.get(3).select("a").text();
info.setAddress(address);
// 获取状态
String status = tds.get(4).text();
info.setStatus(status);
// 获取类型
String type = tds.get(5).text();
info.setType(type);
// 获取存活时间
String liveTime = tds.get(8).text();
info.setLiveTime(liveTime);
// 获取验证时间
String testTime = tds.get(9).text();
info.setTestTime(testTime);
// System.out.println(info);
return info;
}
return null;
}
// 获取西刺网页
public static String getIndex(String url) throws IOException,
ClientProtocolException {
//创建客户端
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet(url);
// setConnectTimeout(10000)连接超时时间(单位豪秒)
// setSocketTimeout(10000)读取超时时间(单位豪秒)
RequestConfig config = RequestConfig.custom().setConnectTimeout(20000)
.setSocketTimeout(30000).build();
httpGet.setConfig(config);
httpGet.setHeader(
"User-Agent",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36");
// 执行
CloseableHttpResponse res = httpClient.execute(httpGet);
HttpEntity entity = res.getEntity();
String html = "";
if (entity != null) {
html = EntityUtils.toString(entity, Charset.forName("utf-8"));
}
// System.out.println(html);
return html;
}
}
pojo代码:
package com.itquwei.spider.pojo;
public class IpInfo {
private String ip;
private String port;
private String address;
private String status;
private String type;
private String liveTime;
private String testTime;
@Override
public String toString() {
return "IpInfo [ip=" + ip + ", port=" + port + ", address=" + address
+ ", status=" + status + ", type=" + type + ", liveTime="
+ liveTime + ", testTime=" + testTime + "]";
}
public String getStatus() {
return status;
}
public void setStatus(String status) {
this.status = status;
}
public String getLiveTime() {
return liveTime;
}
public void setLiveTime(String liveTime) {
this.liveTime = liveTime;
}
public String getTestTime() {
return testTime;
}
public void setTestTime(String testTime) {
this.testTime = testTime;
}
public String getIp() {
return ip;
}
public void setIp(String ip) {
this.ip = ip;
}
public String getPort() {
return port;
}
public void setPort(String port) {
this.port = port;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
}
dao代码:连接数据库用的
package com.itquwei.spider.dao;
import org.springframework.jdbc.core.JdbcTemplate;
import com.itquwei.spider.pojo.IpInfo;
import com.mchange.v2.c3p0.ComboPooledDataSource;
public class IpInfoDao extends JdbcTemplate {
public IpInfoDao() {
ComboPooledDataSource dataSource = new ComboPooledDataSource();
dataSource
.setJdbcUrl("jdbc:mysql://localhost:3306/spider?characterEnconding=utf-8");
dataSource.setUser("root");
dataSource.setPassword("root");
setDataSource(dataSource);
}
public void saveIpInfo(IpInfo info) {
String sql = "insert into xc_ipInfo (ip,port,address,status,type,liveTime,testTime) values(?,?,?,?,?,?,?);";
update(sql, info.getIp(), info.getPort(), info.getAddress(),
info.getStatus(), info.getType(), info.getLiveTime(),
info.getTestTime());
}
}
结果:
