package com.spider;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
//import org.jsoup.Connection;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
/**
* @Author zhaoxin
* @Email 1272743926@qq.com
* @Description //TODO
* @Date 2018/11/30
**/
public class BJ {
public static void main(String[] args) throws Exception {
for (int j = 1; j < 51; j++) {
//解析的网址
// Thread.sleep(2000);
String path = "http://rexian.beijing.gov.cn/default/com.web.index.moreNewLetterQuery.flow?PageCond/currentPage=" + j + "&type=nextPage";
Document d1 = conn(path);//转换为Dom树
Elements list = d1.select("div[class=my_activities_conter mail_conter] ul li p a");
BufferedWriter bw1 = new BufferedWriter(new FileWriter("spider/comment/第" + j + "页内容" + ".txt"));
System.out.println("spider/comment/第" + j + "页内容" + ".txt");
for (int i = 0; i < list.size(); i++) {
Document d2 = conn("http://rexian.beijing.gov.cn/default/" + list.get(i).attr("href"));
Elements txt = d2.select("div[class=con_left float_left] div[class=mail_track] span[class=font14 mail_problem]");
bw1.write("第" + i + "条内容:" + txt.text() + "\n");
}
bw1.close();
}
}
public static Document conn(String path) throws IOException {
Connection connection = Jsoup.connect(path);
connection.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36");
connection.header("Cookie", "pgv_pvi=4558768128; pgv_si=s3590505472; JSESSIONID=9DA2CC59CBF23789F93D028092A537A2; pgv_heid=1543563223863.1543567739768.1543567740806.45; accsid=1352; accside=left; accs1352=on; accfirst=0; __jsluid=de167ee5ed3b0e9e198288185efc2902; Hm_lvt_d7682ab43891c68a00de46e9ce5b76aa=1543563192; Hm_lpvt_d7682ab43891c68a00de46e9ce5b76aa=1543563192; _va_ref=%5B%22%22%2C%22%22%2C1543566649%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DGqfnHiLrNdisAae9_OHXPcmctOAkDREUM_djpN3fdtD76GxJXVb6fAkwP9vygdiw%26wd%3D%26eqid%3D85738a5e0000da7f000000045c00f147%22%5D; _va_ses=*; yunsuo_session_verify=b2a47ebec1c011e45e0e1b4f15c42d2b; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2216763c43eb19f3-0a2f2c2fbe88b6-5701732-2073600-16763c43eb2363%22%7D; sensorsdata_is_new_user=true; _va_id=e30ca400ca2395fc.1543478402.3.1543567741.1543564410.");
connection.header("Connection", "keep-alive");
connection.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
Connection.Response rs = connection.execute();//获取响应
Document d1 = Jsoup.parse(rs.body());//转换为Dom树
return d1;
}
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.spider</groupId>
<artifactId>spider</artifactId>
<version>1.0-SNAPSHOT</version>
<name>spider</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.2</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.0.3</version>
<scope>compile</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/net.sf.json-lib/json-lib -->
<dependency>
<groupId>net.sf.json-lib</groupId>
<artifactId>json-lib</artifactId>
<version>2.4</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.google.code.gson/gson -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.0</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.53</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.44</version>
</dependency>
<dependency>
<groupId>com.spider</groupId>
<artifactId>spider</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<!-- see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>6</source>
<target>6</target>
</configuration>
</plugin>
</plugins>
</build>
</project>