单本多线程爬虫 需要导入Jsoup库
package 单本爬虫;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Spider {
public static void main(String[] args) throws IOException, InterruptedException, ExecutionException {
long starttime=System.currentTimeMillis();
List<Chapter> chapList=new ArrayList<Chapter>();
String baseUrl="https://www.xxbiquge.com/5_5690/";
String bUrl="https://www.xxbiquge.com";
List<Future<Chapter>> result=new ArrayList<Future<Chapter>>();
File file = new File("e:/aaa/1.txt");
if (!file.exists()) {
file.createNewFile();
} else {
file.delete();
}
FileWriter fw = new FileWriter(file);
Document doc=Jsoup.connect(baseUrl).get();
Element element=doc.getElementById("list");
Elements elements=element.select("a[href]");
for(Element e:elements){
Chapter p=new Chapter();
p.setName(e.text());
p.setUrl(bUrl+e.attr("href"));
chapList.add(p);
}
ExecutorService pool=Executors.newFixedThreadPool(20);
for(int i=0;i<chapList.size();i++){
result.add(pool.submit(new GetContent(chapList.get(i))));
//System.out.println(i);
}
for(Future<Chapter> future:result){
fw.write(future.get().getName()+"\r\n");
fw.write(future.get().getContent()+"\r\n");
}
fw.close();
pool.shutdown();
long endTime=System.currentTimeMillis();
System.out.println("运行时间: "+(endTime-starttime)+" ms");
}
}
class GetContent implements Callable<Chapter>{
Chapter chapter;
Document doc;
String list="";
public GetContent(Chapter p){
this.chapter=p;
}
@Override
public Chapter call() {
try {
doc=Jsoup.connect(chapter.getUrl()).get();
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
Element e1=doc.getElementById("content");
String[] s=e1.text().split(" ");
for(String t:s){
list+=" "+t+"\r\n";
}
chapter.setContent(list);
return chapter;
}
}
class Chapter{
String name="";
String content="";
String Url="";
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getUrl() {
return Url;
}
public void setUrl(String nextUrl) {
this.Url = nextUrl;
}
}