java爬网络小说

单本多线程爬虫 需要导入Jsoup库
package 单本爬虫;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Spider {
		
	public static void main(String[] args) throws IOException, InterruptedException, ExecutionException {
		long starttime=System.currentTimeMillis();
		List<Chapter> chapList=new ArrayList<Chapter>();
		String baseUrl="https://www.xxbiquge.com/5_5690/";
		String bUrl="https://www.xxbiquge.com";
		List<Future<Chapter>> result=new ArrayList<Future<Chapter>>();
		File file = new File("e:/aaa/1.txt");  
        if (!file.exists()) {  
            file.createNewFile();  
        } else {  
            file.delete();  
        }  
        FileWriter fw = new FileWriter(file); 
		Document doc=Jsoup.connect(baseUrl).get();
		Element element=doc.getElementById("list");
		Elements elements=element.select("a[href]");
		for(Element e:elements){
			Chapter p=new Chapter();
			p.setName(e.text());
			p.setUrl(bUrl+e.attr("href"));
			chapList.add(p);	
		}
		ExecutorService pool=Executors.newFixedThreadPool(20);
		for(int i=0;i<chapList.size();i++){
			result.add(pool.submit(new GetContent(chapList.get(i))));
			//System.out.println(i);
		}
		for(Future<Chapter> future:result){
			fw.write(future.get().getName()+"\r\n");
			fw.write(future.get().getContent()+"\r\n");
		}
		fw.close();
		pool.shutdown();
		long endTime=System.currentTimeMillis();
		System.out.println("运行时间: "+(endTime-starttime)+" ms");
	}
}


class GetContent implements Callable<Chapter>{
	Chapter chapter;
	Document doc;
	String list="";
	public GetContent(Chapter p){
		this.chapter=p;
	}
	@Override
	public Chapter call() {
		try {
			doc=Jsoup.connect(chapter.getUrl()).get();
		} catch (IOException e1) {
			// TODO Auto-generated catch block
			e1.printStackTrace();
		}
		Element e1=doc.getElementById("content");
		String[] s=e1.text().split(" ");
		for(String t:s){
			list+="  "+t+"\r\n";
		}
		chapter.setContent(list);
		return chapter;
	}	
}	


class Chapter{
	String name="";
	String content="";
	String Url="";
	public String getName() {
		return name;
	}
	public void setName(String name) {
		this.name = name;
	}
	public String getContent() {
		return content;
	}
	public void setContent(String content) {
		this.content = content;
	}
	public String getUrl() {
		return Url;
	}
	public void setUrl(String nextUrl) {
		this.Url = nextUrl;
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值