java 多线程爬虫
Executors类 提供工厂方法用来创建不同类型的线程池。Executors是工具类,他提供对ThreadPoolExecutor的封装,会产生几种线程池供大家使用。
package spider;
import java.util.concurrent.Executorservice;
import java.util.concurrent.Executors ;
class Spd{
public static void gets(int page){
System.out.printin(page);
}
}
public class MultiThreadExample{
public static void main(string[] args){
ExecutorService executorservice = Executors.newFixedThreadPool(5);
long startTime =System.currentTimeMillis();
for(int i=1:i<= 100:i++){
final int page =i;
executorService.submit(()->Spd.gets(page));
}
long endTime =System.currentTimeMillisO;
long executionTime =endTimestartTime;
System.out.printIn("Total execution time:" + executionTime + " milliseconds");
executorService.shutdown();
}
}
import java.util.concurrent.Executorservice:
import java.util.concurrent.Executors;
public static void getTest(){
String base_url = "https://wzzdg.sun0769.com/political/index/politicsNewest";
ExecutorService executorService = Executors.newFixedThreadpo61(5);
for(int i=1;i<20;i++){
final int page =i;
System.out.print1n("开始第几页:"+ page);
executorservice.submit(()->{
try {
String url=base_url+"?id=1&page=" + page;
String content=crawl(url);
process(content);
}catch(Exception e){
e.printstackTrace();
}
});
}
executorservice.shutdown():
}
public static void getTest(){
ExecutorService executorService = Executors.newFixedThreadPool(5);
for (int i=1;i<20:i++){
final int page =i;
System.out.printIn("开始第几页:"+page);
CompletableFuture.runAsync(()->{
try {
String url=BASE_URL +"?id=1&page=" + page;
String content =crawl(url);
process(content);
} catch(xceptione) {
e.printStackTrace()
}
},executorService);
}
executorService.shutdown();
}