咳咳,今天给大家分享一个多线程的知识点,和线程池,最近任务是写爬虫,五百万个网址,循环很慢,然后考虑用多线程,今天看了一下多线程,氛围继承thread 和实现runnuble接口,差不多这么拼写的,区别在于runnuble接口可以共享资源,当然,也就考虑到了线程安全问题,都知道,for循环中的int i 是非线程安全的,先不聊原子性的,直说多线程,实现runnble接口,刚开始会导致数据重复,查询资料,说事因为线程不安全导致,因为涉及到征用乱七八招的东西,然后,我有了解了一下线程池,下面上代码,果断解决,正常单线程跑完这个程序需要七天,这个多线程,我估计,也就两天左右
1,线程最好是用阿里巴巴推荐的方法,(但是因为定线程数量,并且队列用的是ArrayBlockingQueue,所以效率较低,不过可以保证内存不会OOM)因为,原生方法会产生OOM情况,阿里巴巴写法如下
2,当我们看如下黑色截图,会发现,原声线程池会产生OOM的主要原因是用
LinkedBlockingQueue队列,该队列可以导致OOM,至于该队列和ArrayBlockingQueue的底层原理大家可以了查阅百度上的其他文章,截图为Executors创建线程池的具体代码
/**
* 创建 类方法(线程),主要为了线程shutDown后不影响其他线程执行
*
* @return
*/
public static ExecutorService getThreadPoolExecutor() {
ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat("funinbook-pool-%d").build();
/*
* corePoolSize:线程池中核心线程数的最大值
* maximumPoolSize:线程池中能拥有最多线程数
*/
ExecutorService pool = new ThreadPoolExecutor(
5,
10,
10,
TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<Runnable>(1024),
namedThreadFactory,
new ThreadPoolExecutor.CallerRunsPolicy()
);
return pool;
}
package com.spring.crawl;
import com.alibaba.fastjson.JSONObject;
import com.spring.service.ITestService;
import java.util.List;
import java.util.Map;
import java.util.concurrent.*;
public class StartCrawl {
public static void executorPool(ITestService service) throws InterruptedException {
//这里面有问题,线程池要单独管理起来,所以大家要写成单例模式
//如果是定时执行的情况,单例模式的线程池就不能关闭,因为关闭在启动就会空指针
//所以,在一定情况下,线程池可以不用关闭
//如果想写成单例,可以写成static类方法
ExecutorService executor = Executors.newFixedThreadPool(90);
ExecutorService executor1 = Executors.newFixedThreadPool(80);
CountDownLatch latch = new CountDownLatch(2999999 - 2701000 + 1);
BlockingDeque<String> qeque = new LinkedBlockingDeque<>(80);
for (int i = 2701000; i <= 2999999; i++) {
final String str = String.valueOf(i);
executor.submit(() -> {
//生产队列
StartCrawl.provider(qeque, latch, str, service);
});
executor1.submit(() -> {
//消费队列
StartCrawl.consumer(qeque, service);
});
}
//判断线程池关闭有几种,第一种就是如下这种,用latch.await(),第二种是等到shutdown关闭//
//后,用如下代码判断,如果关闭后,线程还未被回收,就直接强制退出
//executor.shutdown();
//executor.awaitTermination(1, TimeUnit.HOURS);
latch.await();
executor.shutdown();
executor1.shutdown();
System.out.println("------所有线程执行完成");
}
//生产者
public static void provider(BlockingDeque<String> qeque, CountDownLatch latch, String str, ITestService service) {
//生产队列,如果队列满了,阻塞
String json = null;
try {
json = GetHtml.grabDataGet("https://jobs.51job.com/all/co" + str + ".html", str, service);
if (json != null) {
qeque.put(json);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
System.out.println("------执行后 i=" + str);
latch.countDown();
}
}
//消费者
public static void consumer(BlockingDeque<String> qeque, ITestService service) {
//消费队列,如果列队size满了
String json = null;
try {
//获取消费数据
json = (String) qeque.take();
Map map = JSONObject.parseObject(json);
} catch (InterruptedException e) {
e.printStackTrace();
}
if (json != null) {
// 写入文本
WritingText.setText(json);
System.out.println("消费数据");
}
}
}
---------------------------------------
当然,看不懂lambad没关系,下面为正常代码
public class Loadurl10 {
public static void main(String[] args) throws InterruptedException {
/* ExecutorService executor = Executors.newFixedThreadPool(100);
for (int i = 4500000; i <= 5101000; i++) {
final String str = String.valueOf(i);
executor.submit(() -> GetHtml.grabDataGet("https://jobs.51job.com/all/co" + str + ".html", str));
}*/
ExecutorService executor = Executors.newFixedThreadPool(111);
CountDownLatch latch = new CountDownLatch(5101000 - 1 + 1);
for (int i = 1; i <= 5101000; i++) {
final String str = String.valueOf(i);
executor.submit(new runnublerun(str));
/* executor.submit(() -> {
System.out.println("------执行前 i=" + str);
GetHtml.grabDataGet("https://jobs.51job.com/all/co" + str + ".html", str);
latch.countDown();
System.out.println("------执行后 i=" + str);
});*/
}
System.out.println("------等待所有线程执行完成");
latch.await();
System.out.println("------所有线程执行完成");
}
}
class runnublerun implements Runnable {
private String str;
public runnublerun(String str) {
this.str = str;
}
@Override
public void run() {
System.out.println("------执行前 i=" + str);
GetHtml.grabDataGet("https://jobs.51job.com/all/co" + str + ".html", str);
System.out.println("------执行后 i=" + str);
}
}