哪些情况会影响数据插入的效率
1.单次客户端与服务端之间的通信
2. mysql对主键、唯一索引、外键的校验
3. 事务检查
4. 单线程处理业务并封装sql数据(比较耗时)
针对以上情况解决方案
1.将单次插入数据操作修改为批量操作(load加载数据比批量新增数据效率更高),可以减少client与server的通信消耗。
1.1 修改bulk_insert_buffer_size参数
# 修改insert缓存池长度,可以大批量的新增数据
set bulk_insert_buffer_size = 53687091200;
# 需要超级权限
set GLOBAL concurrent_insert = 2;
2.关闭唯一索引、主键、外键校验
必须保证数据库主键的唯一情况下,才可使用此属性。及时关闭此属性mysql依然会检查主键唯一性
set unique_checks = 0;
set foreign_key_checks = 0;
3. 关闭事务检查
set autocommit = 0;
4. 使用java多线程模型master-worker模式拆分任务,减少业务处理的耗时
4.1 Master类
这个是master类主要作用是从队列中获取数据并将数据放入临时集合中,集合中的数据大于阀值则使用线程池创建线程交于worker执行批量插入数据操作。
此类核心:1.拆分任务 2. 创建线程调用worker的run方法 3. 记录每个线程的结果集
package com.yuanda.erp9.syn.execule.thread;
import com.yuanda.erp9.syn.enums.ThreadPoolTypeEnum;
import com.yuanda.erp9.syn.execule.factory.SimpleThreadPoolFactory;
import com.yuanda.erp9.syn.service.erp9.ESService;
import com.yuanda.erp9.syn.service.erp9.GoodsService;
import com.yuanda.erp9.syn.service.erp9.impl.ESServiceImpl;
import com.yuanda.erp9.syn.service.erp9.impl.GoodsServiceImpl;
import com.yuanda.erp9.syn.util.SpringContextUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import org.springframework.util.CollectionUtils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
/**
* @ClassName Master
* @Description 主任务类,负责拆分任务,并调度worker执行.
* @Date 2022/11/15
* @Author myq
*/
@Slf4j
public class Master<T> {
/**
* 存放所有任务的容器
*/
private ConcurrentLinkedDeque<T> deque = new ConcurrentLinkedDeque<>();
/**
* 存储返回值
*/
private ConcurrentHashMap<String, Object> resMap = new ConcurrentHashMap<>();
/**
* 是否开始创建子任务执行
*/
public volatile boolean isBegin = false;
/**
* 子任务最大接受数量(每个子任务执行的任务最大次数)
*/
private final int workTaskMaxSize = 3000;
public Master(Integer supplierId, String hisp) {
this.isBegin = true;
// 删除所有上次添加的数据
deleteAllOfBefore(supplierId, hisp);
// 设置属性
setMysqlInsertProperty();
// 创建线程并执行start方法之前,执行delayStart方法
new Thread(this::distributionTask,"Master-Stared").start();
}
/**
* @Description: 添加任务
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/11/1515:56
*/
public void add(T task) {
deque.add(task);
}
/**
* @Description: 拆分任务,并创建子线程去执行
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/12/279:43
*/
public void distributionTask() {
List<T> tempList = new ArrayList<>();
//创建线程池
ThreadPoolTaskExecutor executor = SimpleThreadPoolFactory.choose(ThreadPoolTypeEnum.SPECIAL_PURPOSE);
// 延时启动后
while (isBegin) {
// 获取队列里面的数据
T poll = deque.poll();
if (poll != null) {
tempList.add(poll);
// 满足创建子任务条件
if (tempList.size() >= workTaskMaxSize) {
working(tempList, executor);
tempList.clear();
}
}
}
// 结束标志
if (!isBegin) {
// 获取队列里面的数据
while (deque.size() > 0) {
T poll = deque.poll();
tempList.add(poll);
if (tempList.size() >= workTaskMaxSize) {
working(tempList, executor);
tempList.clear();
}
}
working(tempList,executor);
tempList.clear();
}
isCompleted(executor);
// 重置属性
resetMysqlInsertProperyt();
System.gc();
}
/**
* @Description: 判断线程池中的线程是否执行完毕
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/12/2810:35
*/
public void isCompleted(ThreadPoolTaskExecutor executor) {
while (true) {
if (executor.getActiveCount() <= 0) {
// executor.shutdown();
log.debug(">>>>>>>>>>>>>>>>>>>>任务执行完毕<<<<<<<<<<<<<<<<<<<");
break;
}
}
}
/**
* @Description: 批量新增之后重置属性
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/12/2314:42
*/
private void resetMysqlInsertProperyt() {
GoodsService goodsService = getBean();
goodsService.insertBatchBeforeSet();
}
/**
* @Description: 批量新增之前设置属性
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/12/2314:41
*/
private void setMysqlInsertProperty() {
GoodsService goodsService = getBean();
goodsService.insertBatchAfterReset();
}
/**
* @Description: 子任务执行-并启动线程
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/11/1617:28
*/
private void working(List<T> sonTaskList, ThreadPoolTaskExecutor executor) {
ArrayList<T> mysqlList = new ArrayList<>(sonTaskList);
AbstractWorker mysqlWorker = new MysqlWorker(mysqlList, resMap);
executor.execute(mysqlWorker);
ArrayList<T> esList = new ArrayList<>(sonTaskList);
AbstractWorker esWorker = new ElasticSearchWorker(esList, resMap);
executor.execute(esWorker);
}
/**
* @Description: 删除上次数据
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/12/1915:20
*/
private void deleteAllOfBefore(Integer supplierId, String hisp) {
List<String> deleteIds = Collections.EMPTY_LIST;
try {
GoodsService goodsService = getBean();
deleteIds = goodsService.queryDeleteIds(supplierId, hisp);
if (CollectionUtils.isEmpty(deleteIds))
return;
List<String> mysqlIds = new ArrayList<>(deleteIds);
goodsService.deleteAll(mysqlIds);
log.info(">>>>>>>>>>>>>>>>>>>>>>>删除mysql数据成功>>>>>>>>>>>>>>>>>>>>>>>");
ESService esService = SpringContextUtil.getBean("ESServiceImpl", ESServiceImpl.class);
ArrayList<String> esIds = new ArrayList<>(deleteIds);
deleteIds.clear();
esService.deleteAllByIds(esIds);
log.info(">>>>>>>>>>>>>>>>>>>>>>>删除ES数据成功>>>>>>>>>>>>>>>>>>>>>>>");
} finally {
deleteIds.clear();
}
}
/**
* @Description: 返回GoodService
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/12/2315:05
*/
public GoodsService getBean() {
return SpringContextUtil.getBean(GoodsService.class, "goodsServiceImpl", GoodsServiceImpl.class);
}
}
4.2 Worker接口
package com.yuanda.erp9.syn.execule.thread;
/**
* @ClassName Worker
* @Description 子任务工作者
* @Date 2022/11/15
* @Author myq
*/
public interface Worker extends Runnable {
}
4.3 MysqlWorker
同步数据到三张表中
package com.yuanda.erp9.syn.execule.thread;
import com.yuanda.erp9.syn.pojo.CmsGoodsPojo;
import com.yuanda.erp9.syn.service.erp9.GoodsService;
import com.yuanda.erp9.syn.util.SpringContextUtil;
import lombok.extern.slf4j.Slf4j;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
/**
* @ClassName Worker
* @Description 子任务工作者
* @Date 2022/11/15
* @Author myq
*/
@Slf4j
public class MysqlWorker<T> extends AbstractWorker<T> {
// 数据集合
private List<T> task;
// 子任务执行情况结果集
private ConcurrentHashMap<String, Object> resultMap;
public MysqlWorker(List<T> task, ConcurrentHashMap<String, Object> resultMap) {
// 初始化子任务
this.task = task;
this.resultMap = resultMap;
}
/**
* @Description: 由父类线程池调用
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/11/2815:24
*/
@Override
void task() {
String workerThreadName = Thread.currentThread().getName();
try {
log.info("**************开始同步mysql数据**************");
GoodsService goodsService = SpringContextUtil.getBean("goodsServiceImpl");
goodsService.insertBatch((List<CmsGoodsPojo>) task);
log.info("**************结束同步mysql数据**************");
} catch (Exception e) {
e.printStackTrace();
log.error(workerThreadName + ":{} ", e);
resultMap.put("errMsg", "worker run error : {" + e + "}");
resultMap.put("errThreadName", workerThreadName);
resultMap.put("errCode", 500);
throw new RuntimeException("Thread-Name" + workerThreadName + "worker run error :{}");
} finally {
task.clear();
}
}
}
4.4 ElasticSearchWorker
同步数据到es中
package com.yuanda.erp9.syn.execule.thread;
import com.yuanda.erp9.syn.pojo.CmsGoodsPojo;
import com.yuanda.erp9.syn.service.erp9.ESService;
import com.yuanda.erp9.syn.service.erp9.impl.ESServiceImpl;
import com.yuanda.erp9.syn.util.SpringContextUtil;
import lombok.extern.slf4j.Slf4j;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
/**
* @ClassName Worker
* @Description 子任务工作者
* @Date 2022/11/15
* @Author myq
*/
@Slf4j
public class ElasticSearchWorker<T> extends AbstractWorker<T> {
// 数据集合
private List<T> task;
// 子任务执行情况结果集
private ConcurrentHashMap<String, Object> resultMap;
public ElasticSearchWorker(List<T> task, ConcurrentHashMap<String, Object> resultMap) {
this.task = task;
this.resultMap = resultMap;
}
/**
* @Description: 将数据分批执行添加到数据库中
* @Params:
* @Return:
* @Author: Mr.myq
* @Date: 2022/11/1517:09
*/
@Override
void task() {
String workerThreadName = Thread.currentThread().getName();
try {
log.info("**************开始同步es数据**************");
ESService esService = SpringContextUtil.getBean("ESServiceImpl", ESServiceImpl.class);
esService.addESData((List<CmsGoodsPojo>) task);
log.info("***************结束同步es数据*************");
} catch (Exception e) {
e.printStackTrace();
log.error("线程名称:{},同步es数据异常: {}", workerThreadName, e.toString());
resultMap.put("errMsg", "worker run error : {" + e + "}");
resultMap.put("errThreadName", workerThreadName);
resultMap.put("type", "elasticsearch");
throw new RuntimeException("Thread-Name: " + workerThreadName + "worker run error :{}");
} finally {
task.clear();
}
}
}
以上都是代码片段无法直接使用。最重要的核心思想
经过测试插入700W数据(三张表需要2分45秒的时间)
如果需要源码的小伙伴,请评论、私信联系我。如果对您有帮助,希望您三连。