1、Whlie循环方式
使用Whlie循环方式时,要求数据量少,否则会出现栈溢出或堆溢出!!!并且Whlie循环方式是单线程,所以会导致同步速度很慢!!!
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpRequest;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import org.apache.commons.collections4.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
/**
* @author QiaoChu
*/
public class TestTask {
private Logger log = LoggerFactory.getLogger(this.getClass());
private static final String GET_URL = "https://www.xxoo.com/";
//起始页数码
private static final int PAGE_INDEX = 1;
//每页记录数
private static final int PAGE_SIZE = 1000;
private DataMapper dataMapper;
/**
* 计划任务
*/
public void testTimingTask() {
fetchAndSave(PAGE_INDEX, PAGE_SIZE);
}
/**
* 数据同步 - While循环方式
* DataEntity为自定义实体类,这里仅做示例!!!
* DataMapper为自定义Mapper接口,这里仅做示例!!!
* batchInsert为自定义批量插入方法,这里仅做示例!!!
*/
private void fetchAndSave(int pageIndex, int pageSize) {
boolean data = true;
log.info("【数据同步 - 存量】,第{}次同步,", pageIndex);
while (data) {
List<DataEntity> dataList = getDataByPage(pageIndex, pageSize);
log.info("【数据同步 - 存量数据[{}]】,第{}次同步", JSON.toJSONString(dataList), pageIndex);
if (CollectionUtils.isNotEmpty(dataList)) {
//批量插入数据
dataMapper.batchInsert(dataList);
log.info("【数据同步 - 存量】,第{}次同步,同步成功", pageIndex);
if (dataList.size() < pageSize) {
log.info("【数据同步 - 存量】,第{}次同步,获取数据小于每页获取条数,证明已全部同步完毕!!!", pageIndex);
return;
}
//循环加载
pageIndex++;
} else {
log.info("【数据同步 - 存量】,第{}次同步,获取数据为空,证明已全部同步完毕!!!", pageIndex);
data = false;
}
}
}
/**
* 获取分页数据, DataEntity为自定义实体类,这里仅做示例!!!
*/
private List<DataEntity> getDataByPage(int pageIndex, int pageSize) {
List<DataEntity> dataList = new ArrayList<>();
String comUrl = String.format("%s?page=%s&limit=%s", GET_URL, pageIndex, pageSize);
String body = HttpRequest.get(comUrl).execute().body();
log.info("获取第三方数据信息:[{}],第{}次同步", JSON.toJSONString(body), pageIndex);
if (StrUtil.isEmpty(body)) {
return dataList;
}
JSONObject jsonObject = JSONObject.parseObject(body);
JSONArray dataArray = jsonObject.getJSONArray("dataList");
dataList = dataArray.toJavaList(DataEntity.class);
return dataList;
}
class DataEntity {
private String username;
private Integer sex;
private Integer age;
private String mobile;
public DataEntity() {
}
public DataEntity(String username, Integer sex, Integer age, String mobile) {
this.username = username;
this.sex = sex;
this.age = age;
this.mobile = mobile;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public Integer getSex() {
return sex;
}
public void setSex(Integer sex) {
this.sex = sex;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
public String getMobile() {
return mobile;
}
public void setMobile(String mobile) {
this.mobile = mobile;
}
}
}
2、递归方式
使用递归方式时,要求数据量少,否则会出现栈溢出或堆溢出!!!并且递归方式是单线程,所以会导致同步速度很慢!!!
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpRequest;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import org.apache.commons.collections4.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
/**
* @author QiaoChu
*/
public class TestTask {
private Logger log = LoggerFactory.getLogger(this.getClass());
private static final String GET_URL = "https://www.xxoo.com/";
//起始页数码
private static final int PAGE_INDEX = 1;
//每页记录数
private static final int PAGE_SIZE = 1000;
private DataMapper dataMapper;
/**
* 计划任务
*/
public void testTimingTask() {
fetchAndSave(PAGE_INDEX, PAGE_SIZE);
}
/**
* 数据同步 - 递归方式
* DataEntity为自定义实体类,这里仅做示例!!!
* DataMapper为自定义Mapper接口,这里仅做示例!!!
* batchInsert为自定义批量插入方法,这里仅做示例!!!
*/
private void fetchAndSave(int pageIndex, int pageSize) {
log.info("【数据同步 - 存量】,第{}次同步,", pageIndex);
List<DataEntity> dataList = getDataByPage(pageIndex, pageSize);
log.info("【数据同步 - 存量数据[{}]】,第{}次同步", JSON.toJSONString(dataList), pageIndex);
if (CollectionUtils.isNotEmpty(dataList)) {
//批量插入数据
dataMapper.batchInsert(dataList);
log.info("【数据同步 - 存量】,第{}次同步,同步成功", pageIndex);
if (dataList.size() < pageSize) {
log.info("【数据同步 - 存量】,第{}次同步,获取数据小于每页获取条数,证明已全部同步完毕!!!", pageIndex);
return;
}
// 递归操作-直到数据同步完毕
fetchAndSave(pageIndex + 1, pageSize);
} else {
log.info("【数据同步 - 存量】,第{}次同步,获取数据为空,证明已全部同步完毕!!!", pageIndex);
}
}
/**
* 获取分页数据, DataEntity为自定义实体类,这里仅做示例!!!
*/
private List<DataEntity> getDataByPage(int pageIndex, int pageSize) {
List<DataEntity> dataList = new ArrayList<>();
String comUrl = String.format("%s?page=%s&limit=%s", GET_URL, pageIndex, pageSize);
String body = HttpRequest.get(comUrl).execute().body();
log.info("获取第三方数据信息:[{}],第{}次同步", JSON.toJSONString(body), pageIndex);
if (StrUtil.isEmpty(body)) {
return dataList;
}
JSONObject jsonObject = JSONObject.parseObject(body);
JSONArray dataArray = jsonObject.getJSONArray("dataList");
dataList = dataArray.toJavaList(DataEntity.class);
return dataList;
}
class DataEntity {
private String username;
private Integer sex;
private Integer age;
private String mobile;
public DataEntity() {
}
public DataEntity(String username, Integer sex, Integer age, String mobile) {
this.username = username;
this.sex = sex;
this.age = age;
this.mobile = mobile;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public Integer getSex() {
return sex;
}
public void setSex(Integer sex) {
this.sex = sex;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
public String getMobile() {
return mobile;
}
public void setMobile(String mobile) {
this.mobile = mobile;
}
}
}
3、多线程方式
由于递归方式是单线程,考虑到数据的庞大,且易造成内存溢出,因此将递归更换成多线程方式,不仅避免了内存溢出的情况,且速度大大的提升!!!
package com.ruoyi.quartz.task;
import cn.hutool.core.util.StrUtil;
import cn.hutool.http.HttpRequest;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import org.apache.commons.collections4.CollectionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
/**
* @author QiaoChu
*/
public class TestTask {
private Logger log = LoggerFactory.getLogger(this.getClass());
private static final String GET_URL = "https://www.xxoo.com/";
//总记录数
private static final int TOTAL_NUM = 1000000;
//每页记录数
private static final int PAGE_SIZE = 1000;
private DataMapper dataMapper;
/**
* 计划任务
*/
public void testTimingTask() {
synAllData(TOTAL_NUM, PAGE_SIZE);
}
/**
* 数据同步 - 【多线程方式】
*/
private void synAllData(int totalNum, int pageSize) {
// 定义原子变量 - 页数
AtomicInteger pageIndex = new AtomicInteger(0);
// 创建线程池
ExecutorService fixedThreadPool = Executors.newFixedThreadPool(10);
int pageCount = (totalNum + pageSize - 1) / pageSize;
for (int index = 1; index <= pageCount; index++) {
fixedThreadPool.submit(() -> {
try {
multiFetchAndSave(pageIndex.incrementAndGet(), pageSize);
} catch (Exception e) {
log.error("并发获取并保存数据异常:{}", e);
}
});
}
}
/**
* 数据同步 - 【多线程方式】
* DataEntity为自定义实体类,这里仅做示例!!!
* DataMapper为自定义Mapper接口,这里仅做示例!!!
* batchInsert为自定义批量插入方法,这里仅做示例!!!
*/
private void multiFetchAndSave(int pageIndex, int pageSize) {
log.info("【数据同步 - 存量】,第{}次同步,", pageIndex);
List<DataEntity> dataList = getDataByPage(pageIndex, pageSize);
log.info("【数据同步 - 存量数据[{}]】,第{}次同步", JSON.toJSONString(dataList), pageIndex);
if (CollectionUtils.isNotEmpty(dataList)) {
//批量插入数据
dataMapper.batchInsert(dataList);
log.info("【数据同步 - 存量】,第{}次同步,同步成功", pageIndex);
if (dataList.size() < pageSize) {
log.info("【数据同步 - 存量】,第{}次同步,获取数据小于每页获取条数,证明已全部同步完毕!!!", pageIndex);
}
} else {
log.info("【数据同步 - 存量】,第{}次同步,获取数据为空,证明已全部同步完毕!!!", pageIndex);
}
}
/**
* 获取分页数据, DataEntity为自定义实体类,这里仅做示例!!!
*/
private List<DataEntity> getDataByPage(int pageIndex, int pageSize) {
List<DataEntity> dataList = new ArrayList<>();
String comUrl = String.format("%s?page=%s&limit=%s", GET_URL, pageIndex, pageSize);
String body = HttpRequest.get(comUrl).execute().body();
log.info("获取第三方数据信息:[{}],第{}次同步", JSON.toJSONString(body), pageIndex);
if (StrUtil.isEmpty(body)) {
return dataList;
}
JSONObject jsonObject = JSONObject.parseObject(body);
JSONArray dataArray = jsonObject.getJSONArray("dataList");
dataList = dataArray.toJavaList(DataEntity.class);
return dataList;
}
class DataEntity {
private String username;
private Integer sex;
private Integer age;
private String mobile;
public DataEntity() {
}
public DataEntity(String username, Integer sex, Integer age, String mobile) {
this.username = username;
this.sex = sex;
this.age = age;
this.mobile = mobile;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public Integer getSex() {
return sex;
}
public void setSex(Integer sex) {
this.sex = sex;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
public String getMobile() {
return mobile;
}
public void setMobile(String mobile) {
this.mobile = mobile;
}
}
}