百万级含80+字段以上的Excel数据导出时,主要的问题在于:
- 内存溢出风险 :一次性加载百万级数据到内存
- 导出时间长 :大数据量导致导出过程耗时
- 文件过大 :单个Excel文件可能超过软件处理上限
- 字段过多 :数据复杂
技术方案设计
整体架构
采取导出工程与页面系统工程分离
本方案通过智能分流、内存优化和异步处理等技术手段,成功解决了百万级数据导出的难题。关键点在于:
1. 根据数据量自动选择最优导出策略
2. 采用流式处理避免内存溢出
3. 完善的异常处理和资源管理
详细实现
单文件导出实现
数据库表DDL
本文采用PostgreSQL,可转换成对应的数据库DDL语句进行建表
CREATE TABLE "public"."export_download" (
"id" int8 NOT NULL DEFAULT nextval('export_download_id_seq'::regclass),
"model" varchar(255) COLLATE "pg_catalog"."default" NOT NULL,
"export_number" int8 NOT NULL,
"export_status" varchar(255) COLLATE "pg_catalog"."default" NOT NULL,
"remark" varchar(255) COLLATE "pg_catalog"."default",
"operator" varchar(255) COLLATE "pg_catalog"."default",
"create_time" timestamp(0) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"update_time" timestamp(0) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"file_download" text COLLATE "pg_catalog"."default",
CONSTRAINT "idx_export_download_id_primary" PRIMARY KEY ("id")
)
;
ALTER TABLE "public"."export_download"
OWNER TO "Mak1racle";
COMMENT ON COLUMN "public"."export_download"."id" IS 'ID';
COMMENT ON COLUMN "public"."export_download"."model" IS '数据模块';
COMMENT ON COLUMN "public"."export_download"."export_number" IS '导出数量';
COMMENT ON COLUMN "public"."export_download"."export_status" IS '导出状态';
COMMENT ON COLUMN "public"."export_download"."remark" IS '备注';
COMMENT ON COLUMN "public"."export_download"."operator" IS '操作人';
COMMENT ON COLUMN "public"."export_download"."create_time" IS '创建时间';
COMMENT ON COLUMN "public"."export_download"."update_time" IS '更新时间';
COMMENT ON COLUMN "public"."export_download"."file_download" IS '文件下载路径';
COMMENT ON TABLE "public"."export_download" IS '下载导出表';
系统工程接口
// 自行根据Mybatis或MybatisPlus进行mapper设置,本文采用mybatis,涉及sql根据业务调整
@Autowired
private CaseMapper caseMapper;
@Autowired
private DownloadExportMapper downloadExportMapper;
// DownloadUtil集成了导出时的一些通用方法或步骤
@Autowired
private DownloadUtil downloadUtil;
private final static String DOWNLOAD_URL = "/case/export";
public Result<String> export(Request request, HttpServletResponse response) {
// 根据传参获取统计导出数量
Integer pageSize = Math.toIntExact(caseMapper.countAll(request);
log.info("导出数据量:{}", pageSize);
Export downloadExport = new Export();
// 获取导出操作人
String realName = CurrentUserUtils.getCurrentRealName();
downloadExport.setOperator(realName);
downloadExport.setExportNumber(pageSize);
downloadExport.setCreateTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()));
downloadExport.setUpdateTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()));
downloadExport.setModel(DownloadModelEnum.getModelByUrl(DOWNLOAD_URL));
downloadExport.setExportStatus("生成中");
downloadExportMapper.insert(downloadExport);
//此处需要固定在查询请求实体类中增加一个导出id字段
request.setDownloadId(String.valueOf(downloadExport.getId()));
String res = downloadUtil.export(request, downloadExport);
return new Result<String>().success(res);
//此处还可以增加一层数量判断,当小于多少量级时直接使用该系统工程导出
// 详细代码可见系统工程直接导出补充
}
// 系统工程直接导出代码补充
// 获取数据
List<Case> list = caseMapper.selectList(request);
log.info("数据量:{}查询获取成功", list.size());
try (ServletOutputStream out = response.getOutputStream()) {
ExcelWriter excelWriter = EasyExcel.write(out, Case.class)
.build();
// 按顺序写入Excel,每次写入对应的sheet
log.info("开始将数据写入表格");
WriteSheet build = EasyExcel.writerSheet().build();
excelWriter.write(list, build);
log.info("{}写入完成", list.size());
excelWriter.finish();
log.info("导出成功");
return new Result<String>().success("导出成功");
} catch (Exception e) {
log.error("导出异常", e);
throw new RuntimeException("导出异常", e);
}
@Data
@ApiModel(value = "下载导出")
public class Export {
@ApiModelProperty(value = "ID")
private Integer id;
@ApiModelProperty(value = "数据模块")
private String model;
@ApiModelProperty(value = "导出数量")
private Integer exportNumber;
@ApiModelProperty(value = "导出状态")
private String exportStatus;
@ApiModelProperty(value = "备注")
private String remark;
@ApiModelProperty(value = "操作人")
private String operator;
@ApiModelProperty(value = "生成时间")
private String createTime;
@ApiModelProperty(value = "更新时间")
private String updateTime;
@ApiModelProperty(value = "文件下载路径")
private String fileDownload;
}
//若同一个系统工程中有多个接口需要导出,按此配置对应导出下载路径即可
@Component
public class DownloadUtil {
// 将导出工程的基本地址配置到运行文件application.yml中
@Value("${download.base_url}")
private String baseUrl;
@Autowired
private DownloadExportMapper downloadExportMapper;
private final static String DOWNLOAD_URL = "/case/export";
private String getUrl(String urlSuffix) {
// 避免导出地址获取失败
if (StringUtils.isBlank(baseUrl)) {
return "导出工程的url字符串" + urlSuffix;
}
return baseUrl + urlSuffix;
}
public String getDownloadUrl(){
return getUrl(DOWNLOAD_URL);
}
public String export(Request request, Export export) {
return exportCommon(getDownloadUrl(), request, export);
}
private <T> String exportCommon(String downloadUrl, T request, Export export) {
// 使用反射创建对象实例
try {
JSON json = JSONUtil.parse(request);
String body = HttpRequest.post(downloadUrl).body(json.toString()).execute().body();
JSON response = JSONUtil.parse(body);
Integer code = (Integer)response.getByPath("code");
if (code == null){
export.setExportStatus("生成失败");
export.setUpdateTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()));
downloadExportMapper.update(export);
throw new GzsendiException("下载请求发送失败:下载程序返回为空");
}
else if (code == 200){
return "导出任务已创建,请转至报表中心查看;";
}else {
export.setExportStatus("生成失败");
export.setUpdateTime(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()));
downloadExportMapper.update(export);
String message = (String) response.getByPath("message");
return "异步下载文件失败:" + message;
}
} catch (Exception e) {
throw new GzsendiException("下载请求发送失败:{}", e);
}
}
}
导出工程接口
// 同样的注入响应mapper,此处省略
public Result<Object> export(Request request) throws Exception {
// 1. 校验导出任务
// 根据自己业务逻辑进行导出任务查询条件、结果数量等校验
// 此处用Export实体类模拟需要导出的实体类数据信息
// 在调用导出接口,创建导出任务时可直接保存相关信息,让此工程在后台进行导出
Export export = downloadExportMapper.queryById(Integer.valueOf(request.getDownloadId()));
// 2. 根据数据量选择导出策略
// 设置阈值:当导出数量级处于(MIN_SIZE, MAX_SIZE]区间时可以单表导出,[MAX_SIZE,一定量级)时按照某个字段将数据进行分隔,如可按某时间字段每一个月为一个表格,最终合并到一个zip压缩包导出
if (export.getExportNumber() > ExportUtil.MIN_SIZE && export.getExportNumber() <= ExportUtil.MAX_SIZE) {
return exportSingleFile(request); // 单文件导出
} else {
return exportMultiFile(request); // 多文件分表导出
}
}
实体类
// 使用注解设置导出后表头及内容格式
@Data
@HeadRowHeight(15)
@ColumnWidth(30)
@HeadStyle(horizontalAlignment = HorizontalAlignmentEnum.CENTER, verticalAlignment = VerticalAlignmentEnum.CENTER, borderTop = BorderStyleEnum.THIN, borderBottom = BorderStyleEnum.THIN, borderLeft = BorderStyleEnum.THIN, borderRight = BorderStyleEnum.THIN)
@ContentStyle(horizontalAlignment = HorizontalAlignmentEnum.CENTER, verticalAlignment = VerticalAlignmentEnum.CENTER, borderTop = BorderStyleEnum.THIN, borderBottom = BorderStyleEnum.THIN, borderLeft = BorderStyleEnum.THIN, borderRight = BorderStyleEnum.THIN)
public class Case{
@ApiModelProperty(value = "名称")
@ExcelProperty("名称")
private String name;
... ...
}
工具类
@Component
@Slf4j
public class ExportUtil {
//根据需求设置阈值
public static final int MIN_SIZE = 5000;
public static final int MAX_SIZE = 60000;
public static final String STATUS_GENERATED = "已生成";
public static final String STATUS_FAILED = "生成失败";
@Autowired
private DownloadExportMapper downloadExportMapper;
// 自行创建对应的Websocket
@Autowired
private DownloadWebSocketHandler downloadWebSocketHandler;
/**
* 通用导出方法,根据数据量自动选择单表导出或分表导出
* @param downloadId 下载任务ID
* @param filePrefix 文件名前缀
* @param dataFetcher 数据获取函数
* @param dataProcessor 数据处理函数
* @param dataValidator 数据验证函数
* @param entityClass 导出实体类
* @param threadPool 线程池
* @return 导出结果
*/
public <T, R> Result<Object> exportData(
Integer downloadId,
String filePrefix,
Function<R, List<T>> dataFetcher,
Consumer<T> dataProcessor,
Function<List<T>, Boolean> dataValidator,
Class<?> entityClass,
R request,
ThreadPoolExecutor threadPool) {
// 获取对应日志数据
DownloadExport export = downloadExportMapper.queryById(downloadId);
if (export == null) {
log.error("导出任务不存在");
return Result.error("导出任务不存在");
}
if (export.getExportNumber() > MIN_SIZE && export.getExportNumber() <= MAX_SIZE) {
return exportSingleFile(export, filePrefix, dataFetcher, dataProcessor, dataValidator, entityClass, request, threadPool);
} else {
return exportMultiFile(export, filePrefix, request, threadPool);
}
}
/**
* 单表导出方法
*/
private <T, R> Result<Object> exportSingleFile(
DownloadExport export,
String filePrefix,
Function<R, List<T>> dataFetcher,
Consumer<T> dataProcessor,
Function<List<T>, Boolean> dataValidator,
Class<?> entityClass,
R request,
ThreadPoolExecutor threadPool) {
String filename = filePrefix + "_" + DateUtil.format(DateUtil.date(), "yyyyMMddHHmmss") + ".xlsx";
String filepath = downloadPath + UUID.randomUUID().toString() + ".xlsx";
threadPool.execute(() -> {
log.info("导出数据量为:{},单表进行导出", export.getExportNumber());
try {
// 获取数据
List<T> dataList = dataFetcher.apply(request);
// 处理数据
if (dataProcessor != null) {
dataList.forEach(dataProcessor);
}
// 验证数据
if (dataValidator != null && !dataValidator.apply(dataList)) {
log.error("查询条件传递有误,请重新查询");
exportFailCallback(export, filepath);
return;
}
log.info("开始数据导出,导出总量为{}", dataList.size());
// 过滤空数据
List<T> filteredData = dataList.stream()
.filter(Objects::nonNull)
.collect(Collectors.toList());
// 导出到Excel
writeToExcel(filteredData, filepath, entityClass);
// 更新导出状态
exportSuccessCallback(export, filename, filepath);
} catch (Exception e) {
log.error("导出数据到本地异常", e);
exportFailCallback(export, filepath);
}
});
return new Result<>().success("导出任务已提交");
}
/**
* 分表导出方法
*/
private <R> Result<Object> exportMultiFile(
DownloadExport export,
String filePrefix,
R request,
ThreadPoolExecutor threadPool) {
String filename = filePrefix + "_" + DateUtil.format(DateUtil.date(), "yyyyMMddHHmmss") + ".zip";
String filepath = downloadPath + UUID.randomUUID().toString() + ".zip";
threadPool.execute(() -> {
log.info("导出数据量为:{},进行分表导出", export.getExportNumber());
String tempDir = downloadPath + UUID.randomUUID().toString() + "/";
new File(tempDir).mkdirs();
try {
// 这里需要具体实现分表导出逻辑
// 由于分表导出逻辑较为复杂,需要根据具体业务场景实现
// 此处仅提供框架
// 创建最终压缩包
outPutToZip(filepath, tempDir, export);
// 清理临时目录
clearTempFiles(tempDir);
// 更新导出状态
exportSuccessCallback(export, filename, filepath);
} catch (Exception e) {
log.error("导出数据到本地异常", e);
exportFailCallback(export, filepath);
}
});
return new Result<>().success("导出任务已提交");
}
/**
* 将数据写入Excel文件
*/
public <T> void writeToExcel(List<T> dataList, String filepath, Class<?> entityClass) {
try (FileOutputStream out = new FileOutputStream(filepath)) {
ExcelWriter excelWriter = EasyExcel.write(out, entityClass)
.build();
WriteSheet writeSheet = EasyExcel.writerSheet().build();
excelWriter.write(dataList, writeSheet);
excelWriter.finish();
log.info("{}条记录写入完成", dataList.size());
log.info("导出成功到本地:{}", filepath);
} catch (Exception e) {
log.error("Excel文件写入失败", e);
throw new RuntimeException("Excel文件写入失败", e);
}
}
/**
* 将数据写入Excel文件(带sheet名)
*/
public <T> void writeToExcelWithSheetName(List<T> dataList, String filepath, Class<?> entityClass, String sheetName) {
try (FileOutputStream out = new FileOutputStream(filepath)) {
ExcelWriter excelWriter = EasyExcel.write(out, entityClass)
.build();
WriteSheet writeSheet = EasyExcel.writerSheet(sheetName).build();
excelWriter.write(dataList, writeSheet);
excelWriter.finish();
log.info("分表 {} 写入完成,记录数: {}", sheetName, dataList.size());
} catch (Exception e) {
log.error("Excel文件写入失败", e);
throw new RuntimeException("Excel文件写入失败", e);
}
}
/**
* 写入多个sheet的Excel文件
*/
public void writeMultiSheetExcel(String filepath, Map<String, Object> sheetDataMap) {
try (FileOutputStream out = new FileOutputStream(filepath)) {
ExcelWriter excelWriter = EasyExcel.write(out)
.build();
int sheetNo = 0;
for (Map.Entry<String, Object> entry : sheetDataMap.entrySet()) {
String sheetName = entry.getKey();
Object data = entry.getValue();
if (data instanceof Map) {
Map<String, Object> sheetInfo = (Map<String, Object>) data;
List<?> dataList = (List<?>) sheetInfo.get("data");
Class<?> headClass = (Class<?>) sheetInfo.get("headClass");
WriteSheet writeSheet = EasyExcel.writerSheet(sheetNo++, sheetName)
.head(headClass)
.build();
excelWriter.write(dataList, writeSheet);
}
}
excelWriter.finish();
log.info("多Sheet Excel导出成功到本地:{}", filepath);
} catch (Exception e) {
log.error("多Sheet Excel文件写入失败", e);
throw new RuntimeException("多Sheet Excel文件写入失败", e);
}
}
/*
* 导出失败回调及导出任务处理
*/
public void exportFailCallback(DownloadExport export, String filepath) {
// 导出失败回调
export.setExportStatus(STATUS_FAILED);
export.setUpdateTime(DateUtil.format(DateUtil.date(), "yyyy-MM-dd HH:mm:ss"));
export.setFileDownload(filepath);
downloadExportMapper.update(export);
downloadWebSocketHandler.broadcast("任务 " + export.getModel() + " 状态: 生成失败");
}
/**
* 将文件导出到压缩包
* @param filepath
* @param tempDir
* @param export
*/
public void outPutToZip(String filepath, String tempDir, DownloadExport export) throws Exception{
try (FileOutputStream fos = new FileOutputStream(filepath);
ZipOutputStream zos = new ZipOutputStream(fos)) {
File[] files = new File(tempDir).listFiles();
if (files != null) {
for (File file : files) {
try (FileInputStream fis = new FileInputStream(file)) {
ZipEntry zipEntry = new ZipEntry(file.getName());
zos.putNextEntry(zipEntry);
byte[] buffer = new byte[1024];
int length;
while ((length = fis.read(buffer)) > 0) {
zos.write(buffer, 0, length);
}
zos.closeEntry();
}
}
log.info("所有分表文件已成功压缩到: {}", filepath);
}
} catch (Exception e) {
log.error("压缩文件生成异常", e);
exportFailCallback(export, filepath);
throw new RuntimeException("压缩文件生成异常", e);
}
}
/**
* 清理掉临时文件及目录
* @param tempDir
*/
public static void clearTempFiles(String tempDir) {
File dir = new File(tempDir);
if (dir.exists() && dir.isDirectory()) {
File[] files = dir.listFiles();
if (files != null) {
for (File file : files) {
if (!file.delete()) {
log.warn("无法删除临时文件: {}", file.getAbsolutePath());
}
}
}
if (!dir.delete()) {
log.warn("无法删除临时目录: {}", dir.getAbsolutePath());
}
}
}
/**
* 导出成功回调
* @param export
* @param filename
* @param filepath
*/
public void exportSuccessCallback(DownloadExport export, String filename, String filepath) {
// 更新数据库状态
log.info("文件导出数据更新中:{}", export.getModel());
export.setExportStatus(STATUS_GENERATED);
export.setRemark(filename);
export.setUpdateTime(DateUtil.format(DateUtil.date(), "yyyy-MM-dd HH:mm:ss"));
export.setFileDownload(filepath);
downloadExportMapper.update(export);
log.info("文件导出数据已更新:{}", export.getModel());
// 发送 websocket 通知
downloadWebSocketHandler.broadcast("任务 " + export.getModel() + " 状态: 已生成");
}
/**
* 根据开始和结束时间生成期间月份
* @param dateDuration
* @return
*/
public static List<String> getDateRangeStr(Map<String, String> dateDuration) {
// 根据开始和结束时间生成期间月份
List<DateTime> range = DateUtil.rangeToList(DateUtil.parseDate(dateDuration.get("start")),
DateUtil.parse(dateDuration.get("end")), DateField.MONTH);
return range.stream()
.map(date -> DateUtil.format(date, "yyyy-MM"))
.collect(Collectors.toList());
}
/**
* 生成临时目录
*/
public String createTempDir() {
String tempDir = downloadPath + UUID.randomUUID().toString() + "/";
File dir = new File(tempDir);
if (!dir.exists() && !dir.mkdirs()) {
throw new RuntimeException("无法创建临时目录: " + tempDir);
}
return tempDir;
}
/**
* 生成文件路径
*/
public String generateFilePath(String suffix) {
return downloadPath + UUID.randomUUID().toString() + suffix;
}
/**
* 生成文件名
*/
public String generateFileName(String prefix, String suffix) {
return prefix + "_" + DateUtil.format(DateUtil.date(), "yyyyMMddHHmmss") + suffix;
}
}
单文件导出
private Result<Object> exportSingleFile(Export export, Request request) {
//此处的exportUtil为导出工程中的
String filename = exportUtil.generateFileName("导出文件名", ".xlsx");
String filepath = exportUtil.generateFilePath(".xlsx");
//提前创建定义好的线程池
THREAD_POOL_EXECUTOR.execute(() -> {
log.info("导出数据量为:{},单表进行导出", export.getExportNumber());
try {
// 获取数据
List<Case> list = caseMapper.selectList(request);
// 验证数据
if (CollectionUtil.isEmpty(list)) {
log.error("查询条件传递有误,请重新查询");
// 此处增加了Websocket,将导出状态更新信息同步到前端
exportUtil.exportFailCallback(export, filepath);
return;
}
log.info("开始数据导出,导出总量为{}", list.size());
// 过滤空数据
List<Case> filteredData = list.stream()
.filter(Objects::nonNull)
.collect(Collectors.toList());
// 导出到Excel
try (FileOutputStream out = new FileOutputStream(filepath)) {
ExcelWriter excelWriter = EasyExcel.write(out, Case.class)
.build();
WriteSheet writeSheet = EasyExcel.writerSheet().build();
excelWriter.write(filteredData, writeSheet);
excelWriter.finish();
log.info("{}条记录写入完成", filteredData.size());
log.info("导出成功到本地:{}", filepath);
} catch (Exception e) {
log.error("导出异常", e);
exportUtil.exportFailCallback(export, filepath);
throw new RuntimeException("导出异常", e);
}
// 使用WebSocket更新导出状态
exportUtil.exportSuccessCallback(export, filename, filepath);
} catch (Exception e) {
log.error("导出单数据异常", e);
exportUtil.exportFailCallback(export, filepath);
}
});
return new Result<>().success("导出任务已提交");
}
多文件分表合并到zip文件导出
/**
* 多文件导出12345工单数据
*/
private Result<Object> exportMultiFile(Export export, Request request) {
String filename = exportUtil.generateFileName("压缩包名称", ".zip");
String filepath = exportUtil.generateFilePath(".zip");
THREAD_POOL_EXECUTOR.execute(() -> {
log.info("导出数据量为:{},进行分表导出", export.getExportNumber());
try {
// 查询当前条件下数据涉及时间段
// 此处查询需导出数据列表中的开始时间和结束时间
Map<String, String> dateDuration = caseMapper.selectCaseDateDuration(request);
List<String> dateRange = ExportUtil.getDateRangeStr(dateDuration);
// 创建临时保存目录
String tempDir = exportUtil.createTempDir();
// 按月份分表导出
for (String date : dateRange) {
request.setQueryDate(date);
// 新建一个sql,增加一个按月份去查询数据
List<Case> data = caseMapper.selectListOnMonth(request);
if (CollectionUtil.isEmpty(data)) {
continue;
}
String sheetName = date;
String excelFilePath = tempDir + "【" + sheetName + "】" + "表数据名称" + ".xlsx";
exportUtil.writeToExcelWithSheetName(data, excelFilePath, ComplaintFocusesWork.class, sheetName);
}
// 创建最终压缩包
exportUtil.outPutToZip(filepath, tempDir, export);
// 清理临时目录
ExportUtil.clearTempFiles(tempDir);
// 更新导出状态
exportUtil.exportSuccessCallback(export, filename, filepath);
} catch (Exception e) {
log.error("导出异常", e);
exportUtil.exportFailCallback(export, filepath);
}
});
return new Result<>().success("导出任务已提交");
}
关键技术点
1. 智能分流策略(例如,每个月的数据量预统计量最多也不超过5万时可以采用以下策略)
- 5万条以下:单文件导出
- 5万条以上:按时间维度分表导出后压缩
2. 内存优化
- 使用线程池异步处理
- 分批查询数据
3. Excel处理
- 采用阿里EasyExcel避免OOM
- 自动调整列宽
性能优化措施
线程池配置
private static final ThreadPoolExecutor THREAD_POOL_EXECUTOR = new ThreadPoolExecutor(
5, 10, 0L, TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<>(5), new ThreadPoolExecutor.DiscardOldestPolicy());
内存管理
- 使用Stream API处理数据流
- 及时关闭文件资源
- 清理临时文件