java使用生产者消费者模型批量处理百万级数据量文件
说明:
文件解析工具类部分代码重复,仅需要看第一个主要的核心逻辑类即可
1、主要核心逻辑抽象类BatchProcessLargeFile
package com.example.schedule.threads;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.example.schedule.config.annotation.FieldOrder;
import com.example.schedule.config.dataSourceConfig.DataSourceConfig;
import com.example.schedule.tools.DbColumnsChecker;
import com.example.schedule.tools.FileExtractTool;
import com.example.schedule.tools.dto.ColumnsMsg;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.jdbc.datasource.DriverManagerDataSource;
import java.io.*;
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.*;
import java.util.concurrent.*;
import java.util.function.BiConsumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @Author:
* @Title:
* @Description:
* 一、功能说明
* 生产者消费者模型处理大文件,使用 LinkedBlockingQueue 作为数据缓存区,三级线程开启数量可配置
* 二、处理逻辑
* 线程 threadRead 读取文件转换为Bean -> 将 bean 放到第一个 LinkedBlockingQueue A 中
* 线程 threadTrans 读取队列A中的bean -> 对数据进行转换填充生成pojo类,放到第二个 LinkedBlockingQueue B 中
* 线程 threadInsert 读取队列B中的pojo -> 批量入库
* 三、其他功能:
* 1、读取文件数据解析成bean时,文件字段内容与bean类定义不一致将会跳过,且将错误数据与错误原因写入到与文件路径一致的新文件parseError中
* 2、数据入库时,会检查每个字段的数据值是否符合数据库字段定义,且将错误数据与错误原因写入到与文件路径一致的新文件中insertError中(可开关)
* 四、涉及技术
* 反射、内部类、多线程、自定义注解、抽象类、继承、字段排序、文件处理、数据库密码加解密
* @Date: 2024/9/27 10:29
* @Version: 1.0
*/
public abstract class BatchProcessLargeFile {
private static final Logger logger = LoggerFactory.getLogger(BatchProcessLargeFile.class);
// ==================公共配置=====================
@Value("${spring.datasource.url}")
private String dbUrl;
@Value("${spring.datasource.username}")
private String dbUsername;
@Value("${spring.datasource.driverClassName}")
private String dbDriverClassName;
@Autowired
private DataSourceConfig dataSourceConfig;
@Value("${batchConfig.timeOut:600}")
private String timeOut;
// 性能测试:是否入库/queue队列大小监控
@Value("${batchConfig.performanceTesting:true}")
private String performanceTesting;
// ==================文件处理配置=====================
@Value("${batchConfig.batchInsertNum:200}")
private String batchInsertNum;
// 读文件线程数
@Value("${batchConfig.threadReadCount:1}")
private String threadReadCount;
// 转换Bean线程数
@Value("${batchConfig.threadTransferCount:2}")
private String threadTransferCount;
// 入库线程数
@Value("${batchConfig.threadInsertCount:4}")
private String threadInsertCount;
// 入库字段检查
@Value("${batchConfig.insertColumnsCheck:true}")
private String insertColumnsCheck;
/***
* @description: 大文件批量处理逻辑抽象父类
* @author:
* @date:2024/9/27 14:21
* @param fileFullName:文件全路径
* @param clazz:文件单行解析目标bean
* @param beanQueue:bean属性类暂存区
* @param insertQueue:入表bean属性类暂存区
* @param batchInsertMethod:批量入表mapper语句
* @return: void
*/
public <T1, T2> void startThread(String fileFullName, Class<T1> clazz, LinkedBlockingQueue<T1> beanQueue, LinkedBlockingQueue<T2> insertQueue, BiConsumer<List<T2>, Integer> batchInsertMethod) {
try {
long start = System.currentTimeMillis();
int threadReadNum = Integer.parseInt(threadReadCount);
int threadTransNum = Integer.parseInt(threadTransferCount);
int threadInsertNum = Integer.parseInt(threadInsertCount);
int timeOutWait = Integer.parseInt(timeOut);
int batchInsertCount = Integer.parseInt(batchInsertNum);
boolean columnsCheck = Boolean.parseBoolean(insertColumnsCheck);
boolean insertSwitch = Boolean.parseBoolean(performanceTesting);
logger.info("当前开启读文件线程数:{},转换bean线程数:{},入库线程数:{}", threadReadNum, threadTransNum, threadInsertNum);
List<CompletableFuture<Void>> futureList = new ArrayList<>();
logger.info("~~~~~~~~~~~~~~~~~创建读取文件线程~~~~~~~~~~~~~~~~~");
CompletableFuture<Void> producerFuture = null;
ExecutorService executorRead = Executors.newFixedThreadPool(threadReadNum, r -> {
Thread t = new Thread(r);
t.setName("ThreadRead-" + t.getId());
return t;
});
long totalLines = getFileLineNum(fileFullName);
long linesPerThread = totalLines / threadReadNum;
logger.info("当前获取到文件总行数为:{},开启线程数为:{},每个线程处理数量为:{}", totalLines, threadReadNum, linesPerThread);
long remainder = totalLines % threadReadNum;
long startLine = 1;
for (int i = 1; i <= threadReadNum; i++) {
long endLine = startLine + linesPerThread;
if (i == threadReadNum) {
// 如果是最后一个线程,把剩余的行数加上
endLine += remainder;
}
logger.info("当前开启线程{},处理的行数为:[{},{})", i, startLine, endLine);
producerFuture = CompletableFuture.runAsync(new readFile(beanQueue, fileFullName, clazz, "\u00A7", "D", "UTF-8", startLine, endLine), executorRead);
futureList.add(producerFuture);
startLine = endLine;
}
logger.info("~~~~~~~~~~~~~~~~~创建数据转换线程~~~~~~~~~~~~~~~~~");
ExecutorService executorTransfer = Executors.newFixedThreadPool(threadTransNum, r -> {
Thread t = new Thread(r);
t.setName("ThreadTransfer-" + t.getId());
return t;
});
for (int i = 0; i < threadTransNum; i++) {
CompletableFuture<Void> future = CompletableFuture.runAsync(new transBean(beanQueue, insertQueue, this, timeOutWait), executorTransfer);
futureList.add(future);
}
logger.info("~~~~~~~~~~~~~~~~~创建写入数据库线程~~~~~~~~~~~~~~~~~");
String dbPassword = dataSourceConfig.getPassword(); // 先解析数据库密码
ExecutorService executorInsert = Executors.newFixedThreadPool(threadInsertNum, r -> {
Thread t = new Thread(r);
t.setName("ThreadInsert-" + t.getId());
return t;
});
for (int i = 0; i < threadInsertNum; i++) {
CompletableFuture<Void> future = CompletableFuture.runAsync(new insertData(insertQueue, batchInsertMethod, dbUrl, dbUsername, dbPassword, dbDriverClassName, fileFullName, batchInsertCount, timeOutWait, columnsCheck, insertSwitch), executorInsert);
futureList.add(future);
}
logger.info("线程开启完毕,等待线程执行完毕");
CompletableFuture.allOf(futureList.toArray(new CompletableFuture[0])).join();
logger.info("线程执行完毕,关闭线程池");
executorRead.shutdown();
executorTransfer.shutdown();
executorInsert.shutdown();
long end = System.currentTimeMillis();
logger.info("处理文件{}耗时:{}ms", fileFullName, end - start);
} catch (Exception e) {
e.printStackTrace();
}
}
/***
* @description: 获取文件总行数
* @author:
* @date:2024/10/5 15:59
* @param filePath :
* @return: int
*/
public static long getFileLineNum(String filePath) {
try (LineNumberReader lineNumberReader = new LineNumberReader(new FileReader(filePath))){
lineNumberReader.skip(Long.MAX_VALUE);
return lineNumberReader.getLineNumber();
} catch (IOException e) {
return -1;
}
}
/***
* @description : 内部类,读取文件内容转换为bean,并放到阻塞队列【beanQueue】
* @author:
* @date:2024/9/27 10:32
* @return:
*/
static class readFile<T> implements Runnable {
private static final Logger logger = LoggerFactory.getLogger(readFile.class);
private final LinkedBlockingQueue<T> beanQueue;
private final String filePath;
private final Class<T> clazz;
private final String separator;
private final String readStartChar;
private final String encoding;
private final long startLine;
private final long endLine;
/***
* @description:
* @author:
* @date:2024/9/27 10:36
* @param beanQueue :暂时存放Bean的队列
* @param filePath :文件路径
* @param clazz :解析文件生成目标Bean类型
* @param separator :分隔符
* @param readStartChar : 读取指定字符【readStartChar】开头的行
* @return:
*/
public readFile(LinkedBlockingQueue<T> beanQueue, String filePath, Class<T> clazz, String separator, String readStartChar, String encoding, long startLine, long endLine) {
this.beanQueue = beanQueue;
this.filePath = filePath;
this.clazz = clazz;
this.separator = separator;
this.readStartChar = readStartChar;
this.encoding = encoding;
this.startLine = startLine;
this.endLine = endLine;
}
@Override
public void run() {
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), encoding));
String line;
long currentLine = 1;
// 跳过不属于本线程的行
while (currentLine < startLine && (line = reader.readLine()) != null) {
currentLine++;
}
while (currentLine < endLine && (line = reader.readLine()) != null) {
T bean = null;
String[] row = line.split(separator);
if (row.length == 0) {
currentLine++;
continue;
} else {
if (!row[0].equals(readStartChar)) {
logger.info("跳过字符{}的行:{}", row[0], row);
currentLine++;
continue;
}
// 去除第一个标识符
String[] newArray = new String[row.length - 1];
System.arraycopy(row, 1, newArray, 0, newArray.length);
try {
bean = parseRow(newArray, clazz, currentLine);
} catch (Exception e) {
e.printStackTrace();
logger.error("DATA ERROR:{},ERROR MSG:{}", newArray, e.getMessage());
// 解析错误的数据放到指定文件中
FileExtractTool.appendErrBeanData(filePath, line + "\u00A7" + e.getMessage());
}
}
if (bean != null) {
logger.info("放入beanQueue中的数据类型:{}", bean.toString());
beanQueue.put(bean); // 阻塞操作,如果队列满了会等待
}
currentLine++;
}
logger.info("read线程{}结束运行", Thread.currentThread().getName());
} catch (IOException | InterruptedException e) {
e.printStackTrace();
logger.error("解析错误");
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
private T parseRow(String[] row, Class<T> clazz, long line) throws Exception {
T obj = null;
try {
// 调用反射,根据传入的class创建对象
obj = clazz.newInstance();
// 获取类中的所有字段
Field[] fields = clazz.getDeclaredFields();
// 排序字段
Arrays.sort(fields, Comparator.comparingInt(f -> {
FieldOrder annotation = f.getAnnotation(FieldOrder.class);
if (annotation != null) {
return annotation.value();
} else {
// 如果没有注解,默认返回一个较大的值,放到末尾
return Integer.MAX_VALUE;
}
}));
// 开始解析
for (int i = 0; i < fields.length && i < row.length; i++) {
// 当前处理的字段
Field field = fields[i];
field.setAccessible(true); // 设置字段访问权限为可访问
// 当前行的第i个cell的数据
String cellValue = row[i];
// 获取当前字段的类型
Class<?> type = field.getType();
try {
if (cellValue != null && !"".equals(cellValue)) {
// 根据字段类型将cell的值添加到字段
if (type == String.class) {
field.set(obj, cellValue);
continue;
}
if (type == byte.class || type == Byte.class) {
field.set(obj, Byte.valueOf(cellValue));
continue;
}
if (type == short.class || type == Short.class) {
field.set(obj, Short.valueOf(cellValue));
continue;
}
if (type == int.class || type == Integer.class) {
field.set(obj, Integer.valueOf(cellValue));
continue;
}
if (type == long.class || type == Long.class) {
field.set(obj, Long.valueOf(cellValue));
continue;
}
if (type == float.class || type == Float.class) {
field.set(obj, Float.valueOf(cellValue));
continue;
}
if (type == double.class || type == Double.class) {
field.set(obj, Double.valueOf(cellValue));
continue;
}
if (type == boolean.class || type == Boolean.class) {
field.set(obj, Boolean.valueOf(cellValue));
continue;
}
if (type == BigDecimal.class) {
// 仅提取数字
if (cellValue != null && !"".equals(cellValue)) {
Pattern pattern = Pattern.compile("\\d+\\.?\\d*");
Matcher matcher = pattern.matcher(cellValue);
String number = "";
if (matcher.find()) {
// 返回匹配到的字符串
number = matcher.group();
} else {
throw new NumberFormatException();
}
BigDecimal value = BigDecimal.ZERO;
if (cellValue.contains("万")) {
// 解析为负数
value = new BigDecimal(number).negate();
} else {
value = new BigDecimal(number);
}
field.set(obj, value);
} else {
field.set(obj, BigDecimal.ZERO);
}
}
} else {
field.set(obj, null);
}
} catch (NumberFormatException e) {
logger.error("解析第{}行的{}列报错,读取到的值为【{}】。代码中配置的字段名为【{}】,字段类型为{}", line, (i + 2), cellValue, field.getName(), type);
throw new NumberFormatException("解析第" + line + "行的第" + (i + 2) + "列报错,读取到的值为【" + cellValue + "】。代码中配置的字段名为【" + field.getName() + "】,字段类型为" + type);
}
}
} catch (InstantiationException e) {
e.printStackTrace();
logger.error("创建实体类【{}】时出错", clazz);
} catch (IllegalAccessException e) {
e.printStackTrace();
logger.error("实体类【{}】无访问权限", clazz);
}
return obj;
}
}
/***
* @description : 内部类,读取阻塞队列【beanQueue】中的数据,按逻辑转换,转换完毕的放到【insertQueue】
* @author:
* @date:2024/9/27 10:40
* @return:
*/
static class transBean<T1, T2> implements Runnable {
private final LinkedBlockingQueue<T1> beanQueue;
private final LinkedBlockingQueue<T2> insertQueue;
private final BatchProcessLargeFile batchProcessLargeFile;
private final int timeOutWait;
public transBean(LinkedBlockingQueue<T1> beanQueue, LinkedBlockingQueue<T2> insertQueue, BatchProcessLargeFile batchProcessLargeFile, int timeOutWait) {
this.beanQueue = beanQueue;
this.insertQueue = insertQueue;
this.batchProcessLargeFile = batchProcessLargeFile;
this.timeOutWait = timeOutWait;
}
@Override
public void run() {
try {
while (true) {
// 从队列中取元素,如果队列为空则等待timeOut秒后结束线程
T1 take = beanQueue.poll(timeOutWait, TimeUnit.SECONDS);
if (take != null) {
logger.info("从beanQueue中取出的数据类型:{}", take.toString());
T2 info = batchProcessLargeFile.customerTrans(take);
insertQueue.put(info);
logger.info("放入insertQueue中的数据类型:{}", info.getClass());
} else {
logger.info("trans线程{}结束运行:{}秒内没有从beanQueue中获取到数据", Thread.currentThread().getName(), timeOutWait);
break;
}
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
public abstract <T1, T2> T2 customerTrans(T1 take);
/***
* @description : 内部类,读取阻塞队列【insertQueue】中的数据,入库
* @author:
* @date:2024/9/27 10:40
* @return:
*/
static class insertData<T> implements Runnable {
private final ObjectMapper objectMapper = new ObjectMapper();
private String dbUrl;
private String dbUsername;
private String dbPassword;
private String dbDriverClassName;
private String fileFullName;
private int batchInsertCount;
private int timeOutWait;
private boolean columnsCheck;
private boolean insertSwitch;
private final LinkedBlockingQueue<T> insertQueue;
private final BiConsumer<List<T>, Integer> batchInsertMethod;
public insertData(LinkedBlockingQueue<T> insertQueue, BiConsumer<List<T>, Integer> batchInsertMethod, String dbUrl, String dbUsername, String dbPassword, String dbDriverClassName, String fileFullName, int batchInsertCount, int timeOutWait, boolean columnsCheck, boolean insertSwitch) {
this.insertQueue = insertQueue;
this.batchInsertMethod = batchInsertMethod;
this.dbUrl = dbUrl;
this.dbUsername = dbUsername;
this.dbPassword = dbPassword;
this.dbDriverClassName = dbDriverClassName;
this.fileFullName = fileFullName;
this.timeOutWait = timeOutWait;
this.batchInsertCount = batchInsertCount;
this.columnsCheck = columnsCheck;
this.insertSwitch = insertSwitch;
}
@Override
public void run() {
HashMap<String, ColumnsMsg> columnsMsg = null;
ArrayList<T> list = new ArrayList<>();
long start = System.currentTimeMillis();
while (true) {
try {
T take = insertQueue.poll(timeOutWait, TimeUnit.SECONDS);
if (take != null) {
logger.info("从insertQueue中取出的数据类型:{}", take.getClass());
boolean flag;
// 入库字段检查
if (columnsCheck){
if (columnsMsg == null) {
columnsMsg = getColumnsMsg(take.getClass());
}
flag = checkData(take, columnsMsg, fileFullName);
}else {
flag = true;
}
// 根据检查结果判定是否入库
if (flag) {
list.add(take);
if (list.size() == batchInsertCount) {
logger.info("当前入库的数据量:{}", list.size());
if (insertSwitch){
try {
batchInsertMethod.accept(list, list.size());
}catch (Exception e){
e.printStackTrace();
logger.error("入库出错:{}", e.getMessage());
}
}
list.clear();
long end = System.currentTimeMillis();
logger.warn("【文件处理】批次量大小为{},字段检查并入库完成,耗时:{}ms", batchInsertCount, end - start);
start = end;
}
}
} else {
if (list.size() > 0) {
logger.info("当前入库的数据量(最后批次):{}", list.size());
if (insertSwitch){
try {
batchInsertMethod.accept(list, list.size());
}catch (Exception e){
e.printStackTrace();
logger.error("入库出错:{}", e.getMessage());
}
}
long end = System.currentTimeMillis();
logger.warn("【文件处理】最后批次入库完成,耗时:{}ms", end - start);
}
logger.info("insert线程{}结束运行:{}秒内没有从insertQueue中获取到数据", Thread.currentThread().getName(), timeOutWait);
break;
}
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
private boolean checkData(T pojo, HashMap<String, ColumnsMsg> columnsMsg, String fileFullName) {
try {
StringBuilder msg = new StringBuilder();
String result = DbColumnsChecker.checkData(pojo, columnsMsg);
if (!"SUCCESS".equals(result)) {
msg.setLength(0);// 清空数据
msg.append(printBeanFields(pojo)).append(":").append(result);
FileExtractTool.appendErrPojoData(fileFullName, msg.toString());
return false;
} else {
return true;
}
} catch (NoSuchFieldException | IllegalAccessException e) {
e.printStackTrace();
}
return false;
}
public <T> String printBeanFields(T bean) {
if (bean == null) {
return null;
}
// 获取 bean 的类信息
Class<?> clazz = bean.getClass();
Field[] fields = clazz.getDeclaredFields();
// 创建一个 Map 来存储字段名和字段值
java.util.Map<String, Object> fieldMap = new java.util.HashMap<>();
// 访问所有字段
for (Field field : fields) {
field.setAccessible(true); // 设置可访问性
try {
Object value = field.get(bean); // 获取字段值
fieldMap.put(field.getName(), value); // 将字段名和字段值添加到 Map 中
} catch (IllegalAccessException e) {
e.printStackTrace();
}
}
// 将 Map 转换为 JSON 字符串并返回
try {
return objectMapper.writeValueAsString(fieldMap);
} catch (JsonProcessingException e) {
e.printStackTrace();
return null; // 处理 JSON 转换异常
}
}
// 获取clazz对应的表字段信息
private HashMap<String, ColumnsMsg> getColumnsMsg(Class<?> clazz) {
Connection connection = null;
Statement statement = null;
ResultSet resultSet = null;
HashMap<String, ColumnsMsg> map = new HashMap<>();
try {
// 创建数据源并获取连接
DriverManagerDataSource dataSource = dataSource();
connection = dataSource.getConnection();
// 执行查询以获取表的列信息
String tableName = clazz.getSimpleName().replaceAll("Mapper$", "");
tableName = tableName.replaceAll("([a-z])([A-Z])", "$1_$2").toLowerCase(); // 驼峰转下划线
DatabaseMetaData meta = connection.getMetaData();
ResultSet columns = meta.getColumns(null, null, tableName, null);
while (columns.next()) {
ColumnsMsg columnsMsg = new ColumnsMsg();
// 字段名转换为小驼峰
String columnName = toCamelCase(columns.getString("COLUMN_NAME"));
columnsMsg.setTableName(tableName);
columnsMsg.setColumnName(columnName);
columnsMsg.setColumnType(columns.getString("TYPE_NAME"));
columnsMsg.setColumnSize(columns.getInt("COLUMN_SIZE"));
columnsMsg.setColumnDigit(columns.getInt("DECIMAL_DIGITS"));
columnsMsg.setNullAble(columns.getBoolean("NULLABLE"));
map.put(columnName, columnsMsg);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// 关闭资源,确保连接断开
try {
if (resultSet != null) {
resultSet.close();
}
if (statement != null) {
statement.close();
}
if (connection != null) {
connection.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
return map;
}
private String toCamelCase(String input) {
StringBuilder result = new StringBuilder();
if (input == null || input.isEmpty()) {
return "";
} else if (!input.contains("_")) {
return input.toLowerCase();
}
String[] split = input.split("_");
for (String s : split) {
if (s.isEmpty()) {
continue;
}
result.append(s.substring(0, 1).toUpperCase());
result.append(s.substring(1).toLowerCase());
}
StringBuilder ret = new StringBuilder(result.substring(0, 1).toLowerCase());
ret.append(result.substring(1, result.toString().length()));
return ret.toString();
}
private DriverManagerDataSource dataSource() {
DriverManagerDataSource dataSource = new DriverManagerDataSource();
dataSource.setDriverClassName(dbDriverClassName);
dataSource.setUrl(dbUrl);
dataSource.setUsername(dbUsername);
dataSource.setPassword(dbPassword);
return dataSource;
}
}
}
2、字段解析工具类ColumnsMsg
package com.example.schedule.tools.dto;
/**
* @Author:
* @Title:
* @Description:
* @Date: 2024/9/24 16:29
* @Version: 1.0
*/
public class ColumnsMsg {
/***
* 字段名
*/
private String tableName;
/***
* 字段名
*/
private String columnName;
/***
* 字段类型
*/
private String columnType;
/***
* 字段长度
*/
private int columnSize;
/***
* 字段精确度
*/
private int columnDigit;
/***
* 是否可为空
*/
private boolean nullAble;
public ColumnsMsg() {
}
public ColumnsMsg(String tableName) {
this.tableName = tableName;
}
public String getTableName() {
return tableName;
}
public void setTableName(String tableName) {
this.tableName = tableName;
}
public String getColumnName() {
return columnName;
}
public void setColumnName(String columnName) {
this.columnName = columnName;
}
public String getColumnType() {
return columnType;
}
public void setColumnType(String columnType) {
this.columnType = columnType;
}
public int getColumnSize() {
return columnSize;
}
public void setColumnSize(int columnSize) {
this.columnSize = columnSize;
}
public int getColumnDigit() {
return columnDigit;
}
public void setColumnDigit(int columnDigit) {
this.columnDigit = columnDigit;
}
public boolean isNullAble() {
return nullAble;
}
public void setNullAble(boolean nullAble) {
this.nullAble = nullAble;
}
@Override
public String toString() {
return "ColumnsMsg{" +
"tableName='" + tableName + '\'' +
", columnName='" + columnName + '\'' +
", columnType='" + columnType + '\'' +
", columnSize=" + columnSize +
", columnDigit=" + columnDigit +
", nullAble=" + nullAble +
'}';
}
}
3、入库字段检查逻辑类DbColumnsChecker
package com.example.schedule.tools;
import com.example.schedule.tools.dto.ColumnsMsg;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import javax.sql.DataSource;
import java.lang.reflect.Field;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
/**
* @Author:
* @Title:
* @Description:
* @Date: 2024/9/24 16:15
* @Version: 1.0
*/
@Component
public class DbColumnsChecker {
private static final Logger logger = LoggerFactory.getLogger(DbColumnsChecker.class);
@Autowired
private DataSource dataSource;
/***
* @description:检查list中的数据入库是否会出错
* @author:
* @date:2024/9/27 16:35
* @param dataList :
* @return: java.util.HashMap<java.lang.String,java.util.ArrayList<T>>
*/
public <T> HashMap<String, ArrayList<T>> checkDataList(ArrayList<T> dataList) throws NoSuchFieldException, IllegalAccessException {
HashMap<String, ArrayList<T>> result = new HashMap<>();
if (dataList == null || dataList.size() == 0) {
return result;
}
Class<?> clazz = dataList.get(0).getClass(); // 实体类名
List<Field> fieldList = Arrays.asList(clazz.getDeclaredFields()); // 所有字段
// 获取表中的字段信息
HashMap<String, ColumnsMsg> columnsMap = this.getColumnsMsg(clazz);
if (columnsMap.size() == 1){
logger.info("未找到表【{}】", columnsMap.get("tableName").getTableName());
}
// 结果集
ArrayList<T> errorList = new ArrayList<>();
ArrayList<T> correctList = new ArrayList<>();
A : for (T data : dataList) {
for (Field field : fieldList) {
// 获取代码中的字段名
String fieldName = field.getName();
if ("serialVersionUID".equals(fieldName)) {
// 跳过默认字段
continue;
}
// 获取该字段名的值
Field declaredField = data.getClass().getDeclaredField(fieldName);
declaredField.setAccessible(true);
Object o = declaredField.get(data);
String value = null;
if (o != null){
value = String.valueOf(o);
}else {
continue;
}
// 获取数据库中该表的字段信息
ColumnsMsg columnMsg = columnsMap.get(fieldName);
if (columnMsg == null) {
logger.error("表【{}】中未找到字段{},跳过检查", clazz.getSimpleName(), fieldName);
continue;
}
// 判断字段值是否符合定义的类型
if ("varchar".equals(columnMsg.getColumnType())) {
// 字符串类型,仅判断长度
if (value.length() > columnMsg.getColumnSize()){
logger.error("DATA ERROR:字段【{}】超长,无法入库:{}", fieldName, value);
errorList.add(data);
continue A;
}
continue;
}
if ("numeric".equals(columnMsg.getColumnType())) {
// 数值类型,判断长度、精度、是否纯数字
if (!value.matches("-?\\d+(\\.\\d+)?")){
logger.error("DATA ERROR:字段【{}】={}中含有非法字符:{}", fieldName, value, data);
errorList.add(data);
continue A;
}
if (value.contains(".")){
String[] split = value.split("\\.");
// 先校验右边
int intBit = columnMsg.getColumnSize() - columnMsg.getColumnDigit(); // 可存放的整数位数
int flotBit = columnMsg.getColumnDigit(); // 可存放的小数位数
if (split[0].length() > intBit){
logger.error("DATA ERROR:字段【{}】={}中整数部分超长:{}", fieldName, value, data);
errorList.add(data);
continue A;
}
if (split[1].length() > flotBit){
logger.warn("DATA WARRING:字段【{}】={}中小数部分超长,将会丢失精度:{}", fieldName, value, data);
// errorList.add(data);
// continue A;
}
continue;
}else {
if (value.length() > columnMsg.getColumnSize()){
logger.error("DATA ERROR:字段【{}】={}超长:{}", fieldName, value, data);
errorList.add(data);
continue A;
}
}
continue;
}
if ("timestamp".equals(columnMsg.getColumnType())) {
// 时间类型
// logger.info("暂时跳过时间类型");
}
}
correctList.add(data);
}
result.put("errorList", errorList);
result.put("correctList", correctList);
return result;
}
public static <T> String checkData(T data, HashMap<String, ColumnsMsg> columnsMap) throws NoSuchFieldException, IllegalAccessException {
Class<?> clazz = data.getClass();
List<Field> fieldList = Arrays.asList(clazz.getDeclaredFields()); // 所有字段
// 获取表中的字段信息
if (columnsMap.size() == 1){
logger.info("未找到表【{}】", columnsMap.get("tableName").getTableName());
}
StringBuilder returnMsg = new StringBuilder();
for (Field field : fieldList) {
// 获取代码中的字段名
String fieldName = field.getName();
if ("serialVersionUID".equals(fieldName)) {
// 跳过默认字段
continue;
}
// 获取该字段名的值
Field declaredField = data.getClass().getDeclaredField(fieldName);
declaredField.setAccessible(true);
Object o = declaredField.get(data);
String value = null;
if (o != null){
value = String.valueOf(o);
}else {
continue;
}
// 获取数据库中该表的字段信息
ColumnsMsg columnMsg = columnsMap.get(fieldName);
if (columnMsg == null) {
logger.error("表【{}】中未找到字段{},跳过检查", clazz.getSimpleName(), fieldName);
continue;
}
// 判断字段值是否符合定义的类型
if ("varchar".equals(columnMsg.getColumnType())) {
// 字符串类型,仅判断长度
if (value.length() > columnMsg.getColumnSize()){
returnMsg.setLength(0); // 清空内容
returnMsg.append("DATA ERROR:字段【").append(fieldName).append("】超长,无法入库:").append(value);
logger.error(returnMsg.toString());
return returnMsg.toString();
}
continue;
}
if ("numeric".equals(columnMsg.getColumnType())) {
// 数值类型,判断长度、精度、是否纯数字
if (!value.matches("-?\\d+(\\.\\d+)?")){
returnMsg.setLength(0); // 清空内容
returnMsg.append("DATA ERROR:字段【").append(fieldName).append("】=").append(value).append("中含有非法字符:").append(data);
logger.error(returnMsg.toString());
return returnMsg.toString();
}
if (value.contains(".")){
String[] split = value.split("\\.");
// 先校验右边
int intBit = columnMsg.getColumnSize() - columnMsg.getColumnDigit(); // 可存放的整数位数
int flotBit = columnMsg.getColumnDigit(); // 可存放的小数位数
if (split[0].length() > intBit){
returnMsg.setLength(0); // 清空内容
returnMsg.append("DATA ERROR:字段【").append(fieldName).append("】=").append(value).append("中整数部分超长:").append(data);
logger.error(returnMsg.toString());
return returnMsg.toString();
}
if (split[1].length() > flotBit){
returnMsg.setLength(0); // 清空内容
returnMsg.append("DATA WARRING:字段【").append(fieldName).append("】=").append(value).append("中小数部分超长,将会丢失精度:").append(data);
logger.warn(returnMsg.toString());
return returnMsg.toString();
}
continue;
}else {
if (value.length() > columnMsg.getColumnSize()){
returnMsg.setLength(0); // 清空内容
returnMsg.append("DATA ERROR:字段【").append(fieldName).append("】=").append(value).append("超长:").append(data);
logger.error(returnMsg.toString());
return returnMsg.toString();
}
}
continue;
}
if ("timestamp".equals(columnMsg.getColumnType())) {
// 时间类型
}
}
return "SUCCESS";
}
private HashMap<String, ColumnsMsg> getColumnsMsg(Class<?> clazz) {
String tableName = clazz.getSimpleName().replaceAll("Mapper$", "");
tableName = tableName.replaceAll("([a-z])([A-Z])", "$1_$2").toLowerCase(); // 驼峰转下划线
HashMap<String, ColumnsMsg> map = new HashMap<>();
map.put("tableName", new ColumnsMsg(tableName));
try {
DatabaseMetaData meta = dataSource.getConnection().getMetaData();
ResultSet columns = meta.getColumns(null, null, tableName, null);
while (columns.next()) {
ColumnsMsg columnsMsg = new ColumnsMsg();
// 字段名转换为小驼峰
String columnName = toCamelCase(columns.getString("COLUMN_NAME"));
columnsMsg.setTableName(tableName);
columnsMsg.setColumnName(columnName);
columnsMsg.setColumnType(columns.getString("TYPE_NAME"));
columnsMsg.setColumnSize(columns.getInt("COLUMN_SIZE"));
columnsMsg.setColumnDigit(columns.getInt("DECIMAL_DIGITS"));
columnsMsg.setNullAble(columns.getBoolean("NULLABLE"));
map.put(columnName, columnsMsg);
}
} catch (SQLException e) {
e.printStackTrace();
}
return map;
}
private String toCamelCase(String input) {
StringBuilder result = new StringBuilder();
if (input == null || input.isEmpty()) {
return "";
} else if (!input.contains("_")) {
return input.toLowerCase();
}
String[] split = input.split("_");
for (String s : split) {
if (s.isEmpty()) {
continue;
}
result.append(s.substring(0, 1).toUpperCase());
result.append(s.substring(1).toLowerCase());
}
StringBuilder ret = new StringBuilder(result.substring(0, 1).toLowerCase());
ret.append(result.substring(1, result.toString().length()));
return ret.toString();
}
}
4、自定义注解FieldOrder
package com.example.schedule.config.annotation;
import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;
/**
* @Author:
* Description:
* CreateDate: 2024/6/19 9:42
* UpdateUser:
* UpdateDate: 2024/6/19 9:42
* UpdateRemark:
* Version: 1.0
*/
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.FIELD)
public @interface FieldOrder {
int value();
}
5、数据库密码加解密类DataSourceConfig
package com.example.schedule.config.dataSourceConfig;
import com.alibaba.druid.pool.DruidDataSource;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.env.Environment;
/**
* @Author:
* @Title:
* @Description:
* @Date: 2024/10/9 9:41
* @Version: 1.0
*/
@Configuration
public class DataSourceConfig {
@Autowired
private Environment env;
private CustomerDruidConfig druidConfig;
@Bean
public DruidDataSource dataSource(){
druidConfig = new CustomerDruidConfig();
// 数据库连接管理
druidConfig.setDriverClassName(env.getProperty("spring.datasource.driverClassName"));
druidConfig.setUrl(env.getProperty("spring.datasource.url"));
druidConfig.setUsername(env.getProperty("spring.datasource.username"));
druidConfig.setPasswordFilePath(env.getProperty("spring.datasource.passwordFilePath"));
// 连接池配置
druidConfig.setInitialSize(Integer.valueOf(env.getProperty("spring.datasource.druid.initialSize")));
druidConfig.setMinIdle(Integer.valueOf(env.getProperty("spring.datasource.druid.minIdle")));
druidConfig.setMaxActive(Integer.valueOf(env.getProperty("spring.datasource.druid.maxActive")));
druidConfig.setMaxWait(Integer.valueOf(env.getProperty("spring.datasource.druid.maxWait")));
druidConfig.setTimeBetweenEvictionRunsMillis(Integer.valueOf(env.getProperty("spring.datasource.druid.timeBetweenEvictionRunsMillis")));
druidConfig.setMinEvictableIdleTimeMillis(Integer.valueOf(env.getProperty("spring.datasource.druid.minEvictableIdleTimeMillis")));
druidConfig.setMaxEvictableIdleTimeMillis(Integer.valueOf(env.getProperty("spring.datasource.druid.maxEvictableIdleTimeMillis")));
druidConfig.setValidationQuery(env.getProperty("spring.datasource.druid.validationQuery"));
druidConfig.setTestWhileIdle(Boolean.parseBoolean(env.getProperty("spring.datasource.druid.testWhileIdle")));
druidConfig.setTestOnBorrow(Boolean.parseBoolean(env.getProperty("spring.datasource.druid.testOnBorrow")));
druidConfig.setTestOnReturn(Boolean.parseBoolean(env.getProperty("spring.datasource.druid.testOnReturn")));
return druidConfig;
}
public String getPassword(){
return druidConfig.getPassword();
}
}
6、数据库连接池CustomerDruidConfig
package com.example.schedule.config.dataSourceConfig;
import com.alibaba.druid.pool.DruidDataSource;
import com.example.schedule.config.utils.MyAESUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
/**
* @Author:
* @Title:
* @Description:
* @Date: 2024/10/9 9:39
* @Version: 1.0
*/
public class CustomerDruidConfig extends DruidDataSource {
private static final Logger logger = LoggerFactory.getLogger(CustomerDruidConfig.class);
String passwordFilePath = "";
public void setPasswordFilePath(String passwordFilePath) {
if (passwordFilePath == null || "".equals(passwordFilePath)){
logger.error("password file path error : passwordFilePath = {}", passwordFilePath);
try {
throw new IOException("password file path error : passwordFilePath = " + passwordFilePath);
} catch (IOException e) {
e.printStackTrace();
}
}
this.passwordFilePath = passwordFilePath;
String password = parsePassword();
super.setPassword(password);
}
@Override
public String getPassword(){
return this.parsePassword();
}
private String parsePassword(){
String pwd = null;
String cKey = "ssfk;-scb+123!=0";
String newPwd = "";
FileReader reader = null;
BufferedReader br = null;
try {
reader = new FileReader(passwordFilePath);
br = new BufferedReader(reader);
pwd = br.readLine().trim();
if (!"".equals(pwd)) {
newPwd = MyAESUtil.Decrypt(pwd, cKey);
}else {
logger.error("The file {} does not contain any data", passwordFilePath);
throw new Exception("The file " + passwordFilePath + " does not contain any data");
}
} catch (Exception e) {
logger.error("Decrypt DB password error,");
e.printStackTrace();
}finally {
try {
if (reader != null){
reader.close();
}
if (br != null){
br.close();
}
} catch (IOException e) {
logger.error("The file {} read error", passwordFilePath);
e.printStackTrace();
}
}
return newPwd;
}
}
7、解析文件工具类FileExtractTool
package com.example.schedule.tools;
import com.example.schedule.config.annotation.FieldOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;
/**
* @Author:
* @Title:
* @Description: 文件解析工具
* @Date: 2024/9/9 17:39
* @Version: 1.0
*/
public class FileExtractTool {
private static final Logger logger = LoggerFactory.getLogger(FileExtractTool.class);
private static final SimpleDateFormat sdfForDate = new SimpleDateFormat("yyyyMMdd");
/**
* @description: 解析.gz文件为Bean对象
* @author:
* @date:2024/9/9 17:49
* @param fileGZPath :压缩文件路径
* @param separator :文件中使用的分隔符
* @param encoding :文件编码格式
* @param readStartChar :读取指定字符【readStartChar】开头的行
* @param clazz : 解析完生成的bean类型
* @return: java.util.ArrayList<T>
*/
public static <T> ArrayList<T> parseGZFile(String fileGZPath, String separator, String encoding, String readStartChar, Class<T> clazz) throws Exception {
// 解压文件
String fileFullName = decompressGzipFile(fileGZPath);
logger.info("文件{}解压成功,解压后的文件为{}", fileGZPath, fileFullName);
ArrayList<T> list = sameLogic(fileFullName, separator, encoding, readStartChar, clazz);
logger.info("删除解析成功后的解压文件{}", fileFullName);
deleteDecompressGzipFile(fileFullName);
return list;
}
/**
* @description: 解析csv文件为Bean对象
* @author:
* @date:2024/9/9 17:49
* @param filepath :文件路径
* @param separator :文件中使用的分隔符
* @param encoding :文件编码格式
* @param readStartChar :读取指定字符【readStartChar】开头的行
* @param clazz : 解析完生成的bean类型
* @return: java.util.ArrayList<T>
*/
public static <T> ArrayList<T> parseFile(String filepath, String separator, String encoding, String readStartChar, Class<T> clazz) throws Exception {
return sameLogic(filepath, separator, encoding, readStartChar, clazz);
}
private static <T> ArrayList<T> sameLogic(String filepath, String separator, String encoding, String readStartChar, Class<T> clazz) {
// 解析文件为数组
ArrayList<String[]> rowList = null;
try {
rowList = readContBySeparator(filepath, separator, encoding);
} catch (Exception e) {
e.printStackTrace();
logger.error("文件解析错误");
}
logger.info("文件{}解析成功,解析到的数据量为{}", filepath, rowList.size());
// 解析文件为bean
ArrayList<T> list = new ArrayList<>();
for (int i = 0; i < rowList.size(); i++) {
String[] row = rowList.get(i);
if (!readStartChar.equals(row[0])) {
logger.info("当前跳过第{}行,非数据行,跳过数据为:{}", i, row);
continue;
}
// 去除第一个标识符
String[] newArray = new String[row.length - 1];
System.arraycopy(row, 1, newArray, 0, newArray.length);
T bean = null;
try {
bean = parseRow(newArray, clazz, i+1);
} catch (Exception e) {
e.printStackTrace();
logger.error("DATA ERROR:{},ERROR MSG:{}", newArray, e.getMessage());
continue;
}
list.add(bean);
}
logger.info("文件{}解析成功,文件行数={},读取有效数据行数:{}", filepath, rowList.size(), list.size());
return list;
}
/***
* @description: 解压GZ文件
* @author:
* @date:2024/9/9 17:45
* @param compressedFilePath : 文件名
* @return: java.lang.String :解压后的文件名
*/
public static String decompressGzipFile(String compressedFilePath) throws Exception {
// 校验文件是否存在
File file = new File(compressedFilePath);
if (!file.exists()) {
logger.error("文件不存在{}", compressedFilePath);
throw new FileNotFoundException("文件" + compressedFilePath + "不存在");
}
// 检验文件后缀名
String substring = compressedFilePath.substring(compressedFilePath.lastIndexOf(".") + 1);
if (!"gz".equals(substring)) {
logger.error("文件不是gz类型{}", compressedFilePath);
throw new Exception("文件" + compressedFilePath + "不是gz类型");
}
logger.info("开始解压文件{}", compressedFilePath);
long start = System.currentTimeMillis();
String osName = System.getProperty("os.name").toLowerCase();
logger.info("当前运行环境信息:{}", osName);
String filePath = "";
// if (osName.contains("linux")){
// filePath = decompressGzFileWithLinuxCommand(compressedFilePath);
// }else {
// filePath = decompressGzFileWithJVM(compressedFilePath);
// }
filePath = decompressGzFileWithJVM(compressedFilePath);
long end = System.currentTimeMillis();
logger.info("文件{}解压完毕,解压耗时:{}ms", compressedFilePath, end - start);
return filePath;
}
// 使用JVM解压
private static String decompressGzFileWithJVM(String compressedFilePath) throws IOException {
FileInputStream fis = new FileInputStream(compressedFilePath);
GZIPInputStream gzipIn = new GZIPInputStream(fis);
String decompressedFilePath = compressedFilePath.replaceAll("\\.gz$", "");
FileOutputStream fos = new FileOutputStream(decompressedFilePath);
byte[] buffer = new byte[1024];
int len;
while ((len = gzipIn.read(buffer)) > 0) {
fos.write(buffer, 0, len);
}
fos.close();
gzipIn.close();
fis.close();
return decompressedFilePath;
}
// 使用Linux解压
private static String decompressGzFileWithLinuxCommand(String compressedFilePath) throws IOException, InterruptedException {
String decompressedFilePath = compressedFilePath.replaceAll("\\.gz", "");
ProcessBuilder processBuilder = new ProcessBuilder("gunzip -k ", compressedFilePath);
Process process = processBuilder.start();
boolean completed = process.waitFor(3, TimeUnit.HOURS);
if (!completed || process.exitValue() != 0) {
throw new RuntimeException("解压文件失败: " + compressedFilePath);
}
return decompressedFilePath;
}
/***
* @description: 将文件内容读取到List中
* @author:
* @date:2024/9/9 17:46
* @param filePath :文件路径
* @param separator :文件内容分隔符
* @param encoding :文件编码格式
* @return: java.util.ArrayList<java.lang.String [ ]>
*/
private static ArrayList<String[]> readContBySeparator(String filePath, String separator, String encoding) throws Exception {
// 校验文件
File file = new File(filePath);
if (!file.exists()) {
logger.error("文件不存在{}", filePath);
throw new FileNotFoundException("文件" + filePath + "不存在");
}
//检验文件后缀名
String substring = filePath.substring(filePath.lastIndexOf(".") + 1);
if (!"csv".equals(substring)) {
logger.error("文件不是csv类型{}", filePath);
throw new Exception("文件" + filePath + "不是csv类型");
}
// 解析文件
ArrayList<String[]> result = new ArrayList<>();
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), encoding));
String line = "";
while ((line = reader.readLine()) != null) {
String[] data = line.split(separator);
result.add(data);
}
reader.close();
return result;
}
/**
* 解析单行
*
* @param row
* @param clazz
* @return T
* @author CX
* @date 2024/9/9 15:24
*/
public static <T> T parseRow(String[] row, Class<T> clazz, Integer line) throws Exception {
T obj = null;
// 调用反射,根据传入的class创建对象
obj = clazz.newInstance();
// 获取类中的所有字段
Field[] fields = clazz.getDeclaredFields();
// 排序字段
Arrays.sort(fields, Comparator.comparingInt(f -> {
FieldOrder annotation = f.getAnnotation(FieldOrder.class);
if (annotation != null) {
return annotation.value();
} else {
// 如果没有注解,默认返回一个较大的值,放到末尾
return Integer.MAX_VALUE;
}
}));
// 开始解析
for (int i = 0; i < fields.length && i < row.length; i++) {
// 当前处理的字段
Field field = fields[i];
field.setAccessible(true); // 设置字段访问权限为可访问
// 当前行的第i个cell的数据
String cellValue = row[i];
// 获取当前字段的类型
Class<?> type = field.getType();
try {
if ("NA".equals(cellValue) || "N/A".equals(cellValue) || "N\\A".equals(cellValue)){
logger.info("当前读取到第{}行第{}列值为NA,字段类型为{},已设置为空或0",line, (i + 2), type);
if (type == BigDecimal.class) {
field.set(obj, BigDecimal.ZERO);
}
if (type == byte.class || type == Byte.class ||
type == short.class || type == Short.class ||
type == int.class || type == Integer.class ||
type == long.class || type == Long.class ||
type == float.class || type == Float.class ||
type == double.class || type == Double.class){
field.set(obj, 0);
}
if (type == String.class || type == Character.class){
field.set(obj, null);
}
}else if (cellValue != null && !"".equals(cellValue)) {
// 根据字段类型将cell的值添加到字段
if (type == String.class) {
field.set(obj, cellValue);
}
if (type == byte.class || type == Byte.class) {
field.set(obj, Byte.valueOf(cellValue));
}
if (type == short.class || type == Short.class) {
field.set(obj, Short.valueOf(cellValue));
}
if (type == int.class || type == Integer.class) {
field.set(obj, Integer.valueOf(cellValue));
}
if (type == long.class || type == Long.class) {
field.set(obj, Long.valueOf(cellValue));
}
if (type == float.class || type == Float.class) {
field.set(obj, Float.valueOf(cellValue));
}
if (type == double.class || type == Double.class) {
field.set(obj, Double.valueOf(cellValue));
}
if (type == boolean.class || type == Boolean.class) {
field.set(obj, Boolean.valueOf(cellValue));
}
if (type == BigDecimal.class) {
// 仅提取数字
if (cellValue != null && !"".equals(cellValue)){
Pattern pattern = Pattern.compile("\\d+\\.?\\d*");
Matcher matcher = pattern.matcher(cellValue);
String number = "";
if (matcher.find()) {
// 返回匹配到的字符串
number = matcher.group();
}else {
logger.error("解析第{}行的{}列报错,读取到的值为【{}】。代码中配置的字段名为【{}】,字段类型为{}",line, (i + 2), cellValue, field.getName(), type);
throw new NumberFormatException("解析第" + line + "行的第" + (i + 2) + "列报错,读取到的值为【" + cellValue + "】。代码中配置的字段名为【" + field.getName() + "】,字段类型为" + type);
}
BigDecimal value = BigDecimal.ZERO;
if (cellValue.contains("万")){
// 解析为负数
value = new BigDecimal(number).negate();
}else {
value = new BigDecimal(number);
}
field.set(obj, value);
}else {
field.set(obj, BigDecimal.ZERO);
}
}
if (type == Date.class) {
Date date = sdfForDate.parse(cellValue);
field.set(obj, date);
}
}else {
field.set(obj, null);
}
} catch (ParseException e) {
logger.error("解析第{}行的{}列报错,读取到的值为【{}】。时间格式错误,应为yyyyMMdd", line, (i + 2), cellValue);
throw new ParseException("解析第"+ line + "行的第" + (i + 2) + "列报错,读取到的值为【" + cellValue + "】。时间格式错误,应为yyyyMMdd", (i + 2));
} catch (NumberFormatException e) {
logger.error("解析第{}行的{}列报错,读取到的值为【{}】。代码中配置的字段名为【{}】,字段类型为{}",line, (i + 2), cellValue, field.getName(), type);
throw new NumberFormatException("解析第" + line + "行的第" + (i + 2) + "列报错,读取到的值为【" + cellValue + "】。代码中配置的字段名为【" + field.getName() + "】,字段类型为" + type);
} catch (Exception e) {
logger.error("解析第{}行的第{}列报错,错误原因:{}", line, (i + 2), e.getMessage());
throw new Exception("解析第" + line + "行的第" + (i + 2) + "列报错,错误原因:" + e.getMessage());
}
}
return obj;
}
/***
* @description: 删除解析完毕的文件
* @author:
* @date:2024/9/10 9:41
* @param filePath :文件全路径
*/
public static void deleteDecompressGzipFile(String filePath) {
// 创建 File 对象
File file = new File(filePath);
// 检查文件是否存在
if (!file.exists()) {
logger.info("文件不存在");
return;
}
// 尝试删除文件
boolean deleted = file.delete();
if (deleted) {
logger.info("文件删除成功{}", filePath);
} else {
logger.info("文件删除失败{}", filePath);
}
}
/**
* @description :获取指定路径下指定前缀的所有文件名全路径
* @author:
* @date:2024/9/13 17:19
* @param fullPath :
* @return: java.util.ArrayList<java.lang.String>
*/
public static List<String> getFileList(String fullPath){
List<String> resultList = new ArrayList<>();
try {
// 获取路径部分,假设前缀是路径加文件名前半部分的形式
Path dirPath = Paths.get(fullPath).getParent();
String filePrefix = Paths.get(fullPath).getFileName().toString();
// 遍历目录中的文件
try (Stream<Path> paths = Files.list(dirPath)) {
resultList = paths
.filter(Files::isRegularFile) // 只要文件,忽略文件夹
.filter(p -> p.getFileName().toString().startsWith(filePrefix)) // 文件名前缀匹配
.map(Path::toString) // 转换为完整路径的字符串
.collect(Collectors.toList());
}
} catch (IOException e) {
logger.info("读取文件路径错误:{}", fullPath);
}
return resultList;
}
/**
* 读取数据文件,包括解压和解析过程
*
* @param absolutePath 文件的绝对路径
* @return 数据行的列表,每行数据为一个字符串数组
* @throws Exception 如果解压或解析过程中发生错误,则抛出异常
*/
public static List<String[]> readDataFile(String absolutePath) throws Exception {
// 解压文件
String fileFullName = decompressGzipFile(absolutePath);
logger.info("文件{}解压成功,解压后的文件为{}", absolutePath, fileFullName);
// 解析解压后的文件
List<String[]> dataLines = DataParserTool.readDataFile(new File(fileFullName), "UTF-8", "\u00A7", true, "T");
// 删除已解压文件
logger.info("删除解析成功后的解压文件{}", fileFullName);
deleteDecompressGzipFile(fileFullName);
// 返回解析后的数据列表
return dataLines;
}
/***
* @description: 将内容追加到指定的文件中。如果文件不存在,则创建并写入;如果存在,则追加。
* @author:
* @date:2024/9/27 16:20
* @param filePath
* @param content :
* @return: void
*/
public static void appendErrBeanData(String filePath, String content) {
int lastDotIndex = filePath.lastIndexOf('.');
String fileName = "";
if (lastDotIndex != -1) {
String fileNameWithoutExtension = filePath.substring(0, lastDotIndex);
String fileExtension = filePath.substring(lastDotIndex);
fileName = fileNameWithoutExtension + "_DATAERROR" + fileExtension;
} else {
// 如果文件名中没有后缀,则直接添加"_DATAERROR"
fileName = filePath + "_DATAERROR";
}
Path path = Path.of(fileName);
try {
// 使用StandardOpenOption.APPEND选项来追加内容
Files.write(path, (content + System.lineSeparator()).getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
logger.info("解析出错的内容成功写入文件");
} catch (IOException e) {
logger.error("写入文件时发生错误");
e.printStackTrace();
}
}
/***
* @description: 将内容追加到指定的文件中。如果文件不存在,则创建并写入;如果存在,则追加。
* @author:
* @date:2024/9/27 16:20
* @param filePath
* @param content :
* @return: void
*/
public static void appendErrPojoData(String filePath, String content) {
int lastDotIndex = filePath.lastIndexOf('.');
String fileName = "";
if (lastDotIndex != -1) {
String fileNameWithoutExtension = filePath.substring(0, lastDotIndex);
fileName = fileNameWithoutExtension + "_INSERTERROR" + ".txt";
} else {
// 如果文件名中没有后缀,则直接添加"_INSERTERROR"
fileName = filePath + "_INSERTERROR";
}
Path path = Path.of(fileName);
try {
// 使用StandardOpenOption.APPEND选项来追加内容
Files.write(path, (content + System.lineSeparator()).getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
logger.info("解析出错的内容成功写入文件");
} catch (IOException e) {
logger.error("写入文件时发生错误");
e.printStackTrace();
}
}
}