java批量处理大文件(生产者消费者模型)

java使用生产者消费者模型批量处理百万级数据量文件

说明:
文件解析工具类部分代码重复,仅需要看第一个主要的核心逻辑类即可
1、主要核心逻辑抽象类BatchProcessLargeFile

package com.example.schedule.threads;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.example.schedule.config.annotation.FieldOrder;
import com.example.schedule.config.dataSourceConfig.DataSourceConfig;
import com.example.schedule.tools.DbColumnsChecker;
import com.example.schedule.tools.FileExtractTool;
import com.example.schedule.tools.dto.ColumnsMsg;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.jdbc.datasource.DriverManagerDataSource;

import java.io.*;
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.*;
import java.util.concurrent.*;
import java.util.function.BiConsumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @Author:
 * @Title:
 * @Description:
 *      一、功能说明
 *           生产者消费者模型处理大文件,使用 LinkedBlockingQueue 作为数据缓存区,三级线程开启数量可配置
 *      二、处理逻辑
 *           线程 threadRead 读取文件转换为Bean -> 将 bean 放到第一个 LinkedBlockingQueue A 中
 *           线程 threadTrans 读取队列A中的bean -> 对数据进行转换填充生成pojo类,放到第二个 LinkedBlockingQueue B 中
 *           线程 threadInsert 读取队列B中的pojo -> 批量入库
 *      三、其他功能:
 *           1、读取文件数据解析成bean时,文件字段内容与bean类定义不一致将会跳过,且将错误数据与错误原因写入到与文件路径一致的新文件parseError中
 *           2、数据入库时,会检查每个字段的数据值是否符合数据库字段定义,且将错误数据与错误原因写入到与文件路径一致的新文件中insertError中(可开关)
 *      四、涉及技术
 *           反射、内部类、多线程、自定义注解、抽象类、继承、字段排序、文件处理、数据库密码加解密
 * @Date: 2024/9/27 10:29
 * @Version: 1.0
 */
public abstract class BatchProcessLargeFile {

    private static final Logger logger = LoggerFactory.getLogger(BatchProcessLargeFile.class);

    // ==================公共配置=====================

    @Value("${spring.datasource.url}")
    private String dbUrl;

    @Value("${spring.datasource.username}")
    private String dbUsername;

    @Value("${spring.datasource.driverClassName}")
    private String dbDriverClassName;

    @Autowired
    private DataSourceConfig dataSourceConfig;

    @Value("${batchConfig.timeOut:600}")
    private String timeOut;

    // 性能测试:是否入库/queue队列大小监控
    @Value("${batchConfig.performanceTesting:true}")
    private String performanceTesting;

    // ==================文件处理配置=====================

    @Value("${batchConfig.batchInsertNum:200}")
    private String batchInsertNum;

    // 读文件线程数
    @Value("${batchConfig.threadReadCount:1}")
    private String threadReadCount;

    // 转换Bean线程数
    @Value("${batchConfig.threadTransferCount:2}")
    private String threadTransferCount;

    // 入库线程数
    @Value("${batchConfig.threadInsertCount:4}")
    private String threadInsertCount;

    // 入库字段检查
    @Value("${batchConfig.insertColumnsCheck:true}")
    private String insertColumnsCheck;

    /***
     * @description: 大文件批量处理逻辑抽象父类
     * @author:
     * @date:2024/9/27 14:21
     * @param    fileFullName:文件全路径
     * @param    clazz:文件单行解析目标bean
     * @param    beanQueue:bean属性类暂存区
     * @param    insertQueue:入表bean属性类暂存区
     * @param    batchInsertMethod:批量入表mapper语句
     * @return: void
     */
    public <T1, T2> void startThread(String fileFullName, Class<T1> clazz, LinkedBlockingQueue<T1> beanQueue, LinkedBlockingQueue<T2> insertQueue, BiConsumer<List<T2>, Integer> batchInsertMethod) {
        try {
            long start = System.currentTimeMillis();

            int threadReadNum = Integer.parseInt(threadReadCount);
            int threadTransNum = Integer.parseInt(threadTransferCount);
            int threadInsertNum = Integer.parseInt(threadInsertCount);
            int timeOutWait = Integer.parseInt(timeOut);
            int batchInsertCount = Integer.parseInt(batchInsertNum);
            boolean columnsCheck = Boolean.parseBoolean(insertColumnsCheck);
            boolean insertSwitch = Boolean.parseBoolean(performanceTesting);

            logger.info("当前开启读文件线程数:{},转换bean线程数:{},入库线程数:{}", threadReadNum, threadTransNum, threadInsertNum);

            List<CompletableFuture<Void>> futureList = new ArrayList<>();

            logger.info("~~~~~~~~~~~~~~~~~创建读取文件线程~~~~~~~~~~~~~~~~~");
            CompletableFuture<Void> producerFuture = null;
            ExecutorService executorRead = Executors.newFixedThreadPool(threadReadNum, r -> {
                Thread t = new Thread(r);
                t.setName("ThreadRead-" + t.getId());
                return t;
            });
            long totalLines = getFileLineNum(fileFullName);
            long linesPerThread = totalLines / threadReadNum;
            logger.info("当前获取到文件总行数为:{},开启线程数为:{},每个线程处理数量为:{}", totalLines, threadReadNum, linesPerThread);
            long remainder = totalLines % threadReadNum;
            long startLine = 1;
            for (int i = 1; i <= threadReadNum; i++) {
                long endLine = startLine + linesPerThread;
                if (i == threadReadNum) {
                    // 如果是最后一个线程,把剩余的行数加上
                    endLine += remainder;
                }
                logger.info("当前开启线程{},处理的行数为:[{},{})", i, startLine, endLine);
                producerFuture = CompletableFuture.runAsync(new readFile(beanQueue, fileFullName, clazz, "\u00A7", "D", "UTF-8", startLine, endLine), executorRead);
                futureList.add(producerFuture);
                startLine = endLine;
            }

            logger.info("~~~~~~~~~~~~~~~~~创建数据转换线程~~~~~~~~~~~~~~~~~");
            ExecutorService executorTransfer = Executors.newFixedThreadPool(threadTransNum, r -> {
                Thread t = new Thread(r);
                t.setName("ThreadTransfer-" + t.getId());
                return t;
            });
            for (int i = 0; i < threadTransNum; i++) {
                CompletableFuture<Void> future = CompletableFuture.runAsync(new transBean(beanQueue, insertQueue, this, timeOutWait), executorTransfer);
                futureList.add(future);
            }

            logger.info("~~~~~~~~~~~~~~~~~创建写入数据库线程~~~~~~~~~~~~~~~~~");
            String dbPassword = dataSourceConfig.getPassword();    // 先解析数据库密码
            ExecutorService executorInsert = Executors.newFixedThreadPool(threadInsertNum, r -> {
                Thread t = new Thread(r);
                t.setName("ThreadInsert-" + t.getId());
                return t;
            });
            for (int i = 0; i < threadInsertNum; i++) {
                CompletableFuture<Void> future = CompletableFuture.runAsync(new insertData(insertQueue, batchInsertMethod, dbUrl, dbUsername, dbPassword, dbDriverClassName, fileFullName, batchInsertCount, timeOutWait, columnsCheck, insertSwitch), executorInsert);
                futureList.add(future);
            }

            logger.info("线程开启完毕,等待线程执行完毕");
            CompletableFuture.allOf(futureList.toArray(new CompletableFuture[0])).join();

            logger.info("线程执行完毕,关闭线程池");
            executorRead.shutdown();
            executorTransfer.shutdown();
            executorInsert.shutdown();

            long end = System.currentTimeMillis();
            logger.info("处理文件{}耗时:{}ms", fileFullName, end - start);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /***
     * @description: 获取文件总行数
     * @author:
     * @date:2024/10/5 15:59
     * @param	filePath	:
     * @return: int
     */
    public static long getFileLineNum(String filePath) {
        try (LineNumberReader lineNumberReader = new LineNumberReader(new FileReader(filePath))){
            lineNumberReader.skip(Long.MAX_VALUE);
            return lineNumberReader.getLineNumber();
        } catch (IOException e) {
            return -1;
        }
    }

    /***
     * @description : 内部类,读取文件内容转换为bean,并放到阻塞队列【beanQueue】
     * @author:
     * @date:2024/9/27 10:32
     * @return:
     */
    static class readFile<T> implements Runnable {

        private static final Logger logger = LoggerFactory.getLogger(readFile.class);
        private final LinkedBlockingQueue<T> beanQueue;
        private final String filePath;
        private final Class<T> clazz;
        private final String separator;
        private final String readStartChar;
        private final String encoding;
        private final long startLine;
        private final long endLine;

        /***
         * @description:
         * @author:
         * @date:2024/9/27 10:36
         * @param    beanQueue :暂时存放Bean的队列
         * @param    filePath :文件路径
         * @param    clazz :解析文件生成目标Bean类型
         * @param    separator :分隔符
         * @param    readStartChar : 读取指定字符【readStartChar】开头的行
         * @return:
         */
        public readFile(LinkedBlockingQueue<T> beanQueue, String filePath, Class<T> clazz, String separator, String readStartChar, String encoding, long startLine, long endLine) {
            this.beanQueue = beanQueue;
            this.filePath = filePath;
            this.clazz = clazz;
            this.separator = separator;
            this.readStartChar = readStartChar;
            this.encoding = encoding;
            this.startLine = startLine;
            this.endLine = endLine;
        }

        @Override
        public void run() {
            BufferedReader reader = null;
            try {
                reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), encoding));
                String line;
                long currentLine = 1;

                // 跳过不属于本线程的行
                while (currentLine < startLine && (line = reader.readLine()) != null) {
                    currentLine++;
                }

                while (currentLine < endLine && (line = reader.readLine()) != null) {
                    T bean = null;
                    String[] row = line.split(separator);
                    if (row.length == 0) {
                        currentLine++;
                        continue;
                    } else {
                        if (!row[0].equals(readStartChar)) {
                            logger.info("跳过字符{}的行:{}", row[0], row);
                            currentLine++;
                            continue;
                        }
                        // 去除第一个标识符
                        String[] newArray = new String[row.length - 1];
                        System.arraycopy(row, 1, newArray, 0, newArray.length);
                        try {
                            bean = parseRow(newArray, clazz, currentLine);
                        } catch (Exception e) {
                            e.printStackTrace();
                            logger.error("DATA ERROR:{},ERROR MSG:{}", newArray, e.getMessage());
                            // 解析错误的数据放到指定文件中
                            FileExtractTool.appendErrBeanData(filePath, line + "\u00A7" + e.getMessage());
                        }
                    }
                    if (bean != null) {
                        logger.info("放入beanQueue中的数据类型:{}", bean.toString());
                        beanQueue.put(bean);  // 阻塞操作,如果队列满了会等待
                    }
                    currentLine++;
                }
                logger.info("read线程{}结束运行", Thread.currentThread().getName());
            } catch (IOException | InterruptedException e) {
                e.printStackTrace();
                logger.error("解析错误");
            } finally {
                if (reader != null) {
                    try {
                        reader.close();
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        }

        private T parseRow(String[] row, Class<T> clazz, long line) throws Exception {
            T obj = null;
            try {
                // 调用反射,根据传入的class创建对象
                obj = clazz.newInstance();
                // 获取类中的所有字段
                Field[] fields = clazz.getDeclaredFields();
                // 排序字段
                Arrays.sort(fields, Comparator.comparingInt(f -> {
                    FieldOrder annotation = f.getAnnotation(FieldOrder.class);
                    if (annotation != null) {
                        return annotation.value();
                    } else {
                        // 如果没有注解,默认返回一个较大的值,放到末尾
                        return Integer.MAX_VALUE;
                    }
                }));

                // 开始解析
                for (int i = 0; i < fields.length && i < row.length; i++) {
                    // 当前处理的字段
                    Field field = fields[i];
                    field.setAccessible(true); // 设置字段访问权限为可访问
                    // 当前行的第i个cell的数据
                    String cellValue = row[i];
                    // 获取当前字段的类型
                    Class<?> type = field.getType();
                    try {
                        if (cellValue != null && !"".equals(cellValue)) {
                            // 根据字段类型将cell的值添加到字段
                            if (type == String.class) {
                                field.set(obj, cellValue);
                                continue;
                            }
                            if (type == byte.class || type == Byte.class) {
                                field.set(obj, Byte.valueOf(cellValue));
                                continue;
                            }
                            if (type == short.class || type == Short.class) {
                                field.set(obj, Short.valueOf(cellValue));
                                continue;
                            }
                            if (type == int.class || type == Integer.class) {
                                field.set(obj, Integer.valueOf(cellValue));
                                continue;
                            }
                            if (type == long.class || type == Long.class) {
                                field.set(obj, Long.valueOf(cellValue));
                                continue;
                            }
                            if (type == float.class || type == Float.class) {
                                field.set(obj, Float.valueOf(cellValue));
                                continue;
                            }
                            if (type == double.class || type == Double.class) {
                                field.set(obj, Double.valueOf(cellValue));
                                continue;
                            }
                            if (type == boolean.class || type == Boolean.class) {
                                field.set(obj, Boolean.valueOf(cellValue));
                                continue;
                            }
                            if (type == BigDecimal.class) {
                                // 仅提取数字
                                if (cellValue != null && !"".equals(cellValue)) {
                                    Pattern pattern = Pattern.compile("\\d+\\.?\\d*");
                                    Matcher matcher = pattern.matcher(cellValue);
                                    String number = "";
                                    if (matcher.find()) {
                                        // 返回匹配到的字符串
                                        number = matcher.group();
                                    } else {
                                        throw new NumberFormatException();
                                    }
                                    BigDecimal value = BigDecimal.ZERO;
                                    if (cellValue.contains("万")) {
                                        // 解析为负数
                                        value = new BigDecimal(number).negate();
                                    } else {
                                        value = new BigDecimal(number);
                                    }
                                    field.set(obj, value);
                                } else {
                                    field.set(obj, BigDecimal.ZERO);
                                }
                            }
                        } else {
                            field.set(obj, null);
                        }
                    } catch (NumberFormatException e) {
                        logger.error("解析第{}行的{}列报错,读取到的值为【{}】。代码中配置的字段名为【{}】,字段类型为{}", line, (i + 2), cellValue, field.getName(), type);
                        throw new NumberFormatException("解析第" + line + "行的第" + (i + 2) + "列报错,读取到的值为【" + cellValue + "】。代码中配置的字段名为【" + field.getName() + "】,字段类型为" + type);
                    }
                }
            } catch (InstantiationException e) {
                e.printStackTrace();
                logger.error("创建实体类【{}】时出错", clazz);
            } catch (IllegalAccessException e) {
                e.printStackTrace();
                logger.error("实体类【{}】无访问权限", clazz);
            }
            return obj;
        }
    }

    /***
     * @description : 内部类,读取阻塞队列【beanQueue】中的数据,按逻辑转换,转换完毕的放到【insertQueue】
     * @author:
     * @date:2024/9/27 10:40
     * @return:
     */
    static class transBean<T1, T2> implements Runnable {

        private final LinkedBlockingQueue<T1> beanQueue;
        private final LinkedBlockingQueue<T2> insertQueue;
        private final BatchProcessLargeFile batchProcessLargeFile;
        private final int timeOutWait;

        public transBean(LinkedBlockingQueue<T1> beanQueue, LinkedBlockingQueue<T2> insertQueue, BatchProcessLargeFile batchProcessLargeFile, int timeOutWait) {
            this.beanQueue = beanQueue;
            this.insertQueue = insertQueue;
            this.batchProcessLargeFile = batchProcessLargeFile;
            this.timeOutWait = timeOutWait;
        }

        @Override
        public void run() {
            try {
                while (true) {
                    // 从队列中取元素,如果队列为空则等待timeOut秒后结束线程
                    T1 take = beanQueue.poll(timeOutWait, TimeUnit.SECONDS);
                    if (take != null) {
                        logger.info("从beanQueue中取出的数据类型:{}", take.toString());
                        T2 info = batchProcessLargeFile.customerTrans(take);
                        insertQueue.put(info);
                        logger.info("放入insertQueue中的数据类型:{}", info.getClass());
                    } else {
                        logger.info("trans线程{}结束运行:{}秒内没有从beanQueue中获取到数据", Thread.currentThread().getName(), timeOutWait);
                        break;
                    }
                }
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
    }

    public abstract <T1, T2> T2 customerTrans(T1 take);

    /***
     * @description : 内部类,读取阻塞队列【insertQueue】中的数据,入库
     * @author:
     * @date:2024/9/27 10:40
     * @return:
     */
    static class insertData<T> implements Runnable {

        private final ObjectMapper objectMapper = new ObjectMapper();

        private String dbUrl;
        private String dbUsername;
        private String dbPassword;
        private String dbDriverClassName;
        private String fileFullName;
        private int batchInsertCount;
        private int timeOutWait;
        private boolean columnsCheck;
        private boolean insertSwitch;

        private final LinkedBlockingQueue<T> insertQueue;
        private final BiConsumer<List<T>, Integer> batchInsertMethod;

        public insertData(LinkedBlockingQueue<T> insertQueue, BiConsumer<List<T>, Integer> batchInsertMethod, String dbUrl, String dbUsername, String dbPassword, String dbDriverClassName, String fileFullName, int batchInsertCount, int timeOutWait, boolean columnsCheck, boolean insertSwitch) {
            this.insertQueue = insertQueue;
            this.batchInsertMethod = batchInsertMethod;
            this.dbUrl = dbUrl;
            this.dbUsername = dbUsername;
            this.dbPassword = dbPassword;
            this.dbDriverClassName = dbDriverClassName;
            this.fileFullName = fileFullName;
            this.timeOutWait = timeOutWait;
            this.batchInsertCount = batchInsertCount;
            this.columnsCheck = columnsCheck;
            this.insertSwitch = insertSwitch;
        }

        @Override
        public void run() {
            HashMap<String, ColumnsMsg> columnsMsg = null;
            ArrayList<T> list = new ArrayList<>();
            long start = System.currentTimeMillis();
            while (true) {
                try {
                    T take = insertQueue.poll(timeOutWait, TimeUnit.SECONDS);
                    if (take != null) {
                        logger.info("从insertQueue中取出的数据类型:{}", take.getClass());
                        boolean flag;
                        // 入库字段检查
                        if (columnsCheck){
                            if (columnsMsg == null) {
                                columnsMsg = getColumnsMsg(take.getClass());
                            }
                            flag = checkData(take, columnsMsg, fileFullName);
                        }else {
                            flag = true;
                        }
                        // 根据检查结果判定是否入库
                        if (flag) {
                            list.add(take);
                            if (list.size() == batchInsertCount) {
                                logger.info("当前入库的数据量:{}", list.size());
                                if (insertSwitch){
                                    try {
                                        batchInsertMethod.accept(list, list.size());
                                    }catch (Exception e){
                                        e.printStackTrace();
                                        logger.error("入库出错:{}", e.getMessage());
                                    }
                                }
                                list.clear();
                                long end = System.currentTimeMillis();
                                logger.warn("【文件处理】批次量大小为{},字段检查并入库完成,耗时:{}ms", batchInsertCount, end - start);
                                start = end;
                            }
                        }
                    } else {
                        if (list.size() > 0) {
                            logger.info("当前入库的数据量(最后批次):{}", list.size());
                            if (insertSwitch){
                                try {
                                    batchInsertMethod.accept(list, list.size());
                                }catch (Exception e){
                                    e.printStackTrace();
                                    logger.error("入库出错:{}", e.getMessage());
                                }
                            }
                            long end = System.currentTimeMillis();
                            logger.warn("【文件处理】最后批次入库完成,耗时:{}ms", end - start);
                        }
                        logger.info("insert线程{}结束运行:{}秒内没有从insertQueue中获取到数据", Thread.currentThread().getName(), timeOutWait);
                        break;
                    }
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }

        private boolean checkData(T pojo, HashMap<String, ColumnsMsg> columnsMsg, String fileFullName) {
            try {
                StringBuilder msg = new StringBuilder();
                String result = DbColumnsChecker.checkData(pojo, columnsMsg);
                if (!"SUCCESS".equals(result)) {
                    msg.setLength(0);// 清空数据
                    msg.append(printBeanFields(pojo)).append(":").append(result);
                    FileExtractTool.appendErrPojoData(fileFullName, msg.toString());
                    return false;
                } else {
                    return true;
                }
            } catch (NoSuchFieldException | IllegalAccessException e) {
                e.printStackTrace();
            }
            return false;
        }

        public <T> String printBeanFields(T bean) {
            if (bean == null) {
                return null;
            }

            // 获取 bean 的类信息
            Class<?> clazz = bean.getClass();
            Field[] fields = clazz.getDeclaredFields();

            // 创建一个 Map 来存储字段名和字段值
            java.util.Map<String, Object> fieldMap = new java.util.HashMap<>();

            // 访问所有字段
            for (Field field : fields) {
                field.setAccessible(true); // 设置可访问性
                try {
                    Object value = field.get(bean); // 获取字段值
                    fieldMap.put(field.getName(), value); // 将字段名和字段值添加到 Map 中
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                }
            }

            // 将 Map 转换为 JSON 字符串并返回
            try {
                return objectMapper.writeValueAsString(fieldMap);
            } catch (JsonProcessingException e) {
                e.printStackTrace();
                return null; // 处理 JSON 转换异常
            }
        }

        // 获取clazz对应的表字段信息
        private HashMap<String, ColumnsMsg> getColumnsMsg(Class<?> clazz) {
            Connection connection = null;
            Statement statement = null;
            ResultSet resultSet = null;
            HashMap<String, ColumnsMsg> map = new HashMap<>();
            try {
                // 创建数据源并获取连接
                DriverManagerDataSource dataSource = dataSource();
                connection = dataSource.getConnection();

                // 执行查询以获取表的列信息
                String tableName = clazz.getSimpleName().replaceAll("Mapper$", "");
                tableName = tableName.replaceAll("([a-z])([A-Z])", "$1_$2").toLowerCase();  // 驼峰转下划线
                DatabaseMetaData meta = connection.getMetaData();
                ResultSet columns = meta.getColumns(null, null, tableName, null);
                while (columns.next()) {
                    ColumnsMsg columnsMsg = new ColumnsMsg();
                    // 字段名转换为小驼峰
                    String columnName = toCamelCase(columns.getString("COLUMN_NAME"));
                    columnsMsg.setTableName(tableName);
                    columnsMsg.setColumnName(columnName);
                    columnsMsg.setColumnType(columns.getString("TYPE_NAME"));
                    columnsMsg.setColumnSize(columns.getInt("COLUMN_SIZE"));
                    columnsMsg.setColumnDigit(columns.getInt("DECIMAL_DIGITS"));
                    columnsMsg.setNullAble(columns.getBoolean("NULLABLE"));
                    map.put(columnName, columnsMsg);
                }
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                // 关闭资源,确保连接断开
                try {
                    if (resultSet != null) {
                        resultSet.close();
                    }
                    if (statement != null) {
                        statement.close();
                    }
                    if (connection != null) {
                        connection.close();
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            return map;
        }

        private String toCamelCase(String input) {
            StringBuilder result = new StringBuilder();
            if (input == null || input.isEmpty()) {
                return "";
            } else if (!input.contains("_")) {
                return input.toLowerCase();
            }
            String[] split = input.split("_");
            for (String s : split) {
                if (s.isEmpty()) {
                    continue;
                }
                result.append(s.substring(0, 1).toUpperCase());
                result.append(s.substring(1).toLowerCase());
            }
            StringBuilder ret = new StringBuilder(result.substring(0, 1).toLowerCase());
            ret.append(result.substring(1, result.toString().length()));
            return ret.toString();
        }

        private DriverManagerDataSource dataSource() {
            DriverManagerDataSource dataSource = new DriverManagerDataSource();
            dataSource.setDriverClassName(dbDriverClassName);
            dataSource.setUrl(dbUrl);
            dataSource.setUsername(dbUsername);
            dataSource.setPassword(dbPassword);
            return dataSource;
        }
    }
}

2、字段解析工具类ColumnsMsg

package com.example.schedule.tools.dto;

/**
 * @Author:
 * @Title:
 * @Description:
 * @Date: 2024/9/24 16:29
 * @Version: 1.0
 */
public class ColumnsMsg {

    /***
     * 字段名
     */
    private String tableName;

    /***
     * 字段名
    */
    private String columnName;

    /***
     * 字段类型
     */
    private String columnType;

    /***
     * 字段长度
     */
    private int columnSize;

    /***
     * 字段精确度
     */
    private int columnDigit;

    /***
     * 是否可为空
     */
    private boolean nullAble;

    public ColumnsMsg() {
    }

    public ColumnsMsg(String tableName) {
        this.tableName = tableName;
    }

    public String getTableName() {
        return tableName;
    }

    public void setTableName(String tableName) {
        this.tableName = tableName;
    }

    public String getColumnName() {
        return columnName;
    }

    public void setColumnName(String columnName) {
        this.columnName = columnName;
    }

    public String getColumnType() {
        return columnType;
    }

    public void setColumnType(String columnType) {
        this.columnType = columnType;
    }

    public int getColumnSize() {
        return columnSize;
    }

    public void setColumnSize(int columnSize) {
        this.columnSize = columnSize;
    }

    public int getColumnDigit() {
        return columnDigit;
    }

    public void setColumnDigit(int columnDigit) {
        this.columnDigit = columnDigit;
    }

    public boolean isNullAble() {
        return nullAble;
    }

    public void setNullAble(boolean nullAble) {
        this.nullAble = nullAble;
    }

    @Override
    public String toString() {
        return "ColumnsMsg{" +
                "tableName='" + tableName + '\'' +
                ", columnName='" + columnName + '\'' +
                ", columnType='" + columnType + '\'' +
                ", columnSize=" + columnSize +
                ", columnDigit=" + columnDigit +
                ", nullAble=" + nullAble +
                '}';
    }
}

3、入库字段检查逻辑类DbColumnsChecker

package com.example.schedule.tools;

import com.example.schedule.tools.dto.ColumnsMsg;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import javax.sql.DataSource;
import java.lang.reflect.Field;
import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;

/**
 * @Author:
 * @Title:
 * @Description:
 * @Date: 2024/9/24 16:15
 * @Version: 1.0
 */
@Component
public class DbColumnsChecker {

    private static final Logger logger = LoggerFactory.getLogger(DbColumnsChecker.class);

    @Autowired
    private DataSource dataSource;

    /***
     * @description:检查list中的数据入库是否会出错
     * @author:
     * @date:2024/9/27 16:35
     * @param	dataList	:
     * @return: java.util.HashMap<java.lang.String,java.util.ArrayList<T>>
    */
    public <T> HashMap<String, ArrayList<T>> checkDataList(ArrayList<T> dataList) throws NoSuchFieldException, IllegalAccessException {
        HashMap<String, ArrayList<T>> result = new HashMap<>();
        if (dataList == null || dataList.size() == 0) {
            return result;
        }
        Class<?> clazz = dataList.get(0).getClass();    // 实体类名
        List<Field> fieldList = Arrays.asList(clazz.getDeclaredFields());   // 所有字段
        // 获取表中的字段信息
        HashMap<String, ColumnsMsg> columnsMap = this.getColumnsMsg(clazz);
        if (columnsMap.size() == 1){
            logger.info("未找到表【{}】", columnsMap.get("tableName").getTableName());
        }
        // 结果集
        ArrayList<T> errorList = new ArrayList<>();
        ArrayList<T> correctList = new ArrayList<>();
        A : for (T data : dataList) {
            for (Field field : fieldList) {
                // 获取代码中的字段名
                String fieldName = field.getName();
                if ("serialVersionUID".equals(fieldName)) {
                    // 跳过默认字段
                    continue;
                }
                // 获取该字段名的值
                Field declaredField = data.getClass().getDeclaredField(fieldName);
                declaredField.setAccessible(true);
                Object o = declaredField.get(data);
                String value = null;
                if (o != null){
                    value = String.valueOf(o);
                }else {
                    continue;
                }
                // 获取数据库中该表的字段信息
                ColumnsMsg columnMsg = columnsMap.get(fieldName);
                if (columnMsg == null) {
                    logger.error("表【{}】中未找到字段{},跳过检查", clazz.getSimpleName(), fieldName);
                    continue;
                }
                // 判断字段值是否符合定义的类型
                if ("varchar".equals(columnMsg.getColumnType())) {
                    // 字符串类型,仅判断长度
                    if (value.length() > columnMsg.getColumnSize()){
                        logger.error("DATA ERROR:字段【{}】超长,无法入库:{}", fieldName, value);
                        errorList.add(data);
                        continue A;
                    }
                    continue;
                }
                if ("numeric".equals(columnMsg.getColumnType())) {
                    // 数值类型,判断长度、精度、是否纯数字
                    if (!value.matches("-?\\d+(\\.\\d+)?")){
                        logger.error("DATA ERROR:字段【{}】={}中含有非法字符:{}", fieldName, value, data);
                        errorList.add(data);
                        continue A;
                    }
                    if (value.contains(".")){
                        String[] split = value.split("\\.");
                        // 先校验右边
                        int intBit = columnMsg.getColumnSize() - columnMsg.getColumnDigit(); // 可存放的整数位数
                        int flotBit = columnMsg.getColumnDigit();   // 可存放的小数位数
                        if (split[0].length() > intBit){
                            logger.error("DATA ERROR:字段【{}】={}中整数部分超长:{}", fieldName, value, data);
                            errorList.add(data);
                            continue A;
                        }
                        if (split[1].length() > flotBit){
                            logger.warn("DATA WARRING:字段【{}】={}中小数部分超长,将会丢失精度:{}", fieldName, value, data);
//                            errorList.add(data);
//                            continue A;
                        }
                        continue;
                    }else {
                        if (value.length() > columnMsg.getColumnSize()){
                            logger.error("DATA ERROR:字段【{}】={}超长:{}", fieldName, value, data);
                            errorList.add(data);
                            continue A;
                        }
                    }
                    continue;
                }
                if ("timestamp".equals(columnMsg.getColumnType())) {
                    // 时间类型
//                    logger.info("暂时跳过时间类型");
                }
            }
            correctList.add(data);
        }
        result.put("errorList", errorList);
        result.put("correctList", correctList);
        return result;
    }

    public static  <T> String checkData(T data, HashMap<String, ColumnsMsg> columnsMap) throws NoSuchFieldException, IllegalAccessException {
        Class<?> clazz = data.getClass();
        List<Field> fieldList = Arrays.asList(clazz.getDeclaredFields());   // 所有字段
        // 获取表中的字段信息
        if (columnsMap.size() == 1){
            logger.info("未找到表【{}】", columnsMap.get("tableName").getTableName());
        }

        StringBuilder returnMsg = new StringBuilder();
        for (Field field : fieldList) {
            // 获取代码中的字段名
            String fieldName = field.getName();
            if ("serialVersionUID".equals(fieldName)) {
                // 跳过默认字段
                continue;
            }
            // 获取该字段名的值
            Field declaredField = data.getClass().getDeclaredField(fieldName);
            declaredField.setAccessible(true);
            Object o = declaredField.get(data);
            String value = null;
            if (o != null){
                value = String.valueOf(o);
            }else {
                continue;
            }
            // 获取数据库中该表的字段信息
            ColumnsMsg columnMsg = columnsMap.get(fieldName);
            if (columnMsg == null) {
                logger.error("表【{}】中未找到字段{},跳过检查", clazz.getSimpleName(), fieldName);
                continue;
            }
            // 判断字段值是否符合定义的类型
            if ("varchar".equals(columnMsg.getColumnType())) {
                // 字符串类型,仅判断长度
                if (value.length() > columnMsg.getColumnSize()){
                    returnMsg.setLength(0); // 清空内容
                    returnMsg.append("DATA ERROR:字段【").append(fieldName).append("】超长,无法入库:").append(value);
                    logger.error(returnMsg.toString());
                    return returnMsg.toString();
                }
                continue;
            }
            if ("numeric".equals(columnMsg.getColumnType())) {
                // 数值类型,判断长度、精度、是否纯数字
                if (!value.matches("-?\\d+(\\.\\d+)?")){
                    returnMsg.setLength(0); // 清空内容
                    returnMsg.append("DATA ERROR:字段【").append(fieldName).append("】=").append(value).append("中含有非法字符:").append(data);
                    logger.error(returnMsg.toString());
                    return returnMsg.toString();
                }
                if (value.contains(".")){
                    String[] split = value.split("\\.");
                    // 先校验右边
                    int intBit = columnMsg.getColumnSize() - columnMsg.getColumnDigit(); // 可存放的整数位数
                    int flotBit = columnMsg.getColumnDigit();   // 可存放的小数位数
                    if (split[0].length() > intBit){
                        returnMsg.setLength(0); // 清空内容
                        returnMsg.append("DATA ERROR:字段【").append(fieldName).append("】=").append(value).append("中整数部分超长:").append(data);
                        logger.error(returnMsg.toString());
                        return returnMsg.toString();
                    }
                    if (split[1].length() > flotBit){
                        returnMsg.setLength(0); // 清空内容
                        returnMsg.append("DATA WARRING:字段【").append(fieldName).append("】=").append(value).append("中小数部分超长,将会丢失精度:").append(data);
                        logger.warn(returnMsg.toString());
                        return returnMsg.toString();
                    }
                    continue;
                }else {
                    if (value.length() > columnMsg.getColumnSize()){
                        returnMsg.setLength(0); // 清空内容
                        returnMsg.append("DATA ERROR:字段【").append(fieldName).append("】=").append(value).append("超长:").append(data);
                        logger.error(returnMsg.toString());
                        return returnMsg.toString();
                    }
                }
                continue;
            }
            if ("timestamp".equals(columnMsg.getColumnType())) {
                // 时间类型
            }
        }
        return "SUCCESS";
    }

    private HashMap<String, ColumnsMsg> getColumnsMsg(Class<?> clazz) {
        String tableName = clazz.getSimpleName().replaceAll("Mapper$", "");
        tableName = tableName.replaceAll("([a-z])([A-Z])", "$1_$2").toLowerCase();  // 驼峰转下划线
        HashMap<String, ColumnsMsg> map = new HashMap<>();
        map.put("tableName", new ColumnsMsg(tableName));
        try {
            DatabaseMetaData meta = dataSource.getConnection().getMetaData();
            ResultSet columns = meta.getColumns(null, null, tableName, null);
            while (columns.next()) {
                ColumnsMsg columnsMsg = new ColumnsMsg();
                // 字段名转换为小驼峰
                String columnName = toCamelCase(columns.getString("COLUMN_NAME"));
                columnsMsg.setTableName(tableName);
                columnsMsg.setColumnName(columnName);
                columnsMsg.setColumnType(columns.getString("TYPE_NAME"));
                columnsMsg.setColumnSize(columns.getInt("COLUMN_SIZE"));
                columnsMsg.setColumnDigit(columns.getInt("DECIMAL_DIGITS"));
                columnsMsg.setNullAble(columns.getBoolean("NULLABLE"));
                map.put(columnName, columnsMsg);
            }
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return map;
    }

    private String toCamelCase(String input) {
        StringBuilder result = new StringBuilder();
        if (input == null || input.isEmpty()) {
            return "";
        } else if (!input.contains("_")) {
            return input.toLowerCase();
        }
        String[] split = input.split("_");
        for (String s : split) {
            if (s.isEmpty()) {
                continue;
            }
            result.append(s.substring(0, 1).toUpperCase());
            result.append(s.substring(1).toLowerCase());
        }
        StringBuilder ret = new StringBuilder(result.substring(0, 1).toLowerCase());
        ret.append(result.substring(1, result.toString().length()));
        return ret.toString();
    }
}

4、自定义注解FieldOrder

package com.example.schedule.config.annotation;

import java.lang.annotation.ElementType;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.lang.annotation.Target;

/**
 * @Author:
 * Description:
 * CreateDate: 2024/6/19 9:42
 * UpdateUser:
 * UpdateDate: 2024/6/19 9:42
 * UpdateRemark:
 * Version: 1.0
 */
@Retention(RetentionPolicy.RUNTIME)
@Target(ElementType.FIELD)
public @interface FieldOrder {
    int value();
}

5、数据库密码加解密类DataSourceConfig

package com.example.schedule.config.dataSourceConfig;

import com.alibaba.druid.pool.DruidDataSource;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.core.env.Environment;

/**
 * @Author:
 * @Title:
 * @Description:
 * @Date: 2024/10/9 9:41
 * @Version: 1.0
 */
@Configuration
public class DataSourceConfig {

    @Autowired
    private Environment env;

    private CustomerDruidConfig druidConfig;

    @Bean
    public DruidDataSource dataSource(){
        druidConfig = new CustomerDruidConfig();
        // 数据库连接管理
        druidConfig.setDriverClassName(env.getProperty("spring.datasource.driverClassName"));
        druidConfig.setUrl(env.getProperty("spring.datasource.url"));
        druidConfig.setUsername(env.getProperty("spring.datasource.username"));
        druidConfig.setPasswordFilePath(env.getProperty("spring.datasource.passwordFilePath"));
        // 连接池配置
        druidConfig.setInitialSize(Integer.valueOf(env.getProperty("spring.datasource.druid.initialSize")));
        druidConfig.setMinIdle(Integer.valueOf(env.getProperty("spring.datasource.druid.minIdle")));
        druidConfig.setMaxActive(Integer.valueOf(env.getProperty("spring.datasource.druid.maxActive")));
        druidConfig.setMaxWait(Integer.valueOf(env.getProperty("spring.datasource.druid.maxWait")));
        druidConfig.setTimeBetweenEvictionRunsMillis(Integer.valueOf(env.getProperty("spring.datasource.druid.timeBetweenEvictionRunsMillis")));
        druidConfig.setMinEvictableIdleTimeMillis(Integer.valueOf(env.getProperty("spring.datasource.druid.minEvictableIdleTimeMillis")));
        druidConfig.setMaxEvictableIdleTimeMillis(Integer.valueOf(env.getProperty("spring.datasource.druid.maxEvictableIdleTimeMillis")));
        druidConfig.setValidationQuery(env.getProperty("spring.datasource.druid.validationQuery"));
        druidConfig.setTestWhileIdle(Boolean.parseBoolean(env.getProperty("spring.datasource.druid.testWhileIdle")));
        druidConfig.setTestOnBorrow(Boolean.parseBoolean(env.getProperty("spring.datasource.druid.testOnBorrow")));
        druidConfig.setTestOnReturn(Boolean.parseBoolean(env.getProperty("spring.datasource.druid.testOnReturn")));
        return druidConfig;
    }

    public String getPassword(){
        return druidConfig.getPassword();
    }
}

6、数据库连接池CustomerDruidConfig

package com.example.schedule.config.dataSourceConfig;

import com.alibaba.druid.pool.DruidDataSource;
import com.example.schedule.config.utils.MyAESUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

/**
 * @Author:
 * @Title:
 * @Description:
 * @Date: 2024/10/9 9:39
 * @Version: 1.0
 */
public class CustomerDruidConfig extends DruidDataSource {

    private static final Logger logger = LoggerFactory.getLogger(CustomerDruidConfig.class);

    String passwordFilePath = "";

    public void setPasswordFilePath(String passwordFilePath) {
        if (passwordFilePath == null || "".equals(passwordFilePath)){
            logger.error("password file path error : passwordFilePath = {}", passwordFilePath);
            try {
                throw new IOException("password file path error : passwordFilePath = " + passwordFilePath);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        this.passwordFilePath = passwordFilePath;
        String password = parsePassword();
        super.setPassword(password);
    }

    @Override
    public String getPassword(){
        return this.parsePassword();
    }

    private String parsePassword(){
        String pwd = null;
        String cKey = "ssfk;-scb+123!=0";
        String newPwd = "";
        FileReader reader = null;
        BufferedReader br = null;

        try {
            reader = new FileReader(passwordFilePath);
            br = new BufferedReader(reader);
            pwd = br.readLine().trim();
            if (!"".equals(pwd)) {
                newPwd = MyAESUtil.Decrypt(pwd, cKey);
            }else {
                logger.error("The file {} does not contain any data", passwordFilePath);
                throw new Exception("The file " + passwordFilePath + " does not contain any data");
            }
        } catch (Exception e) {
            logger.error("Decrypt DB password error,");
            e.printStackTrace();
        }finally {
            try {
                if (reader != null){
                    reader.close();
                }
                if (br != null){
                    br.close();
                }
            } catch (IOException e) {
                logger.error("The file {} read error", passwordFilePath);
                e.printStackTrace();
            }
        }
        return newPwd;
    }
}

7、解析文件工具类FileExtractTool

package com.example.schedule.tools;

import com.example.schedule.config.annotation.FieldOrder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.lang.reflect.Field;
import java.math.BigDecimal;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.GZIPInputStream;

/**
 * @Author:
 * @Title:
 * @Description: 文件解析工具
 * @Date: 2024/9/9 17:39
 * @Version: 1.0
 */
public class FileExtractTool {

    private static final Logger logger = LoggerFactory.getLogger(FileExtractTool.class);
    private static final SimpleDateFormat sdfForDate = new SimpleDateFormat("yyyyMMdd");

    /**
     * @description: 解析.gz文件为Bean对象
     * @author:
     * @date:2024/9/9 17:49
     * @param    fileGZPath :压缩文件路径
     * @param    separator :文件中使用的分隔符
     * @param    encoding :文件编码格式
     * @param    readStartChar :读取指定字符【readStartChar】开头的行
     * @param    clazz : 解析完生成的bean类型
     * @return: java.util.ArrayList<T>
     */
    public static <T> ArrayList<T> parseGZFile(String fileGZPath, String separator, String encoding, String readStartChar, Class<T> clazz) throws Exception {
        // 解压文件
        String fileFullName = decompressGzipFile(fileGZPath);
        logger.info("文件{}解压成功,解压后的文件为{}", fileGZPath, fileFullName);
        ArrayList<T> list = sameLogic(fileFullName, separator, encoding, readStartChar, clazz);
        logger.info("删除解析成功后的解压文件{}", fileFullName);
        deleteDecompressGzipFile(fileFullName);
        return list;
    }

    /**
     * @description: 解析csv文件为Bean对象
     * @author:
     * @date:2024/9/9 17:49
     * @param    filepath :文件路径
     * @param    separator :文件中使用的分隔符
     * @param    encoding :文件编码格式
     * @param    readStartChar :读取指定字符【readStartChar】开头的行
     * @param    clazz : 解析完生成的bean类型
     * @return: java.util.ArrayList<T>
     */
    public static <T> ArrayList<T> parseFile(String filepath, String separator, String encoding, String readStartChar, Class<T> clazz) throws Exception {
        return sameLogic(filepath, separator, encoding, readStartChar, clazz);
    }

    private static <T> ArrayList<T> sameLogic(String filepath, String separator, String encoding, String readStartChar, Class<T> clazz) {
        // 解析文件为数组
        ArrayList<String[]> rowList = null;
        try {
            rowList = readContBySeparator(filepath, separator, encoding);
        } catch (Exception e) {
            e.printStackTrace();
            logger.error("文件解析错误");
        }
        logger.info("文件{}解析成功,解析到的数据量为{}", filepath, rowList.size());
        // 解析文件为bean
        ArrayList<T> list = new ArrayList<>();
        for (int i = 0; i < rowList.size(); i++) {
            String[] row = rowList.get(i);
            if (!readStartChar.equals(row[0])) {
                logger.info("当前跳过第{}行,非数据行,跳过数据为:{}", i, row);
                continue;
            }
            // 去除第一个标识符
            String[] newArray = new String[row.length - 1];
            System.arraycopy(row, 1, newArray, 0, newArray.length);
            T bean = null;
            try {
                bean = parseRow(newArray, clazz, i+1);
            } catch (Exception e) {
                e.printStackTrace();
                logger.error("DATA ERROR:{},ERROR MSG:{}", newArray, e.getMessage());
                continue;
            }
            list.add(bean);
        }
        logger.info("文件{}解析成功,文件行数={},读取有效数据行数:{}", filepath, rowList.size(), list.size());
        return list;
    }

    /***
     * @description: 解压GZ文件
     * @author:
     * @date:2024/9/9 17:45
     * @param    compressedFilePath : 文件名
     * @return: java.lang.String :解压后的文件名
     */
    public static String decompressGzipFile(String compressedFilePath) throws Exception {
        // 校验文件是否存在
        File file = new File(compressedFilePath);
        if (!file.exists()) {
            logger.error("文件不存在{}", compressedFilePath);
            throw new FileNotFoundException("文件" + compressedFilePath + "不存在");
        }

        // 检验文件后缀名
        String substring = compressedFilePath.substring(compressedFilePath.lastIndexOf(".") + 1);
        if (!"gz".equals(substring)) {
            logger.error("文件不是gz类型{}", compressedFilePath);
            throw new Exception("文件" + compressedFilePath + "不是gz类型");
        }

        logger.info("开始解压文件{}", compressedFilePath);
        long start = System.currentTimeMillis();

        String osName = System.getProperty("os.name").toLowerCase();
        logger.info("当前运行环境信息:{}", osName);
        String filePath = "";
//        if (osName.contains("linux")){
//            filePath = decompressGzFileWithLinuxCommand(compressedFilePath);
//        }else {
//            filePath = decompressGzFileWithJVM(compressedFilePath);
//        }
        filePath = decompressGzFileWithJVM(compressedFilePath);

        long end = System.currentTimeMillis();
        logger.info("文件{}解压完毕,解压耗时:{}ms", compressedFilePath, end - start);

        return filePath;
    }

    // 使用JVM解压
    private static String decompressGzFileWithJVM(String compressedFilePath) throws IOException {
        FileInputStream fis = new FileInputStream(compressedFilePath);
        GZIPInputStream gzipIn = new GZIPInputStream(fis);
        String decompressedFilePath = compressedFilePath.replaceAll("\\.gz$", "");
        FileOutputStream fos = new FileOutputStream(decompressedFilePath);
        byte[] buffer = new byte[1024];
        int len;
        while ((len = gzipIn.read(buffer)) > 0) {
            fos.write(buffer, 0, len);
        }
        fos.close();
        gzipIn.close();
        fis.close();
        return decompressedFilePath;
    }

    // 使用Linux解压
    private static String decompressGzFileWithLinuxCommand(String compressedFilePath) throws IOException, InterruptedException {
        String decompressedFilePath = compressedFilePath.replaceAll("\\.gz", "");
        ProcessBuilder processBuilder = new ProcessBuilder("gunzip -k ", compressedFilePath);
        Process process = processBuilder.start();
        boolean completed = process.waitFor(3, TimeUnit.HOURS);
        if (!completed || process.exitValue() != 0) {
            throw new RuntimeException("解压文件失败: " + compressedFilePath);
        }
        return decompressedFilePath;
    }


    /***
     * @description: 将文件内容读取到List中
     * @author:
     * @date:2024/9/9 17:46
     * @param    filePath :文件路径
     * @param    separator :文件内容分隔符
     * @param    encoding :文件编码格式
     * @return: java.util.ArrayList<java.lang.String [ ]>
     */
    private static ArrayList<String[]> readContBySeparator(String filePath, String separator, String encoding) throws Exception {
        // 校验文件
        File file = new File(filePath);
        if (!file.exists()) {
            logger.error("文件不存在{}", filePath);
            throw new FileNotFoundException("文件" + filePath + "不存在");
        }

        //检验文件后缀名
        String substring = filePath.substring(filePath.lastIndexOf(".") + 1);
        if (!"csv".equals(substring)) {
            logger.error("文件不是csv类型{}", filePath);
            throw new Exception("文件" + filePath + "不是csv类型");
        }

        // 解析文件
        ArrayList<String[]> result = new ArrayList<>();
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), encoding));
        String line = "";
        while ((line = reader.readLine()) != null) {
            String[] data = line.split(separator);
            result.add(data);
        }
        reader.close();
        return result;
    }

    /**
     * 解析单行
     *
     * @param row
     * @param clazz
     * @return T
     * @author CX
     * @date 2024/9/9 15:24
     */
    public static <T> T parseRow(String[] row, Class<T> clazz, Integer line) throws Exception {
        T obj = null;
        // 调用反射,根据传入的class创建对象
        obj = clazz.newInstance();
        // 获取类中的所有字段
        Field[] fields = clazz.getDeclaredFields();
        // 排序字段
        Arrays.sort(fields, Comparator.comparingInt(f -> {
            FieldOrder annotation = f.getAnnotation(FieldOrder.class);
            if (annotation != null) {
                return annotation.value();
            } else {
                // 如果没有注解,默认返回一个较大的值,放到末尾
                return Integer.MAX_VALUE;
            }
        }));

        // 开始解析
        for (int i = 0; i < fields.length && i < row.length; i++) {
            // 当前处理的字段
            Field field = fields[i];
            field.setAccessible(true); // 设置字段访问权限为可访问
            // 当前行的第i个cell的数据
            String cellValue = row[i];
            // 获取当前字段的类型
            Class<?> type = field.getType();
            try {
                if ("NA".equals(cellValue) || "N/A".equals(cellValue) || "N\\A".equals(cellValue)){
                    logger.info("当前读取到第{}行第{}列值为NA,字段类型为{},已设置为空或0",line, (i + 2), type);
                    if (type == BigDecimal.class) {
                        field.set(obj, BigDecimal.ZERO);
                    }
                    if (type == byte.class || type == Byte.class ||
                            type == short.class || type == Short.class ||
                            type == int.class || type == Integer.class ||
                            type == long.class || type == Long.class ||
                            type == float.class || type == Float.class ||
                            type == double.class || type == Double.class){
                        field.set(obj, 0);
                    }
                    if (type == String.class || type == Character.class){
                        field.set(obj, null);
                    }
                }else if (cellValue != null && !"".equals(cellValue)) {
                    // 根据字段类型将cell的值添加到字段
                    if (type == String.class) {
                        field.set(obj, cellValue);
                    }
                    if (type == byte.class || type == Byte.class) {
                        field.set(obj, Byte.valueOf(cellValue));
                    }
                    if (type == short.class || type == Short.class) {
                        field.set(obj, Short.valueOf(cellValue));
                    }
                    if (type == int.class || type == Integer.class) {
                        field.set(obj, Integer.valueOf(cellValue));
                    }
                    if (type == long.class || type == Long.class) {
                        field.set(obj, Long.valueOf(cellValue));
                    }
                    if (type == float.class || type == Float.class) {
                        field.set(obj, Float.valueOf(cellValue));
                    }
                    if (type == double.class || type == Double.class) {
                        field.set(obj, Double.valueOf(cellValue));
                    }
                    if (type == boolean.class || type == Boolean.class) {
                        field.set(obj, Boolean.valueOf(cellValue));
                    }
                    if (type == BigDecimal.class) {
                        // 仅提取数字
                        if (cellValue != null && !"".equals(cellValue)){
                            Pattern pattern = Pattern.compile("\\d+\\.?\\d*");
                            Matcher matcher = pattern.matcher(cellValue);
                            String number = "";
                            if (matcher.find()) {
                                // 返回匹配到的字符串
                                number = matcher.group();
                            }else {
                                logger.error("解析第{}行的{}列报错,读取到的值为【{}】。代码中配置的字段名为【{}】,字段类型为{}",line, (i + 2), cellValue, field.getName(), type);
                                throw new NumberFormatException("解析第" + line + "行的第" + (i + 2) + "列报错,读取到的值为【" + cellValue + "】。代码中配置的字段名为【" + field.getName() + "】,字段类型为" + type);
                            }
                            BigDecimal value = BigDecimal.ZERO;
                            if (cellValue.contains("万")){
                                // 解析为负数
                                value = new BigDecimal(number).negate();
                            }else {
                                value = new BigDecimal(number);
                            }
                            field.set(obj, value);
                        }else {
                            field.set(obj, BigDecimal.ZERO);
                        }
                    }
                    if (type == Date.class) {
                        Date date = sdfForDate.parse(cellValue);
                        field.set(obj, date);
                    }
                }else {
                    field.set(obj, null);
                }
            } catch (ParseException e) {
                logger.error("解析第{}行的{}列报错,读取到的值为【{}】。时间格式错误,应为yyyyMMdd", line, (i + 2), cellValue);
                throw new ParseException("解析第"+ line + "行的第" + (i + 2) + "列报错,读取到的值为【" + cellValue + "】。时间格式错误,应为yyyyMMdd", (i + 2));
            } catch (NumberFormatException e) {
                logger.error("解析第{}行的{}列报错,读取到的值为【{}】。代码中配置的字段名为【{}】,字段类型为{}",line, (i + 2), cellValue, field.getName(), type);
                throw new NumberFormatException("解析第" + line + "行的第" + (i + 2) + "列报错,读取到的值为【" + cellValue + "】。代码中配置的字段名为【" + field.getName() + "】,字段类型为" + type);
            } catch (Exception e) {
                logger.error("解析第{}行的第{}列报错,错误原因:{}", line, (i + 2), e.getMessage());
                throw new Exception("解析第"  + line + "行的第" + (i + 2) + "列报错,错误原因:" + e.getMessage());
            }
        }
        return obj;
    }

    /***
     * @description: 删除解析完毕的文件
     * @author:
     * @date:2024/9/10 9:41
     * @param    filePath :文件全路径
     */
    public static void deleteDecompressGzipFile(String filePath) {
        // 创建 File 对象
        File file = new File(filePath);

        // 检查文件是否存在
        if (!file.exists()) {
            logger.info("文件不存在");
            return;
        }

        // 尝试删除文件
        boolean deleted = file.delete();
        if (deleted) {
            logger.info("文件删除成功{}", filePath);
        } else {
            logger.info("文件删除失败{}", filePath);
        }
    }

    /**
     * @description :获取指定路径下指定前缀的所有文件名全路径
     * @author:
     * @date:2024/9/13 17:19
      * @param	fullPath	:
     * @return: java.util.ArrayList<java.lang.String>
    */
    public static List<String> getFileList(String fullPath){
        List<String> resultList = new ArrayList<>();
        try {
            // 获取路径部分,假设前缀是路径加文件名前半部分的形式
            Path dirPath = Paths.get(fullPath).getParent();
            String filePrefix = Paths.get(fullPath).getFileName().toString();

            // 遍历目录中的文件
            try (Stream<Path> paths = Files.list(dirPath)) {
                resultList = paths
                        .filter(Files::isRegularFile) // 只要文件,忽略文件夹
                        .filter(p -> p.getFileName().toString().startsWith(filePrefix)) // 文件名前缀匹配
                        .map(Path::toString) // 转换为完整路径的字符串
                        .collect(Collectors.toList());
            }
        } catch (IOException e) {
            logger.info("读取文件路径错误:{}", fullPath);
        }
        return resultList;
    }
    /**
     * 读取数据文件,包括解压和解析过程
     *
     * @param absolutePath 文件的绝对路径
     * @return 数据行的列表,每行数据为一个字符串数组
     * @throws Exception 如果解压或解析过程中发生错误,则抛出异常
     */
    public static List<String[]> readDataFile(String absolutePath) throws Exception {
        // 解压文件
        String fileFullName = decompressGzipFile(absolutePath);
        logger.info("文件{}解压成功,解压后的文件为{}", absolutePath, fileFullName);

        // 解析解压后的文件
        List<String[]> dataLines = DataParserTool.readDataFile(new File(fileFullName), "UTF-8", "\u00A7", true, "T");

        // 删除已解压文件
        logger.info("删除解析成功后的解压文件{}", fileFullName);
        deleteDecompressGzipFile(fileFullName);

        // 返回解析后的数据列表
        return dataLines;
    }

    /***
     * @description: 将内容追加到指定的文件中。如果文件不存在,则创建并写入;如果存在,则追加。
     * @author:
     * @date:2024/9/27 16:20
     * @param	filePath
     * @param	content	:
     * @return: void
    */
    public static void appendErrBeanData(String filePath, String content) {
        int lastDotIndex = filePath.lastIndexOf('.');
        String fileName = "";
        if (lastDotIndex != -1) {
            String fileNameWithoutExtension = filePath.substring(0, lastDotIndex);
            String fileExtension = filePath.substring(lastDotIndex);
            fileName =  fileNameWithoutExtension + "_DATAERROR" + fileExtension;
        } else {
            // 如果文件名中没有后缀,则直接添加"_DATAERROR"
            fileName =  filePath + "_DATAERROR";
        }
        Path path = Path.of(fileName);
        try {
            // 使用StandardOpenOption.APPEND选项来追加内容
            Files.write(path, (content + System.lineSeparator()).getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
            logger.info("解析出错的内容成功写入文件");
        } catch (IOException e) {
            logger.error("写入文件时发生错误");
            e.printStackTrace();
        }
    }

    /***
     * @description: 将内容追加到指定的文件中。如果文件不存在,则创建并写入;如果存在,则追加。
     * @author:
     * @date:2024/9/27 16:20
     * @param	filePath
     * @param	content	:
     * @return: void
     */
    public static void appendErrPojoData(String filePath, String content) {
        int lastDotIndex = filePath.lastIndexOf('.');
        String fileName = "";
        if (lastDotIndex != -1) {
            String fileNameWithoutExtension = filePath.substring(0, lastDotIndex);
            fileName =  fileNameWithoutExtension + "_INSERTERROR" + ".txt";
        } else {
            // 如果文件名中没有后缀,则直接添加"_INSERTERROR"
            fileName =  filePath + "_INSERTERROR";
        }
        Path path = Path.of(fileName);
        try {
            // 使用StandardOpenOption.APPEND选项来追加内容
            Files.write(path, (content + System.lineSeparator()).getBytes(), StandardOpenOption.CREATE, StandardOpenOption.APPEND);
            logger.info("解析出错的内容成功写入文件");
        } catch (IOException e) {
            logger.error("写入文件时发生错误");
            e.printStackTrace();
        }
    }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值