使用java编写的数据库存储引擎。解析超大sql语句文件的算法并按照指定规则将数据库名,表名,字段名和对应的值写进磁盘中,逐行读取逐行写入

文章介绍了一个用于检查SQL字段类型的工具类和一个多线程的SQL语句解析算法,该算法能读取和处理大到10GB的SQL文件而不会导致内存溢出。解析过程包括识别SQL语句的类型,如建表和插入语句,将表名、字段名及字段值写入指定的磁盘路径,确保数据正确性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

因为是逐行读取逐行写入的,所以经过10G的sql文件测试,目前无内存溢出异常

首先我封装了一个校验sql字段类型的工具类,这个是用来检验sql建表语句中的字段的


public class CheckSqlType {

    /**
     * 判断字符串是否包含sql字段类型
     * @param sql
     * @return
     */
    public static Boolean isSqlType(String sql) {
        String sqlLowerCase = sql.toLowerCase();
        String type = "text,memo,byte,integer,long,single,double,currency,autonumber,date,time," +
                "yes,no,ole object,hyperlink,lookup wizard,char,varchar,tinytext,blob,mediumtext," +
                "mediumblob,longtext,longblob,enum,set,tinyint,smallint,mediumint,int,bigint,float," +
                "decimal,datetime,timestamp,year,nchar,nvarchar,ntext,bit,binary,varbinary,image,numeric," +
                "smallmoney,money,real,datetime2,smalldatetime,datetimeoffset,sql_variant,uniqueidentifier," +
                "xml,cursor,table";

        String[] types = type.split(",");
        for (String sqlType : types) {
            if (sqlLowerCase.contains(" "+sqlType)){
                return true;
            }
        }

        return false;
    }

}

然后就是解析sql语句的算法了,将解析好的数据字段值写入输入的磁盘路径下text/库名/表名/字段名.txt中(因为sql文件是以库名命名的)




import java.io.*;
import java.util.*;

/**
 * 
 * @description 解析sql语句的算法,获取sql语句中的表名,字段名及字段值并存到磁盘,可多线程同时运行
 *
 * @author: 影耀子(YingYew)
 * @create: 2023-03-18 16:28
 **/
public class AnalysisSql {

    /**
     * 解析sql语句的算法,获取sql语句中的表名,字段名及字段值,可多线程同时运行
     * @param sqlPath sql语句文件所在目录,读取此目录下所有后缀名为.sql的文件
     * @param textPath 要写入磁盘的路径
     * @throws Exception
     */
    public void parseSql(String sqlPath,String textPath) throws Exception {
        // KEY为表名,value为表字段(按顺序添加进集合中)
        Map<String, List<String>> tableInfo = new HashMap<>();

        // 获取所有sql文件名
        String[] list = new File(sqlPath).list();

        // 根据文件名创建对应文件夹
        for (String s : list) {
            //数据库文件地址
            String sqlFilePath = textPath+"/text/"+s.replace(".sql","");
            // 根据文件名(数据库名)创建对应文件夹
            if (s.endsWith(".sql")){
                new File(sqlFilePath).mkdirs();
            }

            // 读取sql文件,逐行读取
            BufferedReader sqlReader = new BufferedReader(new FileReader(sqlPath+s));
            String sqlContext = sqlReader.readLine();

            // 根据“;”区分的一行sql语句
            StringBuilder sqlOneLine = new StringBuilder(sqlContext);
            while (sqlContext != null){
                String nextLine = sqlReader.readLine();

                if (sqlContext.endsWith(";")) {
//                    System.out.println(sqlOneLine);
                    // sqlOneLine就是截取的一条sql语句,开始业务编写

                    // 判断sql语句并写入磁盘文件
                    sqlWriteIn(sqlOneLine.toString(),sqlFilePath,tableInfo);

                    // 清空sqlOneLine字符
                    sqlOneLine.setLength(0);
                }
                // 注释不加入
                String sqlOneLineStr = sqlOneLine.toString();
                if (sqlOneLineStr.endsWith("*/") && sqlOneLineStr.contains("/*")){
                    sqlOneLine.delete(sqlOneLineStr.indexOf("/*"),sqlOneLineStr.indexOf("*/")+2);
                }

                if (nextLine != null && !nextLine.startsWith("--")){
                    sqlOneLine.append(nextLine);
                }

                sqlContext = nextLine;
            }
            sqlReader.close();
            System.out.println("全部运行成功,sql名列表:"+list);
        }
    }

    /**
     * 处理一条sql的逻辑
     *  在path路径下创建表名的文件夹
     *
     * @param sql sql语句
     * @param path 、需要写入的路径
     * @param tableInfo 、KEY为表名,value为表字段(按顺序添加进集合中)
     */
    public void sqlWriteIn(String sql,String path,Map<String, List<String>> tableInfo) throws Exception {
        String sqlLowerCase = sql.toLowerCase();
        // 建库语句
        if (sqlLowerCase.contains("create ")
                && sqlLowerCase.contains(" table ")){

            String table = sql.substring(sqlLowerCase.indexOf(" table") + 6, sqlLowerCase.indexOf("("))
                    .trim().replace("'","").replace("`","")
                    .replace("\"","");

            List<String> sqlFieldList = new ArrayList<>();

            // 创建表名文件夹
            new File(path+"/"+table).mkdir();
            new File(path+"/"+table+"/field").mkdir();
            String[] sqlFieldInformations = sql.substring
                    (sqlLowerCase.indexOf("(")+1, sqlLowerCase.lastIndexOf(")"))
                    .split(",");

            for (String sqlFieldInformation : sqlFieldInformations) {
                sqlFieldInformation = sqlFieldInformation.trim();
                String sqlFieldName = sqlFieldInformation
                        .substring(0, sqlFieldInformation.indexOf(" "));
                sqlFieldName = sqlFieldName.replace("`","")
                        .replace("'","")
                        .replace("\"","");

                // 判断此行数据是否是字段
                if (!CheckSqlType.isSqlType(sqlFieldInformation)){
                    break;
                }
                // 根据字段名创建字段文件text
                new File(path+"/"+table+"/field/"+sqlFieldName+".txt").createNewFile();
                sqlFieldList.add(sqlFieldName);
            }
            tableInfo.put(table,sqlFieldList);

            return;
        }

        // 添加语句
        if (sqlLowerCase.contains("insert ")
                && sqlLowerCase.contains(" into ")){

            String table = sql.substring(sqlLowerCase.indexOf(" into ") + 6, sqlLowerCase.indexOf(" values"))
                    .trim().replace("'","").replace("`","")
                    .replace("\"","");

            String substring = sql.substring
                    (sqlLowerCase.indexOf(" values") + 7, sqlLowerCase.lastIndexOf(")"));
            String sqlFieldValuesStr = substring
                    .substring(substring.indexOf("(")+1);

            // 处理有问题的数据,
            List<String> sqlFieldValues = handle(sqlFieldValuesStr);

            List<String> tableFieldNameList = new ArrayList<>();
            // 如果添加语句包含字段名,则使用添加语句的字段名
            if (table.contains("(") && table.contains(")")){
                String[] tableFieldName = table.substring(table.indexOf("(") + 1, table.indexOf(")")).split(",");
                tableFieldNameList.addAll(Arrays.asList(tableFieldName));
                table = table.substring(0,table.indexOf("("));
            }
            int tableFieldNameListSize = tableFieldNameList.size();
            List<String> DefaultTableFieldNameListlist = tableInfo.get(table);

            if (sqlFieldValues.size() != tableFieldNameListSize &&
                    sqlFieldValues.size() != DefaultTableFieldNameListlist.size()){

                throw new Exception("表字段长度与数据长度不符---表名:"+table);
            }
            BufferedWriter bufferedWriter;
            int sqlFieldValuesLen = sqlFieldValues.size();
            for (int i = 0; i < sqlFieldValuesLen; i++) {
                String sqlFieldValue = sqlFieldValues.get(i);

                // 判断添加语句中有无字段
                if (tableFieldNameListSize > 0){
                    bufferedWriter = new BufferedWriter(new FileWriter
                            (path + "/" + table + "/field/" + tableFieldNameList.get(i).trim() + ".txt",true));
                }else {

                    bufferedWriter = new BufferedWriter(new FileWriter
                            (path + "/" + table + "/field/" + DefaultTableFieldNameListlist.get(i) + ".txt",true));
                }
                bufferedWriter.append(sqlFieldValue);
                bufferedWriter.newLine();
                bufferedWriter.flush();// 清除缓存
                bufferedWriter.close();
            }

        }



    }

    /**
     * '15465', '全体成员(无人武部,交警莲湖大队)', null, null,
     * '1', '2266', '3071', '2021-05-18 15:36:32', '2021-04-27 14:43:47', null, null
     *
     * @author : YingYew影耀子
     * @date : 2023/3/16 15:06
     * @param :null
     * @return :null
     **/

    public List<String> handle(String sqlFieldValuesStr) {
        StringBuilder sqlFieldValuesStrBuilder = new StringBuilder(sqlFieldValuesStr);

//        System.out.println(sqlFieldValuesStr);
        List<String> sqlFieldValueStrList = new ArrayList<>();


        /** 1,从后往前读取,判断是否以引号结尾,
         *   截取以最后一个,到最后一行的数据
         *      是:去除前后空格及引号
         *      否:去除前后空格
         *
         * @author : 影耀子(YingYew)
         * @date : 2023/3/16 16:51
         * @param :args
         **/

        while (sqlFieldValuesStrBuilder.length() > 0){
            String sqlFieldValuesBuilderStr = sqlFieldValuesStrBuilder.toString();
            int length = sqlFieldValuesStrBuilder.length();

            String sqlFieldValue;
            if (sqlFieldValuesBuilderStr.endsWith("'")){
                sqlFieldValue = getSqlFieldValue("'",sqlFieldValuesStr,length,sqlFieldValuesStrBuilder);
            }else if (sqlFieldValuesBuilderStr.endsWith("\"")){
                sqlFieldValue = getSqlFieldValue("\"",sqlFieldValuesStr,length,sqlFieldValuesStrBuilder);
            }else if (sqlFieldValuesBuilderStr.endsWith("`")){
                sqlFieldValue = getSqlFieldValue("`",sqlFieldValuesStr,length,sqlFieldValuesStrBuilder);
            }else{
                int lastIndex = sqlFieldValuesStr.lastIndexOf(",", length - 1);
                // 因为无引号,所以截取到结尾
                sqlFieldValue = sqlFieldValuesStr.substring
                        (lastIndex+1, length).trim();
//                .replace(",","").trim()
                if (lastIndex != -1){
                    sqlFieldValuesStrBuilder.setLength(lastIndex);
                }else {
                    sqlFieldValuesStrBuilder.setLength(0);
                }
            }
            sqlFieldValueStrList.add(sqlFieldValue);
        }
        Collections.reverse(sqlFieldValueStrList);

        return sqlFieldValueStrList;
    }


    /**
     * 获取sql语句中的值
     * @author : YingYew影耀子
     * @date : 2023/3/17 10:20
     * @return :java.lang.String
     **/
    public String getSqlFieldValue(String quotation,String sqlFieldValuesStr,int length,StringBuilder sqlFieldValuesStrBuilder){

        // 获取字段值的开始下标
        int lastIndexQuote = sqlFieldValuesStr.lastIndexOf(quotation, length - 2);

        // 若检测到\,则特殊处理得到字段值开始下标,
        while (sqlFieldValuesStr.startsWith("\\", lastIndexQuote-1)){
            lastIndexQuote = sqlFieldValuesStr.lastIndexOf(quotation, lastIndexQuote-1);
        }
        // 获取字段值前面的“,”开始的下标
        int lastIndexComma = sqlFieldValuesStr.lastIndexOf(",", lastIndexQuote-1);
        // 获取字段值,不包含包含引号
        String sqlFieldValueQuestion = sqlFieldValuesStr.substring
                (lastIndexQuote+1, length-1);

        if (lastIndexComma != -1){
            sqlFieldValuesStrBuilder.setLength(lastIndexComma);
        }else {
            sqlFieldValuesStrBuilder.setLength(0);
        }

        // 正确的值
        return analyseSprit(sqlFieldValueQuestion);
    }


    /**
     * 解析斜杠
     * 未\\\\央\\\\\区\'各\\\街\\道    输出:未\\央\\区'各\街\道
     *  斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬
     * 输出:斯蒂芬\r\n斯蒂芬\r\n斯蒂芬\r\n斯蒂芬\r\n斯蒂芬\r\n斯蒂芬\r\n斯蒂芬
     *
     * @date : 2023/3/17 11:29
     * @param :sqlFieldValuesStr
     * @return :java.lang.String
     **/
    public String analyseSprit(String sqlFieldValue) {

        if (!sqlFieldValue.contains("\\")){
            return sqlFieldValue;
        }
//        sqlFieldValue = sqlFieldValue.replace("\\"+quotation,quotation);

        // 解析斜杠的逻辑
        StringBuilder sqlFieldValueStringBuilder = new StringBuilder(sqlFieldValue);

        List<String> handleSqlFieldValueFallList = new ArrayList<>();
        while (sqlFieldValueStringBuilder.length() > 0){
            // 获取最后处理\的下标
            int barEndIndex = sqlFieldValue.lastIndexOf("\\",sqlFieldValueStringBuilder.length()-1);
            if (barEndIndex == -1){
                handleSqlFieldValueFallList.add(sqlFieldValueStringBuilder.toString());
                break;
            }
            // 获取每个\开始的下标
            int barStartIndex = barEndIndex;
            while (sqlFieldValue.startsWith("\\",barStartIndex-1)){
                barStartIndex--;
            }
            // 获取杠的个数
            int barNum = barEndIndex-barStartIndex+1;
            // 正确的杠的个数
            int correctBarNum = barNum/2;
            StringBuilder bar = new StringBuilder();
            for (int i = 0; i < correctBarNum; i++) {
                bar.append("\\");
            }
            handleSqlFieldValueFallList.add(
                    bar.append(sqlFieldValue, barEndIndex+1, sqlFieldValueStringBuilder.length()).toString());
            sqlFieldValueStringBuilder.setLength(barStartIndex);
        }
        Collections.reverse(handleSqlFieldValueFallList);
        StringBuilder handleSqlFieldValue = new StringBuilder();
        for (String s :handleSqlFieldValueFallList){
            handleSqlFieldValue.append(s);
        }
        return handleSqlFieldValue.toString();
    }

}

此代码已测试过,可多线程运行过10G内容的sql文件,无内存溢出

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值