因为是逐行读取逐行写入的,所以经过10G的sql文件测试,目前无内存溢出异常
首先我封装了一个校验sql字段类型的工具类,这个是用来检验sql建表语句中的字段的
public class CheckSqlType {
/**
* 判断字符串是否包含sql字段类型
* @param sql
* @return
*/
public static Boolean isSqlType(String sql) {
String sqlLowerCase = sql.toLowerCase();
String type = "text,memo,byte,integer,long,single,double,currency,autonumber,date,time," +
"yes,no,ole object,hyperlink,lookup wizard,char,varchar,tinytext,blob,mediumtext," +
"mediumblob,longtext,longblob,enum,set,tinyint,smallint,mediumint,int,bigint,float," +
"decimal,datetime,timestamp,year,nchar,nvarchar,ntext,bit,binary,varbinary,image,numeric," +
"smallmoney,money,real,datetime2,smalldatetime,datetimeoffset,sql_variant,uniqueidentifier," +
"xml,cursor,table";
String[] types = type.split(",");
for (String sqlType : types) {
if (sqlLowerCase.contains(" "+sqlType)){
return true;
}
}
return false;
}
}
然后就是解析sql语句的算法了,将解析好的数据字段值写入输入的磁盘路径下text/库名/表名/字段名.txt中(因为sql文件是以库名命名的)
import java.io.*;
import java.util.*;
/**
*
* @description 解析sql语句的算法,获取sql语句中的表名,字段名及字段值并存到磁盘,可多线程同时运行
*
* @author: 影耀子(YingYew)
* @create: 2023-03-18 16:28
**/
public class AnalysisSql {
/**
* 解析sql语句的算法,获取sql语句中的表名,字段名及字段值,可多线程同时运行
* @param sqlPath sql语句文件所在目录,读取此目录下所有后缀名为.sql的文件
* @param textPath 要写入磁盘的路径
* @throws Exception
*/
public void parseSql(String sqlPath,String textPath) throws Exception {
// KEY为表名,value为表字段(按顺序添加进集合中)
Map<String, List<String>> tableInfo = new HashMap<>();
// 获取所有sql文件名
String[] list = new File(sqlPath).list();
// 根据文件名创建对应文件夹
for (String s : list) {
//数据库文件地址
String sqlFilePath = textPath+"/text/"+s.replace(".sql","");
// 根据文件名(数据库名)创建对应文件夹
if (s.endsWith(".sql")){
new File(sqlFilePath).mkdirs();
}
// 读取sql文件,逐行读取
BufferedReader sqlReader = new BufferedReader(new FileReader(sqlPath+s));
String sqlContext = sqlReader.readLine();
// 根据“;”区分的一行sql语句
StringBuilder sqlOneLine = new StringBuilder(sqlContext);
while (sqlContext != null){
String nextLine = sqlReader.readLine();
if (sqlContext.endsWith(";")) {
// System.out.println(sqlOneLine);
// sqlOneLine就是截取的一条sql语句,开始业务编写
// 判断sql语句并写入磁盘文件
sqlWriteIn(sqlOneLine.toString(),sqlFilePath,tableInfo);
// 清空sqlOneLine字符
sqlOneLine.setLength(0);
}
// 注释不加入
String sqlOneLineStr = sqlOneLine.toString();
if (sqlOneLineStr.endsWith("*/") && sqlOneLineStr.contains("/*")){
sqlOneLine.delete(sqlOneLineStr.indexOf("/*"),sqlOneLineStr.indexOf("*/")+2);
}
if (nextLine != null && !nextLine.startsWith("--")){
sqlOneLine.append(nextLine);
}
sqlContext = nextLine;
}
sqlReader.close();
System.out.println("全部运行成功,sql名列表:"+list);
}
}
/**
* 处理一条sql的逻辑
* 在path路径下创建表名的文件夹
*
* @param sql sql语句
* @param path 、需要写入的路径
* @param tableInfo 、KEY为表名,value为表字段(按顺序添加进集合中)
*/
public void sqlWriteIn(String sql,String path,Map<String, List<String>> tableInfo) throws Exception {
String sqlLowerCase = sql.toLowerCase();
// 建库语句
if (sqlLowerCase.contains("create ")
&& sqlLowerCase.contains(" table ")){
String table = sql.substring(sqlLowerCase.indexOf(" table") + 6, sqlLowerCase.indexOf("("))
.trim().replace("'","").replace("`","")
.replace("\"","");
List<String> sqlFieldList = new ArrayList<>();
// 创建表名文件夹
new File(path+"/"+table).mkdir();
new File(path+"/"+table+"/field").mkdir();
String[] sqlFieldInformations = sql.substring
(sqlLowerCase.indexOf("(")+1, sqlLowerCase.lastIndexOf(")"))
.split(",");
for (String sqlFieldInformation : sqlFieldInformations) {
sqlFieldInformation = sqlFieldInformation.trim();
String sqlFieldName = sqlFieldInformation
.substring(0, sqlFieldInformation.indexOf(" "));
sqlFieldName = sqlFieldName.replace("`","")
.replace("'","")
.replace("\"","");
// 判断此行数据是否是字段
if (!CheckSqlType.isSqlType(sqlFieldInformation)){
break;
}
// 根据字段名创建字段文件text
new File(path+"/"+table+"/field/"+sqlFieldName+".txt").createNewFile();
sqlFieldList.add(sqlFieldName);
}
tableInfo.put(table,sqlFieldList);
return;
}
// 添加语句
if (sqlLowerCase.contains("insert ")
&& sqlLowerCase.contains(" into ")){
String table = sql.substring(sqlLowerCase.indexOf(" into ") + 6, sqlLowerCase.indexOf(" values"))
.trim().replace("'","").replace("`","")
.replace("\"","");
String substring = sql.substring
(sqlLowerCase.indexOf(" values") + 7, sqlLowerCase.lastIndexOf(")"));
String sqlFieldValuesStr = substring
.substring(substring.indexOf("(")+1);
// 处理有问题的数据,
List<String> sqlFieldValues = handle(sqlFieldValuesStr);
List<String> tableFieldNameList = new ArrayList<>();
// 如果添加语句包含字段名,则使用添加语句的字段名
if (table.contains("(") && table.contains(")")){
String[] tableFieldName = table.substring(table.indexOf("(") + 1, table.indexOf(")")).split(",");
tableFieldNameList.addAll(Arrays.asList(tableFieldName));
table = table.substring(0,table.indexOf("("));
}
int tableFieldNameListSize = tableFieldNameList.size();
List<String> DefaultTableFieldNameListlist = tableInfo.get(table);
if (sqlFieldValues.size() != tableFieldNameListSize &&
sqlFieldValues.size() != DefaultTableFieldNameListlist.size()){
throw new Exception("表字段长度与数据长度不符---表名:"+table);
}
BufferedWriter bufferedWriter;
int sqlFieldValuesLen = sqlFieldValues.size();
for (int i = 0; i < sqlFieldValuesLen; i++) {
String sqlFieldValue = sqlFieldValues.get(i);
// 判断添加语句中有无字段
if (tableFieldNameListSize > 0){
bufferedWriter = new BufferedWriter(new FileWriter
(path + "/" + table + "/field/" + tableFieldNameList.get(i).trim() + ".txt",true));
}else {
bufferedWriter = new BufferedWriter(new FileWriter
(path + "/" + table + "/field/" + DefaultTableFieldNameListlist.get(i) + ".txt",true));
}
bufferedWriter.append(sqlFieldValue);
bufferedWriter.newLine();
bufferedWriter.flush();// 清除缓存
bufferedWriter.close();
}
}
}
/**
* '15465', '全体成员(无人武部,交警莲湖大队)', null, null,
* '1', '2266', '3071', '2021-05-18 15:36:32', '2021-04-27 14:43:47', null, null
*
* @author : YingYew影耀子
* @date : 2023/3/16 15:06
* @param :null
* @return :null
**/
public List<String> handle(String sqlFieldValuesStr) {
StringBuilder sqlFieldValuesStrBuilder = new StringBuilder(sqlFieldValuesStr);
// System.out.println(sqlFieldValuesStr);
List<String> sqlFieldValueStrList = new ArrayList<>();
/** 1,从后往前读取,判断是否以引号结尾,
* 截取以最后一个,到最后一行的数据
* 是:去除前后空格及引号
* 否:去除前后空格
*
* @author : 影耀子(YingYew)
* @date : 2023/3/16 16:51
* @param :args
**/
while (sqlFieldValuesStrBuilder.length() > 0){
String sqlFieldValuesBuilderStr = sqlFieldValuesStrBuilder.toString();
int length = sqlFieldValuesStrBuilder.length();
String sqlFieldValue;
if (sqlFieldValuesBuilderStr.endsWith("'")){
sqlFieldValue = getSqlFieldValue("'",sqlFieldValuesStr,length,sqlFieldValuesStrBuilder);
}else if (sqlFieldValuesBuilderStr.endsWith("\"")){
sqlFieldValue = getSqlFieldValue("\"",sqlFieldValuesStr,length,sqlFieldValuesStrBuilder);
}else if (sqlFieldValuesBuilderStr.endsWith("`")){
sqlFieldValue = getSqlFieldValue("`",sqlFieldValuesStr,length,sqlFieldValuesStrBuilder);
}else{
int lastIndex = sqlFieldValuesStr.lastIndexOf(",", length - 1);
// 因为无引号,所以截取到结尾
sqlFieldValue = sqlFieldValuesStr.substring
(lastIndex+1, length).trim();
// .replace(",","").trim()
if (lastIndex != -1){
sqlFieldValuesStrBuilder.setLength(lastIndex);
}else {
sqlFieldValuesStrBuilder.setLength(0);
}
}
sqlFieldValueStrList.add(sqlFieldValue);
}
Collections.reverse(sqlFieldValueStrList);
return sqlFieldValueStrList;
}
/**
* 获取sql语句中的值
* @author : YingYew影耀子
* @date : 2023/3/17 10:20
* @return :java.lang.String
**/
public String getSqlFieldValue(String quotation,String sqlFieldValuesStr,int length,StringBuilder sqlFieldValuesStrBuilder){
// 获取字段值的开始下标
int lastIndexQuote = sqlFieldValuesStr.lastIndexOf(quotation, length - 2);
// 若检测到\,则特殊处理得到字段值开始下标,
while (sqlFieldValuesStr.startsWith("\\", lastIndexQuote-1)){
lastIndexQuote = sqlFieldValuesStr.lastIndexOf(quotation, lastIndexQuote-1);
}
// 获取字段值前面的“,”开始的下标
int lastIndexComma = sqlFieldValuesStr.lastIndexOf(",", lastIndexQuote-1);
// 获取字段值,不包含包含引号
String sqlFieldValueQuestion = sqlFieldValuesStr.substring
(lastIndexQuote+1, length-1);
if (lastIndexComma != -1){
sqlFieldValuesStrBuilder.setLength(lastIndexComma);
}else {
sqlFieldValuesStrBuilder.setLength(0);
}
// 正确的值
return analyseSprit(sqlFieldValueQuestion);
}
/**
* 解析斜杠
* 未\\\\央\\\\\区\'各\\\街\\道 输出:未\\央\\区'各\街\道
* 斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬\\r\\n斯蒂芬
* 输出:斯蒂芬\r\n斯蒂芬\r\n斯蒂芬\r\n斯蒂芬\r\n斯蒂芬\r\n斯蒂芬\r\n斯蒂芬
*
* @date : 2023/3/17 11:29
* @param :sqlFieldValuesStr
* @return :java.lang.String
**/
public String analyseSprit(String sqlFieldValue) {
if (!sqlFieldValue.contains("\\")){
return sqlFieldValue;
}
// sqlFieldValue = sqlFieldValue.replace("\\"+quotation,quotation);
// 解析斜杠的逻辑
StringBuilder sqlFieldValueStringBuilder = new StringBuilder(sqlFieldValue);
List<String> handleSqlFieldValueFallList = new ArrayList<>();
while (sqlFieldValueStringBuilder.length() > 0){
// 获取最后处理\的下标
int barEndIndex = sqlFieldValue.lastIndexOf("\\",sqlFieldValueStringBuilder.length()-1);
if (barEndIndex == -1){
handleSqlFieldValueFallList.add(sqlFieldValueStringBuilder.toString());
break;
}
// 获取每个\开始的下标
int barStartIndex = barEndIndex;
while (sqlFieldValue.startsWith("\\",barStartIndex-1)){
barStartIndex--;
}
// 获取杠的个数
int barNum = barEndIndex-barStartIndex+1;
// 正确的杠的个数
int correctBarNum = barNum/2;
StringBuilder bar = new StringBuilder();
for (int i = 0; i < correctBarNum; i++) {
bar.append("\\");
}
handleSqlFieldValueFallList.add(
bar.append(sqlFieldValue, barEndIndex+1, sqlFieldValueStringBuilder.length()).toString());
sqlFieldValueStringBuilder.setLength(barStartIndex);
}
Collections.reverse(handleSqlFieldValueFallList);
StringBuilder handleSqlFieldValue = new StringBuilder();
for (String s :handleSqlFieldValueFallList){
handleSqlFieldValue.append(s);
}
return handleSqlFieldValue.toString();
}
}
此代码已测试过,可多线程运行过10G内容的sql文件,无内存溢出