一,需求
最近手头有个需求就是从日志文件中匹配出需要的一些值,然后写入MySQL数据库中,源日志
[2019-07-29 00:00:04,238] INFO [pool-39-thread-311] c.a.c.a.f.l.AccessLog.info - Total_time=652 Exec_time=652 Queue_time=0 - [2019-07-29 00:00:03 586] 1 select id from tablename where student_id = 15690 and status in (0,1) and is_deleted = 0 and is_ex_class = 0 limit1\;process=20190729000301119424722109999543351\;CLUSTER=ay-ads-hangzhou
仅需要匹配出Total_time,Exec_time,Queue_time,以及查询的SQL
二,分析
用正则表达式,匹配出需要的值,然后插入MySQL
Total_time=(?<total_time>\d+) Exec_time=(?<exec_time>\d+) Queue_time=(?<queue_time>\d+) - [(?<time>.*?)] \d+ (?<sql>.*?)\
三,动手实践
①,先读取日志文件,一行一行的读取,然后用List集合收集
public static List<String> readFile(String path) throws IOException {
List<String> list = new ArrayList<String>();
FileInputStream fis = new FileInputStream(path);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line = "";
while ((line = br.readLine()) != null) {
list.add(line);
}
br.close();
isr.close();
fis.close();
return list;
}
②,配置好MySQL连接等信息
③,往MySQL写入
第一种方式比较笨的,性能低的方式,一行一行写入
String path = "C:/Users/xxx.log";
List<String> scanListPath = readFile(path);
System.out.println(scanListPath.size());
String regex = "Total_time=(?<totalTime>\\d+) Exec_time=(?<execTime>\\d+) Queue_time=(?<queueTime>\\d+) - \\[(?<time>.*?)\\] \\d+ (?<sql>.*?)\\\\";
Pattern pattern = Pattern.compile(regex);
Connection conn = getConn();
System.out.println("conn:"+conn);
scanListPath.stream().forEach(line -> {
Matcher matcher = pattern.matcher(line);
int toTime = 0;
int queueTime = 0;
int execTime = 0;
String executeTime= "";
String sql= "";
if (matcher.find()) {
toTime = Integer.parseInt(matcher.group("totalTime"));
execTime = Integer.parseInt(matcher.group("execTime"));
queueTime = Integer.parseInt(matcher.group("queueTime"));
executeTime = matcher.group("time");
sql = matcher.group("sql").replaceAll("\\t"," ").replaceAll("`","");
}
String statment = "insert into log_result_0731(total_time,exec_time,queue_time,execute_time,ex_sql) values("+toTime+","+execTime+","+queueTime+",'"+executeTime+"','"+sql+"')";
try {
PreparedStatement pste = conn.prepareStatement(statment);
pste.execute();
} catch (SQLException e) {
e.printStackTrace();
System.out.println(statment);
}
});
if (conn != null)
conn.close();
上面方式60万数据耗时半小时多,不能接受,
第二种方式,拼接一条长SQL,一次性写入1000条数据、
语法:
insert into table(col1,col2,col3) values (a,b,c),(d,e,f),(h,i,j)...
String path = "C:/Users/xxx.log";
List<String> scanListPath = readFile(path);
System.out.println(scanListPath.size());
String regex = "Total_time=(?<totalTime>\\d+) Exec_time=(?<execTime>\\d+) Queue_time=(?<queueTime>\\d+) - \\[(?<time>.*?)\\] \\d+ (?<sql>.*?)\\\\";
Pattern pattern = Pattern.compile(regex);
Connection conn = getConn();
System.out.println("conn:"+conn);
List<Tuple5> list = new ArrayList<>();
scanListPath.stream().forEach(line -> {
Matcher matcher = pattern.matcher(line);
int toTime = 0;
int queueTime = 0;
int execTime = 0;
String executeTime= "";
String sql= "";
if (matcher.find()) {
toTime = Integer.parseInt(matcher.group("totalTime"));
execTime = Integer.parseInt(matcher.group("execTime"));
queueTime = Integer.parseInt(matcher.group("queueTime"));
executeTime ="\'" + matcher.group("time")+"'";
sql = "\'"+matcher.group("sql").replaceAll("`","")+"'";
Tuple5<Integer,Integer,Integer,String,String> tuple5 = new Tuple5(toTime,execTime,queueTime,executeTime,sql);//将结果存入五元组
list.add(tuple5);
}
String sql_prefix = "insert into log_result_0805(total_time,exec_time,queue_time,execute_time,ex_sql) values ";
String statmentsql = "";
//集合数据到达一千就开始写入
if (list.size() >= 1000){
StringBuffer sb = new StringBuffer();
sb.append(sql_prefix );
//拼接SQL
list.stream().forEach(tup ->{
sb.append("(").append(tup._1()).append(",").append(tup._2()).append(",").append(tup._3()).append(",").append(tup._4()).append(",").append(tup._5()).append("),");
});
String stat = sb.toString();
boolean suffix = stat.endsWith(",");
if (suffix) {
int length = stat.length();
statmentsql = stat.substring(0,length - 1);//去除最后一个逗号
}
// System.out.println("sql:"+statmentsql);
try {
PreparedStatement pste = conn.prepareStatement(statmentsql);
// pste.executeBatch();
pste.execute();
} catch (SQLException e) {
e.printStackTrace();
System.out.println(statmentsql);
}
list.clear();
}
});
//最后不足一千的数据写入库
if (!(null == list)){
String sql_preefix = "insert into log_result_0805(total_time,exec_time,queue_time,execute_time,ex_sql) values ";
String statmentsql = "";
StringBuffer sb = new StringBuffer();
sb.append(sql_preefix);
list.stream().forEach(tup ->{
sb.append("(").append(tup._1()).append(",").append(tup._2()).append(",").append(tup._3()).append(",").append(tup._4()).append(",").append(tup._5()).append("),");
});
String stat = sb.toString();
boolean suffix = stat.endsWith(",");
if (suffix) {
int length = stat.length();
statmentsql = stat.substring(0,length - 1);
}
list.clear();
try {
PreparedStatement pste = conn.prepareStatement(statmentsql);
pste.execute();
} catch (SQLException e) {
e.printStackTrace();
System.out.println(statmentsql);
}
}
if (conn != null)
conn.close();
第二种方式60万数据大约一分钟之内搞定,前后两种方式,差距可想而知