log文件批量解析入库

最新推荐文章于 2022-10-19 18:19:57 发布

转载最新推荐文章于 2022-10-19 18:19:57 发布 · 308 阅读

0 ·

CC 4.0 BY-SA版权

原文链接：https://my.oschina.net/gao0516/blog/144286

文章标签：

#shell #python #数据库

2019独角兽企业重金招聘Python工程师标准>>>

废话不多说，直接上代码：

//解析log文件将每一行记录入库
	public boolean moreLogFile(String fileAllPath,String logFormat,
			String key,String logType) 	throws Exception {	
		int bufSize = 100,num =0;;
		int count=0;
		boolean bo =false;
		File file = new File(fileAllPath);
		FileChannel fcl = new RandomAccessFile(file, "r").getChannel(); //创建文件通道  "r"是读， "rw"是读写
		ByteBuffer rBuffer = ByteBuffer.allocate(bufSize);  //分配一个字节缓存区 rBuffer
		DBConnectionPool  dbpool = new DBConnectionPool();  //创建连接池
		dbpool.createPool();                                //清空连接池
		Connection conn = dbpool.getConnection();           //根据连接池的初始连接量创建连接
		conn.setAutoCommit(false);
		PreparedStatement ppsm = null;
		ResvolingLogFile rf = new ResvolingLogFile();
		Map map = rf.pinjieSql(logFormat);
		String sqlkey = "",sqlvalue="";
		Iterator it = map.entrySet().iterator();
		while (it.hasNext()){
			Map.Entry entry = (Map.Entry)it.next();
			sqlkey = (String) entry.getKey();
			sqlvalue = (String) entry.getValue();
		}
		String zhengzeAll = "";                  //从配置文件中取出log文件验证的正则表达式
		String [] logkey = sqlkey.split(",");
		for(int s=0;s<logkey.length;s++){        //将正则表达式连接起来，并分组
			zhengzeAll = zhengzeAll + "(" + rb.getString(logkey[s]) +")";
		}
		ppsm = conn.prepareStatement(sqlvalue);	
		String enterStr ="\n";
		String line="",impline="";
		int n=0;
		byte [] bt = new byte [bufSize];
		StringBuffer strBuf = new StringBuffer("");
		try{
			//循环取出文件的每一行的内容
				while(fcl.read(rBuffer)!=-1 ){       //循环条件是文件通道没有读到最后一个字节序列
					int rSize = rBuffer.position();  //返回此缓存区rBuffer的位置 
					rBuffer.rewind();                //重绕此缓冲区。将位置设置为 0 并丢弃标记 
					rBuffer.get(bt);
					rBuffer.clear();                 //清楚此缓存区
					String tempString = new String(bt,0,rSize); //根据缓冲区在文件序列中位置，用 字节数组取出字符串
					int fromIndex=0;
					int endIndex=0;
					while((endIndex=tempString.indexOf(enterStr,fromIndex))!=-1){
						line = tempString.substring(fromIndex, endIndex);  
						line = new String(strBuf.toString()+line);
						strBuf.delete(0,strBuf.length());
						fromIndex = endIndex + 1;	
						System.out.println("===line==="+line);
					
						Pattern pat = Pattern.compile(zhengzeAll.trim());
						Matcher mat = pat.matcher(line.trim()+" ");
						count++;
						//取出每一行的内容与正则表达式进行匹配
						if(!mat.matches()){
							continue;  //如果不匹配，则跳出这次循环，继续下一次循环
						}
						//如果循环匹配，则根据分组将根据正则表达式截取出相应的字符串
						for(int f=0;f<mat.groupCount();f++){
							if(mat.group(f+1).indexOf("\"")>-1)
							{
								ppsm.setString(f+1, mat.group(f+1).replaceAll("\"", ""));
							}if(mat.group(f+1).indexOf("[")>-1 && mat.group(f+1).indexOf("]")>-1){
								ppsm.setString(f+1, mat.group(f+1).replaceAll("\\[", "").replaceAll("\\]", ""));
							}else{
								ppsm.setString(f+1, mat.group(f+1));
							}
						}			
						ppsm.setString(logkey.length+1, key);
						ppsm.setString(logkey.length+2, logType);
						ppsm.addBatch();	
						num++;
						n++;
						//批量插入，每次插入2000行
						if(num==20000){
							ppsm.executeBatch();   //批量将2000条记录插入数据库
							conn.commit();         //插入后commit
							dbpool.returnConnection(conn); //插入完成后释放连接
							ppsm.clearBatch();     //插入完成后情况Batch
							num = 0;
						}	
					}if(rSize>tempString.length()){
						strBuf.append(tempString.subSequence(fromIndex, tempString.length()));
					}else{
						strBuf.append(tempString.subSequence(fromIndex, rSize));
					}
				}
				if(n%20000!=0){     
					ppsm.executeBatch();           //最后的记录不足2000 行时，提交插入
					conn.commit();                 //插入后commit
					dbpool.returnConnection(conn); //插入完成后释放连接
					ppsm.clearBatch();             //插入完成后情况Batch
					bo = true;
				}
		}catch(Exception ex){
			ex.printStackTrace();
		}
		dbpool.closeConnectionPool();
		System.out.println("==========n======="+n);
		System.out.println("==========count==="+count);
		return bo;
	}

转载于:https://my.oschina.net/gao0516/blog/144286