步骤:
通过解析hiveserver2的info日志,获取每个执行sql命令。
通过durid对sql进行解析,获取对于表的操作,并写到数据库中。
接口提供,最小粒度到分钟,给到应用做展现。
关键代码直接给出来:
public class TableFrequency {
private static Logger logger = Logger.getLogger(TableFrequency.class);
//用于匹配日志开头
private static Pattern datePattern = Pattern.compile(
"^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}.*");
//用于解析出这条日志的时间,即执行时间
private static Pattern dateMatch = Pattern.compile(
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}");
//用于解析出SQL语句
private static String sqlRegex = ".*Executing command\\(queryId=[\\w-]*\\):";
//用于去掉comment
private static String commentRegex = "--.*--";
public void analyse() {
try {
File logFile = new File(logFileName);
RandomAccessFile raf = new RandomAccessFile(logFile, "r");
long fileLen = 0;
long strSumLen = 0;
String appendLine = ""; //这个对象用来记录完整的一行日志,因为有些sql是有换行的。
while (true) {
try {
raf.getFilePointer();
String line = raf.readLine();
if (null != line) {
line = new String(line.getBytes("iso-8859-1"), "utf8"); //解决中文乱码问题
if (matches(line)) { //真正的下一行日志
if (appendLine.contains("Executing command") && !appendLine.contains("select 1")) {
String sql = appendLine.replaceAll(sqlRegex, " ").trim();
String event_time = getEventTime(appendLine);
parseSql(sql, event_time);
}
appendLine = "";
}
String lineWithoutComment = line.replaceAll(commentRegex, " ").split("--")[0];
appendLine += lineWithoutComment + "\n"; //把这行日志加入
//appendLine += line;
strSumLen += line.length();
}
fileLen = raf.length();
if (fileLen <= strSumLen) {
logger.info("no more data, wait 1s....");
Thread.sleep(1000);
}
} catch (Exception e){
logger.error(e.getMessage(), e);
appendLine = "";
}
}
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
private static boolean matches(String line) {
return datePattern.matcher(line).matches();
}
public static String getEventTime(String line) {
Matcher matcher = dateMatch.matcher(line);
StringBuilder sb = new StringBuilder();
while (matcher.find()) {
sb.append(matcher.group());
}
return sb.toString().replace("T", " ");
}
private static void parseSql(String sql, String event_time){
logger.info("parse sql " + sql);
SQLStatement statement = SQLUtils.parseSingleStatement(sql, DbType.hive, false);
HiveSchemaStatVisitor visitor = new HiveSchemaStatVisitor();
statement.accept(visitor);
Map<TableStat.Name, TableStat> result = visitor.getTables();
for(Map.Entry<TableStat.Name, TableStat> entry: result.entrySet()){
parseDataAndPost(entry.getKey().getName(), entry.getValue(), event_time);
}
}
private static void parseDataAndPost(String table, TableStat stat, String event_time){
JSONObject param = new JSONObject();
String[] tableInfo = table.split("\\.");
if (tableInfo.length > 1){
param.put("database", tableInfo[0]);
param.put("table_name", tableInfo[1]);
} else {
param.put("database", "");
param.put("table_name", tableInfo[0]);
}
param.put("event_time", event_time);
for (int i = 0; i < stat.getAlterCount(); i++){
param.put("operator", "alter");
postData(param.toString());
}
for (int i = 0; i < stat.getCreateCount(); i++){
param.put("operator", "create");
postData(param.toString());
}
for (int i = 0; i < stat.getCreateIndexCount(); i++){
param.put("operator", "create_index");
postData(param.toString());
}
for (int i = 0; i < stat.getDeleteCount(); i++){
param.put("operator", "delete");
postData(param.toString());
}
for (int i = 0; i < stat.getDropCount(); i++){
param.put("operator", "drop");
postData(param.toString());
}
for (int i = 0; i < stat.getDropIndexCount(); i++){
param.put("operator", "drop_index");
postData(param.toString());
}
for (int i = 0; i < stat.getInsertCount(); i++){
param.put("operator", "insert");
postData(param.toString());
}
for (int i = 0; i < stat.getMergeCount(); i++){
param.put("operator", "merge");
postData(param.toString());
}
for (int i = 0; i < stat.getSelectCount(); i++){
param.put("operator", "select");
postData(param.toString());
}
for (int i = 0; i < stat.getUpdateCount(); i++){
param.put("operator", "update");
postData(param.toString());
}
}
//我这里得处理时发接口,也可以写表,写消息队列等
private static void postData(String postData){
HttpPost httpPost = new HttpPost(url);
CloseableHttpClient client = HttpClients.createDefault();
StringEntity entity = new StringEntity(postData, "UTF-8");
entity.setContentEncoding("UTF-8");
entity.setContentType("application/json");
httpPost.setEntity(entity);
try {
HttpResponse response = client.execute(httpPost);
if (response.getStatusLine().getStatusCode() == 200) {
logger.info("post data success: " + postData);
} else {
logger.info("post data fail: " + postData + ", result: " + EntityUtils.toString(response.getEntity()));
}
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}