Hive表热度统计

步骤:

  1. 通过解析hiveserver2的info日志,获取每个执行sql命令。

  1. 通过durid对sql进行解析,获取对于表的操作,并写到数据库中。

  1. 接口提供,最小粒度到分钟,给到应用做展现。

关键代码直接给出来:

public class TableFrequency {
    private static Logger logger = Logger.getLogger(TableFrequency.class);
    
    //用于匹配日志开头
    private static Pattern datePattern = Pattern.compile(
            "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2},\\d{3}.*");
    //用于解析出这条日志的时间,即执行时间
    private static Pattern dateMatch = Pattern.compile(
            "\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}");
    //用于解析出SQL语句
    private static String sqlRegex = ".*Executing command\\(queryId=[\\w-]*\\):";
    //用于去掉comment
    private static String commentRegex = "--.*--";

    public void analyse() {
        try {
            File logFile = new File(logFileName);

            RandomAccessFile raf = new RandomAccessFile(logFile, "r");
            long fileLen = 0;
            long strSumLen = 0;
            String appendLine = ""; //这个对象用来记录完整的一行日志,因为有些sql是有换行的。
            while (true) {
                try {
                    raf.getFilePointer();
                    String line = raf.readLine();
                    if (null != line) {
                        line = new String(line.getBytes("iso-8859-1"), "utf8"); //解决中文乱码问题
                        if (matches(line)) { //真正的下一行日志
                            if (appendLine.contains("Executing command") && !appendLine.contains("select 1")) {
                                String sql = appendLine.replaceAll(sqlRegex, " ").trim();
                                String event_time = getEventTime(appendLine);
                                parseSql(sql, event_time);
                            }
                            appendLine = "";
                        }

                        String lineWithoutComment = line.replaceAll(commentRegex, " ").split("--")[0];
                        appendLine += lineWithoutComment + "\n"; //把这行日志加入
                        //appendLine += line;
                        strSumLen += line.length();
                    }

                    fileLen = raf.length();
                    if (fileLen <= strSumLen) {
                        logger.info("no more data, wait 1s....");
                        Thread.sleep(1000);
                    }
                } catch (Exception e){
                    logger.error(e.getMessage(), e);
                    appendLine = "";
                }
            }
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
        }    
    }
    
    private static boolean matches(String line) {
        return datePattern.matcher(line).matches();
    }

    public static String getEventTime(String line) {
        Matcher matcher = dateMatch.matcher(line);
        StringBuilder sb = new StringBuilder();
        while (matcher.find()) {
            sb.append(matcher.group());
        }

        return sb.toString().replace("T", " ");
    }

    private static void parseSql(String sql, String event_time){
        logger.info("parse sql " + sql);
        SQLStatement statement = SQLUtils.parseSingleStatement(sql, DbType.hive, false);
        HiveSchemaStatVisitor visitor = new HiveSchemaStatVisitor();
        statement.accept(visitor);
        Map<TableStat.Name, TableStat> result = visitor.getTables();
        for(Map.Entry<TableStat.Name, TableStat> entry: result.entrySet()){
            parseDataAndPost(entry.getKey().getName(), entry.getValue(), event_time);
        }
    }

    private static void parseDataAndPost(String table, TableStat stat, String event_time){
        JSONObject param = new JSONObject();
        String[] tableInfo = table.split("\\.");
        if (tableInfo.length > 1){
            param.put("database", tableInfo[0]);
            param.put("table_name", tableInfo[1]);
        } else {
            param.put("database", "");
            param.put("table_name", tableInfo[0]);
        }

        param.put("event_time", event_time);
        for (int i = 0; i < stat.getAlterCount(); i++){
            param.put("operator", "alter");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getCreateCount(); i++){
            param.put("operator", "create");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getCreateIndexCount(); i++){
            param.put("operator", "create_index");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getDeleteCount(); i++){
            param.put("operator", "delete");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getDropCount(); i++){
            param.put("operator", "drop");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getDropIndexCount(); i++){
            param.put("operator", "drop_index");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getInsertCount(); i++){
            param.put("operator", "insert");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getMergeCount(); i++){
            param.put("operator", "merge");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getSelectCount(); i++){
            param.put("operator", "select");
            postData(param.toString());
        }
        for (int i = 0; i < stat.getUpdateCount(); i++){
            param.put("operator", "update");
            postData(param.toString());
        }
    }

    //我这里得处理时发接口,也可以写表,写消息队列等
    private static void postData(String postData){
        HttpPost httpPost = new HttpPost(url);
        CloseableHttpClient client = HttpClients.createDefault();
        StringEntity entity = new StringEntity(postData, "UTF-8");
        entity.setContentEncoding("UTF-8");
        entity.setContentType("application/json");
        httpPost.setEntity(entity);
        try {
            HttpResponse response = client.execute(httpPost);
            if (response.getStatusLine().getStatusCode() == 200) {
                logger.info("post data success: " + postData);
            } else {
                logger.info("post data fail: " + postData + ", result: " + EntityUtils.toString(response.getEntity()));
            }
        } catch (IOException e) {
            logger.error(e.getMessage(), e);
        }
    }

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值