Openmetadata读取hql日志的字段血缘关系

# -*- coding: utf-8 -*-
import datetime
import json
import os.path
from json import JSONDecodeError

import requests

# Openmetadata  token
metadata_token = "  "
# hive日志血缘关系关键词
search_keyword = "hooks.LineageLogger:"
# Openmetadata 的扫表 schema
metadata_service_db_schema = " "
# Openmetadata 地址
url = ' '

# Openmetadata 的头文件
header: dict = {
    "Accept": "application/json, text/plain, */*",
    "Content-Type": "application/json;charset=UTF-8",
    'Authorization': 'Bearer ' + metadata_token
}
# hive表的分区字段,因为 Openmetadata 不会加载分区字段
partition_field = [  ]

# hive 执行任务生成的日志目录列表
log_path_list = [
    "/tmp/root/hive.log"
]


def read_hive_log(file_path: str):
    """
    读取Hive日志文件并返回包含关键词的行内容列表

    参数:
    file_path (str):Hive日志文件的路径

    返回:
    content (list):包含关键词的行内容json列表
    """
    save_dict = {}
    if os.path.exists('docs/hash_index.log'):
        try:
            with open("docs/hash_index.log", 'r') as f:
                file_content = f.read()
                if file_content != '':
                    save_dict = json.loads(file_content)
        except json.JSONDecodeError as e:
            print(f"无法将文件内容转换为JSON:{e}")

    new_file = log_path.split("/")[-1]

    if new_file in save_dict.keys():
        old_size = save_dict.get(new_file).get('size', 0)
        line_index = save_dict.get(new_file).get('index', 0)
    else:
        # print("此为新文件,从头开始读取")
        old_size = 0
        line_index = 0

    is_new_file = False
    is_read_ok = True
    try:
        new_size: int = os.path.getsize(file_path)
    except FileNotFoundError as e:
        print("文件不存在: ", e)
        new_size = 0
        is_read_ok = False
    except Exception as e1:
        print("读取文件大小失败: ", e1)
        new_size = 0
        is_read_ok = False
    if (new_file not in save_dict.keys()) or (new_file in save_dict.keys() and (new_size < old_size or old_size == 0)):
        is_new_file = True

    content = []
    if is_read_ok:
        is_new_file_only_one = is_old_file_only_one = is_old_update_only_one = False
        try:
            with open(file_path, 'r', encoding='utf-8', errors='replace') as log_file:
                for line_number, line in enumerate(log_file, 1):
                    if search
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值