# -*- coding: utf-8 -*-
import datetime
import json
import os.path
from json import JSONDecodeError
import requests
# Openmetadata token
metadata_token = " "
# hive日志血缘关系关键词
search_keyword = "hooks.LineageLogger:"
# Openmetadata 的扫表 schema
metadata_service_db_schema = " "
# Openmetadata 地址
url = ' '
# Openmetadata 的头文件
header: dict = {
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json;charset=UTF-8",
'Authorization': 'Bearer ' + metadata_token
}
# hive表的分区字段,因为 Openmetadata 不会加载分区字段
partition_field = [ ]
# hive 执行任务生成的日志目录列表
log_path_list = [
"/tmp/root/hive.log"
]
def read_hive_log(file_path: str):
"""
读取Hive日志文件并返回包含关键词的行内容列表
参数:
file_path (str):Hive日志文件的路径
返回:
content (list):包含关键词的行内容json列表
"""
save_dict = {}
if os.path.exists('docs/hash_index.log'):
try:
with open("docs/hash_index.log", 'r') as f:
file_content = f.read()
if file_content != '':
save_dict = json.loads(file_content)
except json.JSONDecodeError as e:
print(f"无法将文件内容转换为JSON:{e}")
new_file = log_path.split("/")[-1]
if new_file in save_dict.keys():
old_size = save_dict.get(new_file).get('size', 0)
line_index = save_dict.get(new_file).get('index', 0)
else:
# print("此为新文件,从头开始读取")
old_size = 0
line_index = 0
is_new_file = False
is_read_ok = True
try:
new_size: int = os.path.getsize(file_path)
except FileNotFoundError as e:
print("文件不存在: ", e)
new_size = 0
is_read_ok = False
except Exception as e1:
print("读取文件大小失败: ", e1)
new_size = 0
is_read_ok = False
if (new_file not in save_dict.keys()) or (new_file in save_dict.keys() and (new_size < old_size or old_size == 0)):
is_new_file = True
content = []
if is_read_ok:
is_new_file_only_one = is_old_file_only_one = is_old_update_only_one = False
try:
with open(file_path, 'r', encoding='utf-8', errors='replace') as log_file:
for line_number, line in enumerate(log_file, 1):
if search
Openmetadata读取hql日志的字段血缘关系
于 2024-05-09 09:44:04 首次发布

最低0.47元/天 解锁文章
1167

被折叠的 条评论
为什么被折叠?



