import os
import csv
import xml.etree.ElementTree as ET
from xml.dom import minidom
from datetime import datetime
# 获取脚本所在目录下的文件路径
def get_path(filename):
return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename)
# 确保 results 目录存在
def ensure_results_dir():
results_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "results")
if not os.path.exists(results_dir):
os.makedirs(results_dir)
return results_dir
# =============================
# Step 1: 将日志转换为格式化的 XML 文件
# =============================
def convert_log_to_xml():
input_file = get_path("gpu_debug.log_offline.txt")
output_file = os.path.join(ensure_results_dir(), "gpu_debug.log_offline.xml")
if not os.path.exists(input_file):
print(f"[错误] 找不到输入文件 {input_file}")
return False
try:
root = ET.Element("Log")
with open(input_file, "r", encoding="utf-8-sig") as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
parts = line.split(",", 3)
if len(parts) < 4:
print(f"[警告] 第 {line_num} 行格式不正确,跳过: {line}")
continue
time_str, level, module, message = [p.strip().strip('"') for p in parts]
entry = ET.SubElement(root, "Entry")
ET.SubElement(entry, "Time").text = time_str
ET.SubElement(entry, "Level").text = level
ET.SubElement(entry, "Module").text = module
ET.SubElement(entry, "Message").text = message
# 使用 minidom 格式化输出
rough_string = ET.tostring(root, encoding="utf-8")
reparsed = minidom.parseString(rough_string)
pretty_xml = reparsed.toprettyxml(indent=" ")
lines_out = [line for line in pretty_xml.splitlines() if line.strip()]
# 写入带 UTF-8 声明且无 BOM 的 XML 文件
with open(output_file, "w", encoding="utf-8") as f:
f.write(lines_out[0] + "\n") # XML declaration
f.write("\n".join(lines_out[1:]) + "\n")
print(f"[成功] 成功生成 XML 文件:{output_file}")
return True
except Exception as e:
print(f"[错误] 生成 XML 时出错:{e}")
return False
# =============================
# Step 2: 加载 Keywords.txt 映射规则
# =============================
def load_keywords():
keyword_file = get_path("Keywords.txt")
keywords = {}
if not os.path.exists(keyword_file):
print(f"[错误] 未找到关键词文件 {keyword_file}")
return keywords
try:
with open(keyword_file, "r", encoding="utf-8-sig") as f:
for line in f:
line = line.strip()
if not line or "|||" not in line:
continue
keyword, meaning = line.split("|||", 1)
keywords[keyword.strip()] = meaning.strip()
print(f"[成功] 成功加载 {len(keywords)} 条关键词规则")
except Exception as e:
print(f"[错误] 读取关键词文件失败:{e}")
return keywords
# =============================
# Step 3: 匹配关键词并生成翻译日志
# =============================
def match_keywords_in_message(keywords):
xml_file = os.path.join(ensure_results_dir(), "gpu_debug.log_offline.xml")
output_file = os.path.join(ensure_results_dir(), "Log_Translation.txt")
if not os.path.exists(xml_file):
print(f"[错误] 找不到 XML 文件 {xml_file}")
return []
matched_entries = []
try:
tree = ET.parse(xml_file)
root = tree.getroot()
print("请输入要筛选的模块名称(如 FIT,FITUI),回车跳过:")
user_input = input().strip()
target_modules = []
if user_input:
target_modules = [m.strip().upper() for m in user_input.split(",")]
with open(output_file, "w", encoding="utf-8") as f:
for entry in root.findall("Entry"):
time_elem = entry.find("Time")
module_elem = entry.find("Module")
message_elem = entry.find("Message")
if None in (time_elem, module_elem, message_elem):
continue
module_text = module_elem.text.strip().upper() if module_elem.text else ""
message_text = message_elem.text or ""
# 模块过滤
if target_modules and module_text not in target_modules:
continue
matched_keyword = None
for kw in keywords:
if kw.lower() in message_text.lower():
matched_keyword = kw
break
if matched_keyword:
raw_log = f"{time_elem.text},{module_elem.text},{message_text}"
translation = keywords[matched_keyword]
f.write(f"{raw_log}\n{translation}\n")
matched_entries.append(raw_log)
print(f"[成功] 成功匹配 {len(matched_entries)} 条")
print(f"结果已保存至:{output_file}")
except Exception as e:
print(f"[错误] 匹配关键词时出错:{e}")
return matched_entries
# =============================
# Step 4: 问题定位
# =============================
def detect_sitting_issues(start_time=None, end_time=None):
keywords_csv = get_path("Question_Keywords.csv")
xml_file = os.path.join(ensure_results_dir(), "gpu_debug.log_offline.xml")
output_file = os.path.join(ensure_results_dir(), "Log_Question.txt")
def parse_time(t):
return datetime.strptime(t, "%Y-%m-%d %H:%M:%S") if t else None
dt_start = parse_time(start_time)
dt_end = parse_time(end_time)
# 1. 加载 CSV 规则
rules = []
if not os.path.exists(keywords_csv):
print(f"[错误] 未找到 {keywords_csv},请检查文件是否存在")
return []
try:
with open(keywords_csv, mode='r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
headers = reader.fieldnames
if headers is None:
print("[错误] CSV 文件头为空,请确认编码和格式")
return []
expected_headers = ['问题标签', '关键字', '问题类型', '备注']
if headers != expected_headers:
print(f"[警告] 列名不匹配!期望: {expected_headers}")
print(f" 实际: {headers}")
print("请确保第一行为:问题标签,关键字,问题类型,备注")
for line_num, row in enumerate(reader, 2):
tag = str(row.get('问题标签', '')).strip().strip('"')
keyword = str(row.get('关键字', '')).strip().strip('"')
issue_type = str(row.get('问题类型', '')).strip().strip('"')
suggestion = str(row.get('备注', '')).strip().strip('"')
if not keyword or not issue_type:
print(f"[警告] 第 {line_num} 行缺少必要字段,跳过: {row}")
continue
rules.append({
'tag': tag,
'keyword': keyword,
'type': issue_type,
'suggestion': suggestion
})
if not rules:
print("[错误] CSV文件中没有加载到任何规则,请检查文件内容和格式")
return []
print(f"[成功] 成功加载 {len(rules)} 条诊断规则(来自 {keywords_csv})")
all_tags = sorted(set(rule['tag'] for rule in rules))
print("可用的问题标签:")
for tag in all_tags:
print(f" - '{tag}'")
except Exception as e:
print(f"[错误] 读取 {keywords_csv} 失败:{e}")
import traceback
traceback.print_exc()
return []
# 2. 获取用户输入的问题标签(支持 , 和 、)
print("输入要分析的【问题标签】")
print("示例输入:久坐,抬腕 或 久坐、抬腕")
selected_rules = []
while True:
user_input = input("请输入问题标签(回车=全选): ").strip()
print(f"[提示] 您输入的是: '{user_input}'")
if user_input == "?":
print("\n所有可用标签列表:")
all_tags = sorted(set(rule['tag'] for rule in rules))
for i, tag in enumerate(all_tags, 1):
print(f" {i:2d}. {tag}")
print()
continue
if not user_input:
selected_rules = rules[:]
print("[成功] 已选择全部问题标签")
break
# 统一处理中英文标点
cleaned_input = user_input.replace(',', ',').replace('、', ',')
tags = [t.strip().strip('"\' ') for t in cleaned_input.split(",") if t.strip()]
print(f"[提示] 分割后标签列表: {tags}")
if not tags:
print("[警告] 输入无效,请重新输入。")
continue
matched_rules = []
matched_tags_set = set()
for rule in rules:
rule_tag_clean = rule['tag'].lower().replace(' ', '')
for tag in tags:
query_clean = tag.lower().replace(' ', '')
if query_clean in rule_tag_clean or rule_tag_clean in query_clean:
matched_rules.append(rule)
matched_tags_set.add(rule['tag'])
break
if matched_rules:
selected_rules = matched_rules
print(f"[成功] 匹配成功!共 {len(selected_rules)} 条规则")
print("匹配到的标签: " + ", ".join(sorted(matched_tags_set)))
break
else:
print("[错误] 未匹配到任何标签,请参考以下可用标签:")
for tag in sorted(set(rule['tag'] for rule in rules)):
print(f" - {tag}")
# 3. 自动提取 XML 中第一个和最后一个 Time 作为默认时间范围
default_start = None
default_end = None
if not os.path.exists(xml_file):
print(f"[警告] 未找到 XML 文件 {xml_file},使用当前时间作为默认值")
now = datetime.now()
default_start = now.strftime("%Y-%m-%d %H:%M:%S")
default_end = now.strftime("%Y-%m-%d %H:%M:%S")
else:
try:
tree = ET.parse(xml_file)
root = tree.getroot()
time_elements = root.findall(".//Entry/Time")
if len(time_elements) == 0:
print("[警告] XML 文件中没有找到任何 <Time> 标签,使用当前时间")
now = datetime.now()
default_start = now.strftime("%Y-%m-%d %H:%M:%S")
default_end = now.strftime("%Y-%m-%d %H:%M:%S")
else:
first_time_str = time_elements[0].text.strip()
last_time_str = time_elements[-1].text.strip()
def is_valid_time_str(t):
try:
datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
return True
except (ValueError, TypeError):
return False
if is_valid_time_str(first_time_str) and is_valid_time_str(last_time_str):
default_start = first_time_str
default_end = last_time_str
print(f"已从日志文件中提取时间范围:")
print(f"开始: {default_start}")
print(f"结束: {default_end}")
else:
print("[警告] 时间格式异常,使用当前时间作为默认值")
now = datetime.now()
default_start = now.strftime("%Y-%m-%d %H:%M:%S")
default_end = now.strftime("%Y-%m-%d %H:%M:%S")
except Exception as e:
print(f"[警告] 解析 XML 获取时间失败:{e},使用当前时间")
now = datetime.now()
default_start = now.strftime("%Y-%m-%d %H:%M:%S")
default_end = now.strftime("%Y-%m-%d %H:%M:%S")
# 4. 用户输入时间范围
use_time_filter = input("是否启用时间筛选?(y/n,默认 n): ").strip().lower()
final_start = default_start
final_end = default_end
if use_time_filter in ('y', 'yes'):
print("提示:时间格式必须为 'YYYY-MM-DD HH:MM:SS'")
start_input = input(f"请输入开始时间(留空={default_start}): ").strip()
end_input = input(f"请输入结束时间(留空={default_end}): ").strip()
final_start = start_input or default_start
final_end = end_input or default_end
def is_valid_time(t):
try:
datetime.strptime(t, "%Y-%m-%d %H:%M:%S")
return True
except ValueError:
return False
if not is_valid_time(final_start) or not is_valid_time(final_end):
print("[错误] 时间格式错误,使用无限制模式")
final_start = None
final_end = None
else:
start_dt = datetime.strptime(final_start, "%Y-%m-%d %H:%M:%S")
end_dt = datetime.strptime(final_end, "%Y-%m-%d %H:%M:%S")
if start_dt > end_dt:
print("[提示] 开始时间晚于结束时间,已自动交换")
final_start, final_end = final_end, final_start
# 5. 解析 XML 并匹配日志
problems_by_tag = {}
try:
tree = ET.parse(xml_file)
root = tree.getroot()
print(f"\n开始分析日志...")
print(f" 时间范围: {final_start or '不限'} 至 {final_end or '不限'}")
print(f" 总日志数: {len(root.findall('.//Entry'))}")
entry_count = 0
for item in root.findall(".//Entry"):
entry_count += 1
time_elem = item.find("Time")
level_elem = item.find("Level")
module_elem = item.find("Module")
message_elem = item.find("Message")
if None in (time_elem, level_elem, module_elem, message_elem):
continue
time_str = time_elem.text or ""
level_str = level_elem.text or ""
module_str = module_elem.text or ""
message_text = message_elem.text or ""
if not time_str or not message_text:
continue
try:
log_time = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")
except ValueError:
continue
if final_start and log_time < datetime.strptime(final_start, "%Y-%m-%d %H:%M:%S"):
continue
if final_end and log_time > datetime.strptime(final_end, "%Y-%m-%d %H:%M:%S"):
continue
full_log_line = f"{time_str},{level_str},{module_str},{message_text}"
for rule in selected_rules:
keyword = rule['keyword'].strip()
if not keyword:
continue
if keyword.lower() in message_text.lower():
tag_name = rule['tag']
if tag_name not in problems_by_tag:
problems_by_tag[tag_name] = []
problem_info = {
'time': time_str,
'type': rule['type'],
'content': full_log_line,
'suggestion': rule['suggestion']
}
problems_by_tag[tag_name].append(problem_info)
print(f"[信息] 扫描 {entry_count} 条日志,完成匹配")
except Exception as e:
print(f"[错误] 解析 XML 失败:{e}")
import traceback
traceback.print_exc()
return []
# 6. 输出结果文件(每个标签都显示,包括无匹配的)
try:
with open(output_file, 'w', encoding='utf-8') as f:
start_range = final_start or "不限"
end_range = final_end or "不限"
f.write(f"分析时间范围: {start_range} 至 {end_range}\n")
f.write("=" * 70 + "\n\n")
total_count = 0
all_selected_tags = sorted(set(rule['tag'] for rule in selected_rules))
for tag_name in all_selected_tags:
problems = problems_by_tag.get(tag_name, [])
clean_tag = tag_name.strip()
if not clean_tag:
continue
f.write(f"{clean_tag}分析:\n")
if not problems:
msg = "在指定时间范围内未发现相关问题"
f.write(f"{msg}\n\n")
print(f"[信息] {clean_tag}: {msg}")
else:
for idx, p in enumerate(problems, 1):
f.write(f"[{idx:02d}] [{p['time']}] {p['type']}\n")
f.write(f"原始日志: {p['content']}\n")
f.write(f"备注: {p['suggestion']}\n")
f.write("\n")
total_count += len(problems)
f.write("=" * 70 + "\n\n")
# 统计输出
if total_count == 0:
print("[信息] 所有标签均未发现匹配的问题")
else:
matched_tags = [tag for tag in all_selected_tags if problems_by_tag.get(tag)]
print(f"[成功] 共处理 {len(matched_tags)} 个标签类别,总计 {total_count} 个问题")
for tag_name in matched_tags:
cnt = len(problems_by_tag[tag_name])
if cnt > 0:
print(f" • {tag_name}: {cnt} 个问题")
if os.path.exists(output_file):
size = os.path.getsize(output_file)
print(f"[成功] 报告已生成: {output_file} ({size} 字节)")
else:
print(f"[错误] 文件生成失败: {output_file}")
except Exception as e:
print(f"[错误] 创建输出文件失败:{e}")
import traceback
traceback.print_exc()
return []
# =============================
# 主函数
# =============================
if __name__ == "__main__":
print("日志关键字匹配与问题诊断工具启动...")
# Step 1: 生成 XML
xml_result = convert_log_to_xml()
if not xml_result:
print("[警告] XML 生成失败,但仍尝试继续...")
# Step 2: 加载关键词并执行翻译
keywords = load_keywords()
if not keywords:
print("[提示] 没有可用的关键字规则,跳过翻译步骤")
else:
matched_entries = match_keywords_in_message(keywords)
# Step 3: 问题诊断
print("\n开始问题诊断模块...")
detect_sitting_issues()
print("\n全部处理完成!请查看 results/ 目录中的输出文件。")
最新发布