normalize、splitText

本文详细介绍了DOM操作中的normalize方法与splitText方法。normalize方法可以将一个父元素内的多个文本节点合并为一个,而splitText方法则用于将一个文本节点分割成两个。通过具体的JavaScript代码示例,展示了如何使用这两种方法来更好地管理和操作DOM中的文本内容。
1、normalize:如果在包含两个或者多个文本节点的父元素上调用normalize()方法,则会将所有文本节点合并成一个节点;
var oDiv = document.createElement("div");
    var text1 = document.createTextNode("第一个");
    oDiv.appendChild(text1);
    var text2 = document.createTextNode("第二个");
    oDiv.appendChild(text2);
    document.body.appendChild(oDiv);
console.log(oDiv.firstChild.nodeValue);  //得到:第一个;

如果调用normalize();
var oDiv = document.createElement("div");
    var text1 = document.createTextNode("第一个");
    oDiv.appendChild(text1);
    var text2 = document.createTextNode("第二个");
    oDiv.appendChild(text2);
    document.body.appendChild(oDiv);

oDiv.normalize()
console.log(oDiv.firstChild.nodeValue); //返回  第一个第二个;合并成了一个文本节点;
2、splitText():分割文本节点;与normalize正好相反;
    var oDiv = document.createElement("div");
    var text1 = document.createTextNode("第一个和第二个文本节点");
    oDiv.appendChild(text1);
    document.body.appendChild(oDiv);
    oDiv.firstChild.splitText(5);
    console.log(oDiv.firstChild.nodeValue);//返回:“第一个和第”  从splitTextu括号中的数字5开始分割的;
    console.log(oDiv.lastChild.nodeValue); //返回:“二个文本节点”

直接给我修改在源代码里 import os import csv import xml.etree.ElementTree as ET from xml.dom import minidom import datetime import re # ================== 工具函数 ================== def get_path(filename): """获取当前脚本所在目录下的文件路径""" return os.path.join(os.path.dirname(os.path.abspath(__file__)), filename) def normalize_spaces(text): """ 将字符串中的多个连续空白字符(空格、制表符等)替换为单个空格,并去除首尾空白 """ if not text or not isinstance(text, str): return "" return re.sub(r'\s+', ' ', text).strip() # 所有支持的 Module 列表(用于筛选提示) MODULES = [ 'HIVIEW','SAMGR','UPDATE','ACE','APP', 'AAFWK','GRAPHIC', 'MEDIA', 'DMA','SEN', 'SCY','XTS','SOFTBUS','PWM','UIKIT','GLOBAL','DATAMGR', 'HISI','COMMU','NV','BSP', 'DM','PM', 'LCD','GPS','BT','NFC','SVRAUDIO','FTM', 'SVR', 'APPUI','AST','FIT', 'ALG','HRM','TLV','WIFI','INPUT', 'RFID','EARSDK','SVCOMMU', 'RSMC','UWB','WATCHEAR', 'PAIR','DRVAUDIO','BRIGHT', 'FITUI','DIALUI', 'FMKUI', 'HRMUI' ] # ================== 阶段1:加载翻译字典 + 智能查找函数 ================== def load_translation_dict(): """ 从 全量翻译对照表.csv 加载 英文 -> 中文 映射 使用 utf-8-sig 处理 BOM """ csv_file = get_path("全量翻译对照表.csv") translation_map = {} if not os.path.exists(csv_file): print(f"[错误] 找不到翻译文件:{os.path.abspath(csv_file)}") return translation_map try: with open(csv_file, mode='r', encoding='utf-8-sig', newline='') as f: reader = csv.DictReader(f) if '英文' not in reader.fieldnames or '中文' not in reader.fieldnames: print(f"[错误] CSV 缺少必要列。期望:'英文', '中文',实际:{reader.fieldnames}") return translation_map for row in reader: eng_key = normalize_spaces(row['英文']) chn_val = normalize_spaces(row['中文']) if eng_key: translation_map[eng_key] = chn_val print(f"[成功] 已加载 {len(translation_map)} 条翻译规则") except Exception as e: import traceback print(f"[错误] 读取 CSV 失败:{e}") print(traceback.format_exc()) return translation_map def find_translation(message, translations): """ 智能查找翻译:先完全匹配,再按长度降序尝试前缀匹配 :param message: 日志中的原始消息字符串 :param translations: 翻译字典 :return: 对应的中文翻译,未找到返回 "" """ msg_clean = normalize_spaces(message) # 1. 完全匹配 if msg_clean in translations: return translations[msg_clean] # 2. 最长前缀优先匹配 sorted_keys = sorted(translations.keys(), key=lambda x: len(x), reverse=True) for key in sorted_keys: if msg_clean.startswith(key): return translations[key] return "" # ================== 阶段2:文本日志 → 带翻译的 XML ================== def convert_log_to_xml(): """ 主函数:将文本日志转为带翻译的 XML,支持前缀匹配 每次调用都会重新生成并覆盖旧文件 """ input_file = get_path("gpu_debug.log_offline.txt") output_dir = os.path.join(os.path.dirname(input_file), "results") output_file = os.path.join(output_dir, "gpu_debug.log_全量翻译.xml") os.makedirs(output_dir, exist_ok=True) translations = load_translation_dict() if not translations: print("[警告] 翻译规则为空,仍将生成 XML,但无 Translation 内容") if not os.path.exists(input_file): print(f"[错误] 找不到日志文件:{os.path.abspath(input_file)}") return None try: root = ET.Element("Log") unmatched_count = 0 total_count = 0 with open(input_file, "r", encoding="utf-8-sig") as f: for line_num, line in enumerate(f, 1): line = line.strip() if not line: continue parts = line.split(",", 3) if len(parts) < 4: print(f"[警告] 第 {line_num} 行字段不足,跳过:{line}") continue time_str, level, module, message = [p.strip().strip('"') for p in parts] time_str = normalize_spaces(time_str) level = normalize_spaces(level) module = normalize_spaces(module) message = normalize_spaces(message) total_count += 1 entry = ET.SubElement(root, "Entry") ET.SubElement(entry, "Time").text = time_str ET.SubElement(entry, "Level").text = level ET.SubElement(entry, "Module").text = module ET.SubElement(entry, "Message").text = message translation_text = find_translation(message, translations) if not translation_text: unmatched_count += 1 ET.SubElement(entry, "Translation").text = translation_text status_msg = f"[提示] 总共 {total_count} 条日志" if unmatched_count > 0: status_msg += f",其中 {unmatched_count} 条未匹配到翻译" print(status_msg) rough_string = ET.tostring(root, encoding="utf-8") reparsed = minidom.parseString(rough_string) pretty_xml = reparsed.toprettyxml(indent=" ") lines_out = [line for line in pretty_xml.splitlines() if line.strip()] with open(output_file, "w", encoding="utf-8") as f: f.write(lines_out[0] + "\n") f.write("\n".join(lines_out[1:]) + "\n") print(f"[成功] 全量翻译 XML 已生成:\n{os.path.abspath(output_file)}") return output_file except Exception as e: import traceback print(f"[错误] 生成 XML 时发生异常:{e}") print(traceback.format_exc()) return None # ================== 阶段3:筛选 Module 并导出 TXT ================== def filter_log_by_module(input_xml, output_xml, selected_modules=None): if not os.path.exists(input_xml): print(f"[错误] 找不到输入文件:{input_xml}") return False try: tree = ET.parse(input_xml) root = tree.getroot() new_root = ET.Element("Log") count = 0 for entry in root.findall("Entry"): module_elem = entry.find("Module") if module_elem is None or not module_elem.text: continue module_text = normalize_spaces(module_elem.text) if selected_modules and module_text not in selected_modules: continue new_entry = ET.SubElement(new_root, "Entry") for child in entry: new_child = ET.SubElement(new_entry, child.tag) new_child.text = normalize_spaces(child.text) if child.text else "" count += 1 rough_string = ET.tostring(new_root, encoding="utf-8") reparsed = minidom.parseString(rough_string) pretty_xml = reparsed.toprettyxml(indent=" ") lines_out = [line for line in pretty_xml.splitlines() if line.strip()] with open(output_xml, "w", encoding="utf-8") as f: f.write(lines_out[0] + "\n") f.write("\n".join(lines_out[1:]) + "\n") print(f"[成功] 已生成筛选后的 XML:{output_xml},共 {count} 条记录") return True except Exception as e: import traceback print(f"[错误] 处理 XML 时发生异常:{e}") print(traceback.format_exc()) return False def convert_xml_to_txt(xml_file, txt_file): if not os.path.exists(xml_file): print(f"[错误] 找不到 XML 文件:{xml_file}") return False try: tree = ET.parse(xml_file) root = tree.getroot() with open(txt_file, "w", encoding="utf-8") as f: for entry in root.findall("Entry"): time_elem = entry.find("Time") module_elem = entry.find("Module") message_elem = entry.find("Message") trans_elem = entry.find("Translation") time_text = normalize_spaces(time_elem.text) if time_elem is not None and time_elem.text else "" module_text = normalize_spaces(module_elem.text) if module_elem is not None and module_elem.text else "" message_text = normalize_spaces(message_elem.text) if message_elem is not None and message_elem.text else "" trans_text = normalize_spaces(trans_elem.text) if trans_elem is not None and trans_elem.text else "" # 写入原始日志行(已规范化) if time_text and module_text and message_text: f.write(f"{time_text},{module_text},{message_text}\n") # 如果有翻译,则另起一行写翻译 if trans_text: f.write(f"{trans_text}\n") print(f"[成功] 已生成 TXT 文件:{txt_file}") return True except Exception as e: import traceback print(f"[错误] 转换为 TXT 时出错:{e}") print(traceback.format_exc()) return False # ================== 主流程控制 ================== def main(): # --- 初始化路径 --- full_xml_path = get_path("results/gpu_debug.log_全量翻译.xml") filtered_xml_path = get_path("results/gpu_debug.log_模块筛选.xml") filtered_txt_path = get_path("results/gpu_debug.log_模块筛选.txt") full_txt_path = get_path("results/gpu_debug.log_全量翻译.txt") # 创建 results 目录 os.makedirs(get_path("results"), exist_ok=True) # === 强制重新生成 XML(每次运行都覆盖旧文件)=== timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") print(f"[正在重新生成] 全量翻译 XML | 时间: {timestamp}") print("(旧文件将被自动覆盖)") generated_xml = convert_log_to_xml() if not generated_xml: print("[失败] 无法生成全量 XML,程序退出。") return full_xml_path = generated_xml # --- 阶段2:询问是否筛选 Module --- print("可用 Module 示例:" + ", ".join(MODULES[:6]) + " ...") user_input = input("\n请输入要筛选的一个或多个 Module(用英文逗号隔开),直接回车表示不筛选:").strip() selected_modules = None if user_input: selected_modules = [normalize_spaces(m) for m in user_input.split(",") if m.strip()] invalid = [m for m in selected_modules if m not in MODULES] if invalid: print(f"[警告] 以下 Module 不在预定义列表中,但仍会尝试匹配:{invalid}") print(f"[信息] 正在筛选 Module: {selected_modules}") # --- 阶段3:输出 TXT --- if selected_modules: success_xml = filter_log_by_module(full_xml_path, filtered_xml_path, selected_modules) if not success_xml: return convert_xml_to_txt(filtered_xml_path, filtered_txt_path) else: convert_xml_to_txt(full_xml_path, full_txt_path) # ================ 运行入口 ================ if __name__ == "__main__": main()
最新发布
12-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值