check a tree is balanced or not

本文详细介绍了如何使用高度平衡方案来判断二叉树是否平衡,并提供了两种复杂度为O(N)的解决方案。其中,一种方法通过递归获取子树的高度并检查高度差不超过1的条件;另一种方法在子树不平衡时返回-1,从而帮助父节点快速识别不平衡情况。

Question:

Given a binary tree, check whether it is balanced or not.

Analyze:

Consider a height-balancing scheme where following conditions should be checked to determine if a binary tree is balanced.
An empty tree is height-balanced. A non-empty binary tree T is balanced if:
1) Left subtree of T is balanced
2) Right subtree of T is balanced
3) The difference between heights of left subtree and right subtree is not more than 1.

The above height-balancing scheme is used in AVL trees. The diagram below shows two trees, one of them is height-balanced and other is not. The second tree is not height-balanced because height of left subtree is 2 more than height of right subtree.

To check if a tree is height-balanced, get the height of left and right subtrees. Return true if difference between heights is not more than 1 and left and right subtrees are balanced, otherwise return false.

Code:

/**
 * Definition for binary tree
 * public class TreeNode {
 *     int val;
 *     TreeNode left;
 *     TreeNode right;
 *     TreeNode(int x) { val = x; }
 * }
 */
public class Solution {
    public boolean isBalanced(TreeNode root) {
        if (root == null) return true;
        if (Math.abs(height(root.left) - height(root.right)) > 1) return false;
        return isBalanced(root.left) && isBalanced(root.right);
    }
    
    public int height(TreeNode root) {
        if (root == null) return 0;
        return Math.max(height(root.left), height(root.right)) + 1;
    }
}

The complexity of the code above is O(N^2). However, some interviewers may ask to provide solution with O(N) complexity. Below is the solution with O(N) complexity. The main idea is if the subtree is balanced, return the true height, else, return -1. Therefore, the parent node of that unbalanced subtree also has -1 height.

public class Solution {
    public boolean isBalanced(TreeNode root) {
        return getDepth(root) >= 0;
    }
    
    public int getDepth(TreeNode node) {
        if (node == null) return 0;
        int leftDepth = getDepth(node.left);
        if (leftDepth < 0) return -1;
        
        int rightDepth = getDepth(node.right);
        if (rightDepth < 0) return -1;
        
        if (Math.abs(leftDepth - rightDepth) > 1) return -1;
        return Math.max(leftDepth, rightDepth) + 1;
    }
}

Reference: 

http://www.geeksforgeeks.org/how-to-determine-if-a-binary-tree-is-balanced/

http://blog.theliuy.com/2012/balanced-binary-tree/

import os import re import json import yaml import logging import argparse from pathlib import Path from lxml import etree from transformers import MarianMTModel, MarianTokenizer, logging as hf_logging from typing import Dict, List, Tuple # -------------------------------------------------- # 日志 # -------------------------------------------------- logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.FileHandler("translation.log", encoding="utf-8"), logging.StreamHandler() ] ) hf_logging.set_verbosity_error() XML_NS = "http://www.w3.org/XML/1998/namespace" # -------------------------------------------------- # 配置 & 术语表 # -------------------------------------------------- def load_config(config_path: str = "config.yaml") -> dict: try: with open(config_path, "r", encoding="utf-8") as f: cfg = yaml.safe_load(f) or {} defaults = { "source_language": "en", "target_languages": ["fr", "de", "es"], "input_dir": "xliff_in", "output_dir": "output", "qa_report_dir": "qa_reports", "model_template": "Helsinki-NLP/opus-mt-{src}-{tgt}", "generate_qa_only": False } for k, v in defaults.items(): cfg.setdefault(k, v) # 处理旧的键名兼容 if "qa_report" in cfg: cfg["qa_report_dir"] = cfg["qa_report"] if "target_languages" in cfg: cfg["target_language"] = cfg["target_languages"] elif "target_language" not in cfg: cfg["target_language"] = defaults["target_languages"] # 创建输出目录 Path(cfg["output_dir"]).mkdir(exist_ok=True) Path(cfg["qa_report_dir"]).mkdir(exist_ok=True) return cfg except FileNotFoundError: logging.error(f"配置文件 {config_path} 未找到!") raise except Exception as e: logging.error(f"加载配置失败: {e}") raise def load_glossary(glossary_path: str = "glossary.json") -> dict: try: if os.path.exists(glossary_path) and os.path.getsize(glossary_path) > 0: with open(glossary_path, "r", encoding="utf-8") as f: return json.load(f) or {} logging.warning(f"术语表 {glossary_path} 不存在或为空,将不使用术语表") return {} except Exception as e: logging.error(f"加载术语表失败({glossary_path}): {e}") return {} # -------------------------------------------------- # 模型管理 # -------------------------------------------------- class ModelManager: def __init__(self, model_template: str): self.model_cache = {} self.model_template = model_template self.supported_langs = {"en", "fr", "de", "es", "ar", "pt", "ja", "ru", "zh"} def is_supported(self, src_lang: str, tgt_lang: str) -> bool: return src_lang in self.supported_langs and tgt_lang in self.supported_langs def get_model(self, src_lang: str, tgt_lang: str): key = f"{src_lang}-{tgt_lang}" if key not in self.model_cache: model_name = self.model_template.format(src=src_lang, tgt=tgt_lang) logging.info(f"加载模型: {model_name}") tokenizer = MarianTokenizer.from_pretrained(model_name) model = MarianMTModel.from_pretrained(model_name) self.model_cache[key] = {"tokenizer": tokenizer, "model": model} logging.info(f"模型 {model_name} 加载成功") return self.model_cache[key] # -------------------------------------------------- # 内容保护函数 # -------------------------------------------------- def protect_content(text: str) -> Tuple[str, Dict[str, str]]: """保护HTML标签和短代码""" if not text: return text, {} protected = text tag_map = {} idx = 0 # 保护短代码 [shortcode] shortcodes = re.findall(r'(\[[^\]]+\])', protected) for sc in shortcodes: key = f"__SC{idx}__" tag_map[key] = sc protected = protected.replace(sc, key, 1) idx += 1 # 保护HTML标签 <tag> html_tags = re.findall(r'(<[^>]+>)', protected) for tag in html_tags: key = f"__HTML{idx}__" tag_map[key] = tag protected = protected.replace(tag, key, 1) idx += 1 return protected, tag_map def restore_content(text: str, tag_map: Dict[str, str]) -> str: """还原被保护的内容""" if not text or not tag_map: return text restored = text for key, original in tag_map.items(): restored = restored.replace(key, original) return restored # -------------------------------------------------- # 翻译 & 检查 # -------------------------------------------------- BRAND_NAMES = {"Flatsome", "WPML", "Helsinki-NLP", "MarianMT", "Petsva"} def should_translate(text: str, tgt_lang: str, glossary: dict) -> bool: s = (text or "").strip() if not s: return False if s in BRAND_NAMES: return False if re.fullmatch(r'^[\d\.,\s]+(?:USD|EUR|¥|\$|€)?$', s): return False # 检查WPML短代码 - 如果整个文本都是短代码,则不翻译 if re.fullmatch(r'\[wpml[^\]]*\]', s, re.IGNORECASE): return False if re.fullmatch(r'^SKU[-_]\d+$', s, re.IGNORECASE): return False if re.fullmatch(r'https?://.*|www\..*|/[\w\-/]+', s): return False if s in glossary and tgt_lang in glossary[s]: return False if re.search(r'(email|password|username|account|phone|tel|contact)', s, re.IGNORECASE): return False return True def translate_text(text: str, tgt_lang: str, model_manager: ModelManager, glossary: dict, src_lang: str = "en") -> str: s = (text or "").strip() # 1) 术语表优先 if s in glossary and tgt_lang in glossary[s]: return glossary[s][tgt_lang] # 2) 不需要翻译直接返回 if not should_translate(text, tgt_lang, glossary): return text or "" # 3) 语言支持 if not model_manager.is_supported(src_lang, tgt_lang): logging.warning(f"不支持语言对 {src_lang}->{tgt_lang},返回原文") return text or "" # 4) 保护内容(HTML标签 + 短代码) protected, tag_map = protect_content(text) try: model_pack = model_manager.get_model(src_lang, tgt_lang) tokenizer, model = model_pack["tokenizer"], model_pack["model"] # 5) 翻译 inputs = tokenizer(protected, return_tensors="pt", padding=True, truncation=True, max_length=512) out = model.generate(**inputs) translated = tokenizer.decode(out[0], skip_special_tokens=True) # 6) 还原保护内容 translated = restore_content(translated, tag_map) # 7) 修复标签完整性 translated = fix_tag_integrity(translated) # 8) 有效性检查 if not is_valid_translation(translated): logging.error(f"翻译结果无效,回退使用源文本") return fix_tag_integrity(text or "") return translated except Exception as e: logging.error(f"翻译失败: {s[:60]}... 错误: {e}") return fix_tag_integrity(text or "") def fix_tag_integrity(txt: str) -> str: """修复标签完整性""" if not txt: return txt # 简单的标签平衡检查 open_tags = re.findall(r'<([a-zA-Z0-9]+)[^>]*>', txt) close_tags = re.findall(r'</([a-zA-Z0-9]+)>', txt) # 计算需要补全的标签 for tag in close_tags: if tag in open_tags: open_tags.remove(tag) # 补全缺失的闭合标签 result = txt for tag in reversed(open_tags): result += f'</{tag}>' logging.warning(f"补全未闭合标签:</{tag}>") return result def is_valid_translation(txt: str) -> bool: s = (txt or "").strip() if len(s) < 2: return False # 特殊字符占比 specials = len(re.findall(r'[*\-=_+#@]', s)) if specials and specials / max(1, len(s)) > 0.3: return False # 不全是重复符号 if re.fullmatch(r'([*\-=])\1{6,}', s): return False return True def check_consistency(source: str, target: str) -> list: issues = [] if not (source or "").strip(): return issues # 数字/金额一致性 - 更精确的提取 def extract_important_numbers(text: str) -> List[str]: """只提取重要的数字(货币等),避免日期、页码等""" patterns = [ r'\$\d+\.?\d*', # 美元金额 r'€\d+\.?\d*', # 欧元金额 r'¥\d+\.?\d*', # 日元金额 r'£\d+\.?\d*', # 英镑金额 r'\b\d+\.\d{2}\b', # 金额格式的小数 ] numbers = [] for pattern in patterns: numbers.extend(re.findall(pattern, text)) return numbers s_num = extract_important_numbers(source or "") t_num = extract_important_numbers(target or "") if s_num != t_num: issues.append(f"数字或金额不一致:源[{','.join(s_num)}] → 目标[{','.join(t_num)}]") # {} 占位符 def extract_placeholders(text: str) -> list: return re.findall(r'\{[^}]+\}', text or "") sp = extract_placeholders(source or "") tp = extract_placeholders(target or "") if len(sp) != len(tp): issues.append(f"占位符数量不一致:源{len(sp)}个 → 目标{len(tp)}个") else: semantic_maps = { "physical address": ["adresse physique", "physische adresse", "dirección física"], "email": ["email", "e-mail", "correo electrónico"], "phone": ["téléphone", "telefon", "teléfono"], "name": ["nom", "name", "nombre"], "order id": ["numéro de commande", "bestellnummer", "número de pedido"] } for s_ph, t_ph in zip(sp, tp): if s_ph == t_ph: continue s_core = s_ph.strip("{}").lower() t_core = t_ph.strip("{}").lower() ok = False if s_core in semantic_maps: ok = t_core in semantic_maps[s_core] else: cores = [w for w in s_core.split() if len(w) > 2] ok = any(c in t_core for c in cores) if not ok: issues.append(f"占位符内容不匹配:源{s_ph} → 目标{t_ph}") # 短代码一致性检查 def extract_shortcodes(text: str) -> list: return re.findall(r'\[[^\]]+\]', text or "") source_shortcodes = extract_shortcodes(source or "") target_shortcodes = extract_shortcodes(target or "") if len(source_shortcodes) != len(target_shortcodes): issues.append(f"短代码数量不一致:源{len(source_shortcodes)}个 → 目标{len(target_shortcodes)}个") else: for i, (src_sc, tgt_sc) in enumerate(zip(source_shortcodes, target_shortcodes)): if src_sc != tgt_sc: issues.append(f"短代码内容不匹配:位置{i+1}, 源={src_sc}, 目标={tgt_sc}") # 标签一致性 def extract_tags(text: str) -> list: tags = [t for t in re.findall(r"<[^>]+>", text or "") if t.strip()] return [t for t in tags if not re.search(r'img', t, re.I)] st = extract_tags(source or "") tt = extract_tags(target or "") if len(st) != len(tt): issues.append(f"标签数量不一致:源{len(st)}个 → 目标{len(tt)}个") # 目标有效性 t_stripped = (target or "").strip() if not t_stripped or re.fullmatch(r'([*\-=\_\+\#\@\s])+', t_stripped) or len(t_stripped) < max(1, len((source or "").strip())) * 0.3: issues.append("目标文本无效:全为特殊字符/空白/过度截断,无有效内容") return issues # -------------------------------------------------- # XLIFF 处理 # -------------------------------------------------- def ensure_target_element(trans_unit, ns, tgt_lang: str, default_state: str = "translated"): target = trans_unit.find(f"{{{ns}}}target") if target is None: target = etree.SubElement(trans_unit, "target") target.set(f"{{{XML_NS}}}lang", tgt_lang) if "state" not in target.attrib: target.set("state", default_state) return target def build_qa_root_like(original_root): qa_root = etree.Element(original_root.tag, nsmap=original_root.nsmap) for k, v in original_root.attrib.items(): qa_root.set(k, v) return qa_root def process_xliff(file_path: Path, model_manager: ModelManager, glossary: dict, config: dict): try: logging.info(f"开始处理文件: {file_path.name}") parser = etree.XMLParser(remove_blank_text=True) tree = etree.parse(str(file_path), parser) root = tree.getroot() ns = root.nsmap.get(None, "urn:oasis:names:tc:xliff:document:1.2") tool_ns = root.nsmap.get("tool", "https://cdn.wpml.org/xliff/custom-attributes.xsd") file_elem = root.find(f".//{{{ns}}}file") if file_elem is None: logging.warning(f"文件 {file_path.name} 无 <file> 标签,跳过") return src_lang = file_elem.get("source-language") or config["source_language"] tgt_lang = file_elem.get("target-language") or ( config["target_language"][0] if isinstance(config["target_language"], list) else config["target_language"] ) if src_lang == tgt_lang: logging.info(f"源语言与目标语言相同({src_lang}),跳过文件 {file_path.name}") return qa_root = build_qa_root_like(root) qa_file = etree.SubElement(qa_root, "file", attrib=file_elem.attrib) qa_body = etree.SubElement(qa_file, "body") trans_units = file_elem.findall(f".//{{{ns}}}body/{{{ns}}}trans-unit") total = len(trans_units) if total == 0: logging.warning(f"文件 {file_path.name} 无翻译单元,跳过") return logging.info(f"找到 {total} 个翻译单元") write_translated = not bool(config.get("generate_qa_only", False)) for i, tu in enumerate(trans_units, 1): if i % 10 == 0 or i == total: logging.info(f"进度 {i}/{total}") source_el = tu.find(f"{{{ns}}}source") if source_el is None: continue src_text = source_el.text or "" translated_text = translate_text(src_text, tgt_lang, model_manager, glossary, src_lang) issues = check_consistency(src_text, translated_text) if write_translated: target_el = ensure_target_element(tu, ns, tgt_lang, default_state="translated") target_el.text = translated_text if issues: tu_id = tu.get("id") or f"unit-{i}" qa_tu = etree.SubElement(qa_body, "trans-unit", id=tu_id) extradata = tu.find(f"{{{tool_ns}}}extradata") if extradata is not None: qa_ex = etree.SubElement(qa_tu, f"{{{tool_ns}}}extradata", attrib=extradata.attrib) qa_ex.text = extradata.text etree.SubElement(qa_tu, "source").text = src_text t = etree.SubElement(qa_tu, "target") t.set(f"{{{XML_NS}}}lang", tgt_lang) t.set("state", "needs-review-translation") t.text = translated_text etree.SubElement(qa_tu, "note").text = "; ".join(issues) out_dir = Path(config["output_dir"]) out_dir.mkdir(exist_ok=True) if write_translated: out_file = out_dir / f"{file_path.stem}_translated.xliff" tree.write(str(out_file), encoding="utf-8", xml_declaration=True, pretty_print=True) logging.info(f"已保存翻译文件: {out_file}") has_qa = len(qa_body) > 0 if has_qa: qa_dir = Path(config["qa_report_dir"]) qa_dir.mkdir(exist_ok=True) qa_file_out = qa_dir / f"{file_path.stem}_qa.xliff" # 修复:保存QA根结构 qa_tree = etree.ElementTree(qa_root) qa_tree.write(str(qa_file_out), encoding="utf-8", xml_declaration=True, pretty_print=True) logging.info(f"QA 文件已保存至 {qa_file_out}") except Exception as e: logging.error(f"处理文件 {file_path.name} 出错: {e}") def main(): parser = argparse.ArgumentParser(description="处理 XLIFF 文件的翻译") parser.add_argument("--config", default="config.yaml", help="配置文件路径") args = parser.parse_args() config = load_config(args.config) glossary = load_glossary() model_manager = ModelManager(config["model_template"]) input_dir = Path(config["input_dir"]) for xliff_file in input_dir.glob("*.xliff"): process_xliff(xliff_file, model_manager, glossary, config) if __name__ == "__main__": main() 我的代码是这个,
最新发布
09-01
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值