static const char sep[256] = { [' ']=1, ['.']=1, ['-']=1, ['_']=1 };

本文介绍了一种使用GCC编译器的特殊初始化数组的方法,这种方法允许使用字符对应的ASCII码作为数组元素的下标进行初始化。然而,需要注意的是并非所有编译器都支持这种方式,例如Visual Studio就不支持。

static const char sep[256] = { [' ']=1, ['.']=1, ['-']=1, ['_']=1 };//gcc能编译通过,vs不支持此格式

意思是定义1个数组,并进行初始化赋值,被赋值元素下标为字符所对应的ASCII码。

# === Step 2: 更新 ENUM === if valid_blocks: enum_pattern = rf'(enum\s+{re.escape(self.enum_name)}\s*\{{)([^}}]*)\}}\s*;' match = re.search(enum_pattern, block_content, re.DOTALL | re.IGNORECASE) if not match: raise ValueError(f"未找到枚举定义: {self.enum_name}") header = match.group(1) body_content = match.group(2) lines = [ln for ln in body_content.splitlines() if ln.strip()] # 提取 target_eq_col 用于对齐(保留原逻辑) last_line = lines[-1] if lines else "" clean_last = re.sub(r'//.*|/\*.*?\*/', '', last_line).strip() first_macro_match = re.search(r'RATE_SET_[A-Z0-9_]+', clean_last) eq_match = re.search(r'=\s*\d+', clean_last) target_eq_col = 30 if first_macro_match and eq_match: raw_before = last_line[:first_macro_match.start() + eq_match.start()] expanded_before = raw_before.expandtabs(4) target_eq_col = len(expanded_before) # 构造新宏 existing_enum = parsed.get('existing_enum', {}) next_enum_value = max(existing_enum.values(), default=-1) + 1 new_macros = [] for block in valid_blocks: name = block["enum_name"] padding = max(1, target_eq_col - len(name.replace('\t', ' '))) formatted = f"{name}{' ' * padding}= {next_enum_value}" new_macros.append(formatted) next_enum_value += 1 # 使用通用函数格式化 new_body = self._format_with_inline_fallback( lines=lines, new_items=new_macros, max_per_line=self.MAX_ENUM_PER_LINE, indent_marker=" ", item_separator=" ", line_suffix=",", extract_func=lambda s: re.findall(r'RATE_SET_[A-Z0-9_]+', s), ) new_enum_code = f"{header}{new_body}\n}};" replacements.append((match.start(), match.end(), new_enum_code)) self.logger.debug(f"更新 enum: 添加 {len(valid_blocks)} 项") # === Step 3: 更新 DATA 数组 === if valid_blocks: data_pattern = rf'(static const unsigned char {re.escape(self.data_array_name)}\[\]\s*=\s*\{{)([^}}]*)(\}}\s*;)' match = re.search(data_pattern, block_content, re.DOTALL) if not match: raise ValueError(f"未找到 data 数组: {self.data_array_name}") header = match.group(1) body_content = match.group(2).strip() footer = match.group(3) lines = body_content.splitlines() last_line = lines[-1] if lines else "" indent_match = re.match(r'^(\s*)', last_line) line_indent = indent_match.group(1) if indent_match else " " new_body = body_content.rstrip() if not new_body.endswith(','): new_body += ',' for block in valid_blocks: # 插入头行(如 /*...*/ 4,) new_body += f"\n{line_indent}{block['raw_header'].strip()}" # 插入每行 body(保持原始缩进) for raw_line in block["raw_body"]: new_body += f"\n{line_indent}{raw_line}" new_data_code = f"{header}{new_body}\n{footer}" replacements.append((match.start(), match.end(), new_data_code)) self.logger.debug(f"计划更新 data 数组: 添加 {len(valid_blocks)} 个原始块") # === Step 4: 更新 INDEX 数组 === if valid_blocks: index_pattern = rf'(static const unsigned short {re.escape(self.index_array_name)}\[\]\s*=\s*\{{)([^}}]*)(\}}\s*;)' match = re.search(index_pattern, block_content, re.DOTALL) if not match: raise ValueError(f"未找到 index 数组: {self.index_array_name}") header = match.group(1) body_content = match.group(2).strip() footer = match.group(3) lines = body_content.splitlines() if body_content.strip() else [] # 计算 index 值 index_values = [] for block in valid_blocks: index_val = base_data_offset + current_new_data_size index_values.append(str(index_val)) current_new_data_size += 1 + block["count"] # 使用通用函数格式化 new_body = self._format_with_inline_fallback( lines=lines, new_items=index_values, max_per_line=self.MAX_INDEX_ITEMS_PER_LINE, indent_marker=" ", item_separator=", ", line_suffix=",", extract_func=lambda s: re.findall(r'\d+', s), # 提取数字 ) new_index_code = f"{header}{new_body}\n{footer}" replacements.append((match.start(), match.end(), new_index_code)) self.logger.debug( f"更新 index 数组: 同行追加部分数据 → [{index_values[0]} ...]" ) 怎么改
10-26
# rate_set/rate_sync.py import json import os import re import logging import sys from pathlib import Path from utils import resource_path from datetime import datetime from typing import Dict, List, Tuple, Any # ------------------------------- # 日志配置 # ------------------------------- PROJECT_ROOT = Path(__file__).parent.parent.resolve() LOG_DIR = PROJECT_ROOT / "output" / "log" LOG_DIR.mkdir(parents=True, exist_ok=True) LOG_FILE = LOG_DIR / f"rate_sync_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" class RateSetSynchronizer: MAX_ENUM_PER_LINE = 4 # enum 每行最多几个宏 MAX_DATA_ITEMS_PER_LINE = 4 # data 数组每行最多几个值 MAX_INDEX_ITEMS_PER_LINE = 15 # index 数组每行最多几个值 def __init__(self, c_file_path=None, dry_run=False, config_path="config/config.json"): self.logger = logging.getLogger(f"{__name__}.RateSetSynchronizer") # 加载配置 self.config_file_path = resource_path(config_path) if not os.path.exists(self.config_file_path): raise FileNotFoundError(f"配置文件不存在: {self.config_file_path}") with open(self.config_file_path, 'r', encoding='utf-8') as f: self.config = json.load(f) self.dry_run = dry_run # C 文件路径 if c_file_path is None: internal_c_path = self.config["target_c_file"] self.c_file_path = resource_path(internal_c_path) else: self.c_file_path = Path(c_file_path) if not self.c_file_path.exists(): raise FileNotFoundError(f"找不到 C 源文件: {self.c_file_path}") # === 单一锚点标记 === self.block_start = self.config["STR_RATE_SET_DATA"] self.block_end = self.config["END_RATE_SET_DATA"] # 数组与枚举名 self.data_array_name = "rate_sets_2g_20m" self.index_array_name = "rate_sets_index_2g_20m" self.enum_name = "rate_set_2g_20m" # 扫描所有子目录中的 .c 文件(排除自身) self.rate_set_dir = Path(__file__).parent self.rate_files = [ f for f in self.rate_set_dir.rglob("*.c") # 递归匹配所有 .c 文件 if f.is_file() and f.name != "rate_sync.py" ] # 加载文件名和结构映射 self.target_map = self.config.get("rate_set_map") if not isinstance(self.target_map, dict): raise ValueError("config.json 中缺少 'rate_set_map' 字段或格式错误") # 变更摘要 self.summary = { "total_new_sets": 0, "per_band": {"2g": 0, "5g": 0, "6g": 0}, "per_type": {}, # 如 20M, 40M, 80M... "new_macros": [], # RATE_SET_xxx "new_locales": [], # 文件中的 /*NAME*/ 块 "tables_updated": set() } self._validate_target_map() # ← 添加一致性校验 def _validate_target_map(self): """验证 rate_set_map 是否一致,防止多个 full_key 映射到同一数组""" seen_data = {} seen_index = {} seen_enum = {} for key, cfg in self.target_map.items(): d = cfg["data"] i = cfg["index"] e = cfg["enum"] if d in seen_data: raise ValueError(f"data 数组冲突: '{d}' 被 '{seen_data[d]}' 和 '{key}' 同时使用") if i in seen_index: raise ValueError(f"index 数组冲突: '{i}' 被 '{seen_index[i]}' 和 '{key}' 同时使用") if e in seen_enum: raise ValueError(f"enum 名称冲突: '{e}' 被 '{seen_enum[e]}' 和 '{key}' 同时使用") seen_data[d] = key seen_index[i] = key seen_enum[e] = key def parse_filename(self, filename: str) -> str: """ 从文件名提取 band_bw_ext 类型键,用于查找 rate_set_map 示例: 2G_20M_rate_set.c → 2G_20M_BASE 2G_20M_EXT_rate_set.c → 2G_20M_EXT 2G_20M_EXT4_NSS2_rate_set.c → 2G_20M_EXT4 5G_80M_NSS1_rate_set.c → 5G_80M_BASE 6G_320M_EXT4_NSS4_rate_set.c → 6G_320M_EXT4 """ # 增强版正则:允许中间有 _NSS\d+ pattern = r'^([A-Z0-9]+)_([0-9]+M)(?:_(EXT\d*))?(_NSS\d+)?_rate_set\.c$' match = re.match(pattern, filename, re.I) if not match: raise ValueError(f"无法识别的文件名格式: {filename}") band, bw, ext, nss_part = match.groups() ext_type = ext.upper() if ext else "BASE" return f"{band.upper()}_{bw.upper()}_{ext_type}" def extract_sub_rate_sets(self, content: str) -> List[Dict[str, Any]]: """ 提取 /*NAME*/ N, 后续多行 WL_RATE_xxx 列表 支持跨行、缩进、逗号、空行、注释干扰等 使用“按行扫描 + 状态机”方式,避免正则越界 """ self.logger.info("开始提取速率集...") self.logger.info("...") sub_sets = [] lines = [line.rstrip('\r\n') for line in content.splitlines()] # 保留原始行尾 i = 0 # 匹配 /*NAME*/ N, 的开头 header_pattern = re.compile(r'/\*\s*([A-Za-z0-9_]+)\s*\*/\s*(\d+)\s*,?') while i < len(lines): line = lines[i] stripped = line.strip() # 跳过空行和纯注释 if not stripped or stripped.startswith("//"): i += 1 continue # 查找头: /*NAME*/ N, match = header_pattern.search(stripped) if not match: i += 1 continue name = match.group(1) try: count = int(match.group(2)) except ValueError: self.logger.warning(f"⚠️ 计数无效,跳过: {name} = '{match.group(2)}'") i += 1 continue self.logger.info(f"🔍 发现块: {name}, 预期数量={count}") # 开始收集 body 内容(保留原始带缩进的行) body_lines = [] j = i + 1 max_lines_to_read = 200 while j < len(lines) and len(body_lines) < max_lines_to_read: ln = lines[j].strip() # 终止条件:遇到新 block / 结构结束 if ln.startswith("/*") or ln.startswith("}") or ln.startswith("enum"): break if ln and not ln.startswith("//"): body_lines.append(lines[j]) # ← 原样保存(含缩进) else: body_lines.append(lines[j]) # 也保留注释或空行(保持格式一致) j += 1 # 提取宏名用于校验(但不再用于生成数据) body_text = "\n".join(body_lines) all_macros = re.findall(r'WL_RATE_[A-Za-z0-9_]+', body_text) rate_items = all_macros[:count] if len(rate_items) < count: self.logger.warning(f"[{name}] 条目不足: 需要 {count}, 实际 {len(rate_items)}") # 构建结果:增加 raw_header 和 raw_body(关键改动) sub_sets.append({ "name": name, "count": count, "rates": rate_items, "raw_header": line, # ← 原始头行(如 /*...*/ 4,) "raw_body": body_lines, # ← 原始 body 行列表 "start_line": i, "end_line": j - 1 }) self.logger.debug(f"✅ 提取成功: {name} → {len(rate_items)} 个速率") i = j # 跳到下一个 block self.logger.info(f" 共提取 {len(sub_sets)} 个有效子集") return sub_sets def parse_all_structures(self, full_content: str) -> Dict: """ 直接从完整 C 文件中解析 enum/data/index 结构 """ self.logger.info("开始解析所有结构...") self.logger.info("...") result = { 'existing_enum': {}, 'data_entries': [], 'index_values': [], 'data_len': 0 } # === 解析 enum === enum_pattern = rf'enum\s+{re.escape(self.enum_name)}\s*\{{([^}}]+)\}};' enum_match = re.search(enum_pattern, full_content, re.DOTALL) if enum_match: body = enum_match.group(1) entries = re.findall(r'(RATE_SET_[^=,\s]+)\s*=\s*(\d+)', body) result['existing_enum'] = {k: int(v) for k, v in entries} self.logger.info(f"解析出 {len(entries)} 个已有枚举项") else: self.logger.warning(f"未找到 enum 定义: {self.enum_name}") # === 解析 data 数组 === data_pattern = rf'static const unsigned char {re.escape(self.data_array_name)}\[\] = \{{([^}}]+)\}};' data_match = re.search(data_pattern, full_content, re.DOTALL) if not data_match: raise ValueError(f"未找到 data 数组: {self.data_array_name}") data_code = data_match.group(1) result['data_entries'] = [item.strip() for item in re.split(r'[,\n]+', data_code) if item.strip()] result['data_len'] = len(result['data_entries']) # === 解析 index 数组 === index_pattern = rf'static const unsigned short {re.escape(self.index_array_name)}\[\] = \{{([^}}]+)\}};' index_match = re.search(index_pattern, full_content, re.DOTALL) if not index_match: raise ValueError(f"未找到 index 数组: {self.index_array_name}") index_code = index_match.group(1) result['index_values'] = [int(x.strip()) for x in re.split(r'[,\n]+', index_code) if x.strip()] return result def build_injection_with_format(self, new_subsets: List[Dict], existing_enum: Dict[str, int]) -> List[Dict]: """ 返回需要注入的原始块列表(包含 raw_header + raw_body) 不再返回 new_data/new_indices/new_enums """ valid_blocks = [] next_enum_value = max(existing_enum.values(), default=-1) + 1 self.logger.info(f"开始构建注入内容,当前最大枚举值 = {next_enum_value}") for subset in new_subsets: enum_name = subset["name"] if enum_name in existing_enum: self.logger.info(f"跳过已存在的枚举项: {enum_name} = {existing_enum[enum_name]}") continue # 只保存必要信息,不计算偏移 valid_blocks.append({ "enum_name": enum_name, "raw_header": subset["raw_header"], "raw_body": subset["raw_body"], "count": subset["count"], # 用于计算 data 占用空间 "enum_value": next_enum_value, # ✅ 必须存在! }) self.logger.debug(f" 准备注入: {enum_name}") self.logger.info(f"新增条目: {enum_name} enum={next_enum_value}") next_enum_value += 1 self.logger.info(f"构建完成:共 {len(valid_blocks)} 个新条目(保留原始格式)") return valid_blocks def format_list(self, items: List[str], indent: str = " ", width: int = 8) -> str: """格式化数组为多行字符串""" lines = [] for i in range(0, len(items), width): chunk = items[i:i + width] lines.append(indent + ", ".join(chunk) + ",") return "\n".join(lines).rstrip(",") def _safe_write_back(self, old_content: str, new_content: str) -> bool: """安全写回文件,带备份""" if old_content == new_content: self.logger.info("主文件内容无变化,无需写入") return False if self.dry_run: self.logger.info("DRY-RUN 模式启用,跳过实际写入") print("[DRY RUN] 差异预览(前 20 行):") diff = new_content.splitlines()[:20] for line in diff: print(f" {line}") return True # 创建备份 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup = self.c_file_path.with_name(f"{self.c_file_path.stem}_{timestamp}.c.bak") try: self.c_file_path.rename(backup) self.logger.info(f"原文件已备份为: {backup.name}") except Exception as e: self.logger.error(f"备份失败: {e}") raise # 写入新内容 try: self.c_file_path.write_text(new_content, encoding='utf-8') self.logger.info(f"✅ 成功写入更新后的文件: {self.c_file_path.name}") return True except Exception as e: self.logger.error(f"写入失败: {e}", exc_info=True) raise def inject_new_data(self) -> bool: try: full_content = self.c_file_path.read_text(encoding='utf-8') except Exception as e: self.logger.error(f"读取主 C 文件失败: {e}") raise self.logger.info(f"正在处理 C 文件: {self.c_file_path.name}") start_pos = full_content.find(self.block_start) end_pos = full_content.find(self.block_end) if start_pos == -1: raise ValueError(f"未找到起始锚点: {self.block_start}") if end_pos == -1: raise ValueError(f"未找到结束锚点: {self.block_end}") if end_pos <= start_pos: raise ValueError("结束锚点位于起始锚点之前") inner_start = start_pos + len(self.block_start) block_content = full_content[inner_start:end_pos].strip() all_changes_made = False # === 遍历每一个 rate set 子文件 === for file_path in self.rate_files: try: self.logger.info(f"→ 处理子文件: {file_path.name}") # --- 1. 解析文件名得到 full_key --- try: full_key = self.parse_filename(file_path.name) self.logger.debug(f" ├─ 解析出 key: {full_key}") except ValueError as ve: self.logger.warning(f" └─ 跳过无效文件名: {ve}") continue # --- 2. 查找 rate_set_map 映射 --- target = self.target_map.get(full_key) if not target: self.logger.warning(f" └─ 未在 config.json 中定义映射关系: {full_key},跳过") continue # --- 3. 动态设置当前注入目标 --- self.data_array_name = target["data"] self.index_array_name = target["index"] self.enum_name = target["enum"] self.logger.debug(f" ├─ 绑定目标:") self.logger.debug(f" data: {self.data_array_name}") self.logger.debug(f" index: {self.index_array_name}") self.logger.debug(f" enum: {self.enum_name}") # --- 4. 解析主文件中的当前结构 --- try: parsed = self.parse_all_structures(full_content) except Exception as e: self.logger.error(f" └─ 解析主文件结构失败: {e}") continue # --- 5. 提取该子文件中的 rate sets --- file_content = file_path.read_text(encoding='utf-8') subsets = self.extract_sub_rate_sets(file_content) if not subsets: self.logger.info(f" └─ 无有效子集数据") continue # --- 6. 构建要注入的内容 --- valid_blocks = self.build_injection_with_format( subsets, existing_enum=parsed['existing_enum'] ) if not valid_blocks: self.logger.info(f" └─ 无需更新") continue # --- 7. 写回新内容(精准插入)--- updated_content = self._write_back_in_blocks( full_content, parsed, valid_blocks ) if updated_content != full_content: all_changes_made = True full_content = updated_content # 更新内存内容供后续文件使用 # === 统计变更 === count = len(valid_blocks) enum_name_base = self.enum_name.replace("rate_set_", "").upper() self.summary["total_new_sets"] += count self.summary["tables_updated"].add(self.data_array_name) self.summary["tables_updated"].add(self.index_array_name) self.summary["tables_updated"].add(self.enum_name) # 提取 band 和 type (如 2G_20M_EXT) band_key = "unknown" bw_type = "unknown" if "2g" in full_key.lower(): band_key = "2g" elif "5g" in full_key.lower(): band_key = "5g" elif "6g" in full_key.lower(): band_key = "6g" bw_match = re.search(r'_(\d+M)', full_key) bw_type = bw_match.group(1) if bw_match else "unknown" self.summary["per_band"][band_key] += count self.summary["per_type"][bw_type] = self.summary["per_type"].get(bw_type, 0) + count # 收集新宏和 locale 名称 for block in valid_blocks: macro_name = f"RATE_SET_{block['enum_name']}" self.summary["new_macros"].append(macro_name) self.summary["new_locales"].append(f"{block['enum_name']} ({full_key})") self.logger.info(f"✅ 成功注入 {count} 条目 → {self.enum_name}") except Exception as e: self.logger.warning(f"❌ 处理文件失败 [{file_path.name}]: {e}") if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug("详细堆栈:", exc_info=True) continue # 最终写回磁盘 if all_changes_made: try: return self._safe_write_back(self.c_file_path.read_text(encoding='utf-8'), full_content) except Exception as e: self.logger.error(f"写入最终文件失败: {e}") raise else: self.logger.info("没有需要更新的内容") return False def _format_with_inline_fallback( self, lines: List[str], new_items: List[str], max_per_line: int, indent_marker: str = " ", item_separator: str = ", ", line_suffix: str = "", # 注意:现在我们不在这里加逗号! extract_func=None, align_eq_col: bool = False, detect_spacing_from_last_line: bool = True, ) -> str: if not lines: lines = [""] last_line = lines[-1].rstrip() indent_match = re.match(r'^(\s*)', last_line) line_indent = indent_match.group(1) if indent_match else indent_marker clean_last = re.sub(r'//.*|/\*.*?\*/', '', last_line).strip() existing_items = extract_func(clean_last) if extract_func else re.findall(r'\w+', clean_last) current_count = len(existing_items) space_left = max(0, max_per_line - current_count) to_append_inline = new_items[:space_left] to_append_newline = new_items[space_left:] # === 检测真实分隔符 === actual_sep = item_separator if detect_spacing_from_last_line and len(existing_items) >= 2: first = re.escape(existing_items[0]) second = re.escape(existing_items[1]) match = re.search(f"({first})(\\s+)({second})", last_line) if match: actual_sep = match.group(2) # === 对齐等号列:关键修复 → 对齐后再加逗号 === formatted_new_items = [] for item in new_items: raw_item = item.rstrip(',') # 去掉可能已有的逗号避免重复 if align_eq_col: m = re.match(r'(\w+)\s*=\s*(\d+)', raw_item) if m: name, val = m.groups() # 计算目标列位置 target_eq_col = None for i in range(len(lines) - 1, -1, -1): ln = lines[i] eq_match = re.search(r'=\s*\d+', ln) if eq_match: raw_before = ln[:eq_match.start()] expanded_before = raw_before.expandtabs(4) target_eq_col = len(expanded_before) break if target_eq_col is None: target_eq_col = 30 padding = max(1, target_eq_col - len(name.replace('\t', ' ').expandtabs(4))) spaces = ' ' * padding aligned_item = f"{name}{spaces}= {val}" formatted_new_items.append(aligned_item) else: formatted_new_items.append(raw_item) else: formatted_new_items.append(raw_item) # 现在统一加逗号:每个 item 都要加! # 注意:是否加逗号应该由调用者或此函数控制,不要混合 final_formatted_items = [f"{item}," for item in formatted_new_items] to_append_inline = final_formatted_items[:space_left] to_append_newline = final_formatted_items[space_left:] # === 构建结果 === result_lines = lines[:-1] # 保留前面所有行 final_main_line = lines[-1].rstrip() # 添加 inline 项 if to_append_inline: joined_inline = actual_sep.join(to_append_inline) if final_main_line.strip(): final_main_line += actual_sep + joined_inline else: final_main_line = joined_inline result_lines.append(final_main_line) # 添加新行(每行最多 max_per_line 个) if to_append_newline: for i in range(0, len(to_append_newline), max_per_line): chunk = to_append_newline[i:i + max_per_line] joined = actual_sep.join(chunk) result_lines.append(f"{line_indent}{joined}") return '\n'.join(result_lines) def _write_back_in_blocks(self, full_content: str, parsed: Dict, valid_blocks: List[Dict]) -> str: """ 使用局部块操作策略:只在 /* START */ ... /* END */ 范围内修改内容 关键改进:直接插入 raw_header + raw_body,保留原始格式 """ self.logger.info("开始执行局部块写入操作...") self.logger.info("...") # 在 _write_back_in_blocks 最上方添加: base_data_offset = parsed['data_len'] current_new_data_size = 0 # 动态记录已写入的新 data 大小 start_pos = full_content.find(self.block_start) end_pos = full_content.find(self.block_end) if start_pos == -1 or end_pos == -1: raise ValueError(f"未找到锚点标记: {self.block_start} 或 {self.block_end}") if end_pos <= start_pos: raise ValueError("结束锚点位于起始锚点之前") inner_start = start_pos + len(self.block_start) block_content = full_content[inner_start:end_pos] replacements = [] # (start_in_block, end_in_block, replacement) # === Step 2: 更新 ENUM === if valid_blocks: # 提取函数:从字符串中提取 RATE_SET_xxx extract_enum = lambda s: re.findall(r'RATE_SET_[A-Z0-9_]+', s) enum_pattern = rf'(enum\s+{re.escape(self.enum_name)}\s*\{{)([^}}]*)\}}\s*;' match = re.search(enum_pattern, block_content, re.DOTALL | re.IGNORECASE) if not match: raise ValueError(f"未找到枚举定义: {self.enum_name}") header = match.group(1) # "enum rate_set_2g_20m {" body_content = match.group(2) lines = [ln.rstrip() for ln in body_content.splitlines() if ln.strip()] # 计算新值 new_macros = [] for block in valid_blocks: name = block["enum_name"] value = block["enum_value"] # ✅ 来自 build_injection_with_format 的正确值 new_macros.append(f"{name} = {value}") # === 关键:获取标准缩进 === indent_match = re.match(r'^(\s*)', lines[0] if lines else "") standard_indent = indent_match.group(1) if indent_match else " " # 格式化新 body new_body = self._format_with_inline_fallback( lines=lines, new_items=new_macros, max_per_line=self.MAX_ENUM_PER_LINE, indent_marker=standard_indent, item_separator=" ", line_suffix="", extract_func=extract_enum, align_eq_col=True, detect_spacing_from_last_line=True, ) # 关键修复:确保每行都有缩进(包括第一行) formatted_lines = [] for line in new_body.splitlines(): stripped = line.strip() if stripped: formatted_lines.append(f"{standard_indent}{stripped}") else: formatted_lines.append(line) final_body = '\n'.join(formatted_lines) # 关键:header 单独占一行,新 body 换行开始 new_enum_code = f"{header}\n{final_body}\n}};" replacements.append((match.start(), match.end(), new_enum_code)) self.logger.debug(f"更新 enum: 添加 {len(valid_blocks)} 项") # === Step 3: 更新 DATA 数组 === if valid_blocks: data_pattern = rf'(static const unsigned char {re.escape(self.data_array_name)}\[\]\s*=\s*\{{)([^}}]*)(\}}\s*;)' match = re.search(data_pattern, block_content, re.DOTALL) if not match: raise ValueError(f"未找到 data 数组: {self.data_array_name}") header = match.group(1) body_content = match.group(2).strip() footer = match.group(3) lines = body_content.splitlines() last_line = lines[-1] if lines else "" indent_match = re.match(r'^(\s*)', last_line) line_indent = indent_match.group(1) if indent_match else " " new_body = body_content.rstrip() if not new_body.endswith(','): new_body += ',' for block in valid_blocks: # 插入头行(如 /*...*/ 4,) new_body += f"\n{line_indent}{block['raw_header'].strip()}" # 插入每行 body(保持原始缩进) for raw_line in block["raw_body"]: new_body += f"\n{line_indent}{raw_line}" new_data_code = f"{header}{new_body}\n{footer}" replacements.append((match.start(), match.end(), new_data_code)) self.logger.debug(f"计划更新 data 数组: 添加 {len(valid_blocks)} 个原始块") # === Step 2: 更新 INDEX 数组 === if valid_blocks: index_pattern = rf'(static const unsigned short {re.escape(self.index_array_name)}\[\]\s*=\s*\{{)([^}}]*)(\}}\s*;)' match = re.search(index_pattern, block_content, re.DOTALL) if not match: raise ValueError(f"未找到 index 数组: {self.index_array_name}") header = match.group(1) body_content = match.group(2) footer = match.group(3).strip() lines = [ln.rstrip() for ln in body_content.splitlines()] non_empty_lines = [ln for ln in lines if ln.strip()] # 获取标准缩进(与 enum 一致) if non_empty_lines: indent_match = re.match(r'^(\s*)', non_empty_lines[0]) standard_indent = indent_match.group(1) if indent_match else " " else: standard_indent = " " # 生成新索引值 # 正确计算 index values:基于 data 偏移 + 每个 block 的实际大小 current_offset = parsed['data_len'] # 初始偏移 = 原 data 长度 new_index_values = [] for block in valid_blocks: # 添加当前 block 的起始偏移 new_index_values.append(str(current_offset)) # 偏移 += 当前 block 的数据条数 current_offset += block["count"]+1 # ← 使用 block 自带的 count! self.logger.info(f"生成新的 index values: {new_index_values}") # 格式化 index body formatted_body = self._format_with_inline_fallback( lines=non_empty_lines, new_items=new_index_values, max_per_line=self.MAX_INDEX_ITEMS_PER_LINE, indent_marker=standard_indent, item_separator=" ", line_suffix="", extract_func=lambda s: re.findall(r'\d+', s), detect_spacing_from_last_line=True, align_eq_col=False, ) # 统一添加缩进 final_lines = [] for line in formatted_body.splitlines(): stripped = line.strip() if stripped: final_lines.append(f"{standard_indent}{stripped}") else: final_lines.append("") final_body = '\n'.join(final_lines) new_index_code = f"{header}\n{final_body}\n{footer}" replacements.append((match.start(), match.end(), new_index_code)) self.logger.debug(f"更新 index 数组: 添加 {len(valid_blocks)} 个索引") # === Step 5: 倒序应用所有替换 === if not replacements: self.logger.info("无任何变更需要写入") return full_content for start, end, r in sorted(replacements, key=lambda x: x[0], reverse=True): block_content = block_content[:start] + r + block_content[end:] # === Step 6: 拼接回完整文件 === final_content = ( full_content[:inner_start] + block_content + full_content[end_pos:] ) self.logger.info(f"成功构建新内容,总长度变化: {len(full_content)} → {len(final_content)}") return final_content def _print_summary(self): """打印最终变更总结""" s = self.summary self.logger.info("=" * 60) self.logger.info("📋 R A T E S E T 变 更 总 结") self.logger.info("=" * 60) if s["total_new_sets"] == 0: self.logger.info("🟢 无新增速率集") else: self.logger.info(f"🟢 新增速率集总数: {s['total_new_sets']} 个") # 按频段分布 self.logger.info(f"📡 按频段分布:") for band in ["2g", "5g", "6g"]: cnt = s["per_band"][band] if cnt > 0: self.logger.info(f" • {band.upper()}: {cnt} 个") # 按带宽类型分布 if s["per_type"]: self.logger.info(f"📏 按带宽分布:") for bw, cnt in s["per_type"].items(): self.logger.info(f" • {bw}: {cnt} 个") # 新增的宏 self.logger.info(f"🔧 新增 ENUM 宏:") for macro in sorted(s["new_macros"])[:20]: # 限制显示前20个 self.logger.info(f" • {macro}") if len(s["new_macros"]) > 20: self.logger.info(f" • ... 还有 {len(s['new_macros']) - 20} 个") # 新增的 Locale 块 self.logger.info(f"📦 新增数据块 (/*NAME*/):") for loc in sorted(s["new_locales"])[:10]: self.logger.info(f" • {loc}") if len(s["new_locales"]) > 10: self.logger.info(f" • ... 还有 {len(s['new_locales']) - 10} 个") # 更新的表 updated_tables = sorted(s["tables_updated"]) self.logger.info(f"📝 更新的数据结构:") for tbl in updated_tables: prefix = "rate_sets_" if "rate_sets_" in tbl else "rate_set_" desc = "ENUM" if tbl.startswith("rate_set_") and not "_" + "idx" in tbl else \ "INDEX" if "_index_" in tbl else "DATA" self.logger.info(f" • {tbl:<30} [{desc}]") self.logger.info("=" * 60) status = "✅ 同步完成" if s["total_new_sets"] > 0 else "ℹ️ 无需更新" self.logger.info(status) def run(self): self.logger.info("开始同步 RATE_SET 数据...") try: changed = self.inject_new_data() # === 打印变更总结 === self._print_summary() if changed: print("🔄 同步完成:已更新 C 文件") else: print("✅ 无新数据,无需更新") return { "success": True, "changed": changed, "file": str(self.c_file_path), "backup": f"{self.c_file_path.stem}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.c.bak" if changed and not self.dry_run else None } except Exception as e: self.logger.error(f"同步失败: {e}", exc_info=True) print("❌ 同步失败,详见日志。") return {"success": False, "error": str(e)} def main(): logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', handlers=[ logging.FileHandler(LOG_FILE, encoding='utf-8'), logging.StreamHandler(sys.stdout) ], force=True ) dry_run = False # 设置为 True 可进行试运行 try: sync = RateSetSynchronizer(dry_run=dry_run) sync.run() print("同步完成!") except FileNotFoundError as e: logging.error(f"文件未找到: {e}") print(" 文件错误,请检查路径。") sys.exit(1) except PermissionError as e: logging.error(f"权限错误: {e}") print(" 权限不足,请关闭编辑器或以管理员运行。") sys.exit(1) except Exception as e: logging.error(f"程序异常退出: {e}", exc_info=True) print(" 同步失败,详见日志。") sys.exit(1) if __name__ == '__main__': main() 这个呢
11-10
/*************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | * / __| | | | |_) | | * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at https://curl.se/docs/copyright.html. * * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYING file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * * SPDX-License-Identifier: curl * ***************************************************************************/ #include "curl_setup.h" #include "urldata.h" #include <curl/curl.h> #include <stddef.h> #ifdef HAVE_LIBZ #include <zlib.h> #endif #ifdef HAVE_BROTLI #include <brotli/decode.h> #endif #ifdef HAVE_ZSTD #include <zstd.h> #endif #include "sendf.h" #include "http.h" #include "content_encoding.h" #include "strdup.h" #include "strcase.h" #include "curl_memory.h" #include "memdebug.h" #define CONTENT_ENCODING_DEFAULT "identity" #ifndef CURL_DISABLE_HTTP #define DSIZ CURL_MAX_WRITE_SIZE /* buffer size for decompressed data */ #ifdef HAVE_LIBZ /* Comment this out if zlib is always going to be at least ver. 1.2.0.4 (doing so will reduce code size slightly). */ #define OLD_ZLIB_SUPPORT 1 #define GZIP_MAGIC_0 0x1f #define GZIP_MAGIC_1 0x8b /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ #define ORIG_NAME 0x08 /* bit 3 set: original file name present */ #define COMMENT 0x10 /* bit 4 set: file comment present */ #define RESERVED 0xE0 /* bits 5..7: reserved */ typedef enum { ZLIB_UNINIT, /* uninitialized */ ZLIB_INIT, /* initialized */ ZLIB_INFLATING, /* inflating started. */ ZLIB_EXTERNAL_TRAILER, /* reading external trailer */ ZLIB_GZIP_HEADER, /* reading gzip header */ ZLIB_GZIP_INFLATING, /* inflating gzip stream */ ZLIB_INIT_GZIP /* initialized in transparent gzip mode */ } zlibInitState; /* Deflate and gzip writer. */ struct zlib_writer { struct contenc_writer super; zlibInitState zlib_init; /* zlib init state */ uInt trailerlen; /* Remaining trailer byte count. */ z_stream z; /* State structure for zlib. */ }; static voidpf zalloc_cb(voidpf opaque, unsigned int items, unsigned int size) { (void) opaque; /* not a typo, keep it calloc() */ return (voidpf) calloc(items, size); } static void zfree_cb(voidpf opaque, voidpf ptr) { (void) opaque; free(ptr); } static CURLcode process_zlib_error(struct Curl_easy *data, z_stream *z) { if(z->msg) failf(data, "Error while processing content unencoding: %s", z->msg); else failf(data, "Error while processing content unencoding: " "Unknown failure within decompression software."); return CURLE_BAD_CONTENT_ENCODING; } static CURLcode exit_zlib(struct Curl_easy *data, z_stream *z, zlibInitState *zlib_init, CURLcode result) { if(*zlib_init == ZLIB_GZIP_HEADER) Curl_safefree(z->next_in); if(*zlib_init != ZLIB_UNINIT) { if(inflateEnd(z) != Z_OK && result == CURLE_OK) result = process_zlib_error(data, z); *zlib_init = ZLIB_UNINIT; } return result; } static CURLcode process_trailer(struct Curl_easy *data, struct zlib_writer *zp) { z_stream *z = &zp->z; CURLcode result = CURLE_OK; uInt len = z->avail_in < zp->trailerlen? z->avail_in: zp->trailerlen; /* Consume expected trailer bytes. Terminate stream if exhausted. Issue an error if unexpected bytes follow. */ zp->trailerlen -= len; z->avail_in -= len; z->next_in += len; if(z->avail_in) result = CURLE_WRITE_ERROR; if(result || !zp->trailerlen) result = exit_zlib(data, z, &zp->zlib_init, result); else { /* Only occurs for gzip with zlib < 1.2.0.4 or raw deflate. */ zp->zlib_init = ZLIB_EXTERNAL_TRAILER; } return result; } static CURLcode inflate_stream(struct Curl_easy *data, struct contenc_writer *writer, zlibInitState started) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ uInt nread = z->avail_in; Bytef *orig_in = z->next_in; bool done = FALSE; CURLcode result = CURLE_OK; /* Curl_client_write status */ char *decomp; /* Put the decompressed data here. */ /* Check state. */ if(zp->zlib_init != ZLIB_INIT && zp->zlib_init != ZLIB_INFLATING && zp->zlib_init != ZLIB_INIT_GZIP && zp->zlib_init != ZLIB_GZIP_INFLATING) return exit_zlib(data, z, &zp->zlib_init, CURLE_WRITE_ERROR); /* Dynamically allocate a buffer for decompression because it's uncommonly large to hold on the stack */ decomp = malloc(DSIZ); if(!decomp) return exit_zlib(data, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); /* because the buffer size is fixed, iteratively decompress and transfer to the client via downstream_write function. */ while(!done) { int status; /* zlib status */ done = TRUE; /* (re)set buffer for decompressed output for every iteration */ z->next_out = (Bytef *) decomp; z->avail_out = DSIZ; #ifdef Z_BLOCK /* Z_BLOCK is only available in zlib ver. >= 1.2.0.5 */ status = inflate(z, Z_BLOCK); #else /* fallback for zlib ver. < 1.2.0.5 */ status = inflate(z, Z_SYNC_FLUSH); #endif /* Flush output data if some. */ if(z->avail_out != DSIZ) { if(status == Z_OK || status == Z_STREAM_END) { zp->zlib_init = started; /* Data started. */ result = Curl_unencode_write(data, writer->downstream, decomp, DSIZ - z->avail_out); if(result) { exit_zlib(data, z, &zp->zlib_init, result); break; } } } /* Dispatch by inflate() status. */ switch(status) { case Z_OK: /* Always loop: there may be unflushed latched data in zlib state. */ done = FALSE; break; case Z_BUF_ERROR: /* No more data to flush: just exit loop. */ break; case Z_STREAM_END: result = process_trailer(data, zp); break; case Z_DATA_ERROR: /* some servers seem to not generate zlib headers, so this is an attempt to fix and continue anyway */ if(zp->zlib_init == ZLIB_INIT) { /* Do not use inflateReset2(): only available since zlib 1.2.3.4. */ (void) inflateEnd(z); /* don't care about the return code */ if(inflateInit2(z, -MAX_WBITS) == Z_OK) { z->next_in = orig_in; z->avail_in = nread; zp->zlib_init = ZLIB_INFLATING; zp->trailerlen = 4; /* Tolerate up to 4 unknown trailer bytes. */ done = FALSE; break; } zp->zlib_init = ZLIB_UNINIT; /* inflateEnd() already called. */ } result = exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z)); break; default: result = exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z)); break; } } free(decomp); /* We're about to leave this call so the `nread' data bytes won't be seen again. If we are in a state that would wrongly allow restart in raw mode at the next call, assume output has already started. */ if(nread && zp->zlib_init == ZLIB_INIT) zp->zlib_init = started; /* Cannot restart anymore. */ return result; } /* Deflate handler. */ static CURLcode deflate_init_writer(struct Curl_easy *data, struct contenc_writer *writer) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ if(!writer->downstream) return CURLE_WRITE_ERROR; /* Initialize zlib */ z->zalloc = (alloc_func) zalloc_cb; z->zfree = (free_func) zfree_cb; if(inflateInit(z) != Z_OK) return process_zlib_error(data, z); zp->zlib_init = ZLIB_INIT; return CURLE_OK; } static CURLcode deflate_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ /* Set the compressed input when this function is called */ z->next_in = (Bytef *) buf; z->avail_in = (uInt) nbytes; if(zp->zlib_init == ZLIB_EXTERNAL_TRAILER) return process_trailer(data, zp); /* Now uncompress the data */ return inflate_stream(data, writer, ZLIB_INFLATING); } static void deflate_close_writer(struct Curl_easy *data, struct contenc_writer *writer) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ exit_zlib(data, z, &zp->zlib_init, CURLE_OK); } static const struct content_encoding deflate_encoding = { "deflate", NULL, deflate_init_writer, deflate_unencode_write, deflate_close_writer, sizeof(struct zlib_writer) }; /* Gzip handler. */ static CURLcode gzip_init_writer(struct Curl_easy *data, struct contenc_writer *writer) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ if(!writer->downstream) return CURLE_WRITE_ERROR; /* Initialize zlib */ z->zalloc = (alloc_func) zalloc_cb; z->zfree = (free_func) zfree_cb; if(strcmp(zlibVersion(), "1.2.0.4") >= 0) { /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */ if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) { return process_zlib_error(data, z); } zp->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */ } else { /* we must parse the gzip header and trailer ourselves */ if(inflateInit2(z, -MAX_WBITS) != Z_OK) { return process_zlib_error(data, z); } zp->trailerlen = 8; /* A CRC-32 and a 32-bit input size (RFC 1952, 2.2) */ zp->zlib_init = ZLIB_INIT; /* Initial call state */ } return CURLE_OK; } #ifdef OLD_ZLIB_SUPPORT /* Skip over the gzip header */ static enum { GZIP_OK, GZIP_BAD, GZIP_UNDERFLOW } check_gzip_header(unsigned char const *data, ssize_t len, ssize_t *headerlen) { int method, flags; const ssize_t totallen = len; /* The shortest header is 10 bytes */ if(len < 10) return GZIP_UNDERFLOW; if((data[0] != GZIP_MAGIC_0) || (data[1] != GZIP_MAGIC_1)) return GZIP_BAD; method = data[2]; flags = data[3]; if(method != Z_DEFLATED || (flags & RESERVED) != 0) { /* Can't handle this compression method or unknown flag */ return GZIP_BAD; } /* Skip over time, xflags, OS code and all previous bytes */ len -= 10; data += 10; if(flags & EXTRA_FIELD) { ssize_t extra_len; if(len < 2) return GZIP_UNDERFLOW; extra_len = (data[1] << 8) | data[0]; if(len < (extra_len + 2)) return GZIP_UNDERFLOW; len -= (extra_len + 2); data += (extra_len + 2); } if(flags & ORIG_NAME) { /* Skip over NUL-terminated file name */ while(len && *data) { --len; ++data; } if(!len || *data) return GZIP_UNDERFLOW; /* Skip over the NUL */ --len; ++data; } if(flags & COMMENT) { /* Skip over NUL-terminated comment */ while(len && *data) { --len; ++data; } if(!len || *data) return GZIP_UNDERFLOW; /* Skip over the NUL */ --len; } if(flags & HEAD_CRC) { if(len < 2) return GZIP_UNDERFLOW; len -= 2; } *headerlen = totallen - len; return GZIP_OK; } #endif static CURLcode gzip_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ if(zp->zlib_init == ZLIB_INIT_GZIP) { /* Let zlib handle the gzip decompression entirely */ z->next_in = (Bytef *) buf; z->avail_in = (uInt) nbytes; /* Now uncompress the data */ return inflate_stream(data, writer, ZLIB_INIT_GZIP); } #ifndef OLD_ZLIB_SUPPORT /* Support for old zlib versions is compiled away and we are running with an old version, so return an error. */ return exit_zlib(data, z, &zp->zlib_init, CURLE_WRITE_ERROR); #else /* This next mess is to get around the potential case where there isn't * enough data passed in to skip over the gzip header. If that happens, we * malloc a block and copy what we have then wait for the next call. If * there still isn't enough (this is definitely a worst-case scenario), we * make the block bigger, copy the next part in and keep waiting. * * This is only required with zlib versions < 1.2.0.4 as newer versions * can handle the gzip header themselves. */ switch(zp->zlib_init) { /* Skip over gzip header? */ case ZLIB_INIT: { /* Initial call state */ ssize_t hlen; switch(check_gzip_header((unsigned char *) buf, nbytes, &hlen)) { case GZIP_OK: z->next_in = (Bytef *) buf + hlen; z->avail_in = (uInt) (nbytes - hlen); zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ break; case GZIP_UNDERFLOW: /* We need more data so we can find the end of the gzip header. It's * possible that the memory block we malloc here will never be freed if * the transfer abruptly aborts after this point. Since it's unlikely * that circumstances will be right for this code path to be followed in * the first place, and it's even more unlikely for a transfer to fail * immediately afterwards, it should seldom be a problem. */ z->avail_in = (uInt) nbytes; z->next_in = malloc(z->avail_in); if(!z->next_in) { return exit_zlib(data, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); } memcpy(z->next_in, buf, z->avail_in); zp->zlib_init = ZLIB_GZIP_HEADER; /* Need more gzip header data state */ /* We don't have any data to inflate yet */ return CURLE_OK; case GZIP_BAD: default: return exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z)); } } break; case ZLIB_GZIP_HEADER: { /* Need more gzip header data state */ ssize_t hlen; z->avail_in += (uInt) nbytes; z->next_in = Curl_saferealloc(z->next_in, z->avail_in); if(!z->next_in) { return exit_zlib(data, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); } /* Append the new block of data to the previous one */ memcpy(z->next_in + z->avail_in - nbytes, buf, nbytes); switch(check_gzip_header(z->next_in, z->avail_in, &hlen)) { case GZIP_OK: /* This is the zlib stream data */ free(z->next_in); /* Don't point into the malloced block since we just freed it */ z->next_in = (Bytef *) buf + hlen + nbytes - z->avail_in; z->avail_in = (uInt) (z->avail_in - hlen); zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ break; case GZIP_UNDERFLOW: /* We still don't have any data to inflate! */ return CURLE_OK; case GZIP_BAD: default: return exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z)); } } break; case ZLIB_EXTERNAL_TRAILER: z->next_in = (Bytef *) buf; z->avail_in = (uInt) nbytes; return process_trailer(data, zp); case ZLIB_GZIP_INFLATING: default: /* Inflating stream state */ z->next_in = (Bytef *) buf; z->avail_in = (uInt) nbytes; break; } if(z->avail_in == 0) { /* We don't have any data to inflate; wait until next time */ return CURLE_OK; } /* We've parsed the header, now uncompress the data */ return inflate_stream(data, writer, ZLIB_GZIP_INFLATING); #endif } static void gzip_close_writer(struct Curl_easy *data, struct contenc_writer *writer) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ exit_zlib(data, z, &zp->zlib_init, CURLE_OK); } static const struct content_encoding gzip_encoding = { "gzip", "x-gzip", gzip_init_writer, gzip_unencode_write, gzip_close_writer, sizeof(struct zlib_writer) }; #endif /* HAVE_LIBZ */ #ifdef HAVE_BROTLI /* Brotli writer. */ struct brotli_writer { struct contenc_writer super; BrotliDecoderState *br; /* State structure for brotli. */ }; static CURLcode brotli_map_error(BrotliDecoderErrorCode be) { switch(be) { case BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_NIBBLE: case BROTLI_DECODER_ERROR_FORMAT_EXUBERANT_META_NIBBLE: case BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_ALPHABET: case BROTLI_DECODER_ERROR_FORMAT_SIMPLE_HUFFMAN_SAME: case BROTLI_DECODER_ERROR_FORMAT_CL_SPACE: case BROTLI_DECODER_ERROR_FORMAT_HUFFMAN_SPACE: case BROTLI_DECODER_ERROR_FORMAT_CONTEXT_MAP_REPEAT: case BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_1: case BROTLI_DECODER_ERROR_FORMAT_BLOCK_LENGTH_2: case BROTLI_DECODER_ERROR_FORMAT_TRANSFORM: case BROTLI_DECODER_ERROR_FORMAT_DICTIONARY: case BROTLI_DECODER_ERROR_FORMAT_WINDOW_BITS: case BROTLI_DECODER_ERROR_FORMAT_PADDING_1: case BROTLI_DECODER_ERROR_FORMAT_PADDING_2: #ifdef BROTLI_DECODER_ERROR_COMPOUND_DICTIONARY case BROTLI_DECODER_ERROR_COMPOUND_DICTIONARY: #endif #ifdef BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET case BROTLI_DECODER_ERROR_DICTIONARY_NOT_SET: #endif case BROTLI_DECODER_ERROR_INVALID_ARGUMENTS: return CURLE_BAD_CONTENT_ENCODING; case BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MODES: case BROTLI_DECODER_ERROR_ALLOC_TREE_GROUPS: case BROTLI_DECODER_ERROR_ALLOC_CONTEXT_MAP: case BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_1: case BROTLI_DECODER_ERROR_ALLOC_RING_BUFFER_2: case BROTLI_DECODER_ERROR_ALLOC_BLOCK_TYPE_TREES: return CURLE_OUT_OF_MEMORY; default: break; } return CURLE_WRITE_ERROR; } static CURLcode brotli_init_writer(struct Curl_easy *data, struct contenc_writer *writer) { struct brotli_writer *bp = (struct brotli_writer *) writer; (void) data; if(!writer->downstream) return CURLE_WRITE_ERROR; bp->br = BrotliDecoderCreateInstance(NULL, NULL, NULL); return bp->br? CURLE_OK: CURLE_OUT_OF_MEMORY; } static CURLcode brotli_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { struct brotli_writer *bp = (struct brotli_writer *) writer; const uint8_t *src = (const uint8_t *) buf; char *decomp; uint8_t *dst; size_t dstleft; CURLcode result = CURLE_OK; BrotliDecoderResult r = BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT; if(!bp->br) return CURLE_WRITE_ERROR; /* Stream already ended. */ decomp = malloc(DSIZ); if(!decomp) return CURLE_OUT_OF_MEMORY; while((nbytes || r == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) && result == CURLE_OK) { dst = (uint8_t *) decomp; dstleft = DSIZ; r = BrotliDecoderDecompressStream(bp->br, &nbytes, &src, &dstleft, &dst, NULL); result = Curl_unencode_write(data, writer->downstream, decomp, DSIZ - dstleft); if(result) break; switch(r) { case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: break; case BROTLI_DECODER_RESULT_SUCCESS: BrotliDecoderDestroyInstance(bp->br); bp->br = NULL; if(nbytes) result = CURLE_WRITE_ERROR; break; default: result = brotli_map_error(BrotliDecoderGetErrorCode(bp->br)); break; } } free(decomp); return result; } static void brotli_close_writer(struct Curl_easy *data, struct contenc_writer *writer) { struct brotli_writer *bp = (struct brotli_writer *) writer; (void) data; if(bp->br) { BrotliDecoderDestroyInstance(bp->br); bp->br = NULL; } } static const struct content_encoding brotli_encoding = { "br", NULL, brotli_init_writer, brotli_unencode_write, brotli_close_writer, sizeof(struct brotli_writer) }; #endif #ifdef HAVE_ZSTD /* Zstd writer. */ struct zstd_writer { struct contenc_writer super; ZSTD_DStream *zds; /* State structure for zstd. */ void *decomp; }; static CURLcode zstd_init_writer(struct Curl_easy *data, struct contenc_writer *writer) { struct zstd_writer *zp = (struct zstd_writer *) writer; (void)data; if(!writer->downstream) return CURLE_WRITE_ERROR; zp->zds = ZSTD_createDStream(); zp->decomp = NULL; return zp->zds ? CURLE_OK : CURLE_OUT_OF_MEMORY; } static CURLcode zstd_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { CURLcode result = CURLE_OK; struct zstd_writer *zp = (struct zstd_writer *) writer; ZSTD_inBuffer in; ZSTD_outBuffer out; size_t errorCode; if(!zp->decomp) { zp->decomp = malloc(DSIZ); if(!zp->decomp) return CURLE_OUT_OF_MEMORY; } in.pos = 0; in.src = buf; in.size = nbytes; for(;;) { out.pos = 0; out.dst = zp->decomp; out.size = DSIZ; errorCode = ZSTD_decompressStream(zp->zds, &out, &in); if(ZSTD_isError(errorCode)) { return CURLE_BAD_CONTENT_ENCODING; } if(out.pos > 0) { result = Curl_unencode_write(data, writer->downstream, zp->decomp, out.pos); if(result) break; } if((in.pos == nbytes) && (out.pos < out.size)) break; } return result; } static void zstd_close_writer(struct Curl_easy *data, struct contenc_writer *writer) { struct zstd_writer *zp = (struct zstd_writer *) writer; (void)data; if(zp->decomp) { free(zp->decomp); zp->decomp = NULL; } if(zp->zds) { ZSTD_freeDStream(zp->zds); zp->zds = NULL; } } static const struct content_encoding zstd_encoding = { "zstd", NULL, zstd_init_writer, zstd_unencode_write, zstd_close_writer, sizeof(struct zstd_writer) }; #endif /* Identity handler. */ static CURLcode identity_init_writer(struct Curl_easy *data, struct contenc_writer *writer) { (void) data; return writer->downstream? CURLE_OK: CURLE_WRITE_ERROR; } static CURLcode identity_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { return Curl_unencode_write(data, writer->downstream, buf, nbytes); } static void identity_close_writer(struct Curl_easy *data, struct contenc_writer *writer) { (void) data; (void) writer; } static const struct content_encoding identity_encoding = { "identity", "none", identity_init_writer, identity_unencode_write, identity_close_writer, sizeof(struct contenc_writer) }; /* supported content encodings table. */ static const struct content_encoding * const encodings[] = { &identity_encoding, #ifdef HAVE_LIBZ &deflate_encoding, &gzip_encoding, #endif #ifdef HAVE_BROTLI &brotli_encoding, #endif #ifdef HAVE_ZSTD &zstd_encoding, #endif NULL }; /* Return a list of comma-separated names of supported encodings. */ char *Curl_all_content_encodings(void) { size_t len = 0; const struct content_encoding * const *cep; const struct content_encoding *ce; char *ace; for(cep = encodings; *cep; cep++) { ce = *cep; if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT)) len += strlen(ce->name) + 2; } if(!len) return strdup(CONTENT_ENCODING_DEFAULT); ace = malloc(len); if(ace) { char *p = ace; for(cep = encodings; *cep; cep++) { ce = *cep; if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT)) { strcpy(p, ce->name); p += strlen(p); *p++ = ','; *p++ = ' '; } } p[-2] = '\0'; } return ace; } /* Real client writer: no downstream. */ static CURLcode client_init_writer(struct Curl_easy *data, struct contenc_writer *writer) { (void) data; return writer->downstream? CURLE_WRITE_ERROR: CURLE_OK; } static CURLcode client_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { struct SingleRequest *k = &data->req; (void) writer; if(!nbytes || k->ignorebody) return CURLE_OK; return Curl_client_write(data, CLIENTWRITE_BODY, (char *) buf, nbytes); } static void client_close_writer(struct Curl_easy *data, struct contenc_writer *writer) { (void) data; (void) writer; } static const struct content_encoding client_encoding = { NULL, NULL, client_init_writer, client_unencode_write, client_close_writer, sizeof(struct contenc_writer) }; /* Deferred error dummy writer. */ static CURLcode error_init_writer(struct Curl_easy *data, struct contenc_writer *writer) { (void) data; return writer->downstream? CURLE_OK: CURLE_WRITE_ERROR; } static CURLcode error_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { char *all = Curl_all_content_encodings(); (void) writer; (void) buf; (void) nbytes; if(!all) return CURLE_OUT_OF_MEMORY; failf(data, "Unrecognized content encoding type. " "libcurl understands %s content encodings.", all); free(all); return CURLE_BAD_CONTENT_ENCODING; } static void error_close_writer(struct Curl_easy *data, struct contenc_writer *writer) { (void) data; (void) writer; } static const struct content_encoding error_encoding = { NULL, NULL, error_init_writer, error_unencode_write, error_close_writer, sizeof(struct contenc_writer) }; /* Create an unencoding writer stage using the given handler. */ static struct contenc_writer * new_unencoding_writer(struct Curl_easy *data, const struct content_encoding *handler, struct contenc_writer *downstream, int order) { struct contenc_writer *writer; DEBUGASSERT(handler->writersize >= sizeof(struct contenc_writer)); writer = (struct contenc_writer *) calloc(1, handler->writersize); if(writer) { writer->handler = handler; writer->downstream = downstream; writer->order = order; if(handler->init_writer(data, writer)) { free(writer); writer = NULL; } } return writer; } /* Write data using an unencoding writer stack. "nbytes" is not allowed to be 0. */ CURLcode Curl_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { if(!nbytes) return CURLE_OK; return writer->handler->unencode_write(data, writer, buf, nbytes); } /* Close and clean-up the connection's writer stack. */ void Curl_unencode_cleanup(struct Curl_easy *data) { struct SingleRequest *k = &data->req; struct contenc_writer *writer = k->writer_stack; while(writer) { k->writer_stack = writer->downstream; writer->handler->close_writer(data, writer); free(writer); writer = k->writer_stack; } } /* Find the content encoding by name. */ static const struct content_encoding *find_encoding(const char *name, size_t len) { const struct content_encoding * const *cep; for(cep = encodings; *cep; cep++) { const struct content_encoding *ce = *cep; if((strncasecompare(name, ce->name, len) && !ce->name[len]) || (ce->alias && strncasecompare(name, ce->alias, len) && !ce->alias[len])) return ce; } return NULL; } /* allow no more than 5 "chained" compression steps */ #define MAX_ENCODE_STACK 5 /* Set-up the unencoding stack from the Content-Encoding header value. * See RFC 7231 section 3.1.2.2. */ CURLcode Curl_build_unencoding_stack(struct Curl_easy *data, const char *enclist, int is_transfer) { struct SingleRequest *k = &data->req; unsigned int order = is_transfer? 2: 1; do { const char *name; size_t namelen; /* Parse a single encoding name. */ while(ISBLANK(*enclist) || *enclist == ',') enclist++; name = enclist; for(namelen = 0; *enclist && *enclist != ','; enclist++) if(!ISSPACE(*enclist)) namelen = enclist - name + 1; /* Special case: chunked encoding is handled at the reader level. */ if(is_transfer && namelen == 7 && strncasecompare(name, "chunked", 7)) { k->chunk = TRUE; /* chunks coming our way. */ Curl_httpchunk_init(data); /* init our chunky engine. */ } else if(namelen) { const struct content_encoding *encoding = find_encoding(name, namelen); struct contenc_writer *writer; if(!k->writer_stack) { k->writer_stack = new_unencoding_writer(data, &client_encoding, NULL, 0); if(!k->writer_stack) return CURLE_OUT_OF_MEMORY; } if(!encoding) encoding = &error_encoding; /* Defer error at stack use. */ if(k->writer_stack_depth++ >= MAX_ENCODE_STACK) { failf(data, "Reject response due to more than %u content encodings", MAX_ENCODE_STACK); return CURLE_BAD_CONTENT_ENCODING; } /* Stack the unencoding stage. */ if(order >= k->writer_stack->order) { writer = new_unencoding_writer(data, encoding, k->writer_stack, order); if(!writer) return CURLE_OUT_OF_MEMORY; k->writer_stack = writer; } else { struct contenc_writer *w = k->writer_stack; while(w->downstream && order < w->downstream->order) w = w->downstream; writer = new_unencoding_writer(data, encoding, w->downstream, order); if(!writer) return CURLE_OUT_OF_MEMORY; w->downstream = writer; } } } while(*enclist); return CURLE_OK; } #else /* Stubs for builds without HTTP. */ CURLcode Curl_build_unencoding_stack(struct Curl_easy *data, const char *enclist, int is_transfer) { (void) data; (void) enclist; (void) is_transfer; return CURLE_NOT_BUILT_IN; } CURLcode Curl_unencode_write(struct Curl_easy *data, struct contenc_writer *writer, const char *buf, size_t nbytes) { (void) data; (void) writer; (void) buf; (void) nbytes; return CURLE_NOT_BUILT_IN; } void Curl_unencode_cleanup(struct Curl_easy *data) { (void) data; } char *Curl_all_content_encodings(void) { return strdup(CONTENT_ENCODING_DEFAULT); /* Satisfy caller. */ } #endif /* CURL_DISABLE_HTTP */ 上边的是patch 下边的是源码: From 76f83f0db23846e254d940ec7fe141010077eb88 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg <daniel@haxx.se> Date: Fri, 24 Jan 2025 11:13:24 +0100 Subject: [PATCH] content_encoding: drop support for zlib before 1.2.0.4 zlib 1.2.0.4 was released on 10 August 2003 Closes #16079 --- docs/INTERNALS.md | 2 +- lib/content_encoding.c | 276 ++++------------------------------------- 2 files changed, 25 insertions(+), 253 deletions(-) diff --git a/docs/INTERNALS.md b/docs/INTERNALS.md index ae77f0e54b05..4e42f4fd1015 100644 --- a/docs/INTERNALS.md +++ b/docs/INTERNALS.md @@ -23,7 +23,7 @@ versions of libs and build tools. - OpenSSL 0.9.7 - GnuTLS 3.1.10 - - zlib 1.1.4 + - zlib 1.2.0.4 - libssh2 1.0 - c-ares 1.16.0 - libidn2 2.0.0 diff --git a/lib/content_encoding.c b/lib/content_encoding.c index e19595d5ec42..d2b17297890d 100644 --- a/lib/content_encoding.c +++ b/lib/content_encoding.c @@ -55,33 +55,13 @@ #define DSIZ CURL_MAX_WRITE_SIZE /* buffer size for decompressed data */ - #ifdef HAVE_LIBZ -/* Comment this out if zlib is always going to be at least ver. 1.2.0.4 - (doing so will reduce code size slightly). */ -#define OLD_ZLIB_SUPPORT 1 - -#define GZIP_MAGIC_0 0x1f -#define GZIP_MAGIC_1 0x8b - -/* gzip flag byte */ -#define CURL_GZIPFLAG_ASCII 0x01 /* bit 0 set: file probably ASCII - text */ -#define CURL_GZIPFLAG_HEAD_CRC 0x02 /* bit 1 set: header CRC present */ -#define CURL_GZIPFLAG_EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ -#define CURL_GZIPFLAG_ORIG_NAME 0x08 /* bit 3 set: original filename - present */ -#define CURL_GZIPFLAG_COMMENT 0x10 /* bit 4 set: file comment present */ -#define CURL_GZIPFLAG_RESERVED 0xE0 /* bits 5..7: reserved */ - typedef enum { ZLIB_UNINIT, /* uninitialized */ ZLIB_INIT, /* initialized */ ZLIB_INFLATING, /* inflating started. */ ZLIB_EXTERNAL_TRAILER, /* reading external trailer */ - ZLIB_GZIP_HEADER, /* reading gzip header */ - ZLIB_GZIP_INFLATING, /* inflating gzip stream */ ZLIB_INIT_GZIP /* initialized in transparent gzip mode */ } zlibInitState; @@ -139,9 +119,6 @@ static CURLcode exit_zlib(struct Curl_easy *data, z_stream *z, zlibInitState *zlib_init, CURLcode result) { - if(*zlib_init == ZLIB_GZIP_HEADER) - Curl_safefree(z->next_in); - if(*zlib_init != ZLIB_UNINIT) { if(inflateEnd(z) != Z_OK && result == CURLE_OK) result = process_zlib_error(data, z); @@ -190,8 +167,7 @@ static CURLcode inflate_stream(struct Curl_easy *data, /* Check state. */ if(zp->zlib_init != ZLIB_INIT && zp->zlib_init != ZLIB_INFLATING && - zp->zlib_init != ZLIB_INIT_GZIP && - zp->zlib_init != ZLIB_GZIP_INFLATING) + zp->zlib_init != ZLIB_INIT_GZIP) return exit_zlib(data, z, &zp->zlib_init, CURLE_WRITE_ERROR); /* Dynamically allocate a buffer for decompression because it is uncommonly @@ -280,7 +256,7 @@ static CURLcode inflate_stream(struct Curl_easy *data, /* Deflate handler. */ static CURLcode deflate_do_init(struct Curl_easy *data, - struct Curl_cwriter *writer) + struct Curl_cwriter *writer) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ @@ -296,8 +272,8 @@ static CURLcode deflate_do_init(struct Curl_easy *data, } static CURLcode deflate_do_write(struct Curl_easy *data, - struct Curl_cwriter *writer, int type, - const char *buf, size_t nbytes) + struct Curl_cwriter *writer, int type, + const char *buf, size_t nbytes) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ @@ -317,7 +293,7 @@ static CURLcode deflate_do_write(struct Curl_easy *data, } static void deflate_do_close(struct Curl_easy *data, - struct Curl_cwriter *writer) + struct Curl_cwriter *writer) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ @@ -337,124 +313,34 @@ static const struct Curl_cwtype deflate_encoding = { /* Gzip handler. */ static CURLcode gzip_do_init(struct Curl_easy *data, - struct Curl_cwriter *writer) + struct Curl_cwriter *writer) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ + const char *v = zlibVersion(); /* Initialize zlib */ z->zalloc = (alloc_func) zalloc_cb; z->zfree = (free_func) zfree_cb; - if(strcmp(zlibVersion(), "1.2.0.4") >= 0) { - /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */ + if(strcmp(v, "1.2.0.4") >= 0) { + /* zlib version >= 1.2.0.4 supports transparent gzip decompressing */ if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) { return process_zlib_error(data, z); } zp->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */ } else { - /* we must parse the gzip header and trailer ourselves */ - if(inflateInit2(z, -MAX_WBITS) != Z_OK) { - return process_zlib_error(data, z); - } - zp->trailerlen = 8; /* A CRC-32 and a 32-bit input size (RFC 1952, 2.2) */ - zp->zlib_init = ZLIB_INIT; /* Initial call state */ + failf(data, "too old zlib version: %s", v); + return CURLE_FAILED_INIT; } return CURLE_OK; } -#ifdef OLD_ZLIB_SUPPORT -/* Skip over the gzip header */ -typedef enum { - GZIP_OK, - GZIP_BAD, - GZIP_UNDERFLOW -} gzip_status; - -static gzip_status check_gzip_header(unsigned char const *data, ssize_t len, - ssize_t *headerlen) -{ - int method, flags; - const ssize_t totallen = len; - - /* The shortest header is 10 bytes */ - if(len < 10) - return GZIP_UNDERFLOW; - - if((data[0] != GZIP_MAGIC_0) || (data[1] != GZIP_MAGIC_1)) - return GZIP_BAD; - - method = data[2]; - flags = data[3]; - - if(method != Z_DEFLATED || (flags & CURL_GZIPFLAG_RESERVED) != 0) { - /* cannot handle this compression method or unknown flag */ - return GZIP_BAD; - } - - /* Skip over time, xflags, OS code and all previous bytes */ - len -= 10; - data += 10; - - if(flags & CURL_GZIPFLAG_EXTRA_FIELD) { - ssize_t extra_len; - - if(len < 2) - return GZIP_UNDERFLOW; - - extra_len = (data[1] << 8) | data[0]; - - if(len < (extra_len + 2)) - return GZIP_UNDERFLOW; - - len -= (extra_len + 2); - data += (extra_len + 2); - } - - if(flags & CURL_GZIPFLAG_ORIG_NAME) { - /* Skip over NUL-terminated filename */ - while(len && *data) { - --len; - ++data; - } - if(!len || *data) - return GZIP_UNDERFLOW; - - /* Skip over the NUL */ - --len; - ++data; - } - - if(flags & CURL_GZIPFLAG_COMMENT) { - /* Skip over NUL-terminated comment */ - while(len && *data) { - --len; - ++data; - } - if(!len || *data) - return GZIP_UNDERFLOW; - - /* Skip over the NUL */ - --len; - } - - if(flags & CURL_GZIPFLAG_HEAD_CRC) { - if(len < 2) - return GZIP_UNDERFLOW; - - len -= 2; - } - - *headerlen = totallen - len; - return GZIP_OK; -} -#endif - static CURLcode gzip_do_write(struct Curl_easy *data, - struct Curl_cwriter *writer, int type, - const char *buf, size_t nbytes) + struct Curl_cwriter *writer, int type, + const char *buf, size_t nbytes) { struct zlib_writer *zp = (struct zlib_writer *) writer; z_stream *z = &zp->z; /* zlib state structure */ @@ -470,117 +356,8 @@ static CURLcode gzip_do_write(struct Curl_easy *data, return inflate_stream(data, writer, type, ZLIB_INIT_GZIP); } -#ifndef OLD_ZLIB_SUPPORT - /* Support for old zlib versions is compiled away and we are running with - an old version, so return an error. */ + /* We are running with an old version: return error. */ return exit_zlib(data, z, &zp->zlib_init, CURLE_WRITE_ERROR); - -#else - /* This next mess is to get around the potential case where there is not - * enough data passed in to skip over the gzip header. If that happens, we - * malloc a block and copy what we have then wait for the next call. If - * there still is not enough (this is definitely a worst-case scenario), we - * make the block bigger, copy the next part in and keep waiting. - * - * This is only required with zlib versions < 1.2.0.4 as newer versions - * can handle the gzip header themselves. - */ - - switch(zp->zlib_init) { - /* Skip over gzip header? */ - case ZLIB_INIT: - { - /* Initial call state */ - ssize_t hlen; - - switch(check_gzip_header((unsigned char *) buf, nbytes, &hlen)) { - case GZIP_OK: - z->next_in = (Bytef *) buf + hlen; - z->avail_in = (uInt) (nbytes - hlen); - zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ - break; - - case GZIP_UNDERFLOW: - /* We need more data so we can find the end of the gzip header. it is - * possible that the memory block we malloc here will never be freed if - * the transfer abruptly aborts after this point. Since it is unlikely - * that circumstances will be right for this code path to be followed in - * the first place, and it is even more unlikely for a transfer to fail - * immediately afterwards, it should seldom be a problem. - */ - z->avail_in = (uInt) nbytes; - z->next_in = malloc(z->avail_in); - if(!z->next_in) { - return exit_zlib(data, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); - } - memcpy(z->next_in, buf, z->avail_in); - zp->zlib_init = ZLIB_GZIP_HEADER; /* Need more gzip header data state */ - /* We do not have any data to inflate yet */ - return CURLE_OK; - - case GZIP_BAD: - default: - return exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z)); - } - - } - break; - - case ZLIB_GZIP_HEADER: - { - /* Need more gzip header data state */ - ssize_t hlen; - z->avail_in += (uInt) nbytes; - z->next_in = Curl_saferealloc(z->next_in, z->avail_in); - if(!z->next_in) { - return exit_zlib(data, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); - } - /* Append the new block of data to the previous one */ - memcpy(z->next_in + z->avail_in - nbytes, buf, nbytes); - - switch(check_gzip_header(z->next_in, (ssize_t)z->avail_in, &hlen)) { - case GZIP_OK: - /* This is the zlib stream data */ - free(z->next_in); - /* Do not point into the malloced block since we just freed it */ - z->next_in = (Bytef *) buf + hlen + nbytes - z->avail_in; - z->avail_in = z->avail_in - (uInt)hlen; - zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ - break; - - case GZIP_UNDERFLOW: - /* We still do not have any data to inflate! */ - return CURLE_OK; - - case GZIP_BAD: - default: - return exit_zlib(data, z, &zp->zlib_init, process_zlib_error(data, z)); - } - - } - break; - - case ZLIB_EXTERNAL_TRAILER: - z->next_in = (Bytef *) buf; - z->avail_in = (uInt) nbytes; - return process_trailer(data, zp); - - case ZLIB_GZIP_INFLATING: - default: - /* Inflating stream state */ - z->next_in = (Bytef *) buf; - z->avail_in = (uInt) nbytes; - break; - } - - if(z->avail_in == 0) { - /* We do not have any data to inflate; wait until next time */ - return CURLE_OK; - } - - /* We have parsed the header, now uncompress the data */ - return inflate_stream(data, writer, type, ZLIB_GZIP_INFLATING); -#endif } static void gzip_do_close(struct Curl_easy *data, @@ -603,7 +380,6 @@ static const struct Curl_cwtype gzip_encoding = { #endif /* HAVE_LIBZ */ - #ifdef HAVE_BROTLI /* Brotli writer. */ struct brotli_writer { @@ -650,7 +426,7 @@ static CURLcode brotli_map_error(BrotliDecoderErrorCode be) } static CURLcode brotli_do_init(struct Curl_easy *data, - struct Curl_cwriter *writer) + struct Curl_cwriter *writer) { struct brotli_writer *bp = (struct brotli_writer *) writer; (void) data; @@ -660,8 +436,8 @@ static CURLcode brotli_do_init(struct Curl_easy *data, } static CURLcode brotli_do_write(struct Curl_easy *data, - struct Curl_cwriter *writer, int type, - const char *buf, size_t nbytes) + struct Curl_cwriter *writer, int type, + const char *buf, size_t nbytes) { struct brotli_writer *bp = (struct brotli_writer *) writer; const uint8_t *src = (const uint8_t *) buf; @@ -733,7 +509,6 @@ static const struct Curl_cwtype brotli_encoding = { }; #endif - #ifdef HAVE_ZSTD /* Zstd writer. */ struct zstd_writer { @@ -757,7 +532,7 @@ static void Curl_zstd_free(void *opaque, void *address) #endif static CURLcode zstd_do_init(struct Curl_easy *data, - struct Curl_cwriter *writer) + struct Curl_cwriter *writer) { struct zstd_writer *zp = (struct zstd_writer *) writer; @@ -778,8 +553,8 @@ static CURLcode zstd_do_init(struct Curl_easy *data, } static CURLcode zstd_do_write(struct Curl_easy *data, - struct Curl_cwriter *writer, int type, - const char *buf, size_t nbytes) + struct Curl_cwriter *writer, int type, + const char *buf, size_t nbytes) { CURLcode result = CURLE_OK; struct zstd_writer *zp = (struct zstd_writer *) writer; @@ -810,7 +585,7 @@ static CURLcode zstd_do_write(struct Curl_easy *data, } if(out.pos > 0) { result = Curl_cwriter_write(data, writer->next, type, - zp->decomp, out.pos); + zp->decomp, out.pos); if(result) break; } @@ -848,7 +623,6 @@ static const struct Curl_cwtype zstd_encoding = { }; #endif - /* Identity handler. */ static const struct Curl_cwtype identity_encoding = { "identity", @@ -859,7 +633,6 @@ static const struct Curl_cwtype identity_encoding = { sizeof(struct Curl_cwriter) }; - /* supported general content decoders. */ static const struct Curl_cwtype * const general_unencoders[] = { &identity_encoding, @@ -923,7 +696,7 @@ void Curl_all_content_encodings(char *buf, size_t blen) /* Deferred error dummy writer. */ static CURLcode error_do_init(struct Curl_easy *data, - struct Curl_cwriter *writer) + struct Curl_cwriter *writer) { (void)data; (void)writer; @@ -931,8 +704,8 @@ static CURLcode error_do_init(struct Curl_easy *data, } static CURLcode error_do_write(struct Curl_easy *data, - struct Curl_cwriter *writer, int type, - const char *buf, size_t nbytes) + struct Curl_cwriter *writer, int type, + const char *buf, size_t nbytes) { (void) writer; (void) buf; @@ -1107,5 +880,4 @@ void Curl_all_content_encodings(char *buf, size_t blen) strcpy(buf, CONTENT_ENCODING_DEFAULT); } - #endif /* CURL_DISABLE_HTTP */ 现在就是想做的就是根据源码相应的函数的行数来修改patch中要修复的行数,不修改源码
09-12
# rate_set/rate_sync.py import json import os import re import logging import sys from pathlib import Path from utils import resource_path from datetime import datetime from typing import Dict, List, Tuple, Any # ------------------------------- # 日志配置 # ------------------------------- PROJECT_ROOT = Path(__file__).parent.parent.resolve() LOG_DIR = PROJECT_ROOT / "output" / "log" LOG_DIR.mkdir(parents=True, exist_ok=True) LOG_FILE = LOG_DIR / f"rate_sync_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" class RateSetSynchronizer: MAX_ENUM_PER_LINE = 4 # enum 每行最多几个宏 MAX_DATA_ITEMS_PER_LINE = 4 # data 数组每行最多几个值 MAX_INDEX_ITEMS_PER_LINE = 15 # index 数组每行最多几个值 def __init__(self, c_file_path=None, dry_run=False, config_path="config/config.json"): self.logger = logging.getLogger(f"{__name__}.RateSetSynchronizer") # 加载配置 self.config_file_path = resource_path(config_path) if not os.path.exists(self.config_file_path): raise FileNotFoundError(f"配置文件不存在: {self.config_file_path}") with open(self.config_file_path, 'r', encoding='utf-8') as f: self.config = json.load(f) self.dry_run = dry_run # C 文件路径 if c_file_path is None: internal_c_path = self.config["target_c_file"] self.c_file_path = resource_path(internal_c_path) else: self.c_file_path = Path(c_file_path) if not self.c_file_path.exists(): raise FileNotFoundError(f"找不到 C 源文件: {self.c_file_path}") # === 单一锚点标记 === self.block_start = self.config["STR_RATE_SET_DATA"] self.block_end = self.config["END_RATE_SET_DATA"] # 数组与枚举名 self.data_array_name = "rate_sets_2g_20m" self.index_array_name = "rate_sets_index_2g_20m" self.enum_name = "rate_set_2g_20m" # 扫描所有子目录中的 .c 文件(排除自身) self.rate_set_dir = Path(__file__).parent self.rate_files = [ f for f in self.rate_set_dir.rglob("*.c") # 递归匹配所有 .c 文件 if f.is_file() and f.name != "rate_sync.py" ] # 加载文件名和结构映射 self.target_map = self.config.get("rate_set_map") if not isinstance(self.target_map, dict): raise ValueError("config.json 中缺少 'rate_set_map' 字段或格式错误") self._validate_target_map() # ← 添加一致性校验 def _validate_target_map(self): """验证 rate_set_map 是否一致,防止多个 full_key 映射到同一数组""" seen_data = {} seen_index = {} seen_enum = {} for key, cfg in self.target_map.items(): d = cfg["data"] i = cfg["index"] e = cfg["enum"] if d in seen_data: raise ValueError(f"data 数组冲突: '{d}' 被 '{seen_data[d]}' 和 '{key}' 同时使用") if i in seen_index: raise ValueError(f"index 数组冲突: '{i}' 被 '{seen_index[i]}' 和 '{key}' 同时使用") if e in seen_enum: raise ValueError(f"enum 名称冲突: '{e}' 被 '{seen_enum[e]}' 和 '{key}' 同时使用") seen_data[d] = key seen_index[i] = key seen_enum[e] = key def parse_filename(self, filename: str) -> str: """ 从文件名提取 band_bw_ext 类型键,用于查找 rate_set_map 示例: 2G_20M_rate_set.c → 2G_20M_BASE 2G_20M_EXT_rate_set.c → 2G_20M_EXT 5G_80M_EXT4_rate_set.c → 5G_80M_EXT4 """ match = re.match(r'^([A-Z0-9]+)_([0-9]+M)(?:_(EXT\d*))?_rate_set\.c$', filename, re.I) if not match: raise ValueError(f"无法识别的文件名格式: {filename}") band, bw, ext = match.groups() ext_type = ext.upper() if ext else "BASE" return f"{band.upper()}_{bw.upper()}_{ext_type}" def extract_sub_rate_sets(self, content: str) -> List[Dict[str, Any]]: """ 提取 /*NAME*/ N, 后续多行 WL_RATE_xxx 列表 支持跨行、缩进、逗号、空行、注释干扰等 使用“按行扫描 + 状态机”方式,避免正则越界 """ self.logger.info("开始提取速率集...") self.logger.info("...") sub_sets = [] lines = [line.rstrip('\r\n') for line in content.splitlines()] # 保留原始行尾 i = 0 # 匹配 /*NAME*/ N, 的开头 header_pattern = re.compile(r'/\*\s*([A-Za-z0-9_]+)\s*\*/\s*(\d+)\s*,?') while i < len(lines): line = lines[i] stripped = line.strip() # 跳过空行和纯注释 if not stripped or stripped.startswith("//"): i += 1 continue # 查找头: /*NAME*/ N, match = header_pattern.search(stripped) if not match: i += 1 continue name = match.group(1) try: count = int(match.group(2)) except ValueError: self.logger.warning(f"⚠️ 计数无效,跳过: {name} = '{match.group(2)}'") i += 1 continue self.logger.info(f"🔍 发现块: {name}, 预期数量={count}") # 开始收集 body 内容(保留原始带缩进的行) body_lines = [] j = i + 1 max_lines_to_read = 200 while j < len(lines) and len(body_lines) < max_lines_to_read: ln = lines[j].strip() # 终止条件:遇到新 block / 结构结束 if ln.startswith("/*") or ln.startswith("}") or ln.startswith("enum"): break if ln and not ln.startswith("//"): body_lines.append(lines[j]) # ← 原样保存(含缩进) else: body_lines.append(lines[j]) # 也保留注释或空行(保持格式一致) j += 1 # 提取宏名用于校验(但不再用于生成数据) body_text = "\n".join(body_lines) all_macros = re.findall(r'WL_RATE_[A-Za-z0-9_]+', body_text) rate_items = all_macros[:count] if len(rate_items) < count: self.logger.warning(f"[{name}] 条目不足: 需要 {count}, 实际 {len(rate_items)}") # 构建结果:增加 raw_header 和 raw_body(关键改动) sub_sets.append({ "name": name, "count": count, "rates": rate_items, "raw_header": line, # ← 原始头行(如 /*...*/ 4,) "raw_body": body_lines, # ← 原始 body 行列表 "start_line": i, "end_line": j - 1 }) self.logger.debug(f"✅ 提取成功: {name} → {len(rate_items)} 个速率") i = j # 跳到下一个 block self.logger.info(f" 共提取 {len(sub_sets)} 个有效子集") return sub_sets def parse_all_structures(self, full_content: str) -> Dict: """ 直接从完整 C 文件中解析 enum/data/index 结构 """ self.logger.info("开始解析所有结构...") self.logger.info("...") result = { 'existing_enum': {}, 'data_entries': [], 'index_values': [], 'data_len': 0 } # === 解析 enum === enum_pattern = rf'enum\s+{re.escape(self.enum_name)}\s*\{{([^}}]+)\}};' enum_match = re.search(enum_pattern, full_content, re.DOTALL) if enum_match: body = enum_match.group(1) entries = re.findall(r'(RATE_SET_[^=,\s]+)\s*=\s*(\d+)', body) result['existing_enum'] = {k: int(v) for k, v in entries} self.logger.info(f"解析出 {len(entries)} 个已有枚举项") else: self.logger.warning(f"未找到 enum 定义: {self.enum_name}") # === 解析 data 数组 === data_pattern = rf'static const unsigned char {re.escape(self.data_array_name)}\[\] = \{{([^}}]+)\}};' data_match = re.search(data_pattern, full_content, re.DOTALL) if not data_match: raise ValueError(f"未找到 data 数组: {self.data_array_name}") data_code = data_match.group(1) result['data_entries'] = [item.strip() for item in re.split(r'[,\n]+', data_code) if item.strip()] result['data_len'] = len(result['data_entries']) # === 解析 index 数组 === index_pattern = rf'static const unsigned short {re.escape(self.index_array_name)}\[\] = \{{([^}}]+)\}};' index_match = re.search(index_pattern, full_content, re.DOTALL) if not index_match: raise ValueError(f"未找到 index 数组: {self.index_array_name}") index_code = index_match.group(1) result['index_values'] = [int(x.strip()) for x in re.split(r'[,\n]+', index_code) if x.strip()] return result def build_injection_with_format(self, new_subsets: List[Dict], existing_enum: Dict[str, int]) -> List[Dict]: """ 返回需要注入的原始块列表(包含 raw_header + raw_body) 不再返回 new_data/new_indices/new_enums """ valid_blocks = [] next_enum_value = max(existing_enum.values(), default=-1) + 1 self.logger.info(f"开始构建注入内容,当前最大枚举值 = {next_enum_value}") for subset in new_subsets: enum_name = subset["name"] if enum_name in existing_enum: self.logger.info(f"跳过已存在的枚举项: {enum_name} = {existing_enum[enum_name]}") continue # 只保存必要信息,不计算偏移 valid_blocks.append({ "enum_name": enum_name, "raw_header": subset["raw_header"], "raw_body": subset["raw_body"], "count": subset["count"], # 用于计算 data 占用空间 "enum_value": next_enum_value, # ✅ 必须存在! }) self.logger.debug(f" 准备注入: {enum_name}") self.logger.info(f"新增条目: {enum_name} enum={next_enum_value}") next_enum_value += 1 self.logger.info(f"构建完成:共 {len(valid_blocks)} 个新条目(保留原始格式)") return valid_blocks def format_list(self, items: List[str], indent: str = " ", width: int = 8) -> str: """格式化数组为多行字符串""" lines = [] for i in range(0, len(items), width): chunk = items[i:i + width] lines.append(indent + ", ".join(chunk) + ",") return "\n".join(lines).rstrip(",") def _safe_write_back(self, old_content: str, new_content: str) -> bool: """安全写回文件,带备份""" if old_content == new_content: self.logger.info("主文件内容无变化,无需写入") return False if self.dry_run: self.logger.info("DRY-RUN 模式启用,跳过实际写入") print("[DRY RUN] 差异预览(前 20 行):") diff = new_content.splitlines()[:20] for line in diff: print(f" {line}") return True # 创建备份 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") backup = self.c_file_path.with_name(f"{self.c_file_path.stem}_{timestamp}.c.bak") try: self.c_file_path.rename(backup) self.logger.info(f"原文件已备份为: {backup.name}") except Exception as e: self.logger.error(f"备份失败: {e}") raise # 写入新内容 try: self.c_file_path.write_text(new_content, encoding='utf-8') self.logger.info(f"✅ 成功写入更新后的文件: {self.c_file_path.name}") return True except Exception as e: self.logger.error(f"写入失败: {e}", exc_info=True) raise def inject_new_data(self) -> bool: try: full_content = self.c_file_path.read_text(encoding='utf-8') except Exception as e: self.logger.error(f"读取主 C 文件失败: {e}") raise self.logger.info(f"正在处理 C 文件: {self.c_file_path.name}") start_pos = full_content.find(self.block_start) end_pos = full_content.find(self.block_end) if start_pos == -1: raise ValueError(f"未找到起始锚点: {self.block_start}") if end_pos == -1: raise ValueError(f"未找到结束锚点: {self.block_end}") if end_pos <= start_pos: raise ValueError("结束锚点位于起始锚点之前") inner_start = start_pos + len(self.block_start) block_content = full_content[inner_start:end_pos].strip() all_changes_made = False # === 遍历每一个 rate set 子文件 === for file_path in self.rate_files: try: self.logger.info(f"→ 处理子文件: {file_path.name}") # --- 1. 解析文件名得到 full_key --- try: full_key = self.parse_filename(file_path.name) self.logger.debug(f" ├─ 解析出 key: {full_key}") except ValueError as ve: self.logger.warning(f" └─ 跳过无效文件名: {ve}") continue # --- 2. 查找 rate_set_map 映射 --- target = self.target_map.get(full_key) if not target: self.logger.warning(f" └─ 未在 config.json 中定义映射关系: {full_key},跳过") continue # --- 3. 动态设置当前注入目标 --- self.data_array_name = target["data"] self.index_array_name = target["index"] self.enum_name = target["enum"] self.logger.debug(f" ├─ 绑定目标:") self.logger.debug(f" data: {self.data_array_name}") self.logger.debug(f" index: {self.index_array_name}") self.logger.debug(f" enum: {self.enum_name}") # --- 4. 解析主文件中的当前结构 --- try: parsed = self.parse_all_structures(full_content) except Exception as e: self.logger.error(f" └─ 解析主文件结构失败: {e}") continue # --- 5. 提取该子文件中的 rate sets --- file_content = file_path.read_text(encoding='utf-8') subsets = self.extract_sub_rate_sets(file_content) if not subsets: self.logger.info(f" └─ 无有效子集数据") continue # --- 6. 构建要注入的内容 --- valid_blocks = self.build_injection_with_format( subsets, existing_enum=parsed['existing_enum'] ) if not valid_blocks: self.logger.info(f" └─ 无需更新") continue # --- 7. 写回新内容(精准插入)--- updated_content = self._write_back_in_blocks( full_content, parsed, valid_blocks ) if updated_content != full_content: all_changes_made = True full_content = updated_content # 更新内存内容供后续文件使用 self.logger.info(f"✅ 成功注入 {len(subsets)} 条目到 {self.enum_name}") except Exception as e: self.logger.warning(f"❌ 处理文件失败 [{file_path.name}]: {e}") if self.logger.isEnabledFor(logging.DEBUG): self.logger.debug("详细堆栈:", exc_info=True) continue # 最终写回磁盘 if all_changes_made: try: return self._safe_write_back(self.c_file_path.read_text(encoding='utf-8'), full_content) except Exception as e: self.logger.error(f"写入最终文件失败: {e}") raise else: self.logger.info("没有需要更新的内容") return False def _format_with_inline_fallback( self, lines: List[str], new_items: List[str], max_per_line: int, indent_marker: str = " ", item_separator: str = ", ", line_suffix: str = "", # 注意:现在我们不在这里加逗号! extract_func=None, align_eq_col: bool = False, detect_spacing_from_last_line: bool = True, ) -> str: if not lines: lines = [""] last_line = lines[-1].rstrip() indent_match = re.match(r'^(\s*)', last_line) line_indent = indent_match.group(1) if indent_match else indent_marker clean_last = re.sub(r'//.*|/\*.*?\*/', '', last_line).strip() existing_items = extract_func(clean_last) if extract_func else re.findall(r'\w+', clean_last) current_count = len(existing_items) space_left = max(0, max_per_line - current_count) to_append_inline = new_items[:space_left] to_append_newline = new_items[space_left:] # === 检测真实分隔符 === actual_sep = item_separator if detect_spacing_from_last_line and len(existing_items) >= 2: first = re.escape(existing_items[0]) second = re.escape(existing_items[1]) match = re.search(f"({first})(\\s+)({second})", last_line) if match: actual_sep = match.group(2) # === 对齐等号列:关键修复 → 对齐后再加逗号 === formatted_new_items = [] for item in new_items: raw_item = item.rstrip(',') # 去掉可能已有的逗号避免重复 if align_eq_col: m = re.match(r'(\w+)\s*=\s*(\d+)', raw_item) if m: name, val = m.groups() # 计算目标列位置 target_eq_col = None for i in range(len(lines) - 1, -1, -1): ln = lines[i] eq_match = re.search(r'=\s*\d+', ln) if eq_match: raw_before = ln[:eq_match.start()] expanded_before = raw_before.expandtabs(4) target_eq_col = len(expanded_before) break if target_eq_col is None: target_eq_col = 30 padding = max(1, target_eq_col - len(name.replace('\t', ' ').expandtabs(4))) spaces = ' ' * padding aligned_item = f"{name}{spaces}= {val}" formatted_new_items.append(aligned_item) else: formatted_new_items.append(raw_item) else: formatted_new_items.append(raw_item) # 现在统一加逗号:每个 item 都要加! # 注意:是否加逗号应该由调用者或此函数控制,不要混合 final_formatted_items = [f"{item}," for item in formatted_new_items] to_append_inline = final_formatted_items[:space_left] to_append_newline = final_formatted_items[space_left:] # === 构建结果 === result_lines = lines[:-1] # 保留前面所有行 final_main_line = lines[-1].rstrip() # 添加 inline 项 if to_append_inline: joined_inline = actual_sep.join(to_append_inline) if final_main_line.strip(): final_main_line += actual_sep + joined_inline else: final_main_line = joined_inline result_lines.append(final_main_line) # 添加新行(每行最多 max_per_line 个) if to_append_newline: for i in range(0, len(to_append_newline), max_per_line): chunk = to_append_newline[i:i + max_per_line] joined = actual_sep.join(chunk) result_lines.append(f"{line_indent}{joined}") return '\n'.join(result_lines) def _write_back_in_blocks(self, full_content: str, parsed: Dict, valid_blocks: List[Dict]) -> str: """ 使用局部块操作策略:只在 /* START */ ... /* END */ 范围内修改内容 关键改进:直接插入 raw_header + raw_body,保留原始格式 """ self.logger.info("开始执行局部块写入操作...") self.logger.info("...") # 在 _write_back_in_blocks 最上方添加: base_data_offset = parsed['data_len'] current_new_data_size = 0 # 动态记录已写入的新 data 大小 start_pos = full_content.find(self.block_start) end_pos = full_content.find(self.block_end) if start_pos == -1 or end_pos == -1: raise ValueError(f"未找到锚点标记: {self.block_start} 或 {self.block_end}") if end_pos <= start_pos: raise ValueError("结束锚点位于起始锚点之前") inner_start = start_pos + len(self.block_start) block_content = full_content[inner_start:end_pos] replacements = [] # (start_in_block, end_in_block, replacement) # === Step 2: 更新 ENUM === if valid_blocks: # 提取函数:从字符串中提取 RATE_SET_xxx extract_enum = lambda s: re.findall(r'RATE_SET_[A-Z0-9_]+', s) enum_pattern = rf'(enum\s+{re.escape(self.enum_name)}\s*\{{)([^}}]*)\}}\s*;' match = re.search(enum_pattern, block_content, re.DOTALL | re.IGNORECASE) if not match: raise ValueError(f"未找到枚举定义: {self.enum_name}") header = match.group(1) # "enum rate_set_2g_20m {" body_content = match.group(2) lines = [ln.rstrip() for ln in body_content.splitlines() if ln.strip()] # 计算新值 new_macros = [] for block in valid_blocks: name = block["enum_name"] value = block["enum_value"] # ✅ 来自 build_injection_with_format 的正确值 new_macros.append(f"{name} = {value}") # === 关键:获取标准缩进 === indent_match = re.match(r'^(\s*)', lines[0] if lines else "") standard_indent = indent_match.group(1) if indent_match else " " # 格式化新 body new_body = self._format_with_inline_fallback( lines=lines, new_items=new_macros, max_per_line=self.MAX_ENUM_PER_LINE, indent_marker=standard_indent, item_separator=" ", line_suffix="", extract_func=extract_enum, align_eq_col=True, detect_spacing_from_last_line=True, ) # 关键修复:确保每行都有缩进(包括第一行) formatted_lines = [] for line in new_body.splitlines(): stripped = line.strip() if stripped: formatted_lines.append(f"{standard_indent}{stripped}") else: formatted_lines.append(line) final_body = '\n'.join(formatted_lines) # 关键:header 单独占一行,新 body 换行开始 new_enum_code = f"{header}\n{final_body}\n}};" replacements.append((match.start(), match.end(), new_enum_code)) self.logger.debug(f"更新 enum: 添加 {len(valid_blocks)} 项") # === Step 3: 更新 DATA 数组 === if valid_blocks: data_pattern = rf'(static const unsigned char {re.escape(self.data_array_name)}\[\]\s*=\s*\{{)([^}}]*)(\}}\s*;)' match = re.search(data_pattern, block_content, re.DOTALL) if not match: raise ValueError(f"未找到 data 数组: {self.data_array_name}") header = match.group(1) body_content = match.group(2).strip() footer = match.group(3) lines = body_content.splitlines() last_line = lines[-1] if lines else "" indent_match = re.match(r'^(\s*)', last_line) line_indent = indent_match.group(1) if indent_match else " " new_body = body_content.rstrip() if not new_body.endswith(','): new_body += ',' for block in valid_blocks: # 插入头行(如 /*...*/ 4,) new_body += f"\n{line_indent}{block['raw_header'].strip()}" # 插入每行 body(保持原始缩进) for raw_line in block["raw_body"]: new_body += f"\n{line_indent}{raw_line}" new_data_code = f"{header}{new_body}\n{footer}" replacements.append((match.start(), match.end(), new_data_code)) self.logger.debug(f"计划更新 data 数组: 添加 {len(valid_blocks)} 个原始块") # === Step 2: 更新 INDEX 数组 === if valid_blocks: index_pattern = rf'(static const unsigned short {re.escape(self.index_array_name)}\[\]\s*=\s*\{{)([^}}]*)(\}}\s*;)' match = re.search(index_pattern, block_content, re.DOTALL) if not match: raise ValueError(f"未找到 index 数组: {self.index_array_name}") header = match.group(1) body_content = match.group(2) footer = match.group(3).strip() lines = [ln.rstrip() for ln in body_content.splitlines()] non_empty_lines = [ln for ln in lines if ln.strip()] # 获取标准缩进(与 enum 一致) if non_empty_lines: indent_match = re.match(r'^(\s*)', non_empty_lines[0]) standard_indent = indent_match.group(1) if indent_match else " " else: standard_indent = " " # 生成新索引值 # 正确计算 index values:基于 data 偏移 + 每个 block 的实际大小 current_offset = parsed['data_len'] # 初始偏移 = 原 data 长度 new_index_values = [] for block in valid_blocks: # 添加当前 block 的起始偏移 new_index_values.append(str(current_offset)) # 偏移 += 当前 block 的数据条数 current_offset += block["count"]+1 # ← 使用 block 自带的 count! self.logger.info(f"生成新的 index values: {new_index_values}") # 格式化 index body formatted_body = self._format_with_inline_fallback( lines=non_empty_lines, new_items=new_index_values, max_per_line=self.MAX_INDEX_ITEMS_PER_LINE, indent_marker=standard_indent, item_separator=" ", line_suffix="", extract_func=lambda s: re.findall(r'\d+', s), detect_spacing_from_last_line=True, align_eq_col=False, ) # 统一添加缩进 final_lines = [] for line in formatted_body.splitlines(): stripped = line.strip() if stripped: final_lines.append(f"{standard_indent}{stripped}") else: final_lines.append("") final_body = '\n'.join(final_lines) new_index_code = f"{header}\n{final_body}\n{footer}" replacements.append((match.start(), match.end(), new_index_code)) self.logger.debug(f"更新 index 数组: 添加 {len(valid_blocks)} 个索引") # === Step 5: 倒序应用所有替换 === if not replacements: self.logger.info("无任何变更需要写入") return full_content for start, end, r in sorted(replacements, key=lambda x: x[0], reverse=True): block_content = block_content[:start] + r + block_content[end:] # === Step 6: 拼接回完整文件 === final_content = ( full_content[:inner_start] + block_content + full_content[end_pos:] ) self.logger.info(f"成功构建新内容,总长度变化: {len(full_content)} → {len(final_content)}") return final_content def run(self): self.logger.info("开始同步 RATE_SET 数据...") try: changed = self.inject_new_data() if changed: print(" 同步完成") else: print(" 无新数据,无需更新") return { "success": True, "changed": changed, "file": str(self.c_file_path), "backup": f"{self.c_file_path.stem}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.c.bak" if changed and not self.dry_run else None } except Exception as e: self.logger.error(f"同步失败: {e}", exc_info=True) print("❌ 同步失败,详见日志。") return {"success": False, "error": str(e)} def main(): logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(name)s: %(message)s', handlers=[ logging.FileHandler(LOG_FILE, encoding='utf-8'), logging.StreamHandler(sys.stdout) ], force=True ) dry_run = False # 设置为 True 可进行试运行 try: sync = RateSetSynchronizer(dry_run=dry_run) sync.run() print("同步完成!") except FileNotFoundError as e: logging.error(f"文件未找到: {e}") print(" 文件错误,请检查路径。") sys.exit(1) except PermissionError as e: logging.error(f"权限错误: {e}") print(" 权限不足,请关闭编辑器或以管理员运行。") sys.exit(1) except Exception as e: logging.error(f"程序异常退出: {e}", exc_info=True) print(" 同步失败,详见日志。") sys.exit(1) if __name__ == '__main__': main() 加一个最终打印变化
10-30
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值