IOError: unknown file format 的解决办法

本文解决在Ubuntu环境下使用ImageFont.truetype()方法时因字体格式不匹配导致的IOError问题。主要原因是Linux不识别Windows字体,通过查找Linux本地可用字体并复制解决该问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

在Ubuntu环境下面使用ImageFont.truetype()方法的时候,如果字体的格式不对的话,可能会导致这个IOError

主要是因为Linux环境下不识别Windows中的字体,虽然两者的后缀都是一样的。

所以不能将Windows中的字体直接复制到Linux中,可以找Linux中的可用字体。

Linux中的字体可以在 /usr/share/fonts/ 中找到,复制一个就可以了。

 

 

 

 

 

 

Bon Appetite~

转载于:https://www.cnblogs.com/zhawj159753/p/4487843.html

#!/usr/bin/env python -- coding: utf-8 -- import os import re import sys import argparse import xlwt from collections import defaultdict 分区名称映射表(前缀 → 友好名称) PARTITION_NAME_MAP = { ‘02_’: ‘system’, ‘03_’: ‘vendor’, ‘04_’: ‘product’, ‘05_’: ‘odm’, ‘06_’: ‘my_product’, ‘07_’: ‘my_engineering’, ‘08_’: ‘my_stock’, ‘09_’: ‘my_heytap’, ‘10_’: ‘my_company’, ‘11_’: ‘my_carrier’, ‘12_’: ‘my_region’, ‘13_’: ‘my_preload’, ‘14_’: ‘data’, ‘15_’: ‘my_bigball’, ‘16_’: ‘my_manifest’, ‘17_system_dlkm’: ‘system_dlkm’, ‘17_vendor_dlkm’: ‘vendor_dlkm’, ‘17_cache’: ‘cache’ } def parse_du_file(file_path): “”“解析du命令输出文件并转换为MB”“” data = {} try: with open(file_path, ‘r’) as f: for line in f: if ‘Permission denied’ in line or ‘No such file’ in line or not line.strip(): continue match = re.match(r'(\d+\.?\d*)\s*([KMG]?)[Bb]?\s+(.*)', line.strip()) if match: size, unit, path = match.groups() size = float(size) # 单位转换到MB if unit == 'K': size = size / 1024.0 elif unit == '': size = size / (1024*1024.0) elif unit == 'M': pass elif unit == 'G': size = size * 1024.0 data[path] = round(size, 4) except IOError as e: print("警告: 无法读取文件 {}: {}".format(file_path, str(e))) return data def extract_file_prefix(filename): “”“提取文件前缀”“” if filename.startswith(‘17_’): return filename.replace(‘.txt’, ‘’) match = re.match(r’^(\d+)', filename) return match.group(1) if match else "other" def is_main_partition_file(filename, prefix): “”“检查是否为主分区文件”“” if prefix.startswith(‘17_’): return True expected_name = prefix + PARTITION_NAME_MAP[prefix] + “.txt” return filename == expected_name def generate_dual_report(folder1, folder2, output_xlsx): “”“生成双机对比报告”“” folder1_name = os.path.basename(os.path.normpath(folder1)) folder2_name = os.path.basename(os.path.normpath(folder2)) for folder in [folder1, folder2]: if not os.path.exists(folder): print("错误: 目录不存在 - {}".format(folder)) return "目录 {} 不存在,请检查路径".format(folder) if not os.path.isdir(folder): print("错误: 路径不是目录 - {}".format(folder)) return "{} 不是有效目录".format(folder) # 初始化数据结构 machine1_main_data = {} machine2_main_data = {} machine1_all_files = defaultdict(dict) machine2_all_files = defaultdict(dict) # 收集数据 for folder_path, main_dict, all_dict in [ (folder1, machine1_main_data, machine1_all_files), (folder2, machine2_main_data, machine2_all_files) ]: print("处理目录: {}".format(folder_path)) try: for filename in os.listdir(folder_path): if not filename.endswith('.txt'): continue prefix = extract_file_prefix(filename) if prefix == '01_' or prefix not in PARTITION_NAME_MAP: continue file_path = os.path.join(folder_path, filename) partition_name = PARTITION_NAME_MAP[prefix] file_data = parse_du_file(file_path) all_dict[filename] = file_data if is_main_partition_file(filename, prefix): print("解析主分区文件: {}".format(file_path)) main_dict[prefix] = file_data except OSError as e: print("目录访问错误: {}".format(str(e))) return "无法访问目录 {}: {}".format(folder_path, str(e)) # 创建Excel工作簿 try: wb = xlwt.Workbook(encoding='utf-8') # ====== 修复:正确定义所有样式变量 ====== header_style = xlwt.easyxf('font: bold on') title_style = xlwt.easyxf('font: bold on, height 280; align: wrap on, vert centre') normal_style = xlwt.easyxf() added_style = xlwt.easyxf('pattern: pattern solid, fore_colour light_green;') removed_style = xlwt.easyxf('pattern: pattern solid, fore_colour rose;') summary_style = xlwt.easyxf('font: bold on, color blue;') wrap_style = xlwt.easyxf('align: wrap on, vert centre') # 修复:正确定义wrap_style # ====== 创建总览Sheet页 ====== ws_overview = wb.add_sheet('总览') print("创建总览Sheet页(仅主文件数据)") current_row = 0 # 写入总览标题 ws_overview.write_merge( current_row, current_row, 0, 5, "存储使用总览(仅主分区文件)", title_style ) current_row += 1 # 写入文件夹名称 ws_overview.write(current_row, 1, folder1_name, header_style) ws_overview.write(current_row, 2, folder2_name, header_style) current_row += 1 # 写入表头(增加备注列) headers = ['分区', '总大小(MB)', '总大小(MB)', '差值(MB)', '标记', '备注(增大TOP3)'] for col, header in enumerate(headers): ws_overview.write(current_row, col, header, header_style) current_row += 1 # 存储各分区汇总数据 overview_data = [] total_machine1 = 0.0 total_machine2 = 0.0 # 按分区顺序处理数据 for prefix in sorted(PARTITION_NAME_MAP.keys()): partition_name = PARTITION_NAME_MAP[prefix] # 跳过data分区 if partition_name == 'data': continue # 获取主文件数据 data1 = machine1_main_data.get(prefix, {}) data2 = machine2_main_data.get(prefix, {}) # 计算主文件总大小 partition_total1 = round(sum(data1.values()), 2) partition_total2 = round(sum(data2.values()), 2) diff = partition_total1 - partition_total2 # 更新总计 total_machine1 += partition_total1 total_machine2 += partition_total2 # 确定标记样式 if diff > 0: mark = "增加" style = added_style elif diff < 0: mark = "减少" style = removed_style else: mark = "无变化" style = normal_style # 计算分区中增大的TOP3路径 top_notes = [] if diff > 0: # 只在分区增大时计算TOP路径 path_diffs = [] all_paths = set(data1.keys()) | set(data2.keys()) for path in all_paths: size1 = data1.get(path, 0.0) size2 = data2.get(path, 0.0) path_diff = size1 - size2 if path_diff > 0: # 只记录增大的路径 path_diffs.append((path, path_diff)) # 按增大值降序排序,取TOP3 path_diffs.sort(key=lambda x: x[1], reverse=True) for i, (path, diff_val) in enumerate(path_diffs[:3]): # 截断过长的路径名 if len(path) > 50: path = "..." + path[-47:] top_notes.append("{}. {}: +{:.2f}MB".format(i+1, path, diff_val)) # 保存分区数据 overview_data.append({ 'name': partition_name, 'machine1': partition_total1, 'machine2': partition_total2, 'diff': diff, 'style': style, 'mark': mark, 'notes': "\n".join(top_notes) if top_notes else "无显著增大路径" }) # 写入行数据到总览页(增加备注列) ws_overview.write(current_row, 0, partition_name, style) ws_overview.write(current_row, 1, partition_total1, style) ws_overview.write(current_row, 2, partition_total2, style) ws_overview.write(current_row, 3, diff, style) ws_overview.write(current_row, 4, mark, style) ws_overview.write(current_row, 5, overview_data[-1]['notes'], wrap_style) # 使用已定义的wrap_style current_row += 1 # 添加空行 current_row += 1 # 写入总计行 total_diff = total_machine1 - total_machine2 if total_diff > 0: total_mark = "总增加" total_style = added_style elif total_diff < 0: total_mark = "总减少" total_style = removed_style else: total_mark = "无变化" total_style = normal_style ws_overview.write(current_row, 0, "总计", header_style) ws_overview.write(current_row, 1, total_machine1, header_style) ws_overview.write(current_row, 2, total_machine2, header_style) ws_overview.write(current_row, 3, total_diff, header_style) ws_overview.write(current_row, 4, total_mark, header_style) ws_overview.write(current_row, 5, "", header_style) # 备注列留空 # 设置备注列宽度(100字符) ws_overview.col(5).width = 256 * 100 # ====== 为每个文件创建单独的Sheet页(保持不变) ====== # 获取所有唯一的文件名(两个文件夹的并集) all_filenames = sorted(set(machine1_all_files.keys()) | set(machine2_all_files.keys())) for filename in all_filenames: # 提取文件前缀 prefix = extract_file_prefix(filename) # 跳过无效前缀 if prefix not in PARTITION_NAME_MAP: continue # 获取分区名称 partition_name = PARTITION_NAME_MAP[prefix] # 创建Sheet页名称(文件名不带扩展名) sheet_name = filename.replace('.txt', '') if len(sheet_name) > 31: # Excel sheet名称长度限制 sheet_name = sheet_name[:31] # 创建Sheet页 ws = wb.add_sheet(sheet_name) print("创建文件Sheet页: {}".format(sheet_name)) # 当前行指针 current_row = 0 # 写入分区标题 title = "分区: {} - 文件: {}".format(partition_name, filename) ws.write_merge( current_row, current_row, 0, 5, title, title_style ) current_row += 1 # 写入文件夹名称 ws.write_merge(current_row, current_row, 0, 1, folder1_name, header_style) ws.write_merge(current_row, current_row, 2, 3, folder2_name, header_style) ws.write(current_row, 4, "差异(M)", header_style) ws.write(current_row, 5, "标记", header_style) current_row += 1 # 写入表头 headers = ['路径', '大小(M)', '路径', '大小(M)', '差异(M)', '标记'] for col, header in enumerate(headers): ws.write(current_row, col, header, header_style) current_row += 1 # 获取文件数据 data1 = machine1_all_files.get(filename, {}) data2 = machine2_all_files.get(filename, {}) # 获取所有路径(合并两个文件夹的路径) all_paths = sorted(set(data1.keys()) | set(data2.keys())) # 初始化变化统计数据 total_increase = 0.0 # 增大总和 total_decrease = 0.0 # 减小总和 total_added = 0.0 # 新增文件总和 total_removed = 0.0 # 去除文件总和 # 写入数据行 for path in all_paths: size1 = data1.get(path, 0.0) size2 = data2.get(path, 0.0) # 计算差值 diff = size1 - size2 # 确定标记和样式 if size1 == 0 and size2 > 0: mark = "除去" cell_style = removed_style total_removed += size2 elif size1 > 0 and size2 == 0: mark = "新增" cell_style = added_style total_added += size1 else: if diff > 0: mark = "增大" cell_style = added_style total_increase += diff elif diff < 0: mark = "减小" cell_style = removed_style total_decrease += abs(diff) else: mark = "相同" cell_style = normal_style # 写入行数据 # folder1列 if size1 > 0: ws.write(current_row, 0, path, cell_style) ws.write(current_row, 1, size1, cell_style) else: ws.write(current_row, 0, "", cell_style) ws.write(current_row, 1, "", cell_style) # folder2列 if size2 > 0: ws.write(current_row, 2, path, cell_style) ws.write(current_row, 3, size2, cell_style) else: ws.write(current_row, 2, "", cell_style) ws.write(current_row, 3, "", cell_style) # 差异和标记列 ws.write(current_row, 4, diff, cell_style) ws.write(current_row, 5, mark, cell_style) current_row += 1 # 添加文件汇总行 file_total1 = sum(data1.values()) file_total2 = sum(data2.values()) file_diff = file_total1 - file_total2 # 写入汇总行 ws.write(current_row, 0, "文件汇总", header_style) ws.write(current_row, 1, file_total1, header_style) ws.write(current_row, 2, "", header_style) ws.write(current_row, 3, file_total2, header_style) ws.write(current_row, 4, file_diff, header_style) ws.write(current_row, 5, "", header_style) current_row += 1 # 添加变化分类统计行 message = ( u"{partition_name}路径下: " u"减小{total_decrease:.2f}M " u"增大{total_increase:.2f}M " u"新增文件{total_added:.2f}M " u"减少文件{total_removed:.2f}M" ).format( partition_name=partition_name, total_decrease=total_decrease, total_increase=total_increase, total_added=total_added, total_removed=total_removed ) ws.write_merge( current_row, current_row, 0, 5, message, summary_style ) # 保存文件 wb.save(output_xlsx) return "对比报告已成功生成: {}".format(output_xlsx) except Exception as e: import traceback traceback.print_exc() return "生成Excel文件时出错: {}".format(str(e)) 单机拆解模式保持不变 def generate_single_report(folder, output_xlsx): “”“单机拆解模式(保持不变)”“” # …(原有单机拆解模式实现)… if name == “main”: # 创建参数解析器 parser = argparse.ArgumentParser(description=‘存储空间分析工具’) subparsers = parser.add_subparsers(dest=‘mode’, help=‘运行模式’) # 双机对比模式 dual_parser = subparsers.add_parser('dual', help='双机对比模式') dual_parser.add_argument('folder1', help='第一个文件夹路径') dual_parser.add_argument('folder2', help='第二个文件夹路径') dual_parser.add_argument('output', help='输出Excel文件路径') # 单机拆解模式 single_parser = subparsers.add_parser('single', help='单机拆解模式') single_parser.add_argument('folder', help='待分析文件夹路径') single_parser.add_argument('output', help='输出Excel文件路径') # 解析参数 args = parser.parse_args() if args.mode == 'dual': print("运行双机对比模式...") result = generate_dual_report(args.folder1, args.folder2, args.output) elif args.mode == 'single': print("运行单机拆解模式...") result = generate_single_report(args.folder, args.output) else: result = "错误:请选择 'dual' 或 'single' 模式" print(result)基于这个脚本修改总览页输出根据18_lpdump.txt文件生成总大小,差值和标记列 备注的to3差异大小改为大于5M的差异都列出Slot 0: Metadata version: 10.2 Metadata size: 2640 bytes Metadata max size: 65536 bytes Metadata slot count: 3 Header flags: virtual_ab_device Partition table: Name: system_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 1527575 linear super 2048 Name: system_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: system_ext_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 2482807 linear super 1529856 Name: system_ext_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: vendor_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 715543 linear super 4014080 Name: vendor_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: product_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 16159 linear super 4730880 Name: product_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_product_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 4165551 linear super 4747264 Name: my_product_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: odm_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 2075367 linear super 8912896 Name: odm_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_engineering_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 655 linear super 10989568 Name: my_engineering_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: vendor_dlkm_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 164735 linear super 10991616 Name: vendor_dlkm_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: system_dlkm_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 839 linear super 11157504 Name: system_dlkm_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_stock_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 4617279 linear super 11159552 Name: my_stock_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_heytap_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 655 linear super 15777792 Name: my_heytap_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_carrier_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 655 linear super 15779840 Name: my_carrier_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_region_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 168575 linear super 15781888 Name: my_region_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_company_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 655 linear super 15951872 Name: my_company_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_preload_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 3191663 linear super 15953920 Name: my_preload_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_bigball_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 655 linear super 19146752 Name: my_bigball_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Name: my_manifest_a Group: qti_dynamic_partitions_a Attributes: readonly Extents: 0 … 911 linear super 19148800 Name: my_manifest_b Group: qti_dynamic_partitions_b Attributes: readonly Extents: Super partition layout: super: 2048 … 1529624: system_a (1527576 sectors) super: 1529856 … 4012664: system_ext_a (2482808 sectors) super: 4014080 … 4729624: vendor_a (715544 sectors) super: 4730880 … 4747040: product_a (16160 sectors) super: 4747264 … 8912816: my_product_a (4165552 sectors) super: 8912896 … 10988264: odm_a (2075368 sectors) super: 10989568 … 10990224: my_engineering_a (656 sectors) super: 10991616 … 11156352: vendor_dlkm_a (164736 sectors) super: 11157504 … 11158344: system_dlkm_a (840 sectors) super: 11159552 … 15776832: my_stock_a (4617280 sectors) super: 15777792 … 15778448: my_heytap_a (656 sectors) super: 15779840 … 15780496: my_carrier_a (656 sectors) super: 15781888 … 15950464: my_region_a (168576 sectors) super: 15951872 … 15952528: my_company_a (656 sectors) super: 15953920 … 19145584: my_preload_a (3191664 sectors) super: 19146752 … 19147408: my_bigball_a (656 sectors) super: 19148800 … 19149712: my_manifest_a (912 sectors) Block device table: Partition name: super First sector: 2048 Size: 12348030976 bytes Flags: none Group table: Name: default Maximum size: 0 bytes Flags: none Name: qti_dynamic_partitions_a Maximum size: 12343836672 bytes Flags: none Name: qti_dynamic_partitions_b Maximum size: 12343836672 bytes Flags: none Snapshot state: Update state: none Using snapuserd: 0 Using userspace snapshots: 0 Using io_uring: 0 Using o_direct: 0 Using XOR compression: 1 Current slot: _a Boot indicator: booting from unknown slot Rollback indicator: No such file or directory Forward merge indicator: No such file or directory Source build fingerprint:
最新发布
08-05
import json import os import jsonlines from collections import defaultdict from typing import Dict, List, Any, Tuple, Union import logging import time from dataclasses import dataclass, field from math import isclose # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("collision_processing.log"), logging.StreamHandler() ] ) logger = logging.getLogger("CollisionProcessor") # 数据类定义 @dataclass class FrameData: timestamp: float road_points: List[Dict[str, float]] record: Dict[str, Any] group_key: str = "" scene_id: str = "" processed: bool = False @dataclass class CameraStats: cam_tag: str total_frames: int = 0 yes_count: int = 0 no_count: int = 0 yes_ratio: float = 0.0 no_ratio: float = 0.0 @dataclass class SceneStats: scene_id: str total_frames: int = 0 yes_count: int = 0 no_count: int = 0 yes_ratio: float = 0.0 no_ratio: float = 0.0 cam_tags: Dict[str, CameraStats] = field(default_factory=dict) @dataclass class OverallStats: total_scenes: int = 0 total_frames: int = 0 yes_count: int = 0 no_count: int = 0 yes_ratio: float = 0.0 no_ratio: float = 0.0 def extract_group_key(image_path: str) -> str: """ 解析文件路径中的倒数第二个值作为分组键 :param image_path: 图片路径字符串 :return: 分组键字符串 """ try: parts = image_path.split("/") if len(parts) < 2: return "default_group" # 返回倒数第二部分作为分组键 return parts[-2] except Exception as e: logger.error(f"提取分组键错误: {e}, 路径: {image_path}") return "default_group" def round_coordinate(value: float, decimals: int = 2) -> float: """ 四舍五入坐标值到指定小数位数 :param value: 原始坐标值 :param decimals: 保留的小数位数 :return: 四舍五入后的坐标值 """ if value is None: return 0.0 factor = 10 ** decimals return round(value * factor) / factor def process_road_points(road_points: List[Dict[str, float]]) -> List[Tuple[float, float]]: """ 处理道路点数据,提取并四舍五入坐标 :param road_points: 原始道路点列表 :return: 处理后的坐标元组列表 """ if not road_points: return [] return [ (round_coordinate(point.get('x', 0.0)), round_coordinate(point.get('y', 0.0))) for point in road_points ] def calculate_overlap_ratio( points1: List[Tuple[float, float]], points2: List[Tuple[float, float]], tolerance: float = 0.01 ) -> float: """ 计算两组点之间的重叠率 :param points1: 第一组点 :param points2: 第二组点 :param tolerance: 坐标比较容差 :return: 重叠率 (0.0-1.0) """ if not points1 or not points2: return 0.0 # 使用集合存储点信息 set1 = set() for x, y in points1: # 使用容差处理浮点数精度问题 set1.add((round(x, 2), round(y, 2))) set2 = set() for x, y in points2: set2.add((round(x, 2), round(y, 2))) # 计算交集和并集 intersection = set1 & set2 union = set1 | set2 if not union: return 0.0 return len(intersection) / len(union) def load_data(input_file: str) -> Dict[str, Dict[str, List[FrameData]]]: """ 从JSONL文件加载数据并分组 :param input_file: 输入文件路径 :return: 分组后的数据结构 {scene_id: {group_key: [FrameData]}} """ grouped_data = defaultdict(lambda: defaultdict(list)) total_records = 0 logger.info(f"开始读取输入文件: {input_file}") try: with open(input_file, 'r', encoding='utf-8') as f: for line_num, line in enumerate(f, 1): try: record = json.loads(line.strip()) scene_id = record.get('scene_id', 'unknown_scene') image_path = record.get('image_path', '') timestamp = record.get('timestamp', 0.0) # 提取道路点信息 mop_info = record.get('mop_pnc_info', {}) if not isinstance(mop_info, dict): mop_info = {} road_points = mop_info.get('roadPoints', []) # 处理道路点 processed_points = process_road_points(road_points) # 提取分组键 group_key = extract_group_key(image_path) # 创建FrameData对象 frame_data = FrameData( timestamp=timestamp, road_points=processed_points, record=record, group_key=group_key, scene_id=scene_id ) # 添加到分组数据 grouped_data[scene_id][group_key].append(frame_data) total_records += 1 except json.JSONDecodeError as e: logger.error(f"JSON解析错误 (行 {line_num}): {e}") except Exception as e: logger.error(f"处理记录错误 (行 {line_num}): {e}") except IOError as e: logger.error(f"文件读取错误: {e}") return {} logger.info(f"成功读取 {total_records} 条记录,分组到 {len(grouped_data)} 个场景") return grouped_data def process_frame_group( frames: List[FrameData], threshold: float = 0.7, debug: bool = False ) -> CameraStats: """ 处理一组帧数据,生成标签并返回相机统计 :param frames: 帧数据列表 :param threshold: 重叠度阈值 :param debug: 是否输出调试信息 :return: 相机统计信息 """ if not frames: return CameraStats(cam_tag="") # 获取相机标签(从第一帧获取) cam_tag = frames[0].group_key if frames else "" cam_stats = CameraStats(cam_tag=cam_tag) cam_stats.total_frames = len(frames) # 按时间戳降序排序(最新时间在前) frames.sort(key=lambda x: x.timestamp, reverse=True) # 处理第一帧(最新帧) if frames: frames[0].record['mask'] = 'yes' cam_stats.yes_count += 1 frames[0].processed = True if debug: logger.debug(f"相机 {cam_tag}: 帧0 (最新) 标签设置为 'yes'") # 处理第二帧 if len(frames) > 1: # 与第一帧比较 overlap = calculate_overlap_ratio( frames[0].road_points, frames[1].road_points ) if overlap > threshold: frames[1].record['mask'] = 'yes' cam_stats.yes_count += 1 if debug: logger.debug(f"相机 {cam_tag}: 帧1 与帧0重叠度 {overlap:.2f} > {threshold} -> 标签设置为 'yes'") else: frames[1].record['mask'] = 'no' cam_stats.no_count += 1 if debug: logger.debug(f"相机 {cam_tag}: 帧1 与帧0重叠度 {overlap:.2f} <= {threshold} -> 标签设置为 'no'") frames[1].processed = True # 处理后续帧(第三帧及以后) for i in range(2, len(frames)): # 与前一帧比较 overlap = calculate_overlap_ratio( frames[i - 1].road_points, frames[i].road_points ) if overlap > threshold: frames[i].record['mask'] = frames[i - 1].record.get('mask', 'yes') if frames[i].record['mask'] == 'yes': cam_stats.yes_count += 1 else: cam_stats.no_count += 1 if debug: logger.debug( f"相机 {cam_tag}: 帧{i} 与帧{i - 1}重叠度 {overlap:.2f} > {threshold} -> 继承标签 '{frames[i].record['mask']}'") else: # 反转前一帧的标签 prev_mask = frames[i - 1].record.get('mask', 'yes') frames[i].record['mask'] = 'no' if prev_mask == 'yes' else 'yes' if frames[i].record['mask'] == 'yes': cam_stats.yes_count += 1 else: cam_stats.no_count += 1 if debug: logger.debug( f"相机 {cam_tag}: 帧{i} 与帧{i - 1}重叠度 {overlap:.2f} <= {threshold} -> 反转标签为 '{frames[i].record['mask']}'") frames[i].processed = True # 计算比例 if cam_stats.total_frames > 0: cam_stats.yes_ratio = cam_stats.yes_count / cam_stats.total_frames cam_stats.no_ratio = cam_stats.no_count / cam_stats.total_frames return cam_stats def process_collision_data( input_file: str, output_file: str, stats_file: str, threshold: float = 0.7, debug: bool = False ) -> Dict[str, Any]: """ 处理碰撞数据的核心函数 :param input_file: 输入JSONL文件路径 :param output_file: 输出JSONL文件路径 :param stats_file: 统计信息输出文件路径 :param threshold: 重叠度阈值 :param debug: 是否输出调试信息 :return: 统计信息字典 """ start_time = time.time() logger.info(f"开始处理碰撞数据: {input_file}") # 加载数据 grouped_data = load_data(input_file) if not grouped_data: logger.error("未加载到有效数据,处理终止") return {} # 初始化统计数据结构 overall_stats = OverallStats(total_scenes=len(grouped_data)) scene_stats_dict = {} labeled_data = [] # 处理每个场景 for scene_id, cam_groups in grouped_data.items(): scene_stats = SceneStats(scene_id=scene_id) # 处理每个相机组 for group_key, frames in cam_groups.items(): cam_stats = process_frame_group(frames, threshold, debug) # 更新场景统计 scene_stats.cam_tags[group_key] = cam_stats scene_stats.total_frames += cam_stats.total_frames scene_stats.yes_count += cam_stats.yes_count scene_stats.no_count += cam_stats.no_count # 收集处理后的数据 labeled_data.extend(frame.record for frame in frames if frame.processed) # 计算场景比例 if scene_stats.total_frames > 0: scene_stats.yes_ratio = scene_stats.yes_count / scene_stats.total_frames scene_stats.no_ratio = scene_stats.no_count / scene_stats.total_frames # 更新总体统计 scene_stats_dict[scene_id] = scene_stats overall_stats.total_frames += scene_stats.total_frames overall_stats.yes_count += scene_stats.yes_count overall_stats.no_count += scene_stats.no_count # 计算总体比例 if overall_stats.total_frames > 0: overall_stats.yes_ratio = overall_stats.yes_count / overall_stats.total_frames overall_stats.no_ratio = overall_stats.no_count / overall_stats.total_frames # 按scene_id、cam_tag和timestamp降序排序 labeled_data.sort(key=lambda x: ( x.get('scene_id', ''), x.get('image_path', '').split('/')[-2] if 'image_path' in x else '', -x.get('timestamp', 0) )) # 准备输出统计信息 stats = { "stats_file": os.path.basename(stats_file), "total_scenes": overall_stats.total_scenes, "scenes": {scene_id: scene_stats.__dict__ for scene_id, scene_stats in scene_stats_dict.items()}, "overall": { "total_frames": overall_stats.total_frames, "yes_count": overall_stats.yes_count, "no_count": overall_stats.no_count, "yes_ratio": overall_stats.yes_ratio, "no_ratio": overall_stats.no_ratio } } # 确保输出目录存在 os.makedirs(os.path.dirname(output_file) or '.', exist_ok=True) os.makedirs(os.path.dirname(stats_file) or '.', exist_ok=True) # 写入输出文件 logger.info(f"写入处理后的数据到: {output_file}") with jsonlines.open(output_file, 'w') as writer: for record in labeled_data: writer.write(record) # 写入统计文件 logger.info(f"写入统计信息到: {stats_file}") with open(stats_file, 'w', encoding='utf-8') as f: json.dump(stats, f, ensure_ascii=False, indent=4) # 计算处理时间 processing_time = time.time() - start_time logger.info(f"处理完成! 总耗时: {processing_time:.2f}秒") return stats if __name__ == "__main__": # 使用指定的文件路径调用处理函数 stats = process_collision_data( input_file="./basicData_historyDepth_v2_img106k_250723__huanshi_6ver_4fish_large_model_900_792874_10frames.jsonl", output_file="processed_106k_0723_tag02.jsonl", stats_file="collision_stats_0723_tag02.json", threshold=0.7, debug=False # 设置为True可查看详细处理日志 ) # 打印摘要统计 if stats: print("\n处理完成!") print(f"总场景数: {stats['total_scenes']}") print(f"总帧数: {stats['overall']['total_frames']}") print(f"YES标签数: {stats['overall']['yes_count']}") print(f"NO标签数: {stats['overall']['no_count']}") print(f"YES占比: {stats['overall']['yes_ratio']:.2%}") print(f"NO占比: {stats['overall']['no_ratio']:.2%}") print(f"输出文件: {stats['output_file']}") print(f"统计文件: {stats['stats_file']}") else: print("处理失败,请检查日志文件获取详细信息") 优化统计信息,只统计 "scene_id": scene_id, "total_cam_tags": len(cam_groups), "cam_tags": {}, "total_frames": 0, "yes_count": 0, "no_count": 0
07-30
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值