在Windows下取得CPU的时钟速度(PROCESSOR_POWER_INFORMATION)

本文介绍了如何在Windows系统中通过使用CallNtPowerInformation函数和PROCESSOR_POWER_INFORMATION结构来获取CPU的最大和当前时钟速度。文章还解决了包含powrprof.h头文件时可能遇到的问题,并提供了完整的代码示例。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

        一直想在Windows下取得CPU的时钟速度,找了好久终于找到了函数CallNtPowerInformation,要想使用它,首先必须包含powrprof.h头文件和链接库powerprof.lib.但是还有几个问题:

        首先powrprof.h和许多其它在Platform SDK中的头文件一样依然没有被C++验证,如果你在C++程序中包含它的时候如果不小心,依然会有链接错误。最好这样包含它:

extern "C" {
#include <powrprof.h>
}
第二个原因是有一个结构不知道什么原因没有包含在powrprof.h中,这个结构是PROCESSOR_POWER_INFORMATION.在Visual C++ 2008 Express Edition中仍然存在这个问题。为了取得正确的处理器速度,我们需要调用CallNtPowerInformation。ProcessorInformation枚举值是第一个参数,PROCESSOR_POWER_INFORMATION 结构的一个数组作为Output缓存(对应于每一个安装在系统中的处理器芯片)。

一种解决的办法是:这个结构因为在MSDN中有定义,所以我们可以在我们自己的文件里定义它:

typedef struct _PROCESSOR_POWER_INFORMATION {
  ULONG  Number;
  ULONG  MaxMhz;
  ULONG  CurrentMhz;
  ULONG  MhzLimit;
  ULONG  MaxIdleState;
  ULONG  CurrentIdleState;
} PROCESSOR_POWER_INFORMATION , *PPROCESSOR_POWER_INFORMATION;

 

从PROCESSOR_POWER_INFORMATION这个结构的定义中你可以发现,这个结构提供CPU速度的信息,例如最大速度,当前速度,和空闲状态(尽管我承认我不知道空闲状态位--我会去解决的)有一个相关的API函数GetPwuCapabilities,它也可以取得系统的一些性能。

 

 

 
 
 
import numpy as np import cv2 from ultralytics import YOLO import torch import time import queue from threading import Thread import psutil import multiprocessing import multiprocessing from plate_recognition.plate_rec import init_model, get_plate_result import GPUtil import pynvml import argparse import statistics from collections import defaultdict import os import threading import platform import subprocess import signal from concurrent.futures import ThreadPoolExecutor # 全局配置 CLASSES = ['danger', 'car_danger', 'headstock', 'light', 'number', '1number', 'double_number'] DETECTOR_MODEL_PATH = './weights/best.engine' # DETECTOR_MODEL_PATH = './weights/best_fp32.engine' TEXT_MODEL_PATH = './weights/plate_rec.pth' def show_frame(frame_data, stream_id, latency_queue): frame, capture_time = frame_data window_name = f"Stream {stream_id}" if frame is not None and frame.size > 0: cv2.imshow(window_name, frame) if cv2.waitKey(1) & 0xFF == ord('q'): return latency = time.time() - capture_time latency_queue.put((stream_id, latency)) def display_process(display_queue, stream_id, latency_queue): window_name = f"Stream {stream_id}" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(window_name, 800, 600) try: while True: frame_data = display_queue.get() if frame_data is None: break frame, capture_time = frame_data if frame is not None and frame.size > 0: cv2.imshow(window_name, frame) latency = time.time() - capture_time latency_queue.put((stream_id, latency)) if cv2.waitKey(1) & 0xFF == ord('q'): break finally: cv2.destroyWindow(window_name) print(f"显示进程{stream_id}退出") class EnhancedResourceMonitor: def __init__(self, gpu_id, process_mgr, interval=0.5): self.gpu_id = gpu_id self.interval = interval self.running = False self.lock = threading.Lock() self.data = defaultdict(list) self.process_mgr = process_mgr # 进程管理器引用 # GPU硬件信息 self.gpu_arch = "Ada Lovelace" self.sm_count = 56 self.peak_tflops = 35.6 self.cores_per_sm = 128 def start(self): pynvml.nvmlInit() self.handle = pynvml.nvmlDeviceGetHandleByIndex(self.gpu_id) self.running = True self.thread = Thread(target=self._monitor, daemon=True) self.thread.start() def _monitor(self): while self.running: try: # 监控所有工作进程 worker_stats = [] for p in self.process_mgr.processes: try: proc = psutil.Process(p.pid) with proc.oneshot(): worker_stats.append({ 'cpu': proc.cpu_percent(), 'mem': proc.memory_info().rss / (1024 ** 2), 'threads': proc.num_threads() }) except (psutil.NoSuchProcess, psutil.AccessDenied): continue # GPU监控 util = pynvml.nvmlDeviceGetUtilizationRates(self.handle) mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) clock_mhz = pynvml.nvmlDeviceGetClockInfo(self.handle, pynvml.NVML_CLOCK_SM) # 计算实际算力 current_tflops = (self.sm_count * (clock_mhz / 1000) * self.cores_per_sm * 2) / 1000 util_percent = (current_tflops / self.peak_tflops) * 100 # 记录数据 with self.lock: if worker_stats: self.data['worker_cpu'].append(sum(s['cpu'] for s in worker_stats)) self.data['worker_mem'].append(sum(s['mem'] for s in worker_stats)) self.data['worker_threads'].append(sum(s['threads'] for s in worker_stats)) self.data['gpu_util'].append(util.gpu) self.data['gpu_mem'].append(mem_info.used / (1024 ** 2)) self.data['gpu_tflops'].append(current_tflops) except Exception as e: print(f"监控错误: {str(e)}") time.sleep(self.interval) def stop(self): self.running = False if hasattr(self, 'thread'): self.thread.join() pynvml.nvmlShutdown() return self._generate_report() def _generate_report(self): report = "\n[程序资源报告]\n" # 进程统计 if self.data.get('worker_threads'): report += f"- 工作进程数: {len(self.process_mgr.processes)}\n" report += f"- 总线程数: {max(self.data['worker_threads'])}\n" report += f"- 峰值CPU使用: {max(self.data['worker_cpu']):.1f}%\n" report += f"- 峰值内存占用: {max(self.data['worker_mem']):.1f}MB\n" # GPU统计 if self.data.get('gpu_tflops'): avg_tflops = statistics.mean(self.data['gpu_tflops']) report += "\n[GPU资源]\n" report += f"- 平均利用率: {statistics.mean(self.data['gpu_util']):.1f}%\n" report += f"- 峰值显存: {max(self.data['gpu_mem']):.1f}MB\n" report += f"- 平均算力: {avg_tflops:.1f} TFLOPS\n" report += f"- 算力利用率: {avg_tflops/self.peak_tflops*100:.1f}%\n" return report class ResourceMonitor: def __init__(self, gpu_id, interval=0.5): self.gpu_id = gpu_id self.interval = interval self.running = False self.data = defaultdict(list) self.gpu_arch = "Ada Lovelace" self.sm_count = 56 # RTX 4070 SUPER有56个SM self.peak_tflops = 35.6 # 理论算力35.6 TFLOPS self.cores_per_sm = 128 # Ada架构每个SM有128个CUDA核心 self.lock = threading.Lock() # 添加锁 self.main_pid = os.getpid() # 记录主进程PID def start(self): pynvml.nvmlInit() self.handle = pynvml.nvmlDeviceGetHandleByIndex(self.gpu_id) self.running = True self.thread = Thread(target=self._monitor, daemon=True) self.thread.start() def _monitor(self): while self.running: try: # 改进的进程监控 main_process = psutil.Process(self.main_pid) with main_process.oneshot(): # 原子化读取 process_cpu = main_process.cpu_percent(interval=0.1) # 更短间隔 process_mem = main_process.memory_info().rss / (1024 ** 2) process_threads = main_process.num_threads() # 确保不会记录到0值 if process_cpu == 0 and len(self.data['process_cpu']) > 0: process_cpu = self.data['process_cpu'][-1] * 0.9 # 使用上次值的90% # 记录数据 with self.lock: self.data['process_cpu'].append(process_cpu) self.data['process_memory'].append(process_mem) self.data['process_threads'].append(process_threads) # 系统进程统计 process_count = len(list(psutil.process_iter())) cpu_percent = psutil.cpu_percent() mem = psutil.virtual_memory() # 获取当前Python进程的资源使用 current_process = psutil.Process() process_cpu = current_process.cpu_percent() process_mem = current_process.memory_info().rss / (1024 ** 2) # MB process_threads = current_process.num_threads() # GPU监控 util = pynvml.nvmlDeviceGetUtilizationRates(self.handle) mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) graphics_clock = pynvml.nvmlDeviceGetClockInfo(self.handle, pynvml.NVML_CLOCK_GRAPHICS) sm_clock = pynvml.nvmlDeviceGetClockInfo(self.handle, pynvml.NVML_CLOCK_SM) power_usage = pynvml.nvmlDeviceGetPowerUsage(self.handle) / 1000 # 瓦特 total_mem = sum(p.memory_info().rss for p in psutil.process_iter(['pid', 'name']) if 'python' in p.info['name'].lower()) / (1024**2) # MB # 获取当前GPU时钟频率 clock_mhz = pynvml.nvmlDeviceGetClockInfo( self.handle, pynvml.NVML_CLOCK_SM ) # 收集数据 self.data['system_processes'].append(process_count) self.data['system_cpu'].append(cpu_percent) self.data['system_memory'].append(mem.used / (1024**3)) # GB self.data['process_cpu'].append(process_cpu) self.data['process_memory'].append(process_mem) self.data['process_threads'].append(process_threads) self.data['gpu_util'].append(util.gpu) self.data['gpu_mem'].append(mem_info.used / (1024**2)) # MB self.data['gpu_power'].append(power_usage) self.data['gpu_clock_graphics'].append(graphics_clock) self.data['gpu_clock_sm'].append(sm_clock) # 实时算力计算 (TFLOPS = SM数 * 时钟频率(GHz) * 每SM核心数 * 2 / 1000) current_tflops = (self.sm_count * (clock_mhz / 1000) * self.cores_per_sm * 2) / 1000 util_percent = (current_tflops / self.peak_tflops) * 100 self.data['gpu_tflops'].append(current_tflops) self.data['gpu_sm_clock'].append(clock_mhz) self.data['gpu_util_actual'].append(util_percent) except Exception as e: print(f"算力监控错误: {e}") except (psutil.NoSuchProcess, pynvml.NVMLError) as e: print(f"监控错误(忽略): {str(e)}") except Exception as e: print(f"意外的监控错误: {str(e)}") time.sleep(self.interval) def stop(self): self.running = False self.thread.join() pynvml.nvmlShutdown() return self._generate_report() def _generate_report(self): if not any(len(v) > 0 for v in self.data.values()): return "无监控数据" report = "\n[资源使用报告]\n" report += "\n[算力分析 - RTX 4070 SUPER]\n" report += f"- GPU架构: {self.gpu_arch}\n" report += f"- 流式多处理器(SM): {self.sm_count}\n" report += f"- CUDA核心: {self.sm_count * self.cores_per_sm}\n" report += f"- 理论峰值算力: {self.peak_tflops} TFLOPS\n" if self.data.get('gpu_tflops'): avg_tflops = statistics.mean(self.data['gpu_tflops']) max_tflops = max(self.data['gpu_tflops']) avg_clock = statistics.mean(self.data['gpu_sm_clock']) report += "\n[实际运行数据]\n" report += f"- 平均SM时钟: {avg_clock} MHz\n" report += f"- 平均算力: {avg_tflops:.1f} TFLOPS\n" report += f"- 峰值算力: {max_tflops:.1f} TFLOPS\n" report += f"- 算力利用率: {avg_tflops/self.peak_tflops*100:.1f}%\n" # 瓶颈分析 avg_util = statistics.mean(self.data['gpu_util']) if avg_util > 90 and util_percent < 70: report += "\n[警告] 高GPU利用率但低算力利用率,可能存在内存带宽瓶颈\n" elif avg_tflops < 0.7 * self.peak_tflops: report += "\n[提示] 算力未充分利用,建议检查:\n" report += " • 批次大小(batch size)是否过小\n" report += " • 模型是否存在大量分支操作\n" # 系统级统计 report += "[系统资源]\n" system_metrics = { 'system_processes': ('系统进程数', '{:.0f}'), 'system_cpu': ('系统CPU使用(%)', '{:.1f}'), 'system_memory': ('系统内存使用(GB)', '{:.2f}') } for key, (name, fmt) in system_metrics.items(): values = self.data.get(key, []) if values: report += ( f"{name}:\n" f" 平均值: {fmt.format(statistics.mean(values))}\n" f" 最大值: {fmt.format(max(values))}\n" f" 最小值: {fmt.format(min(values))}\n" f" 采样数: {len(values)}\n\n" ) # 进程级统计 report += "[主进程资源]\n" process_metrics = { 'process_cpu': ('进程CPU使用(%)', '{:.1f}'), 'process_memory': ('进程内存使用(MB)', '{:.1f}'), 'process_threads': ('程内部的线程数量', '{:.0f}') } for key, (name, fmt) in process_metrics.items(): values = self.data.get(key, []) if values: report += ( f"{name}:\n" f" 平均值: {fmt.format(statistics.mean(values))}\n" f" 最大值: {fmt.format(max(values))}\n" f" 最小值: {fmt.format(min(values))}\n" f" 采样数: {len(values)}\n\n" ) # GPU统计 report += "[GPU资源]\n" gpu_metrics = { 'gpu_util': ('GPU利用率(%)', '{:.1f}'), 'gpu_mem': ('显存使用(MB)', '{:.1f}'), 'gpu_power': ('GPU功耗(W)', '{:.1f}'), 'gpu_clock_graphics': ('图形时钟(MHz)', '{:.0f}'), 'gpu_clock_sm': ('SM时钟(MHz)', '{:.0f}') } for key, (name, fmt) in gpu_metrics.items(): values = self.data.get(key, []) if values: report += ( f"{name}:\n" f" 平均值: {fmt.format(statistics.mean(values))}\n" f" 最大值: {fmt.format(max(values))}\n" f" 最小值: {fmt.format(min(values))}\n" f" 采样数: {len(values)}\n\n" ) return report class VideoProcessor: def __init__(self, device): # 添加CUDA初始化 torch.cuda.empty_cache() # 加载模型前设置优化选项 torch.backends.cudnn.benchmark = True torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True # 加载模型 self.model = YOLO(DETECTOR_MODEL_PATH, task='detect') self.plate_rec_model = init_model(device, TEXT_MODEL_PATH) self.rtsp_url = "rtsp://admin:guoxinzhike901@192.168.1.108:554/cam/realmonitor?channel=1&subtype=0" self.max_retries = 3 # 预热GPU # with torch.no_grad(): # dummy_input = torch.randn(1, 3, 640, 640).to(device) # _ = self.model(dummy_input) self.device = device self.frame_count = 0 self.plate_text_cache = {} def _reconnect(self): cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG) cap.set(cv2.CAP_PROP_RTSP_TRANSPORT, cv2.CAP_RTSP_TRANSPORT_TCP) return cap # 在VideoProcessor类中添加中文显示支持 def process_frame(self, frame): # 增强版中文显示函数(带错误处理和字体回退) def put_chinese_text(img, text, position, font_scale, color, thickness): """支持中文显示的增强函数""" try: from PIL import Image, ImageDraw, ImageFont img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(img_pil) # 尝试加载字体(优先使用自定义字体,失败则回退系统字体) try: font_path = os.path.join("fonts", "platech.ttf") font = ImageFont.truetype(font_path, int(font_scale * 30)) except: font = ImageFont.load_default() print("警告:使用默认字体,中文显示可能不正常") draw.text(position, text, font=font, fill=color) return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR) except Exception as e: print(f"文本渲染失败,使用OpenCV默认显示: {str(e)}") cv2.putText(img, text, position, cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness) return img self.frame_count += 1 # 1. 输入帧验证 if frame is None or frame.size == 0: print("错误:接收到空帧") return frame # 2. 模型推理(添加详细日志) try: results = self.model.track( frame, persist=True, imgsz=640, verbose=False # 关闭YOLO内置输出 ) # 调试输出 print(f"帧 {self.frame_count}: 检测到 {len(results)} 个结果") except Exception as e: print(f"模型推理错误: {str(e)}") return frame # 3. 结果解析与渲染 class_colors = { 'danger': (0, 0, 255), 'car_danger': (0, 165, 255), 'headstock': (255, 0, 0), 'light': (255, 255, 0), 'number': (0, 255, 0), '1number': (0, 255, 255), 'double_number': (128, 0, 128) } for result in results: # 验证检测结果有效性 if not hasattr(result, 'boxes') or result.boxes is None: print("警告:结果中未包含有效检测框") continue for box in result.boxes: try: # 解析检测框数据 cls_id = int(box.cls[0].item()) class_name = CLASSES[cls_id] x1, y1, x2, y2 = map(int, box.xyxy[0].tolist()) conf = box.conf[0].item() track_id = int(box.id[0].item()) if box.id is not None else None # 车牌特殊处理 if class_name == 'number' and (track_id not in self.plate_text_cache or self.frame_count % 5 == 0): plate_img = frame[y1:y2, x1:x2] if plate_img.size > 0: plate_text = get_plate_result(plate_img, self.device, self.plate_rec_model) or "识别失败" self.plate_text_cache[track_id] = plate_text try: if track_id not in self.plate_text_cache or self.frame_count % 5 == 0: plate_img = frame[y1:y2, x1:x2] if plate_img.size > 0: plate_text = get_plate_result(plate_img, self.device, self.plate_rec_model) or "识别失败" self.plate_text_cache[track_id] = plate_text else: plate_text = "无效区域" display_text = f"{self.plate_text_cache.get(track_id, '加载中...')} ID:{track_id} {conf:.2f}" except Exception as e: print(f"车牌处理异常: {str(e)}") display_text = f"车牌识别错误 ID:{track_id}" else: display_text = f"{class_name} {conf:.2f}" + (f" ID:{track_id}" if track_id else "") # 渲染检测框和文本 color = class_colors.get(class_name, (255, 255, 255)) cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) # 文本位置修正(确保不超出画面) y_text = max(y1 - 10, 10) frame = put_chinese_text(frame, display_text, (x1, y_text), 0.7, color, 2) except Exception as e: print(f"单检测框处理错误: {str(e)}") continue return frame def display_thread(display_queue, stream_id, latency_queue): window_name = f"Stream {stream_id}" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(window_name, 800, 600) try: while True: frame_data = display_queue.get() if frame_data is None: break frame, capture_time = frame_data if frame is not None and frame.size > 0: cv2.imshow(window_name, frame) latency = time.time() - capture_time latency_queue.put((stream_id, latency)) if cv2.waitKey(1) & 0xFF == ord('q'): break finally: cv2.destroyWindow(window_name) print(f"显示线程{stream_id}退出") class StreamSimulator: def __init__(self, source_url, num_streams, shared_frame_queue): self.source_url = source_url self.num_streams = num_streams self.shared_frame_queue = shared_frame_queue self.display_queues = [multiprocessing.Queue(maxsize=2000) for _ in range(num_streams)] # 使用 multiprocessing.Queue self.stop_flag = multiprocessing.Event() self.capture_process = None def start(self): self.capture_process = multiprocessing.Process(target=self._capture_and_distribute) self.capture_process.start() def stop(self): self.stop_flag.set() if self.capture_process: self.capture_process.join(timeout=5) if self.capture_process.is_alive(): self.capture_process.terminate() print("强制终止捕获进程") def _capture_and_distribute(self): rtsp_url = self.source_url cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG) cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) cap.set(cv2.CAP_PROP_FPS, 15) skip_frames = 2 # 每 2 帧处理 1 帧 frame_count = 0 try: while not self.stop_flag.is_set(): ret, frame = cap.read() if not ret: print("帧读取失败,重连中...") cap.release() time.sleep(2) cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG) continue frame_count += 1 if frame_count % skip_frames == 0: for i in range(self.num_streams): try: if not self.shared_frame_queue.full(): self.shared_frame_queue.put((frame.copy(), i, time.time()), block=False) else: print(f"共享帧队列已满,丢弃旧帧") self.shared_frame_queue.get_nowait() self.shared_frame_queue.put((frame.copy(), i, time.time()), block=False) except Exception as e: print(f"帧队列操作警告: {type(e).__name__}") finally: cap.release() self.shared_frame_queue.put(None) for q in self.display_queues: q.put(None) def dispatch_process(result_queue, display_queues): frame_buffer = [] while True: data = result_queue.get() if data is None: break # 检查是否为 'stats' 二元组 if isinstance(data, tuple) and len(data) == 2 and data[0] == 'stats': continue # 跳过中间统计数据 # 检查是否为帧数据(三元组) elif isinstance(data, tuple) and len(data) == 3: processed_frame, stream_id, capture_time = data frame_buffer.append((processed_frame, stream_id, capture_time)) frame_buffer.sort(key=lambda x: x[2]) # 按时间戳排序 for frame, sid, _ in frame_buffer: if not display_queues[sid].full(): display_queues[sid].put((frame, capture_time), block=False) else: print(f"显示队列 {sid} 已满,丢帧") frame_buffer = [] # 清空缓冲区 # 检查是否为字典(最终统计数据) elif isinstance(data, dict): continue # 跳过最终统计数据 else: print(f"警告:未知数据类型: {type(data)}") def display_process(display_queue, stream_id, latency_queue): window_name = f"Stream {stream_id}" cv2.namedWindow(window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(window_name, 800, 600) frame_count = 0 try: while True: frame_data = display_queue.get() if frame_data is None: break frame, capture_time = frame_data if frame is not None and frame.size > 0: cv2.imshow(window_name, frame) frame_count += 1 if frame_count % 10 == 0: latency = time.time() - capture_time latency_queue.put((stream_id, latency)) if cv2.getWindowProperty(window_name, cv2.WND_PROP_VISIBLE) < 1: break if cv2.waitKey(1) & 0xFF == ord('q'): break finally: cv2.destroyWindow(window_name) print(f"显示进程{stream_id}退出") def worker_process(input_queue, gpu_id, result_queue, stats_queue, monitor_interval=5): import numpy as np # 显式导入 numpy print(f"In worker process {os.getpid()}, np is {np}, type(np.empty((1,))) = {type(np.empty((1,)))}") from collections import defaultdict torch.set_num_threads(1) cv2.setNumThreads(1) torch.cuda.set_device(gpu_id) device = torch.device(f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu") processor = VideoProcessor(device) start_time = time.time() stats = { 'frame_count': 0, 'avg_fps': 0, 'max_gpu_mem': 0, 'process_time': 0, 'stream_id': None } frame_counts_per_stream = defaultdict(int) try: while True: frame_data = input_queue.get() if frame_data is None: break frame, stream_id, capture_time = frame_data stats['stream_id'] = stream_id # 单帧处理 start_process = time.time() results = processor.model.track(frame, imgsz=640, verbose=False) # 处理单帧 stats['process_time'] += time.time() - start_process processed_frame = processor.process_frame(frame) # 移除 cProfile if processed_frame is not None and processed_frame.size > 0: stats['frame_count'] += 1 frame_counts_per_stream[stream_id] += 1 result_queue.put((processed_frame, stream_id, capture_time)) # 定期更新统计信息 if stats['frame_count'] % monitor_interval == 0: duration = time.time() - start_time stats['avg_fps'] = stats['frame_count'] / duration if torch.cuda.is_available(): mem = torch.cuda.max_memory_allocated() / (1024 ** 2) stats['max_gpu_mem'] = max(stats['max_gpu_mem'], mem) stats['worker_pid'] = os.getpid() stats['frame_counts_per_stream'] = dict(frame_counts_per_stream) stats_queue.put(('stats', stats.copy())) except Exception as e: print(f"工作进程错误: {e}") finally: stats['worker_pid'] = os.getpid() stats['frame_counts_per_stream'] = dict(frame_counts_per_stream) stats_queue.put(stats) def get_gpu_info(): """获取GPU信息""" pynvml.nvmlInit() gpu_count = pynvml.nvmlDeviceGetCount() gpus = [] for i in range(gpu_count): handle = pynvml.nvmlDeviceGetHandleByIndex(i) name = pynvml.nvmlDeviceGetName(handle) mem = pynvml.nvmlDeviceGetMemoryInfo(handle) gpus.append({ 'id': i, 'name': name.decode('utf-8') if isinstance(name, bytes) else name, 'total_mem': mem.total / (1024 ** 2) }) pynvml.nvmlShutdown() return gpus import os import argparse def monitor_resources(gpu_id, interval=5): """资源监控线程""" pynvml.nvmlInit() handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id) while True: # GPU监控 util = pynvml.nvmlDeviceGetUtilizationRates(handle) mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle) # CPU监控 cpu_percent = psutil.cpu_percent() mem = psutil.virtual_memory() print(f"\n[资源监控] GPU: {util.gpu}% 显存: {mem_info.used/1024**2:.1f}/{mem_info.total/1024**2:.1f}MB | " f"CPU: {cpu_percent}% 内存: {mem.used/1024**3:.1f}/{mem.total/1024**3:.1f}GB") time.sleep(interval) class DynamicProcessManager: def __init__(self, num_workers): self.num_workers = num_workers self.processes = [] self.result_queues = [] def start_workers(self, input_queue, gpu_id, result_queue, stats_queue): for i in range(self.num_workers): p = multiprocessing.Process( target=worker_process, args=(input_queue, gpu_id, result_queue, stats_queue) ) self.processes.append(p) p.start() def stop_workers(self): for p in self.processes: if p.is_alive(): p.terminate() try: p.join(timeout=1) except: pass if p.is_alive(): if platform.system() == "Windows": subprocess.run(['taskkill', '/F', '/PID', str(p.pid)], check=False) else: os.kill(p.pid, signal.SIGKILL) print(f"强制终止进程 {p.pid}") self.processes = [] def get_gpu_info(): pynvml.nvmlInit() gpu_count = pynvml.nvmlDeviceGetCount() gpus = [] for i in range(gpu_count): handle = pynvml.nvmlDeviceGetHandleByIndex(i) name = pynvml.nvmlDeviceGetName(handle) mem = pynvml.nvmlDeviceGetMemoryInfo(handle) gpus.append({ 'id': i, 'name': name.decode('utf-8') if isinstance(name, bytes) else name, 'total_mem': mem.total / (1024 ** 2) }) pynvml.nvmlShutdown() return gpus class ProgramMonitor: def __init__(self, gpu_id, process_manager, result_queue, stats_queue, args): self.gpu_id = gpu_id self.result_queue = result_queue self.stats_queue = stats_queue self.process_manager = process_manager self.args = args self.running = False self.stop_flag = threading.Event() self.data = { 'process': defaultdict(list), 'workers': defaultdict(list), 'gpu': defaultdict(list), 'fps_per_stream': defaultdict(list), 'total_fps': [], 'worker_stats': [], 'cpu_per_core': [], 'mem_bandwidth': [] } self.lock = threading.Lock() self.gpu_info = { 'arch': "Ada Lovelace", 'sm_count': 56, 'cores_per_sm': 128, 'peak_tflops': 35.6 } self.total_frame_counts = defaultdict(int) self.last_frame_counts = defaultdict(lambda: defaultdict(int)) self.start_time = None self.stop_time = None self.last_mem_time = time.time() self.last_mem_bytes = psutil.virtual_memory().used def start(self): pynvml.nvmlInit() self.handle = pynvml.nvmlDeviceGetHandleByIndex(self.gpu_id) self.running = True self.start_time = time.time() self.thread = Thread(target=self._monitor, daemon=True) self.thread.start() def _monitor(self): last_cpu_times = {} while not self.stop_flag.is_set(): try: # Process stats from stats_queue try: data = self.stats_queue.get_nowait() if isinstance(data, tuple) and data[0] == 'stats': stats = data[1] worker_pid = stats['worker_pid'] frame_counts_per_stream = stats['frame_counts_per_stream'] with self.lock: for stream_id, count in frame_counts_per_stream.items(): delta = count - self.last_frame_counts[worker_pid][stream_id] self.total_frame_counts[stream_id] += delta self.last_frame_counts[worker_pid][stream_id] = count except queue.Empty: pass # Main process monitoring main_process = psutil.Process(os.getpid()) with main_process.oneshot(): current_cpu_time = main_process.cpu_times() pid = main_process.pid if pid in last_cpu_times: cpu_usage = self._calculate_cpu_usage(last_cpu_times[pid], current_cpu_time) self.data['process']['cpu'].append(cpu_usage) last_cpu_times[pid] = current_cpu_time self.data['process']['mem'].append(main_process.memory_info().rss / (1024 ** 2)) self.data['process']['threads'].append(main_process.num_threads()) # Worker processes monitoring for p in self.process_manager.processes: try: proc = psutil.Process(p.pid) with proc.oneshot(): current_cpu_time = proc.cpu_times() pid = p.pid if pid in last_cpu_times: cpu_usage = self._calculate_cpu_usage(last_cpu_times[pid], current_cpu_time) self.data['workers']['cpu'].append(cpu_usage) last_cpu_times[pid] = current_cpu_time self.data['workers']['mem'].append(proc.memory_info().rss / (1024 ** 2)) self.data['workers']['threads'].append(proc.num_threads()) except (psutil.NoSuchProcess, psutil.AccessDenied): continue # Memory bandwidth monitoring current_time = time.time() current_mem_bytes = psutil.virtual_memory().used time_delta = current_time - self.last_mem_time if time_delta > 0: mem_bandwidth = (current_mem_bytes - self.last_mem_bytes) / time_delta / (1024 ** 2) with self.lock: self.data['mem_bandwidth'].append(mem_bandwidth) self.last_mem_time = current_time self.last_mem_bytes = current_mem_bytes # CPU per core monitoring cpu_per_core = psutil.cpu_percent(percpu=True) with self.lock: self.data['cpu_per_core'].append(cpu_per_core) self._monitor_gpu() except Exception as e: print(f"监控错误: {str(e)}") time.sleep(0.5) def _monitor_gpu(self): try: util = pynvml.nvmlDeviceGetUtilizationRates(self.handle) mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) clock_mhz = pynvml.nvmlDeviceGetClockInfo(self.handle, pynvml.NVML_CLOCK_SM) current_tflops = (self.gpu_info['sm_count'] * (clock_mhz / 1000) * self.gpu_info['cores_per_sm'] * 2) / 1000 with self.lock: self.data['gpu']['util'].append(util.gpu) self.data['gpu']['mem'].append(mem_info.used / (1024 ** 2)) self.data['gpu']['tflops'].append(current_tflops) except pynvml.NVMLError as e: print(f"GPU监控错误: {str(e)}") def stop(self): self.stop_time = time.time() self.running = False self.stop_flag.set() if self.thread.is_alive(): self.thread.join(timeout=2) report = self.generate_report() pynvml.nvmlShutdown() return report def generate_report(self): report = "\n=== 程序资源使用报告 ===\n" # System information (unchanged) report += "\n[系统信息]\n" report += f"- CPU核心数: {psutil.cpu_count(logical=False)}物理/{psutil.cpu_count()}逻辑\n" report += f"- 系统内存: {psutil.virtual_memory().total / (1024**3):.1f}GB\n" report += f"- 系统CPU使用率: {psutil.cpu_percent(interval=1):.1f}%\n" report += f"- 系统内存使用: {psutil.virtual_memory().used / (1024**3):.1f}GB / {psutil.virtual_memory().total / (1024**3):.1f}GB\n" gpu_name_raw = pynvml.nvmlDeviceGetName(self.handle) gpu_name = gpu_name_raw.decode('utf-8') if isinstance(gpu_name_raw, bytes) else gpu_name_raw total_gpu_mem = pynvml.nvmlDeviceGetMemoryInfo(self.handle).total / (1024 ** 2) report += f"- GPU型号: {gpu_name}\n" report += f"- GPU总显存: {total_gpu_mem:.1f}MB\n" # Main process stats (unchanged) if self.data['process']['cpu']: report += "\n[主进程资源]\n" report += f"- 平均CPU使用率: {statistics.mean(self.data['process']['cpu']):.1f}%\n" report += f"- 峰值CPU使用率: {max(self.data['process']['cpu']):.1f}%\n" report += f"- 平均内存占用: {statistics.mean(self.data['process']['mem']):.1f}MB\n" report += f"- 峰值内存占用: {max(self.data['process']['mem']):.1f}MB\n" report += f"- 线程数: {max(self.data['process']['threads'])}\n" # Worker processes stats (unchanged except for FPS section) if self.data['workers']['cpu']: num_workers = min(self.args.streams * 4, psutil.cpu_count(logical=True) * 2) num_samples = len(self.data['workers']['cpu']) // num_workers if num_samples > 0: worker_cpu_per_sample = [self.data['workers']['cpu'][i*num_workers:(i+1)*num_workers] for i in range(num_samples)] worker_mem_per_sample = [self.data['workers']['mem'][i*num_workers:(i+1)*num_workers] for i in range(num_samples)] worker_threads_per_sample = [self.data['workers']['threads'][i*num_workers:(i+1)*num_workers] for i in range(num_samples)] avg_worker_cpu = statistics.mean([statistics.mean(sample) for sample in worker_cpu_per_sample]) total_worker_cpu = statistics.mean([sum(sample) for sample in worker_cpu_per_sample]) avg_worker_mem = statistics.mean([statistics.mean(sample) for sample in worker_mem_per_sample]) total_worker_mem = statistics.mean([sum(sample) for sample in worker_mem_per_sample]) max_total_worker_threads = max([sum(sample) for sample in worker_threads_per_sample]) report += f"\n[工作进程资源 ({num_workers}个)]\n" report += f"- 平均CPU使用率(每个进程): {avg_worker_cpu:.1f}%\n" report += f"- 总CPU使用率: {total_worker_cpu:.1f}%\n" report += f"- 平均内存占用(每个进程): {avg_worker_mem:.1f}MB\n" report += f"- 总内存占用: {total_worker_mem:.1f}MB\n" report += f"- 总线程数(峰值): {max_total_worker_threads}\n" # Video stream performance with accurate FPS if self.total_frame_counts: elapsed_time = self.stop_time - self.start_time report += "\n[视频流性能]\n" for stream_id in range(self.args.streams): if stream_id in self.total_frame_counts: avg_fps = self.total_frame_counts[stream_id] / elapsed_time report += f"- 视频流 {stream_id}: 平均 FPS {avg_fps:.1f}\n" total_frames = sum(self.total_frame_counts.values()) total_fps = total_frames / elapsed_time report += f"- 总吞吐量: {total_fps:.1f} FPS\n" # CPU per core (unchanged) if self.data.get('cpu_per_core'): avg_cpu_per_core = [statistics.mean([sample[i] for sample in self.data['cpu_per_core']]) for i in range(len(self.data['cpu_per_core'][0]))] overall_avg_cpu = statistics.mean(avg_cpu_per_core) report += "\n[CPU 硬件线程利用率]\n" for i, avg in enumerate(avg_cpu_per_core): report += f"- 逻辑处理器 {i}: {avg:.1f}%\n" report += f"- 16 个硬件线程平均利用率: {overall_avg_cpu:.1f}%\n" # Total process stats (unchanged) if self.data['process']['cpu'] and self.data['workers']['cpu']: num_display_processes = self.args.streams total_cpu = statistics.mean(self.data['process']['cpu']) + total_worker_cpu total_mem = statistics.mean(self.data['process']['mem']) + total_worker_mem total_threads = max(self.data['process']['threads']) + max_total_worker_threads total_processes = 1 + num_workers + num_display_processes + 1 report += "\n[所有进程总计]\n" report += f"- 总CPU使用率: {total_cpu:.1f}%\n" report += f"- 总内存占用: {total_mem:.1f}MB\n" report += f"- 总线程数: {total_threads}\n" report += f"- 总进程数: {total_processes}(1个主进程 + {num_workers}个工作进程 + {num_display_processes}个显示进程 + 1个分发进程)\n" # GPU stats (unchanged) if self.data['gpu']['tflops']: avg_tflops = statistics.mean(self.data['gpu']['tflops']) util_percent = min((avg_tflops / self.gpu_info['peak_tflops']) * 100, 100.0) report += "\n[GPU资源]\n" report += f"- 平均利用率: {statistics.mean(self.data['gpu']['util']):.1f}%\n" report += f"- 峰值显存: {max(self.data['gpu']['mem']):.1f}MB\n" report += f"- 平均算力: {avg_tflops:.1f}/{self.gpu_info['peak_tflops']} TFLOPS\n" report += f"- 算力利用率: {util_percent:.1f}%\n" # Memory bandwidth (unchanged) if self.data.get('mem_bandwidth'): avg_mem_bandwidth = statistics.mean(self.data['mem_bandwidth']) max_mem_bandwidth = max(self.data['mem_bandwidth']) report += "\n[存储器带宽]\n" report += f"- 平均内存带宽: {avg_mem_bandwidth:.1f} MB/s\n" report += f"- 峰值内存带宽: {max_mem_bandwidth:.1f} MB/s\n" return report def _calculate_cpu_usage(self, prev_times, curr_times): """ 计算基于前后的 CPU 时间的使用率百分比。 参数: prev_times: 上一次的 CPU 时间(psutil.cpu_times 对象) curr_times: 当前的 CPU 时间(psutil.cpu_times 对象) 返回: CPU 使用率(百分比) """ delta_user = curr_times.user - prev_times.user delta_system = curr_times.system - prev_times.system delta_total = (curr_times.user + curr_times.system) - (prev_times.user + prev_times.system) if delta_total > 0: cpu_usage = ((delta_user + delta_system) / delta_total) * 100 else: cpu_usage = 0.0 return cpu_usage # _monitor_gpu and _calculate_cpu_usage remain unchanged def main(): parser = argparse.ArgumentParser() parser.add_argument('--streams', type=int, default=1) parser.add_argument('--source', type=str, default="") parser.add_argument('--gpu_id', type=int, default=0) args = parser.parse_args() camera_config = { 'username': 'admin', 'password': 'guoxinzhike901' } source_url = args.source if args.source else \ f"rtsp://{camera_config['username']}:{camera_config['password']}@192.168.1.108/" gpus = get_gpu_info() print("\n[硬件配置]") print(f"- CPU核心: {psutil.cpu_count(logical=False)}物理/{psutil.cpu_count()}逻辑") print(f"- 内存: {psutil.virtual_memory().total / (1024**3):.1f}GB") print(f"- 使用GPU {args.gpu_id}: {gpus[args.gpu_id]['name']}") print(f" 显存: {gpus[args.gpu_id]['total_mem']:.1f}MB") os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_NUM_THREADS'] = '1' print(f"\n[测试配置]") print(f"- 模拟视频流数: {args.streams}") print(f"- 视频源: {source_url}") # 创建共享队列 frame_queue_size = max(2000, 200 * args.streams) shared_frame_queue = multiprocessing.Queue(maxsize=frame_queue_size) display_queue_size = max(50, 20 * args.streams) shared_result_queue = multiprocessing.Queue(maxsize=2000) stats_queue = multiprocessing.Queue() # New queue for stats # 固定工作进程数为 16 num_workers = 1 #min(args.streams * 8, psutil.cpu_count(logical=True) * 2) process_mgr = DynamicProcessManager(num_workers) simulator = StreamSimulator(source_url, args.streams, shared_frame_queue) monitor = ProgramMonitor(args.gpu_id, process_mgr, shared_result_queue, stats_queue, args) monitor.args = args # 传递 args latency_queue = multiprocessing.Queue() # 启动工作进程 process_mgr.start_workers(shared_frame_queue, args.gpu_id, shared_result_queue, stats_queue) # 启动分发进程 dispatch_p = multiprocessing.Process( target=dispatch_process, args=(shared_result_queue, simulator.display_queues), daemon=True ) dispatch_p.start() simulator.start() monitor.start() # 启动显示进程 display_threads = [] for i in range(args.streams): t = Thread(target=display_thread, args=(simulator.display_queues[i], i+1, latency_queue)) display_threads.append(t) t.start() time.sleep(0.5) print("\n[测试开始] 程序将运行30秒...") start_time = time.time() end_time = start_time + 60*5 try: while time.time() < end_time: time.sleep(1) remaining = int(end_time - time.time()) if remaining % 10 == 0 or remaining <= 5: print(f"剩余时间: {remaining}秒") finally: runtime = time.time() - start_time print(f"\n[测试完成] 实际运行时间: {runtime:.1f}秒") print("停止模拟器...") simulator.stop() print("生成报告并停止监控...") report = monitor.stop() print("停止工作进程...") process_mgr.stop_workers() # 停止显示线程 for q in simulator.display_queues: q.put(None) for t in display_threads: t.join() # 停止分发进程 shared_result_queue.put(None) dispatch_p.join(timeout=5) if dispatch_p.is_alive(): dispatch_p.terminate() # 收集延迟测量 latencies = [] while not latency_queue.empty(): try: stream_id, latency = latency_queue.get_nowait() latencies.append(latency) except queue.Empty: break if latencies: min_latency = min(latencies) max_latency = max(latencies) avg_latency = sum(latencies) / len(latencies) report += f"\n[延迟统计]\n" report += f"- 测量次数: {len(latencies)}\n" report += f"- 最低延迟: {min_latency:.3f}秒\n" report += f"- 最高延迟: {max_latency:.3f}秒\n" report += f"- 平均延迟: {avg_latency:.3f}秒\n" else: report += "\n[延迟统计]\n- 无延迟数据\n" if torch.cuda.is_available(): torch.cuda.empty_cache() print(report) if __name__ == '__main__': multiprocessing.set_start_method('spawn') # multiprocessing.set_start_method('fork') # Linux 默认方法 main() # 测试4路视频流 # python det_ocr_shipinliu_pre.py --streams 1 --gpu_id 0 """ === 程序资源使用报告 === === 程序资源使用报告 === [系统信息] - CPU核心数: 10物理/16逻辑 - 系统内存: 63.8GB - 系统CPU使用率: 14.1% - 系统内存使用: 26.3GB / 63.8GB - GPU型号: NVIDIA GeForce RTX 4070 SUPER - GPU总显存: 12282.0MB [主进程资源] - 平均CPU使用率: 16.3% - 峰值CPU使用率: 28.1% - 平均内存占用: 385.1MB - 峰值内存占用: 385.7MB - 线程数: 9 [工作进程资源 (16个)] - 平均CPU使用率(每个进程): 22.1% - 总CPU使用率: 354.2% - 平均内存占用(每个进程): 801.5MB - 总内存占用: 12823.3MB - 总线程数(峰值): 304 [所有进程总计] - 总CPU使用率: 370.5% - 总内存占用: 13208.4MB - 总线程数: 313 - 总进程数: 19(1个主进程 + 16个工作进程 + 1个显示进程 + 1个分发进程) [GPU资源] - 平均利用率: 31.3% - 峰值显存: 8226.7MB - 平均算力: 22.7/35.6 TFLOPS - 算力利用率: 63.8% [延迟统计] - 测量次数: 67 - 最低延迟: 0.024秒 - 最高延迟: 2.499秒 - 平均延迟: 0.287秒 """ # python det_ocr_shipinliu_pre.py --streams 2 --gpu_id 0 """ === 程序资源使用报告 === [系统信息] - CPU核心数: 10物理/16逻辑 - 系统内存: 63.8GB - 系统CPU使用率: 9.8% - 系统内存使用: 26.3GB / 63.8GB - GPU型号: NVIDIA GeForce RTX 4070 SUPER - GPU总显存: 12282.0MB [主进程资源] - 平均CPU使用率: 15.3% - 峰值CPU使用率: 40.6% - 平均内存占用: 386.4MB - 峰值内存占用: 387.1MB - 线程数: 9 [工作进程资源 (16个)] - 平均CPU使用率(每个进程): 20.8% - 总CPU使用率: 333.1% - 平均内存占用(每个进程): 960.3MB - 总内存占用: 15364.2MB - 总线程数(峰值): 328 [所有进程总计] - 总CPU使用率: 348.4% - 总内存占用: 15750.6MB - 总线程数: 337 - 总进程数: 20(1个主进程 + 16个工作进程 + 2个显示进程 + 1个分发进程) [GPU资源] - 平均利用率: 50.5% - 峰值显存: 8328.6MB - 平均算力: 12.6/35.6 TFLOPS - 算力利用率: 35.4% [延迟统计] - 测量次数: 327 - 最低延迟: 0.027秒 - 最高延迟: 0.757秒 - 平均延迟: 0.080秒 """ # python det_ocr_shipinliu_pre.py --streams 3 --gpu_id 0 """ [系统信息] - CPU核心数: 10物理/16逻辑 - 系统内存: 63.8GB - 系统CPU使用率: 9.5% - 系统内存使用: 26.2GB / 63.8GB - GPU型号: NVIDIA GeForce RTX 4070 SUPER - GPU总显存: 12282.0MB [主进程资源] - 平均CPU使用率: 26.2% - 峰值CPU使用率: 53.1% - 平均内存占用: 386.1MB - 峰值内存占用: 386.6MB - 线程数: 9 [工作进程资源 (16个)] - 平均CPU使用率(每个进程): 43.9% - 总CPU使用率: 702.5% - 平均内存占用(每个进程): 1018.8MB - 总内存占用: 16301.3MB - 总线程数(峰值): 322 [所有进程总计] - 总CPU使用率: 728.7% - 总内存占用: 16687.5MB - 总线程数: 331 - 总进程数: 21(1个主进程 + 16个工作进程 + 3个显示进程 + 1个分发进程) [GPU资源] - 平均利用率: 52.2% - 峰值显存: 7861.9MB - 平均算力: 18.9/35.6 TFLOPS - 算力利用率: 53.1% [延迟统计] - 测量次数: 327 - 最低延迟: 0.030秒 - 最高延迟: 3.756秒 - 平均延迟: 1.077秒 """ # python det_ocr_shipinliu_pre.py --streams 4 --gpu_id 0 cpu100 """ === 程序资源使用报告 === [系统信息] - CPU核心数: 10物理/16逻辑 - 系统内存: 63.8GB - 系统CPU使用率: 58.6% - 系统内存使用: 36.3GB / 63.8GB - GPU型号: NVIDIA GeForce RTX 4070 SUPER - GPU总显存: 12282.0MB [主进程资源] - 平均CPU使用率: 28.0% - 峰值CPU使用率: 53.1% - 平均内存占用: 386.4MB - 峰值内存占用: 386.8MB - 线程数: 9 [工作进程资源 (16个)] - 平均CPU使用率(每个进程): 48.0% - 总CPU使用率: 768.7% - 平均内存占用(每个进程): 1585.2MB - 总内存占用: 25363.6MB - 总线程数(峰值): 320 [所有进程总计] - 总CPU使用率: 796.7% - 总内存占用: 25750.1MB - 总线程数: 329 - 总进程数: 22(1个主进程 + 16个工作进程 + 4个显示进程 + 1个分发进程) [GPU资源] - 平均利用率: 52.9% - 峰值显存: 7991.3MB - 平均算力: 20.2/35.6 TFLOPS - 算力利用率: 56.8% [延迟统计] - 测量次数: 327 - 最低延迟: 1.480秒 - 最高延迟: 14.222秒 - 平均延迟: 8.113秒 """ # python det_ocr_shipinliu_pre.py --streams 5 --gpu_id 0 """ """ # python det_ocr_shipinliu_pre.py --streams 16 --gpu_id 0 """ """ # python det_ocr_shipinliu_pre.py --streams 20 --gpu_id 0 """ """ (yolov8_bt) (base) zhang@zhang:~/danger/yolov7_crnn_ocr_detection$ python det_ocr_shipinliu_pre.py --streams 1 --gpu_id 0 [硬件配置] - CPU核心: 10物理/16逻辑 - 内存: 62.6GB - 使用GPU 0: NVIDIA GeForce RTX 4070 SUPER 显存: 12282.0MB [测试配置] - 模拟视频流数: 1 - 视频源: rtsp://admin:guoxinzhike901@192.168.1.108/ [测试开始] 程序将运行30秒... In worker process 35804, np is <module 'numpy' from '/home/zhang/miniconda3/envs/yolov8_bt/lib/python3.9/site-packages/numpy/__init__.py'>, type(np.empty((1,))) = <class 'numpy.ndarray'> Loading weights/best.engine for TensorRT inference... [06/07/2025-18:54:11] [TRT] [I] Loaded engine size: 39 MiB [06/07/2025-18:54:11] [TRT] [W] Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors. [06/07/2025-18:54:13] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +33, now: CPU 0, GPU 33 (MiB) [06/07/2025-18:54:13] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +39, now: CPU 0, GPU 72 (MiB) 剩余时间: 290秒 剩余时间: 280秒 剩余时间: 270秒 (yolov8_bt) (base) zhang@zhang:~/danger/yolov7_crnn_ocr_detection$ python -c "import numpy as np; print(np.__version__)" 1.23.0 代码运行后报错
最新发布
06-08
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值