import numpy as np
import cv2
from ultralytics import YOLO
import torch
import time
import queue
from threading import Thread
import psutil
import multiprocessing
import multiprocessing
from plate_recognition.plate_rec import init_model, get_plate_result
import GPUtil
import pynvml
import argparse
import statistics
from collections import defaultdict
import os
import threading
import platform
import subprocess
import signal
from concurrent.futures import ThreadPoolExecutor
# 全局配置
CLASSES = ['danger', 'car_danger', 'headstock', 'light', 'number', '1number', 'double_number']
DETECTOR_MODEL_PATH = './weights/best.engine'
# DETECTOR_MODEL_PATH = './weights/best_fp32.engine'
TEXT_MODEL_PATH = './weights/plate_rec.pth'
def show_frame(frame_data, stream_id, latency_queue):
frame, capture_time = frame_data
window_name = f"Stream {stream_id}"
if frame is not None and frame.size > 0:
cv2.imshow(window_name, frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
return
latency = time.time() - capture_time
latency_queue.put((stream_id, latency))
def display_process(display_queue, stream_id, latency_queue):
window_name = f"Stream {stream_id}"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name, 800, 600)
try:
while True:
frame_data = display_queue.get()
if frame_data is None:
break
frame, capture_time = frame_data
if frame is not None and frame.size > 0:
cv2.imshow(window_name, frame)
latency = time.time() - capture_time
latency_queue.put((stream_id, latency))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
cv2.destroyWindow(window_name)
print(f"显示进程{stream_id}退出")
class EnhancedResourceMonitor:
def __init__(self, gpu_id, process_mgr, interval=0.5):
self.gpu_id = gpu_id
self.interval = interval
self.running = False
self.lock = threading.Lock()
self.data = defaultdict(list)
self.process_mgr = process_mgr # 进程管理器引用
# GPU硬件信息
self.gpu_arch = "Ada Lovelace"
self.sm_count = 56
self.peak_tflops = 35.6
self.cores_per_sm = 128
def start(self):
pynvml.nvmlInit()
self.handle = pynvml.nvmlDeviceGetHandleByIndex(self.gpu_id)
self.running = True
self.thread = Thread(target=self._monitor, daemon=True)
self.thread.start()
def _monitor(self):
while self.running:
try:
# 监控所有工作进程
worker_stats = []
for p in self.process_mgr.processes:
try:
proc = psutil.Process(p.pid)
with proc.oneshot():
worker_stats.append({
'cpu': proc.cpu_percent(),
'mem': proc.memory_info().rss / (1024 ** 2),
'threads': proc.num_threads()
})
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
# GPU监控
util = pynvml.nvmlDeviceGetUtilizationRates(self.handle)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle)
clock_mhz = pynvml.nvmlDeviceGetClockInfo(self.handle, pynvml.NVML_CLOCK_SM)
# 计算实际算力
current_tflops = (self.sm_count * (clock_mhz / 1000) *
self.cores_per_sm * 2) / 1000
util_percent = (current_tflops / self.peak_tflops) * 100
# 记录数据
with self.lock:
if worker_stats:
self.data['worker_cpu'].append(sum(s['cpu'] for s in worker_stats))
self.data['worker_mem'].append(sum(s['mem'] for s in worker_stats))
self.data['worker_threads'].append(sum(s['threads'] for s in worker_stats))
self.data['gpu_util'].append(util.gpu)
self.data['gpu_mem'].append(mem_info.used / (1024 ** 2))
self.data['gpu_tflops'].append(current_tflops)
except Exception as e:
print(f"监控错误: {str(e)}")
time.sleep(self.interval)
def stop(self):
self.running = False
if hasattr(self, 'thread'):
self.thread.join()
pynvml.nvmlShutdown()
return self._generate_report()
def _generate_report(self):
report = "\n[程序资源报告]\n"
# 进程统计
if self.data.get('worker_threads'):
report += f"- 工作进程数: {len(self.process_mgr.processes)}\n"
report += f"- 总线程数: {max(self.data['worker_threads'])}\n"
report += f"- 峰值CPU使用: {max(self.data['worker_cpu']):.1f}%\n"
report += f"- 峰值内存占用: {max(self.data['worker_mem']):.1f}MB\n"
# GPU统计
if self.data.get('gpu_tflops'):
avg_tflops = statistics.mean(self.data['gpu_tflops'])
report += "\n[GPU资源]\n"
report += f"- 平均利用率: {statistics.mean(self.data['gpu_util']):.1f}%\n"
report += f"- 峰值显存: {max(self.data['gpu_mem']):.1f}MB\n"
report += f"- 平均算力: {avg_tflops:.1f} TFLOPS\n"
report += f"- 算力利用率: {avg_tflops/self.peak_tflops*100:.1f}%\n"
return report
class ResourceMonitor:
def __init__(self, gpu_id, interval=0.5):
self.gpu_id = gpu_id
self.interval = interval
self.running = False
self.data = defaultdict(list)
self.gpu_arch = "Ada Lovelace"
self.sm_count = 56 # RTX 4070 SUPER有56个SM
self.peak_tflops = 35.6 # 理论算力35.6 TFLOPS
self.cores_per_sm = 128 # Ada架构每个SM有128个CUDA核心
self.lock = threading.Lock() # 添加锁
self.main_pid = os.getpid() # 记录主进程PID
def start(self):
pynvml.nvmlInit()
self.handle = pynvml.nvmlDeviceGetHandleByIndex(self.gpu_id)
self.running = True
self.thread = Thread(target=self._monitor, daemon=True)
self.thread.start()
def _monitor(self):
while self.running:
try:
# 改进的进程监控
main_process = psutil.Process(self.main_pid)
with main_process.oneshot(): # 原子化读取
process_cpu = main_process.cpu_percent(interval=0.1) # 更短间隔
process_mem = main_process.memory_info().rss / (1024 ** 2)
process_threads = main_process.num_threads()
# 确保不会记录到0值
if process_cpu == 0 and len(self.data['process_cpu']) > 0:
process_cpu = self.data['process_cpu'][-1] * 0.9 # 使用上次值的90%
# 记录数据
with self.lock:
self.data['process_cpu'].append(process_cpu)
self.data['process_memory'].append(process_mem)
self.data['process_threads'].append(process_threads)
# 系统进程统计
process_count = len(list(psutil.process_iter()))
cpu_percent = psutil.cpu_percent()
mem = psutil.virtual_memory()
# 获取当前Python进程的资源使用
current_process = psutil.Process()
process_cpu = current_process.cpu_percent()
process_mem = current_process.memory_info().rss / (1024 ** 2) # MB
process_threads = current_process.num_threads()
# GPU监控
util = pynvml.nvmlDeviceGetUtilizationRates(self.handle)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle)
graphics_clock = pynvml.nvmlDeviceGetClockInfo(self.handle, pynvml.NVML_CLOCK_GRAPHICS)
sm_clock = pynvml.nvmlDeviceGetClockInfo(self.handle, pynvml.NVML_CLOCK_SM)
power_usage = pynvml.nvmlDeviceGetPowerUsage(self.handle) / 1000 # 瓦特
total_mem = sum(p.memory_info().rss for p in psutil.process_iter(['pid', 'name'])
if 'python' in p.info['name'].lower()) / (1024**2) # MB
# 获取当前GPU时钟频率
clock_mhz = pynvml.nvmlDeviceGetClockInfo(
self.handle, pynvml.NVML_CLOCK_SM
)
# 收集数据
self.data['system_processes'].append(process_count)
self.data['system_cpu'].append(cpu_percent)
self.data['system_memory'].append(mem.used / (1024**3)) # GB
self.data['process_cpu'].append(process_cpu)
self.data['process_memory'].append(process_mem)
self.data['process_threads'].append(process_threads)
self.data['gpu_util'].append(util.gpu)
self.data['gpu_mem'].append(mem_info.used / (1024**2)) # MB
self.data['gpu_power'].append(power_usage)
self.data['gpu_clock_graphics'].append(graphics_clock)
self.data['gpu_clock_sm'].append(sm_clock)
# 实时算力计算 (TFLOPS = SM数 * 时钟频率(GHz) * 每SM核心数 * 2 / 1000)
current_tflops = (self.sm_count * (clock_mhz / 1000) *
self.cores_per_sm * 2) / 1000
util_percent = (current_tflops / self.peak_tflops) * 100
self.data['gpu_tflops'].append(current_tflops)
self.data['gpu_sm_clock'].append(clock_mhz)
self.data['gpu_util_actual'].append(util_percent)
except Exception as e:
print(f"算力监控错误: {e}")
except (psutil.NoSuchProcess, pynvml.NVMLError) as e:
print(f"监控错误(忽略): {str(e)}")
except Exception as e:
print(f"意外的监控错误: {str(e)}")
time.sleep(self.interval)
def stop(self):
self.running = False
self.thread.join()
pynvml.nvmlShutdown()
return self._generate_report()
def _generate_report(self):
if not any(len(v) > 0 for v in self.data.values()):
return "无监控数据"
report = "\n[资源使用报告]\n"
report += "\n[算力分析 - RTX 4070 SUPER]\n"
report += f"- GPU架构: {self.gpu_arch}\n"
report += f"- 流式多处理器(SM): {self.sm_count}\n"
report += f"- CUDA核心: {self.sm_count * self.cores_per_sm}\n"
report += f"- 理论峰值算力: {self.peak_tflops} TFLOPS\n"
if self.data.get('gpu_tflops'):
avg_tflops = statistics.mean(self.data['gpu_tflops'])
max_tflops = max(self.data['gpu_tflops'])
avg_clock = statistics.mean(self.data['gpu_sm_clock'])
report += "\n[实际运行数据]\n"
report += f"- 平均SM时钟: {avg_clock} MHz\n"
report += f"- 平均算力: {avg_tflops:.1f} TFLOPS\n"
report += f"- 峰值算力: {max_tflops:.1f} TFLOPS\n"
report += f"- 算力利用率: {avg_tflops/self.peak_tflops*100:.1f}%\n"
# 瓶颈分析
avg_util = statistics.mean(self.data['gpu_util'])
if avg_util > 90 and util_percent < 70:
report += "\n[警告] 高GPU利用率但低算力利用率,可能存在内存带宽瓶颈\n"
elif avg_tflops < 0.7 * self.peak_tflops:
report += "\n[提示] 算力未充分利用,建议检查:\n"
report += " • 批次大小(batch size)是否过小\n"
report += " • 模型是否存在大量分支操作\n"
# 系统级统计
report += "[系统资源]\n"
system_metrics = {
'system_processes': ('系统进程数', '{:.0f}'),
'system_cpu': ('系统CPU使用率(%)', '{:.1f}'),
'system_memory': ('系统内存使用(GB)', '{:.2f}')
}
for key, (name, fmt) in system_metrics.items():
values = self.data.get(key, [])
if values:
report += (
f"{name}:\n"
f" 平均值: {fmt.format(statistics.mean(values))}\n"
f" 最大值: {fmt.format(max(values))}\n"
f" 最小值: {fmt.format(min(values))}\n"
f" 采样数: {len(values)}\n\n"
)
# 进程级统计
report += "[主进程资源]\n"
process_metrics = {
'process_cpu': ('进程CPU使用率(%)', '{:.1f}'),
'process_memory': ('进程内存使用(MB)', '{:.1f}'),
'process_threads': ('程内部的线程数量', '{:.0f}')
}
for key, (name, fmt) in process_metrics.items():
values = self.data.get(key, [])
if values:
report += (
f"{name}:\n"
f" 平均值: {fmt.format(statistics.mean(values))}\n"
f" 最大值: {fmt.format(max(values))}\n"
f" 最小值: {fmt.format(min(values))}\n"
f" 采样数: {len(values)}\n\n"
)
# GPU统计
report += "[GPU资源]\n"
gpu_metrics = {
'gpu_util': ('GPU利用率(%)', '{:.1f}'),
'gpu_mem': ('显存使用(MB)', '{:.1f}'),
'gpu_power': ('GPU功耗(W)', '{:.1f}'),
'gpu_clock_graphics': ('图形时钟(MHz)', '{:.0f}'),
'gpu_clock_sm': ('SM时钟(MHz)', '{:.0f}')
}
for key, (name, fmt) in gpu_metrics.items():
values = self.data.get(key, [])
if values:
report += (
f"{name}:\n"
f" 平均值: {fmt.format(statistics.mean(values))}\n"
f" 最大值: {fmt.format(max(values))}\n"
f" 最小值: {fmt.format(min(values))}\n"
f" 采样数: {len(values)}\n\n"
)
return report
class VideoProcessor:
def __init__(self, device):
# 添加CUDA初始化
torch.cuda.empty_cache()
# 加载模型前设置优化选项
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
# 加载模型
self.model = YOLO(DETECTOR_MODEL_PATH, task='detect')
self.plate_rec_model = init_model(device, TEXT_MODEL_PATH)
self.rtsp_url = "rtsp://admin:guoxinzhike901@192.168.1.108:554/cam/realmonitor?channel=1&subtype=0"
self.max_retries = 3
# 预热GPU
# with torch.no_grad():
# dummy_input = torch.randn(1, 3, 640, 640).to(device)
# _ = self.model(dummy_input)
self.device = device
self.frame_count = 0
self.plate_text_cache = {}
def _reconnect(self):
cap = cv2.VideoCapture(self.rtsp_url, cv2.CAP_FFMPEG)
cap.set(cv2.CAP_PROP_RTSP_TRANSPORT, cv2.CAP_RTSP_TRANSPORT_TCP)
return cap
# 在VideoProcessor类中添加中文显示支持
def process_frame(self, frame):
# 增强版中文显示函数(带错误处理和字体回退)
def put_chinese_text(img, text, position, font_scale, color, thickness):
"""支持中文显示的增强函数"""
try:
from PIL import Image, ImageDraw, ImageFont
img_pil = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img_pil)
# 尝试加载字体(优先使用自定义字体,失败则回退系统字体)
try:
font_path = os.path.join("fonts", "platech.ttf")
font = ImageFont.truetype(font_path, int(font_scale * 30))
except:
font = ImageFont.load_default()
print("警告:使用默认字体,中文显示可能不正常")
draw.text(position, text, font=font, fill=color)
return cv2.cvtColor(np.array(img_pil), cv2.COLOR_RGB2BGR)
except Exception as e:
print(f"文本渲染失败,使用OpenCV默认显示: {str(e)}")
cv2.putText(img, text, position, cv2.FONT_HERSHEY_SIMPLEX,
font_scale, color, thickness)
return img
self.frame_count += 1
# 1. 输入帧验证
if frame is None or frame.size == 0:
print("错误:接收到空帧")
return frame
# 2. 模型推理(添加详细日志)
try:
results = self.model.track(
frame,
persist=True,
imgsz=640,
verbose=False # 关闭YOLO内置输出
)
# 调试输出
print(f"帧 {self.frame_count}: 检测到 {len(results)} 个结果")
except Exception as e:
print(f"模型推理错误: {str(e)}")
return frame
# 3. 结果解析与渲染
class_colors = {
'danger': (0, 0, 255),
'car_danger': (0, 165, 255),
'headstock': (255, 0, 0),
'light': (255, 255, 0),
'number': (0, 255, 0),
'1number': (0, 255, 255),
'double_number': (128, 0, 128)
}
for result in results:
# 验证检测结果有效性
if not hasattr(result, 'boxes') or result.boxes is None:
print("警告:结果中未包含有效检测框")
continue
for box in result.boxes:
try:
# 解析检测框数据
cls_id = int(box.cls[0].item())
class_name = CLASSES[cls_id]
x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
conf = box.conf[0].item()
track_id = int(box.id[0].item()) if box.id is not None else None
# 车牌特殊处理
if class_name == 'number' and (track_id not in self.plate_text_cache or self.frame_count % 5 == 0):
plate_img = frame[y1:y2, x1:x2]
if plate_img.size > 0:
plate_text = get_plate_result(plate_img, self.device, self.plate_rec_model) or "识别失败"
self.plate_text_cache[track_id] = plate_text
try:
if track_id not in self.plate_text_cache or self.frame_count % 5 == 0:
plate_img = frame[y1:y2, x1:x2]
if plate_img.size > 0:
plate_text = get_plate_result(plate_img, self.device, self.plate_rec_model) or "识别失败"
self.plate_text_cache[track_id] = plate_text
else:
plate_text = "无效区域"
display_text = f"{self.plate_text_cache.get(track_id, '加载中...')} ID:{track_id} {conf:.2f}"
except Exception as e:
print(f"车牌处理异常: {str(e)}")
display_text = f"车牌识别错误 ID:{track_id}"
else:
display_text = f"{class_name} {conf:.2f}" + (f" ID:{track_id}" if track_id else "")
# 渲染检测框和文本
color = class_colors.get(class_name, (255, 255, 255))
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
# 文本位置修正(确保不超出画面)
y_text = max(y1 - 10, 10)
frame = put_chinese_text(frame, display_text, (x1, y_text), 0.7, color, 2)
except Exception as e:
print(f"单检测框处理错误: {str(e)}")
continue
return frame
def display_thread(display_queue, stream_id, latency_queue):
window_name = f"Stream {stream_id}"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name, 800, 600)
try:
while True:
frame_data = display_queue.get()
if frame_data is None:
break
frame, capture_time = frame_data
if frame is not None and frame.size > 0:
cv2.imshow(window_name, frame)
latency = time.time() - capture_time
latency_queue.put((stream_id, latency))
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
cv2.destroyWindow(window_name)
print(f"显示线程{stream_id}退出")
class StreamSimulator:
def __init__(self, source_url, num_streams, shared_frame_queue):
self.source_url = source_url
self.num_streams = num_streams
self.shared_frame_queue = shared_frame_queue
self.display_queues = [multiprocessing.Queue(maxsize=2000) for _ in range(num_streams)] # 使用 multiprocessing.Queue
self.stop_flag = multiprocessing.Event()
self.capture_process = None
def start(self):
self.capture_process = multiprocessing.Process(target=self._capture_and_distribute)
self.capture_process.start()
def stop(self):
self.stop_flag.set()
if self.capture_process:
self.capture_process.join(timeout=5)
if self.capture_process.is_alive():
self.capture_process.terminate()
print("强制终止捕获进程")
def _capture_and_distribute(self):
rtsp_url = self.source_url
cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
cap.set(cv2.CAP_PROP_FPS, 15)
skip_frames = 2 # 每 2 帧处理 1 帧
frame_count = 0
try:
while not self.stop_flag.is_set():
ret, frame = cap.read()
if not ret:
print("帧读取失败,重连中...")
cap.release()
time.sleep(2)
cap = cv2.VideoCapture(rtsp_url, cv2.CAP_FFMPEG)
continue
frame_count += 1
if frame_count % skip_frames == 0:
for i in range(self.num_streams):
try:
if not self.shared_frame_queue.full():
self.shared_frame_queue.put((frame.copy(), i, time.time()), block=False)
else:
print(f"共享帧队列已满,丢弃旧帧")
self.shared_frame_queue.get_nowait()
self.shared_frame_queue.put((frame.copy(), i, time.time()), block=False)
except Exception as e:
print(f"帧队列操作警告: {type(e).__name__}")
finally:
cap.release()
self.shared_frame_queue.put(None)
for q in self.display_queues:
q.put(None)
def dispatch_process(result_queue, display_queues):
frame_buffer = []
while True:
data = result_queue.get()
if data is None:
break
# 检查是否为 'stats' 二元组
if isinstance(data, tuple) and len(data) == 2 and data[0] == 'stats':
continue # 跳过中间统计数据
# 检查是否为帧数据(三元组)
elif isinstance(data, tuple) and len(data) == 3:
processed_frame, stream_id, capture_time = data
frame_buffer.append((processed_frame, stream_id, capture_time))
frame_buffer.sort(key=lambda x: x[2]) # 按时间戳排序
for frame, sid, _ in frame_buffer:
if not display_queues[sid].full():
display_queues[sid].put((frame, capture_time), block=False)
else:
print(f"显示队列 {sid} 已满,丢帧")
frame_buffer = [] # 清空缓冲区
# 检查是否为字典(最终统计数据)
elif isinstance(data, dict):
continue # 跳过最终统计数据
else:
print(f"警告:未知数据类型: {type(data)}")
def display_process(display_queue, stream_id, latency_queue):
window_name = f"Stream {stream_id}"
cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name, 800, 600)
frame_count = 0
try:
while True:
frame_data = display_queue.get()
if frame_data is None:
break
frame, capture_time = frame_data
if frame is not None and frame.size > 0:
cv2.imshow(window_name, frame)
frame_count += 1
if frame_count % 10 == 0:
latency = time.time() - capture_time
latency_queue.put((stream_id, latency))
if cv2.getWindowProperty(window_name, cv2.WND_PROP_VISIBLE) < 1:
break
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
cv2.destroyWindow(window_name)
print(f"显示进程{stream_id}退出")
def worker_process(input_queue, gpu_id, result_queue, stats_queue, monitor_interval=5):
import numpy as np # 显式导入 numpy
print(f"In worker process {os.getpid()}, np is {np}, type(np.empty((1,))) = {type(np.empty((1,)))}")
from collections import defaultdict
torch.set_num_threads(1)
cv2.setNumThreads(1)
torch.cuda.set_device(gpu_id)
device = torch.device(f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu")
processor = VideoProcessor(device)
start_time = time.time()
stats = {
'frame_count': 0,
'avg_fps': 0,
'max_gpu_mem': 0,
'process_time': 0,
'stream_id': None
}
frame_counts_per_stream = defaultdict(int)
try:
while True:
frame_data = input_queue.get()
if frame_data is None:
break
frame, stream_id, capture_time = frame_data
stats['stream_id'] = stream_id
# 单帧处理
start_process = time.time()
results = processor.model.track(frame, imgsz=640, verbose=False) # 处理单帧
stats['process_time'] += time.time() - start_process
processed_frame = processor.process_frame(frame) # 移除 cProfile
if processed_frame is not None and processed_frame.size > 0:
stats['frame_count'] += 1
frame_counts_per_stream[stream_id] += 1
result_queue.put((processed_frame, stream_id, capture_time))
# 定期更新统计信息
if stats['frame_count'] % monitor_interval == 0:
duration = time.time() - start_time
stats['avg_fps'] = stats['frame_count'] / duration
if torch.cuda.is_available():
mem = torch.cuda.max_memory_allocated() / (1024 ** 2)
stats['max_gpu_mem'] = max(stats['max_gpu_mem'], mem)
stats['worker_pid'] = os.getpid()
stats['frame_counts_per_stream'] = dict(frame_counts_per_stream)
stats_queue.put(('stats', stats.copy()))
except Exception as e:
print(f"工作进程错误: {e}")
finally:
stats['worker_pid'] = os.getpid()
stats['frame_counts_per_stream'] = dict(frame_counts_per_stream)
stats_queue.put(stats)
def get_gpu_info():
"""获取GPU信息"""
pynvml.nvmlInit()
gpu_count = pynvml.nvmlDeviceGetCount()
gpus = []
for i in range(gpu_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle)
mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpus.append({
'id': i,
'name': name.decode('utf-8') if isinstance(name, bytes) else name,
'total_mem': mem.total / (1024 ** 2)
})
pynvml.nvmlShutdown()
return gpus
import os
import argparse
def monitor_resources(gpu_id, interval=5):
"""资源监控线程"""
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
while True:
# GPU监控
util = pynvml.nvmlDeviceGetUtilizationRates(handle)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
# CPU监控
cpu_percent = psutil.cpu_percent()
mem = psutil.virtual_memory()
print(f"\n[资源监控] GPU: {util.gpu}% 显存: {mem_info.used/1024**2:.1f}/{mem_info.total/1024**2:.1f}MB | "
f"CPU: {cpu_percent}% 内存: {mem.used/1024**3:.1f}/{mem.total/1024**3:.1f}GB")
time.sleep(interval)
class DynamicProcessManager:
def __init__(self, num_workers):
self.num_workers = num_workers
self.processes = []
self.result_queues = []
def start_workers(self, input_queue, gpu_id, result_queue, stats_queue):
for i in range(self.num_workers):
p = multiprocessing.Process(
target=worker_process,
args=(input_queue, gpu_id, result_queue, stats_queue)
)
self.processes.append(p)
p.start()
def stop_workers(self):
for p in self.processes:
if p.is_alive():
p.terminate()
try:
p.join(timeout=1)
except:
pass
if p.is_alive():
if platform.system() == "Windows":
subprocess.run(['taskkill', '/F', '/PID', str(p.pid)], check=False)
else:
os.kill(p.pid, signal.SIGKILL)
print(f"强制终止进程 {p.pid}")
self.processes = []
def get_gpu_info():
pynvml.nvmlInit()
gpu_count = pynvml.nvmlDeviceGetCount()
gpus = []
for i in range(gpu_count):
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
name = pynvml.nvmlDeviceGetName(handle)
mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpus.append({
'id': i,
'name': name.decode('utf-8') if isinstance(name, bytes) else name,
'total_mem': mem.total / (1024 ** 2)
})
pynvml.nvmlShutdown()
return gpus
class ProgramMonitor:
def __init__(self, gpu_id, process_manager, result_queue, stats_queue, args):
self.gpu_id = gpu_id
self.result_queue = result_queue
self.stats_queue = stats_queue
self.process_manager = process_manager
self.args = args
self.running = False
self.stop_flag = threading.Event()
self.data = {
'process': defaultdict(list),
'workers': defaultdict(list),
'gpu': defaultdict(list),
'fps_per_stream': defaultdict(list),
'total_fps': [],
'worker_stats': [],
'cpu_per_core': [],
'mem_bandwidth': []
}
self.lock = threading.Lock()
self.gpu_info = {
'arch': "Ada Lovelace",
'sm_count': 56,
'cores_per_sm': 128,
'peak_tflops': 35.6
}
self.total_frame_counts = defaultdict(int)
self.last_frame_counts = defaultdict(lambda: defaultdict(int))
self.start_time = None
self.stop_time = None
self.last_mem_time = time.time()
self.last_mem_bytes = psutil.virtual_memory().used
def start(self):
pynvml.nvmlInit()
self.handle = pynvml.nvmlDeviceGetHandleByIndex(self.gpu_id)
self.running = True
self.start_time = time.time()
self.thread = Thread(target=self._monitor, daemon=True)
self.thread.start()
def _monitor(self):
last_cpu_times = {}
while not self.stop_flag.is_set():
try:
# Process stats from stats_queue
try:
data = self.stats_queue.get_nowait()
if isinstance(data, tuple) and data[0] == 'stats':
stats = data[1]
worker_pid = stats['worker_pid']
frame_counts_per_stream = stats['frame_counts_per_stream']
with self.lock:
for stream_id, count in frame_counts_per_stream.items():
delta = count - self.last_frame_counts[worker_pid][stream_id]
self.total_frame_counts[stream_id] += delta
self.last_frame_counts[worker_pid][stream_id] = count
except queue.Empty:
pass
# Main process monitoring
main_process = psutil.Process(os.getpid())
with main_process.oneshot():
current_cpu_time = main_process.cpu_times()
pid = main_process.pid
if pid in last_cpu_times:
cpu_usage = self._calculate_cpu_usage(last_cpu_times[pid], current_cpu_time)
self.data['process']['cpu'].append(cpu_usage)
last_cpu_times[pid] = current_cpu_time
self.data['process']['mem'].append(main_process.memory_info().rss / (1024 ** 2))
self.data['process']['threads'].append(main_process.num_threads())
# Worker processes monitoring
for p in self.process_manager.processes:
try:
proc = psutil.Process(p.pid)
with proc.oneshot():
current_cpu_time = proc.cpu_times()
pid = p.pid
if pid in last_cpu_times:
cpu_usage = self._calculate_cpu_usage(last_cpu_times[pid], current_cpu_time)
self.data['workers']['cpu'].append(cpu_usage)
last_cpu_times[pid] = current_cpu_time
self.data['workers']['mem'].append(proc.memory_info().rss / (1024 ** 2))
self.data['workers']['threads'].append(proc.num_threads())
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
# Memory bandwidth monitoring
current_time = time.time()
current_mem_bytes = psutil.virtual_memory().used
time_delta = current_time - self.last_mem_time
if time_delta > 0:
mem_bandwidth = (current_mem_bytes - self.last_mem_bytes) / time_delta / (1024 ** 2)
with self.lock:
self.data['mem_bandwidth'].append(mem_bandwidth)
self.last_mem_time = current_time
self.last_mem_bytes = current_mem_bytes
# CPU per core monitoring
cpu_per_core = psutil.cpu_percent(percpu=True)
with self.lock:
self.data['cpu_per_core'].append(cpu_per_core)
self._monitor_gpu()
except Exception as e:
print(f"监控错误: {str(e)}")
time.sleep(0.5)
def _monitor_gpu(self):
try:
util = pynvml.nvmlDeviceGetUtilizationRates(self.handle)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle)
clock_mhz = pynvml.nvmlDeviceGetClockInfo(self.handle, pynvml.NVML_CLOCK_SM)
current_tflops = (self.gpu_info['sm_count'] * (clock_mhz / 1000) *
self.gpu_info['cores_per_sm'] * 2) / 1000
with self.lock:
self.data['gpu']['util'].append(util.gpu)
self.data['gpu']['mem'].append(mem_info.used / (1024 ** 2))
self.data['gpu']['tflops'].append(current_tflops)
except pynvml.NVMLError as e:
print(f"GPU监控错误: {str(e)}")
def stop(self):
self.stop_time = time.time()
self.running = False
self.stop_flag.set()
if self.thread.is_alive():
self.thread.join(timeout=2)
report = self.generate_report()
pynvml.nvmlShutdown()
return report
def generate_report(self):
report = "\n=== 程序资源使用报告 ===\n"
# System information (unchanged)
report += "\n[系统信息]\n"
report += f"- CPU核心数: {psutil.cpu_count(logical=False)}物理/{psutil.cpu_count()}逻辑\n"
report += f"- 系统内存: {psutil.virtual_memory().total / (1024**3):.1f}GB\n"
report += f"- 系统CPU使用率: {psutil.cpu_percent(interval=1):.1f}%\n"
report += f"- 系统内存使用: {psutil.virtual_memory().used / (1024**3):.1f}GB / {psutil.virtual_memory().total / (1024**3):.1f}GB\n"
gpu_name_raw = pynvml.nvmlDeviceGetName(self.handle)
gpu_name = gpu_name_raw.decode('utf-8') if isinstance(gpu_name_raw, bytes) else gpu_name_raw
total_gpu_mem = pynvml.nvmlDeviceGetMemoryInfo(self.handle).total / (1024 ** 2)
report += f"- GPU型号: {gpu_name}\n"
report += f"- GPU总显存: {total_gpu_mem:.1f}MB\n"
# Main process stats (unchanged)
if self.data['process']['cpu']:
report += "\n[主进程资源]\n"
report += f"- 平均CPU使用率: {statistics.mean(self.data['process']['cpu']):.1f}%\n"
report += f"- 峰值CPU使用率: {max(self.data['process']['cpu']):.1f}%\n"
report += f"- 平均内存占用: {statistics.mean(self.data['process']['mem']):.1f}MB\n"
report += f"- 峰值内存占用: {max(self.data['process']['mem']):.1f}MB\n"
report += f"- 线程数: {max(self.data['process']['threads'])}\n"
# Worker processes stats (unchanged except for FPS section)
if self.data['workers']['cpu']:
num_workers = min(self.args.streams * 4, psutil.cpu_count(logical=True) * 2)
num_samples = len(self.data['workers']['cpu']) // num_workers
if num_samples > 0:
worker_cpu_per_sample = [self.data['workers']['cpu'][i*num_workers:(i+1)*num_workers] for i in range(num_samples)]
worker_mem_per_sample = [self.data['workers']['mem'][i*num_workers:(i+1)*num_workers] for i in range(num_samples)]
worker_threads_per_sample = [self.data['workers']['threads'][i*num_workers:(i+1)*num_workers] for i in range(num_samples)]
avg_worker_cpu = statistics.mean([statistics.mean(sample) for sample in worker_cpu_per_sample])
total_worker_cpu = statistics.mean([sum(sample) for sample in worker_cpu_per_sample])
avg_worker_mem = statistics.mean([statistics.mean(sample) for sample in worker_mem_per_sample])
total_worker_mem = statistics.mean([sum(sample) for sample in worker_mem_per_sample])
max_total_worker_threads = max([sum(sample) for sample in worker_threads_per_sample])
report += f"\n[工作进程资源 ({num_workers}个)]\n"
report += f"- 平均CPU使用率(每个进程): {avg_worker_cpu:.1f}%\n"
report += f"- 总CPU使用率: {total_worker_cpu:.1f}%\n"
report += f"- 平均内存占用(每个进程): {avg_worker_mem:.1f}MB\n"
report += f"- 总内存占用: {total_worker_mem:.1f}MB\n"
report += f"- 总线程数(峰值): {max_total_worker_threads}\n"
# Video stream performance with accurate FPS
if self.total_frame_counts:
elapsed_time = self.stop_time - self.start_time
report += "\n[视频流性能]\n"
for stream_id in range(self.args.streams):
if stream_id in self.total_frame_counts:
avg_fps = self.total_frame_counts[stream_id] / elapsed_time
report += f"- 视频流 {stream_id}: 平均 FPS {avg_fps:.1f}\n"
total_frames = sum(self.total_frame_counts.values())
total_fps = total_frames / elapsed_time
report += f"- 总吞吐量: {total_fps:.1f} FPS\n"
# CPU per core (unchanged)
if self.data.get('cpu_per_core'):
avg_cpu_per_core = [statistics.mean([sample[i] for sample in self.data['cpu_per_core']])
for i in range(len(self.data['cpu_per_core'][0]))]
overall_avg_cpu = statistics.mean(avg_cpu_per_core)
report += "\n[CPU 硬件线程利用率]\n"
for i, avg in enumerate(avg_cpu_per_core):
report += f"- 逻辑处理器 {i}: {avg:.1f}%\n"
report += f"- 16 个硬件线程平均利用率: {overall_avg_cpu:.1f}%\n"
# Total process stats (unchanged)
if self.data['process']['cpu'] and self.data['workers']['cpu']:
num_display_processes = self.args.streams
total_cpu = statistics.mean(self.data['process']['cpu']) + total_worker_cpu
total_mem = statistics.mean(self.data['process']['mem']) + total_worker_mem
total_threads = max(self.data['process']['threads']) + max_total_worker_threads
total_processes = 1 + num_workers + num_display_processes + 1
report += "\n[所有进程总计]\n"
report += f"- 总CPU使用率: {total_cpu:.1f}%\n"
report += f"- 总内存占用: {total_mem:.1f}MB\n"
report += f"- 总线程数: {total_threads}\n"
report += f"- 总进程数: {total_processes}(1个主进程 + {num_workers}个工作进程 + {num_display_processes}个显示进程 + 1个分发进程)\n"
# GPU stats (unchanged)
if self.data['gpu']['tflops']:
avg_tflops = statistics.mean(self.data['gpu']['tflops'])
util_percent = min((avg_tflops / self.gpu_info['peak_tflops']) * 100, 100.0)
report += "\n[GPU资源]\n"
report += f"- 平均利用率: {statistics.mean(self.data['gpu']['util']):.1f}%\n"
report += f"- 峰值显存: {max(self.data['gpu']['mem']):.1f}MB\n"
report += f"- 平均算力: {avg_tflops:.1f}/{self.gpu_info['peak_tflops']} TFLOPS\n"
report += f"- 算力利用率: {util_percent:.1f}%\n"
# Memory bandwidth (unchanged)
if self.data.get('mem_bandwidth'):
avg_mem_bandwidth = statistics.mean(self.data['mem_bandwidth'])
max_mem_bandwidth = max(self.data['mem_bandwidth'])
report += "\n[存储器带宽]\n"
report += f"- 平均内存带宽: {avg_mem_bandwidth:.1f} MB/s\n"
report += f"- 峰值内存带宽: {max_mem_bandwidth:.1f} MB/s\n"
return report
def _calculate_cpu_usage(self, prev_times, curr_times):
"""
计算基于前后的 CPU 时间的使用率百分比。
参数:
prev_times: 上一次的 CPU 时间(psutil.cpu_times 对象)
curr_times: 当前的 CPU 时间(psutil.cpu_times 对象)
返回:
CPU 使用率(百分比)
"""
delta_user = curr_times.user - prev_times.user
delta_system = curr_times.system - prev_times.system
delta_total = (curr_times.user + curr_times.system) - (prev_times.user + prev_times.system)
if delta_total > 0:
cpu_usage = ((delta_user + delta_system) / delta_total) * 100
else:
cpu_usage = 0.0
return cpu_usage
# _monitor_gpu and _calculate_cpu_usage remain unchanged
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--streams', type=int, default=1)
parser.add_argument('--source', type=str, default="")
parser.add_argument('--gpu_id', type=int, default=0)
args = parser.parse_args()
camera_config = {
'username': 'admin',
'password': 'guoxinzhike901'
}
source_url = args.source if args.source else \
f"rtsp://{camera_config['username']}:{camera_config['password']}@192.168.1.108/"
gpus = get_gpu_info()
print("\n[硬件配置]")
print(f"- CPU核心: {psutil.cpu_count(logical=False)}物理/{psutil.cpu_count()}逻辑")
print(f"- 内存: {psutil.virtual_memory().total / (1024**3):.1f}GB")
print(f"- 使用GPU {args.gpu_id}: {gpus[args.gpu_id]['name']}")
print(f" 显存: {gpus[args.gpu_id]['total_mem']:.1f}MB")
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
print(f"\n[测试配置]")
print(f"- 模拟视频流数: {args.streams}")
print(f"- 视频源: {source_url}")
# 创建共享队列
frame_queue_size = max(2000, 200 * args.streams)
shared_frame_queue = multiprocessing.Queue(maxsize=frame_queue_size)
display_queue_size = max(50, 20 * args.streams)
shared_result_queue = multiprocessing.Queue(maxsize=2000)
stats_queue = multiprocessing.Queue() # New queue for stats
# 固定工作进程数为 16
num_workers = 1 #min(args.streams * 8, psutil.cpu_count(logical=True) * 2)
process_mgr = DynamicProcessManager(num_workers)
simulator = StreamSimulator(source_url, args.streams, shared_frame_queue)
monitor = ProgramMonitor(args.gpu_id, process_mgr, shared_result_queue, stats_queue, args)
monitor.args = args # 传递 args
latency_queue = multiprocessing.Queue()
# 启动工作进程
process_mgr.start_workers(shared_frame_queue, args.gpu_id, shared_result_queue, stats_queue)
# 启动分发进程
dispatch_p = multiprocessing.Process(
target=dispatch_process,
args=(shared_result_queue, simulator.display_queues),
daemon=True
)
dispatch_p.start()
simulator.start()
monitor.start()
# 启动显示进程
display_threads = []
for i in range(args.streams):
t = Thread(target=display_thread, args=(simulator.display_queues[i], i+1, latency_queue))
display_threads.append(t)
t.start()
time.sleep(0.5)
print("\n[测试开始] 程序将运行30秒...")
start_time = time.time()
end_time = start_time + 60*5
try:
while time.time() < end_time:
time.sleep(1)
remaining = int(end_time - time.time())
if remaining % 10 == 0 or remaining <= 5:
print(f"剩余时间: {remaining}秒")
finally:
runtime = time.time() - start_time
print(f"\n[测试完成] 实际运行时间: {runtime:.1f}秒")
print("停止模拟器...")
simulator.stop()
print("生成报告并停止监控...")
report = monitor.stop()
print("停止工作进程...")
process_mgr.stop_workers()
# 停止显示线程
for q in simulator.display_queues:
q.put(None)
for t in display_threads:
t.join()
# 停止分发进程
shared_result_queue.put(None)
dispatch_p.join(timeout=5)
if dispatch_p.is_alive():
dispatch_p.terminate()
# 收集延迟测量
latencies = []
while not latency_queue.empty():
try:
stream_id, latency = latency_queue.get_nowait()
latencies.append(latency)
except queue.Empty:
break
if latencies:
min_latency = min(latencies)
max_latency = max(latencies)
avg_latency = sum(latencies) / len(latencies)
report += f"\n[延迟统计]\n"
report += f"- 测量次数: {len(latencies)}\n"
report += f"- 最低延迟: {min_latency:.3f}秒\n"
report += f"- 最高延迟: {max_latency:.3f}秒\n"
report += f"- 平均延迟: {avg_latency:.3f}秒\n"
else:
report += "\n[延迟统计]\n- 无延迟数据\n"
if torch.cuda.is_available():
torch.cuda.empty_cache()
print(report)
if __name__ == '__main__':
multiprocessing.set_start_method('spawn')
# multiprocessing.set_start_method('fork') # Linux 默认方法
main()
# 测试4路视频流
# python det_ocr_shipinliu_pre.py --streams 1 --gpu_id 0
"""
=== 程序资源使用报告 ===
=== 程序资源使用报告 ===
[系统信息]
- CPU核心数: 10物理/16逻辑
- 系统内存: 63.8GB
- 系统CPU使用率: 14.1%
- 系统内存使用: 26.3GB / 63.8GB
- GPU型号: NVIDIA GeForce RTX 4070 SUPER
- GPU总显存: 12282.0MB
[主进程资源]
- 平均CPU使用率: 16.3%
- 峰值CPU使用率: 28.1%
- 平均内存占用: 385.1MB
- 峰值内存占用: 385.7MB
- 线程数: 9
[工作进程资源 (16个)]
- 平均CPU使用率(每个进程): 22.1%
- 总CPU使用率: 354.2%
- 平均内存占用(每个进程): 801.5MB
- 总内存占用: 12823.3MB
- 总线程数(峰值): 304
[所有进程总计]
- 总CPU使用率: 370.5%
- 总内存占用: 13208.4MB
- 总线程数: 313
- 总进程数: 19(1个主进程 + 16个工作进程 + 1个显示进程 + 1个分发进程)
[GPU资源]
- 平均利用率: 31.3%
- 峰值显存: 8226.7MB
- 平均算力: 22.7/35.6 TFLOPS
- 算力利用率: 63.8%
[延迟统计]
- 测量次数: 67
- 最低延迟: 0.024秒
- 最高延迟: 2.499秒
- 平均延迟: 0.287秒
"""
# python det_ocr_shipinliu_pre.py --streams 2 --gpu_id 0
"""
=== 程序资源使用报告 ===
[系统信息]
- CPU核心数: 10物理/16逻辑
- 系统内存: 63.8GB
- 系统CPU使用率: 9.8%
- 系统内存使用: 26.3GB / 63.8GB
- GPU型号: NVIDIA GeForce RTX 4070 SUPER
- GPU总显存: 12282.0MB
[主进程资源]
- 平均CPU使用率: 15.3%
- 峰值CPU使用率: 40.6%
- 平均内存占用: 386.4MB
- 峰值内存占用: 387.1MB
- 线程数: 9
[工作进程资源 (16个)]
- 平均CPU使用率(每个进程): 20.8%
- 总CPU使用率: 333.1%
- 平均内存占用(每个进程): 960.3MB
- 总内存占用: 15364.2MB
- 总线程数(峰值): 328
[所有进程总计]
- 总CPU使用率: 348.4%
- 总内存占用: 15750.6MB
- 总线程数: 337
- 总进程数: 20(1个主进程 + 16个工作进程 + 2个显示进程 + 1个分发进程)
[GPU资源]
- 平均利用率: 50.5%
- 峰值显存: 8328.6MB
- 平均算力: 12.6/35.6 TFLOPS
- 算力利用率: 35.4%
[延迟统计]
- 测量次数: 327
- 最低延迟: 0.027秒
- 最高延迟: 0.757秒
- 平均延迟: 0.080秒
"""
# python det_ocr_shipinliu_pre.py --streams 3 --gpu_id 0
"""
[系统信息]
- CPU核心数: 10物理/16逻辑
- 系统内存: 63.8GB
- 系统CPU使用率: 9.5%
- 系统内存使用: 26.2GB / 63.8GB
- GPU型号: NVIDIA GeForce RTX 4070 SUPER
- GPU总显存: 12282.0MB
[主进程资源]
- 平均CPU使用率: 26.2%
- 峰值CPU使用率: 53.1%
- 平均内存占用: 386.1MB
- 峰值内存占用: 386.6MB
- 线程数: 9
[工作进程资源 (16个)]
- 平均CPU使用率(每个进程): 43.9%
- 总CPU使用率: 702.5%
- 平均内存占用(每个进程): 1018.8MB
- 总内存占用: 16301.3MB
- 总线程数(峰值): 322
[所有进程总计]
- 总CPU使用率: 728.7%
- 总内存占用: 16687.5MB
- 总线程数: 331
- 总进程数: 21(1个主进程 + 16个工作进程 + 3个显示进程 + 1个分发进程)
[GPU资源]
- 平均利用率: 52.2%
- 峰值显存: 7861.9MB
- 平均算力: 18.9/35.6 TFLOPS
- 算力利用率: 53.1%
[延迟统计]
- 测量次数: 327
- 最低延迟: 0.030秒
- 最高延迟: 3.756秒
- 平均延迟: 1.077秒
"""
# python det_ocr_shipinliu_pre.py --streams 4 --gpu_id 0 cpu100
"""
=== 程序资源使用报告 ===
[系统信息]
- CPU核心数: 10物理/16逻辑
- 系统内存: 63.8GB
- 系统CPU使用率: 58.6%
- 系统内存使用: 36.3GB / 63.8GB
- GPU型号: NVIDIA GeForce RTX 4070 SUPER
- GPU总显存: 12282.0MB
[主进程资源]
- 平均CPU使用率: 28.0%
- 峰值CPU使用率: 53.1%
- 平均内存占用: 386.4MB
- 峰值内存占用: 386.8MB
- 线程数: 9
[工作进程资源 (16个)]
- 平均CPU使用率(每个进程): 48.0%
- 总CPU使用率: 768.7%
- 平均内存占用(每个进程): 1585.2MB
- 总内存占用: 25363.6MB
- 总线程数(峰值): 320
[所有进程总计]
- 总CPU使用率: 796.7%
- 总内存占用: 25750.1MB
- 总线程数: 329
- 总进程数: 22(1个主进程 + 16个工作进程 + 4个显示进程 + 1个分发进程)
[GPU资源]
- 平均利用率: 52.9%
- 峰值显存: 7991.3MB
- 平均算力: 20.2/35.6 TFLOPS
- 算力利用率: 56.8%
[延迟统计]
- 测量次数: 327
- 最低延迟: 1.480秒
- 最高延迟: 14.222秒
- 平均延迟: 8.113秒
"""
# python det_ocr_shipinliu_pre.py --streams 5 --gpu_id 0
"""
"""
# python det_ocr_shipinliu_pre.py --streams 16 --gpu_id 0
"""
"""
# python det_ocr_shipinliu_pre.py --streams 20 --gpu_id 0
"""
"""
(yolov8_bt) (base) zhang@zhang:~/danger/yolov7_crnn_ocr_detection$ python det_ocr_shipinliu_pre.py --streams 1 --gpu_id 0
[硬件配置]
- CPU核心: 10物理/16逻辑
- 内存: 62.6GB
- 使用GPU 0: NVIDIA GeForce RTX 4070 SUPER
显存: 12282.0MB
[测试配置]
- 模拟视频流数: 1
- 视频源: rtsp://admin:guoxinzhike901@192.168.1.108/
[测试开始] 程序将运行30秒...
In worker process 35804, np is <module 'numpy' from '/home/zhang/miniconda3/envs/yolov8_bt/lib/python3.9/site-packages/numpy/__init__.py'>, type(np.empty((1,))) = <class 'numpy.ndarray'>
Loading weights/best.engine for TensorRT inference...
[06/07/2025-18:54:11] [TRT] [I] Loaded engine size: 39 MiB
[06/07/2025-18:54:11] [TRT] [W] Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors.
[06/07/2025-18:54:13] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +33, now: CPU 0, GPU 33 (MiB)
[06/07/2025-18:54:13] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +39, now: CPU 0, GPU 72 (MiB)
剩余时间: 290秒
剩余时间: 280秒
剩余时间: 270秒 (yolov8_bt) (base) zhang@zhang:~/danger/yolov7_crnn_ocr_detection$ python -c "import numpy as np; print(np.__version__)"
1.23.0
代码运行后报错
最新发布