import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit # 自动初始化 CUDA 上下文
import numpy as np
import cv2
import time
# 初始化 TensorRT 日志记录器
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
# 辅助函数:分配内存
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
for i in range(engine.num_bindings):
# 使用新的API获取绑定信息
name = engine.get_binding_name(i)
dtype = engine.get_binding_dtype(i)
shape = engine.get_binding_shape(i)
# 处理动态形状
if -1 in shape:
shape = [1, 3, 640, 640] # 根据你的模型输入尺寸调整
size = trt.volume(shape)
dtype = trt.nptype(dtype)
# 分配主机和设备内存
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
bindings.append(int(device_mem))
if engine.binding_is_input(name):
inputs.append({'host': host_mem, 'device': device_mem, 'shape': shape, 'name': name})
else:
outputs.append({'host': host_mem, 'device': device_mem, 'shape': shape, 'name': name})
return inputs, outputs, bindings, stream
# 辅助函数:执行推理
def do_inference(context, bindings, inputs, outputs, stream):
# 将输入数据从主机复制到设备
[cuda.memcpy_htod_async(inp['device'], inp['host'], stream) for inp in inputs]
# 执行推理
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
# 将输出从设备复制回主机
[cuda.memcpy_dtoh_async(out['host'], out['device'], stream) for out in outputs]
# 同步流
stream.synchronize()
return [out['host'] for out in outputs]
# 预处理函数
def preprocess_image(frame, input_shape=(640, 640)):
# 调整图像大小
img = cv2.resize(frame, input_shape)
# 归一化到 [0,1]
img = img / 255.0
# BGR to RGB, HWC to CHW
img = img[:, :, ::-1].transpose(2, 0, 1).astype(np.float32)
# 确保内存连续
img = np.ascontiguousarray(img)
return img
# 后处理函数 - 这里需要根据YOLOv11的实际输出格式进行调整
def postprocess_output(output, orig_img_shape, input_shape=(640, 640)):
# 这里只是一个示例,实际的后处理需要根据YOLOv11的输出格式来编写
# 通常包括: 解析输出、应用置信度阈值、非极大值抑制(NMS)等
# 示例: 假设输出是[1, num_classes+5, num_anchors]格式
# 实际实现需要根据你的模型输出进行调整
# 返回检测结果列表,每个检测结果为[x1, y1, x2, y2, confidence, class_id]
detections = []
# 这里应该是你的后处理逻辑
# ...
return detections
# 绘制检测结果
def draw_detections(frame, detections, fps):
# 绘制FPS
cv2.putText(frame, f"FPS: {fps:.2f}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 绘制检测框和标签
for det in detections:
x1, y1, x2, y2, conf, cls_id = det
# 绘制边界框
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
# 绘制标签和置信度
label = f"Class {int(cls_id)}: {conf:.2f}"
cv2.putText(frame, label, (int(x1), int(y1)-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
return frame
# 主函数
def main():
# 加载序列化引擎
try:
with open('py/pre_model/yolo11n_fp16.engine', 'rb') as f:
runtime = trt.Runtime(TRT_LOGGER)
engine = runtime.deserialize_cuda_engine(f.read())
print("Engine loaded successfully")
except Exception as e:
print(f"Failed to load engine: {e}")
return
# 创建上下文
context = engine.create_execution_context()
# 分配内存
inputs, outputs, bindings, stream = allocate_buffers(engine)
# 打开USB摄像头
cap = cv2.VideoCapture(0)
# 设置摄像头参数
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
cap.set(cv2.CAP_PROP_FPS, 30)
# 检查摄像头是否成功打开
if not cap.isOpened():
print("Error: Could not open camera")
return
# 帧率计算变量
prev_time = 0
curr_time = 0
print("Starting real-time detection. Press 'q' to quit.")
try:
while True:
# 读取帧
ret, frame = cap.read()
if not ret:
print("Failed to grab frame")
break
# 预处理图像
input_image = preprocess_image(frame)
# 将数据拷贝到输入缓冲区
np.copyto(inputs[0]['host'], input_image.ravel())
# 执行推理
output = do_inference(context, bindings, inputs, outputs, stream)
# 后处理输出
detections = postprocess_output(output, frame.shape[:2])
# 计算帧率
curr_time = time.time()
fps = 1 / (curr_time - prev_time)
prev_time = curr_time
# 绘制检测结果和帧率
result_frame = draw_detections(frame, detections, fps)
# 显示结果
cv2.imshow('YOLOv11 TensorRT Detection', result_frame)
# 按 'q' 退出
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except KeyboardInterrupt:
print("Interrupted by user")
finally:
# 释放资源
cap.release()
cv2.destroyAllWindows()
print("Resources released")
if __name__ == "__main__":
main()
找出錯誤
最新发布