一、软硬件需求
1.硬件
Jetson Xavier NX开发套件
2.板子配置
jetpack:5.1.4
CUDA:11.4
TensorRT:8.5.2.2
OpenCV:4.5.4
二、.pt转换为.onnx
#进入python环境
python
from ultralytics import YOLO
model = YOLO("./yolov8n.pt")
model.export(format="onnx")
#退出python环境
exit()
三、.onnx转换为.engine
!!!注:请勿使用ultralytics的命令,Ultralytics 的 yolo export
在导出 TensorRT 引擎时,默认使用 x86 平台的配置,无法直接生成 Jetson 兼容的引擎:
yolo export model=yolo11n.pt format=engine
C++ API会无法创建推理环境,TensorRT引擎文件版本不兼容或生成错误,报错如下:
(pytorch) xiao@xiao-desktop:~/Desktop/tensorrt-text/build$ ./yolov8_detector
[TensorRT] 1: [stdArchiveReader.cpp::StdArchiveReader::32] Error Code 1: Serialization (Serialization assertion magicTagRead == kMAGIC_TAG failed.Magic tag does not match)
[TensorRT] 4: [runtime.cpp::deserializeCudaEngine::65] Error Code 4: Internal Error (Engine deserialization failed.)
Segmentation fault (core dumped)
需要使用 trtexec 生成 TensorRT 引擎
/usr/src/tensorrt/bin/trtexec \
--onnx=yolov8n.onnx \
--saveEngine=yolov8n.engine \
--fp16
四、部署环境
1.项目结构
your_project/ ├── src/ │ └── tensorrt_inference.cpp # 你的代码 ├── model/ │ └── yolov8n.engine # 模型文件 ├── video/ │ └── people.mp4 # 测试视频 ├── lable.txt # 标签文件 └── CMakeLists.txt
2.lable.txt
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
3.CMakeLists.txt
cmake_minimum_required(VERSION 3.16)
project(YOLOv8_Detection)
# 设置C++标准
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# 配置CUDA路径(JetPack 5.1.4默认路径)
set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
set(CUDA_ARCH_BIN "7.2") # Xavier NX的GPU架构
# 查找依赖库
find_package(OpenCV REQUIRED)
find_package(CUDA REQUIRED)
# TensorRT路径配置(JetPack默认安装路径)
set(TENSORRT_DIR "/usr/include/aarch64-linux-gnu")
set(TENSORRT_LIB_DIR "/usr/lib/aarch64-linux-gnu")
# 包含目录
include_directories(
${OpenCV_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
${TENSORRT_DIR}
)
# 链接目录
link_directories(
${CUDA_TOOLKIT_ROOT_DIR}/lib64
${CUDA_LIBRARY_DIRS}
${TENSORRT_LIB_DIR}
)
# 添加可执行文件
add_executable(yolov8_detector
src/tensorrt_inference.cpp
)
# 链接库文件
target_link_libraries(yolov8_detector
${OpenCV_LIBS}
${CUDA_LIBRARIES}
nvinfer # TensorRT核心库
nvonnxparser # ONNX解析器
cudart # CUDA运行时库
pthread # 多线程支持
dl # 动态加载库
)
# 附加编译选项(针对Jetson优化)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
add_compile_options(-O3 -march=armv8-a -mtune=cortex-a57 -mfp16-format=ieee)
endif()
4. tensorrt_inference.cpp
#define _CRT_SECURE_NO_DEPRECATE
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <fstream>
#include <numeric>
#include "NvInfer.h"
struct NvinferStruct {
nvinfer1::IRuntime* runtime = nullptr; // TensorRT 运行时接口
nvinfer1::ICudaEngine* engine = nullptr; // TensorRT 引擎
nvinfer1::IExecutionContext* context = nullptr; // 执行上下文
void** dataBuffer = nullptr; // GPU 数据缓冲区指针数组
};
struct detresult {
std::string className; // 类别名称
int classId; // 类别ID
float confidence; // 置信度
cv::Rect rect; // 边界框坐标
// 构造函数
detresult(const std::string& name, int id, float conf, const cv::Rect& r)
: className(name), classId(id), confidence(conf), rect(r) {}
};
class Logger : public nvinfer1::ILogger {
public:
void log(Severity severity, const char* msg) noexcept override {
// 过滤冗余日志(例如冗余的优化信息)
if (severity <= Severity::kWARNING) {
std::cout << "[TensorRT] " << msg << std::endl;
}
}
};
std::vector<std::string> labels;
float score_threshold = 0.3f;
float nms_threshold = 0.5f;
int input_h = 640;
int input_w = 640;
std::string lable_path = "/home/xiao/Desktop/tensorrt-text/lable.txt";
std::string engin_path = "/home/xiao/Desktop/tensorrt-text/model/yolov8n.engine";
std::string video_path = "/home/xiao/Desktop/tensorrt-text/video/people.mp4";
NvinferStruct* p = nullptr;
Logger logger;
int w = 0;
int h = 0;
float x_factor = 0;
float y_factor = 0;
std::vector<float> input_image;
float* output_data = nullptr;
size_t output_size = 0;
double preprocessTime = 0;
double inferTime = 0;
double postprocessTime = 0;
double totalTime = 0;
double detFps = 0;
int init() {
std::ifstream lable_file(lable_path);
if (!lable_file.is_open())
{
std::cerr << "Error opening file: " << lable_path << std::endl;
return -1;
}
std::string line;
while (std::getline(lable_file, line))
{
if (!line.empty())
{
labels.push_back(line);
}
}
lable_file.close();
// 以二进制方式读取文件
std::ifstream engin_file(engin_path.data(), std::ios::binary);
if (!engin_file.good()) {
std::cerr << "文件无法打开,请确定文件是否可用!" << std::endl;
return -1;
}
size_t size = 0;
engin_file.seekg(0, engin_file.end); // 将读指针从文件末尾开始移动0个字节
size = engin_file.tellg(); // 返回读指针的位置,此时读指针的位置就是文件的字节数
engin_file.seekg(0, engin_file.beg); // 将读指针从文件开头开始移动0个字节
char* modelStream = new char[size];
engin_file.read(modelStream, size);
engin_file.close();// 关闭文件
//创建推理核心结构体,初始化变量
p = new NvinferStruct();
//初始化反序列化引擎
p->runtime = nvinfer1::createInferRuntime(logger);
// 初始化推理引擎
p->engine = p->runtime->deserializeCudaEngine(modelStream, size);
if (!p->engine) {
std::cerr << "引擎反序列化失败!请检查文件或版本兼容性。" << std::endl;
delete[] modelStream;
return -1; // 立即终止初始化
}
// 创建上下文
p->context = p->engine->createExecutionContext();
int numNode = p->engine->getNbBindings();
// 创建gpu数据缓冲区
p->dataBuffer = new void* [numNode];
delete[] modelStream;
for (int i = 0; i < numNode; i++) {
nvinfer1::Dims dims = p->engine->getBindingDimensions(i);
nvinfer1::DataType type = p->engine->getBindingDataType(i);
std::vector<int> shape(dims.d, dims.d + dims.nbDims);
size_t size = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<size_t>());
switch (type)
{
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT: size *= 4; break; // 明确为类型 float
case nvinfer1::DataType::kHALF: size *= 2; break;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kINT8:
default:break;
}
cudaMalloc(&(p->dataBuffer[i]), size);
}
output_size = 1 * (labels.size() + 4) * 8400;;
output_data = new float[output_size];
return 0;
}
void preprocess(cv::Mat& frame) {
//前处理
w = frame.cols;
h = frame.rows;
int max = std::max(h, w);
cv::Mat image = cv::Mat::zeros(cv::Size(max, max), CV_8UC3);
cv::Rect roi(0, 0, w, h);
frame.copyTo(image(roi));
x_factor = image.cols / static_cast<float>(input_w);
y_factor = image.rows / static_cast<float>(input_h);
cv::resize(image, image, cv::Size(input_w, input_h));
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
std::vector<cv::Mat> bgrChannels(3);
cv::split(image, bgrChannels);
for (int c = 0; c < 3; c++)
{
bgrChannels[c].convertTo(bgrChannels[c], CV_32FC1, 1.0 / 255.0);
}
std::vector<float> input_image;
int image_area = input_h * input_w;
input_image.clear();
input_image.resize(3 * image_area);
size_t single_chn_size = image_area * sizeof(float);
memcpy(input_image.data(), (float*)bgrChannels[0].data, single_chn_size);
memcpy(input_image.data() + image_area, (float*)bgrChannels[1].data, single_chn_size);
memcpy(input_image.data() + image_area * 2, (float*)bgrChannels[2].data, single_chn_size);
cudaMemcpy(p->dataBuffer[0], input_image.data(), input_image.size() * sizeof(float), cudaMemcpyHostToDevice);
}
void postprocess(std::vector<detresult>& detectionResult) {
cudaMemcpy(output_data, p->dataBuffer[1], output_size * sizeof(float), cudaMemcpyDeviceToHost);
cv::Mat dout(labels.size() + 4, 8400, CV_32F, output_data);
cv::Mat det_output = dout.t();
std::vector<cv::Rect> boxes;
std::vector<int> classIds;
std::vector<float> confidences;
for (int i = 0; i < det_output.rows; i++)
{
cv::Mat classes_scores = det_output.row(i).colRange(4, labels.size() + 4);
cv::Point classIdPoint;
double score;
cv::minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
if (score > score_threshold)
{
float cx = det_output.at<float>(i, 0);
float cy = det_output.at<float>(i, 1);
float ow = det_output.at<float>(i, 2);
float oh = det_output.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
// 坐标值安全校验
if (x < 0)x = 0;
if (y < 0)y = 0;
if (x > w)x = w;
if (y > h)y = h;
if (x + width > w)width = w - x;
if (y + height > h)height = h - y;
cv::Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
}
}
std::vector<int> indexes;
cv::dnn::NMSBoxes(boxes, confidences, score_threshold, nms_threshold, indexes);
for (size_t i = 0; i < indexes.size(); i++)
{
int index = indexes[i];
detresult box(labels[classIds[index]], classIds[index], confidences[index], boxes[index]);
detectionResult.push_back(box);
}
}
void draw(cv::Mat& frame, std::vector<detresult>& detectionResult) {
for (size_t i = 0; i < detectionResult.size(); ++i)
{
detresult box = detectionResult[i];
cv::rectangle(frame, box.rect, cv::Scalar(0, 0, 255), 2);
std::string label = box.className + ":" + cv::format("%.2f", box.confidence);
putText(frame, label, cv::Point(box.rect.x, box.rect.y - 5), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
}
// 绘制时间
putText(frame, "preprocessTime:" + std::to_string(preprocessTime * 1000) + "ms", cv::Point(10, 30), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "inferTime:" + std::to_string(inferTime * 1000) + "ms", cv::Point(10, 70), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "postprocessTime:" + std::to_string(postprocessTime * 1000) + "ms", cv::Point(10, 110), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "totalTime:" + std::to_string(totalTime * 1000) + "ms", cv::Point(10, 150), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
putText(frame, "detFps:" + std::to_string(detFps), cv::Point(10, 190), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 255, 0), 2);
cv::imshow("detresult", frame);
}
void destroy() {
delete output_data;
delete p->dataBuffer;
p->context->destroy();
p->engine->destroy();
p->runtime->destroy();
delete p;
}
int main()
{
init();
//cv::Mat frame = cv::imread(img_path);
//cv::imshow("1.jpg", frame);
//cv::waitKey(0);
cv::VideoCapture capture(video_path);
// 检查视频是否成功打开
if (!capture.isOpened())
{
std::cout << "无法读取视频文件" << std::endl;
return -1;
}
double fps = capture.get(cv::CAP_PROP_FPS);
int width = static_cast<int>(capture.get(cv::CAP_PROP_FRAME_WIDTH));
int height = static_cast<int>(capture.get(cv::CAP_PROP_FRAME_HEIGHT));
cv::Mat frame;
while (true)
{
bool success = capture.read(frame); // 读取一帧数据
// 检查是否成功读取帧
if (!success)
{
std::cout << "读取完毕" << std::endl;
break;
}
double start = (double)cv::getTickCount();
preprocess(frame);
preprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
//推理
start = (double)cv::getTickCount();
p->context->executeV2(p->dataBuffer);
inferTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
//后处理
start = (double)cv::getTickCount();
std::vector<detresult> detectionResult;
postprocess(detectionResult);
postprocessTime = ((double)cv::getTickCount() - start) / cv::getTickFrequency();
totalTime = preprocessTime + inferTime + postprocessTime;
detFps = (1 / (totalTime));
//绘制、显示
cv::namedWindow("detresult", cv::WINDOW_NORMAL); // cv::WINDOW_NORMAL允许用户调整窗口大小
cv::resizeWindow("detresult", width / 2, height / 2); // 设置窗口的宽度和高度
draw(frame, detectionResult);
if (cv::waitKey(1) == 27) // 通过按下ESC键退出循环
{
break;
}
}
destroy();
cv::destroyAllWindows();
getchar();
return 0;
}
5.编译
mkdir build
cd build
cmake ..
make -j6
6.执行
./yolov8_detector