onnxruntime推理
导出onnx模型:
from ultralytics import YOLO
model = YOLO("yolov8n-pose.pt")
model.export(format="onnx")
onnx模型结构如下:

import cv2
import numpy as np
import onnxruntime
# 调色板
palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
[230, 230, 0], [255, 153, 255], [153, 204, 255],
[255, 102, 255], [255, 51, 255], [102, 178, 255],
[51, 153, 255], [255, 153, 153], [255, 102, 102],
[255, 51, 51], [153, 255, 153], [102, 255, 102],
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0],
[255, 255, 255]])
# 17个关键点连接顺序
skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12],
[7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3],
[1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
# 骨架颜色
pose_limb_color = palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
# 关键点颜色
pose_kpt_color = palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
score_threshold = 0.5
nms_threshold = 0.5
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), scaleup=True):
''' 调整图像大小和两边灰条填充 '''
shape = im.shape[:2]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# 缩放比例 (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# 只进行下采样 因为上采样会让图片模糊
if not scaleup:
r = min(r, 1.0)
# 计算pad长宽
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) # 保证缩放后图像比例不变
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
# 在较小边的两侧进行pad, 而不是在一侧pad
dw /= 2
dh /= 2
# 将原图resize到new_unpad(长边相同,比例相同的新图)
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
# 计算上下两侧的padding
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
# 计算左右两侧的padding
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
# 添加灰条
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return im
def pre_process(img):
# 归一化 调整通道为(1,3,640,640)
img = img / 255.
img = np.transpose(img, (2, 0, 1))
data = np.expand_dims(img, axis=0)
return data
def xywh2xyxy(x):
''' 中心坐标、w、h ------>>> 左上点,右下点 '''
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
# nms算法
def nms(dets, iou_thresh):
# dets: N * M, N是bbox的个数,M的前4位是对应的 左上点,右下点
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1) # 求每个bbox的面积
order = scores.argsort()[::-1] # 对分数进行倒排序
keep = [] # 用来保存最后留下来的bboxx下标
while order.size > 0:
i = order[0] # 无条件保留每次迭代中置信度最高的bbox
keep.append(i)
# 计算置信度最高的bbox和其他剩下bbox之间的交叉区域
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
# 计算置信度高的bbox和其他剩下bbox之间交叉区域的面积
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
# 求交叉区域的面积占两者(置信度高的bbox和其他bbox)面积和的必烈
ovr = inter / (areas[i] + areas[order[1:]] - inter)
# 保留ovr小于thresh的bbox,进入下一次迭代。
inds = np.where(ovr <= iou_thresh)[0]
# 因为ovr中的索引不包括order[0]所以要向后移动一位
order = order[inds + 1]
output = []
for i in keep:
output.append(dets[i].tolist())
return np.array(output)
def xyxy2xywh(x):
''' 左上点 右下点 ------>>> 左上点 宽 高 '''
y = np.copy(x)
y[:, 2] = x[:, 2] - x[:, 0] # w
y[:, 3] = x[:, 3] - x[:, 1] # h
return y
def scale_boxes(img1_shape, boxes, img0_shape):
''' 将预测的坐标信息转换回原图尺度
:param img1_shape: 缩放后的图像尺度
:param boxes: 预测的box信息
:param img0_shape: 原始图像尺度
'''
# 将检测框(x y w h)从img1_shape(预测图) 缩放到 img0_shape(原图)
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
boxes[:, 0] -= pad[0]
boxes[:, 1] -= pad[1]
boxes[:, :4] /= gain # 检测框坐标点还原到原图上
num_kpts = boxes.shape[1] // 3 # 56 // 3 = 18
for kid in range(2,num_kpts+1):
boxes[:, kid * 3 - 1] = (boxes[:, kid * 3 - 1] - pad[0]) / gain
boxes[:, kid * 3 ] = (boxes[:, kid * 3 ] - pad[1]) / gain
clip_boxes(boxes, img0_shape)
return boxes
def clip_boxes(boxes, shape):
# 进行一个边界截断,以免溢出 并且将检测框的坐标(左上角x,左上角y,宽度,高度)--->>>(左上角x,左上角y,右下角x,右下角y)
top_left_x = boxes[:, 0].clip(0, shape[1])
top_left_y = boxes[:, 1].clip(0, shape[0])
bottom_right_x = (boxes[:, 0] + boxes[:, 2]).clip(0, shape[1])
bottom_right_y = (boxes[:, 1] + boxes[:, 3]).clip(0, shape[0])
boxes[:, 0] = top_left_x #左上
boxes[:, 1] = top_left_y
boxes[:, 2] = bottom_right_x #右下
boxes[:, 3] = bottom_right_y
def plot_skeleton_kpts(im, kpts, steps=3):
num_kpts = len(kpts) // steps # 51 / 3 =17
# 画点
for kid in range(num_kpts):
r, g, b = pose_kpt_color[kid]
x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1]
conf = kpts[steps * kid + 2]
if conf > score_threshold:
cv2.circle(im, (int(x_coord), int(y_coord)), 5, (int(r), int(g), int(b)), -1)
# 画骨架
for sk_id, sk in enumerate(skeleton):
r, g, b = pose_limb_color[sk_id]
pos1 = (int(kpts[(sk[0]-1)*steps]), int(kpts[(sk[0]-1)*steps+1]))
pos2 = (int(kpts[(sk[1]-1)*steps]), int(kpts[(sk[1]-1)*steps+1]))
conf1 = kpts[(sk[0]-1)*steps+2]
conf2 = kpts[(sk[1]-1)*steps+2]
if conf1 > score_threshold and conf2 > score_threshold: # 对于肢体,相连的两个关键点置信度 必须同时大于 0.5
cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2)
if __name__ == '__main__':
image = cv2.imread('bus.jpg')
img = letterbox(image)
data = pre_process(img)
session = onnxruntime.InferenceSession('yolov8n-pose.onnx', providers=['CPUExecutionProvider'])
input_name = session.get_inputs()[0].name
pred = session.run(None, {input_name: data.astype(np.float32)})[0]
pred = np.transpose(pred[0], (1, 0))
pred = pred[pred[:, 4] > score_threshold] # 置信度阈值过滤
bboxs = xywh2xyxy(pred) # 中心宽高转左上点,右下点
bboxs = nms(bboxs, nms_threshold) # NMS处理
bboxs = xyxy2xywh(np.array(bboxs)) # 坐标从左上点,右下点 到 左上点,宽,高.
bboxs = scale_boxes(img.shape, bboxs, image.shape) # 坐标点还原到原图
for box in bboxs:
det_bbox, det_scores, kpts = box[0:4], box[4], box[5:]
cv2.rectangle(image, (int(det_bbox[0]), int(det_bbox[1])), (int(det_bbox[2]), int(det_bbox[3])), (0, 0, 255), 2)
cv2.putText(image, "conf:{:.2f}".format(det_scores), (int(det_bbox[0]) + 5, int(det_bbox[1]) - 5), cv2.FONT_HERSHEY_DUPLEX, 0.8, (0, 0, 255), 1)
plot_skeleton_kpts(image, kpts)
cv2.imshow("keypoint", image)
cv2.waitKey(0)
cv2.destroyAllWindows()
#include <iostream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
const int input_width = 640;
const int input_height = 640;
const float score_threshold = 0.5;
const float nms_threshold = 0.5;
const int output_numbox = input_width / 8 * input_height / 8 + input_width / 16 * input_height / 16 + input_width / 32 * input_height / 32;
const std::unordered_multimap<int, int> skeletons = { {16, 14}, { 14, 12 }, {17, 15}, {15, 13}, {12, 13}, {6, 12},
{7, 13}, {6, 7}, {6, 8}, {7, 9}, {8, 10}, {9, 11}, {2, 3}, {1, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, {5, 7} };
//LetterBox处理
void LetterBox(const cv::Mat& image, cv::Mat& outImage,
const cv::Size& newShape = cv::Size(640, 640), const cv::Scalar& color = cv::Scalar(114, 114, 114))
{
cv::Size shape = image.size();
float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);
float ratio[2]{ r, r };
int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };
auto dw = (float)(newShape.width - new_un_pad[0]) / 2;
auto dh = (float)(newShape.height - new_un_pad[1]) / 2;
if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])
cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));
else
outImage = image.clone();
int top = int(std::round(dh - 0.1f));
int bottom = int(std::round(dh + 0.1f));
int left = int(std::round(dw - 0.1f));
int right = int(std::round(dw + 0.1f));
cv::Vec4d params;
params[0] = ratio[0];
params[1] = ratio[1];
params[2] = left;
params[3] = top;
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}
//预处理
void pre_process(cv::Mat& image, std::vector<float>& inputs)
{
cv::Vec4d params;
cv::Mat letterbox;
LetterBox(image, letterbox, cv::Size(input_width, input_height));
cv::cvtColor(letterbox, letterbox, cv::COLOR_BGR2RGB);
letterbox.convertTo(letterbox, CV_32FC3, 1.0f / 255.0f);
std::vector<cv::Mat> split_images;
cv::split(letterbox, split_images);
for (size_t i = 0; i < letterbox.channels(); ++i)
{
std::vector<float> split_image_data = split_images[i].reshape(1, 1);
inputs.insert(inputs.end(), split_image_data.begin(), split_image_data.end());
}
}
//网络推理
void process(const wchar_t* model, std::vector<float>& inputs, std::vector<Ort::Value>& outputs)
{
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "yolov5n");
Ort::SessionOptions session_options;
session_options.SetIntraOpNumThreads(12);//设置线程数
session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);//启用模型优化策略
//CUDA option set
//OrtCUDAProviderOptions cuda_option;
//cuda_option.device_id = 0;
//cuda_option.arena_extend_strategy = 0;
//cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
//cuda_option.gpu_mem_limit = SIZE_MAX;
//cuda_option.do_copy_in_default_stream = 1;
//session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
//session_options.AppendExecutionProvider_CUDA(cuda_option);
Ort::Session session(env, model, session_options);
std::vector<const char*> input_node_names;
for (size_t i = 0; i < session.GetInputCount(); i++)
{
input_node_names.push_back("images");
}
std::vector<const char*> output_node_names;
for (size_t i = 0; i < session.GetOutputCount(); i++)
{
output_node_names.push_back("output0");
}
// create input tensor object from data values
std::vector<int64_t> input_node_dims = { 1, 3, input_width, input_height };
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, inputs.data(), inputs.size(), input_node_dims.data(), input_node_dims.size());
std::vector<Ort::Value> ort_inputs;
ort_inputs.push_back(std::move(input_tensor));//右值引用,避免不必要的拷贝和内存分配操作
// score model & input tensor, get back output tensor
outputs = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());
}
//NMS
void nms(std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold, std::vector<int>& indices)
{
struct BoxScore
{
cv::Rect box;
float score;
int id;
};
std::vector<BoxScore> boxes_scores;
for (size_t i = 0; i < boxes.size(); i++)
{
BoxScore box_conf;
box_conf.box = boxes[i];
box_conf.score = scores[i];
box_conf.id = i;
if (scores[i] > score_threshold) boxes_scores.push_back(box_conf);
}
std::sort(boxes_scores.begin(), boxes_scores.end(), [](BoxScore a, BoxScore b) { return a.score > b.score; });
std::vector<float> area(boxes_scores.size());
for (size_t i = 0; i < boxes_scores.size(); ++i)
{
area[i] = boxes_scores[i].box.width * boxes_scores[i].box.height;
}
std::vector<bool> isSuppressed(boxes_scores.size(), false);
for (size_t i = 0; i < boxes_scores.size(); ++i)
{
if (isSuppressed[i]) continue;
for (size_t j = i + 1; j < boxes_scores.size(); ++j)
{
if (isSuppressed[j]) continue;
float x1 = (std::max)(boxes_scores[i].box.x, boxes_scores[j].box.x);
float y1 = (std::max)(boxes_scores[i].box.y, boxes_scores[j].box.y);
float x2 = (std::min)(boxes_scores[i].box.x + boxes_scores[i].box.width, boxes_scores[j].box.x + boxes_scores[j].box.width);
float y2 = (std::min)(boxes_scores[i].box.y + boxes_scores[i].box.height, boxes_scores[j].box.y + boxes_scores[j].box.height);
float w = (std::max)(0.0f, x2 - x1);
float h = (std::max)(0.0f, y2 - y1);
float inter = w * h;
float ovr = inter / (area[i] + area[j] - inter);
if (ovr >= nms_threshold) isSuppressed[j] = true;
}
}
for (int i = 0; i < boxes_scores.size(); ++i)
{
if (!isSuppressed[i]) indices.push_back(boxes_scores[i].id);
}
}
//box缩放到原图尺寸
void scale_box(cv::Rect& box, std::vector<float>& keypoint, cv::Size size)
{
float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
int pad_w = (input_width - size.width * gain) / 2;
int pad_h = (input_height - size.height * gain) / 2;
box.x -= pad_w;
box.y -= pad_h;
box.x /= gain;
box.y /= gain;
box.width /= gain;
box.height /= gain;
for (size_t i = 0; i < keypoint.size() / 3; i++)
{
keypoint[3 * i] = (keypoint[3 * i] - pad_w) / gain;
keypoint[3 * i + 1] = (keypoint[3 * i + 1] - pad_h) / gain;
}
}
//可视化函数
void draw_result(cv::Mat& image, std::string label, cv::Rect box, std::vector<float> keypoint)
{
cv::rectangle(image, box, cv::Scalar(255, 0, 0), 1);
int baseLine;
cv::Size label_size = cv::getTextSize(label, 1, 1, 1, &baseLine);
cv::Point tlc = cv::Point(box.x, box.y);
cv::Point brc = cv::Point(box.x, box.y + label_size.height + baseLine);
cv::putText(image, label, cv::Point(box.x, box.y), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 1);
for (size_t i = 0; i < keypoint.size() / 3; i++)
{
if (keypoint[3 * i + 2] < score_threshold)
continue;
cv::circle(image, cv::Point(keypoint[3 * i], keypoint[3 * i + 1]), 4, cv::Scalar(0, 255, 0), -1);
}
for (auto skeleton : skeletons)
{
cv::Point pos1(keypoint[3 * (skeleton.first - 1)], keypoint[3 * (skeleton.first - 1) + 1]);
cv::Point pos2(keypoint[3 * (skeleton.second - 1)], keypoint[3 * (skeleton.second - 1) + 1]);
float conf1 = keypoint[3 * (skeleton.first - 1) + 2];
float conf2 = keypoint[3 * (skeleton.second - 1) + 2];
if (conf1 > score_threshold && conf2 > score_threshold)
{
cv::line(image, pos1, pos2, cv::Scalar(0, 255, 0), 2);
}
}
}
//后处理
void post_process(cv::Mat& image, cv::Mat& result, std::vector<Ort::Value>& outputs)
{
std::vector<cv::Rect> boxes;
std::vector<float> scores;
std::vector<int> class_ids;
std::vector<std::vector<float>> keypoints;
for (int i = 0; i < output_numbox; i++)
{
float score = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 4 * output_numbox + i)[0];
if (score < score_threshold)
continue;
float x = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 0 * output_numbox + i)[0];
float y = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 1 * output_numbox + i)[0];
float w = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 2 * output_numbox + i)[0];
float h = (const_cast<float*> (outputs[0].GetTensorData<float>()) + 3 * output_numbox + i)[0];
int left = int(x - 0.5 * w);
int top = int(y - 0.5 * h);
int width = int(w);
int height = int(h);
cv::Rect box = cv::Rect(left, top, width, height);
std::vector<float> keypoint(51);
for (int j = 0; j < keypoint.size(); j++)
{
keypoint[j] = (const_cast<float*> (outputs[0].GetTensorData<float>()) + (5 + j) * output_numbox + i)[0];
}
scale_box(box, keypoint, image.size());
boxes.push_back(box);
scores.push_back(score);
keypoints.push_back(keypoint);
}
std::vector<int> indices;
nms(boxes, scores, score_threshold, nms_threshold, indices);
for (int i = 0; i < indices.size(); i++)
{
int idx = indices[i];
cv::Rect box = boxes[idx];
std::string label = cv::format("%.2f", scores[idx]);
std::vector<float> keypoint = keypoints[idx];
draw_result(result, label, box, keypoint);
}
}
int main(int argc, char* argv[])
{
cv::Mat image = cv::imread("bus.jpg");
std::vector<float> inputs;
pre_process(image, inputs);
const wchar_t* model = L"yolov8n-pose.onnx";
std::vector<Ort::Value> outputs;
process(model, inputs, outputs);
cv::Mat result = image.clone();
post_process(image, result, outputs);
cv::imwrite("result.jpg", result);
return 0;
}
tensorrt推理
python推理代码:
import cv2
import numpy as np
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
# 调色板
palette = np.array([[255, 128, 0], [255, 153, 51], [255, 178, 102],
[230, 230, 0], [255, 153, 255], [153, 204, 255],
[255, 102, 255], [255, 51, 255], [102, 178, 255],
[51, 153, 255], [255, 153, 153], [255, 102, 102],
[255, 51, 51], [153, 255, 153], [102, 255, 102],
[51, 255, 51], [0, 255, 0], [0, 0, 255], [255, 0, 0],
[255, 255, 255]])
# 17个关键点连接顺序
skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12],
[7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3],
[1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]]
# 骨架颜色
pose_limb_color = palette[[9, 9, 9, 9, 7, 7, 7, 0, 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16]]
# 关键点颜色
pose_kpt_color = palette[[16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9]]
score_threshold = 0.5
nms_threshold = 0.5
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114)):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# Compute padding
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = (new_shape[1] - new_unpad[0])/2, (new_shape[0] - new_unpad[1])/2 # wh padding
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im
def pre_process(img):
# 归一化 调整通道为(1,3,640,640)
img = img / 255.
img = np.transpose(img, (2, 0, 1))
data = np.expand_dims(img, axis=0)
return data
def xywh2xyxy(x):
''' 中心坐标、w、h ------>>> 左上点,右下点 '''
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
# nms算法
def nms(dets, iou_thresh):
# dets: N * M, N是bbox的个数,M的前4位是对应的 左上点,右下点
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
areas = (x2 - x1 + 1) * (y2 - y1 + 1) # 求每个bbox的面积
order = scores.argsort()[::-1] # 对分数进行倒排序
keep = [] # 用来保存最后留下来的bboxx下标
while order.size > 0:
i = order[0] # 无条件保留每次迭代中置信度最高的bbox
keep.append(i)
# 计算置信度最高的bbox和其他剩下bbox之间的交叉区域
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
# 计算置信度高的bbox和其他剩下bbox之间交叉区域的面积
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
# 求交叉区域的面积占两者(置信度高的bbox和其他bbox)面积和的必烈
ovr = inter / (areas[i] + areas[order[1:]] - inter)
# 保留ovr小于thresh的bbox,进入下一次迭代。
inds = np.where(ovr <= iou_thresh)[0]
# 因为ovr中的索引不包括order[0]所以要向后移动一位
order = order[inds + 1]
output = []
for i in keep:
output.append(dets[i].tolist())
return np.array(output)
def xyxy2xywh(x):
''' 左上点 右下点 ------>>> 左上点 宽 高 '''
y = np.copy(x)
y[:, 2] = x[:, 2] - x[:, 0] # w
y[:, 3] = x[:, 3] - x[:, 1] # h
return y
def scale_boxes(img1_shape, boxes, img0_shape):
''' 将预测的坐标信息转换回原图尺度
:param img1_shape: 缩放后的图像尺度
:param boxes: 预测的box信息
:param img0_shape: 原始图像尺度
'''
# 将检测框(x y w h)从img1_shape(预测图) 缩放到 img0_shape(原图)
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
boxes[:, 0] -= pad[0]
boxes[:, 1] -= pad[1]
boxes[:, :4] /= gain # 检测框坐标点还原到原图上
num_kpts = boxes.shape[1] // 3 # 56 // 3 = 18
for kid in range(2,num_kpts+1):
boxes[:, kid * 3 - 1] = (boxes[:, kid * 3 - 1] - pad[0]) / gain
boxes[:, kid * 3 ] = (boxes[:, kid * 3 ] - pad[1]) / gain
clip_boxes(boxes, img0_shape)
return boxes
def clip_boxes(boxes, shape):
top_left_x = boxes[:, 0].clip(0, shape[1])
top_left_y = boxes[:, 1].clip(0, shape[0])
bottom_right_x = (boxes[:, 0] + boxes[:, 2]).clip(0, shape[1])
bottom_right_y = (boxes[:, 1] + boxes[:, 3]).clip(0, shape[0])
boxes[:, 0] = top_left_x #左上
boxes[:, 1] = top_left_y
boxes[:, 2] = bottom_right_x #右下
boxes[:, 3] = bottom_right_y
def plot_skeleton_kpts(im, kpts, steps=3):
num_kpts = len(kpts) // steps # 51 / 3 =17
for kid in range(num_kpts): # 画点
r, g, b = pose_kpt_color[kid]
x_coord, y_coord = kpts[steps * kid], kpts[steps * kid + 1]
conf = kpts[steps * kid + 2]
if conf > score_threshold:
cv2.circle(im, (int(x_coord), int(y_coord)), 5, (int(r), int(g), int(b)), -1)
for sk_id, sk in enumerate(skeleton): # 画骨架
r, g, b = pose_limb_color[sk_id]
pos1 = (int(kpts[(sk[0]-1)*steps]), int(kpts[(sk[0]-1)*steps+1]))
pos2 = (int(kpts[(sk[1]-1)*steps]), int(kpts[(sk[1]-1)*steps+1]))
conf1 = kpts[(sk[0]-1)*steps+2]
conf2 = kpts[(sk[1]-1)*steps+2]
if conf1 > score_threshold and conf2 > score_threshold: # 对于肢体,相连的两个关键点置信度 必须同时大于 0.5
cv2.line(im, pos1, pos2, (int(r), int(g), int(b)), thickness=2)
if __name__ == '__main__':
logger = trt.Logger(trt.Logger.WARNING)
with open("yolov8n-pose.engine", "rb") as f, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
inputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
outputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
inputs_device = cuda.mem_alloc(inputs_host.nbytes)
outputs_device = cuda.mem_alloc(outputs_host.nbytes)
stream = cuda.Stream()
image = cv2.imread('bus.jpg', -1)
input = letterbox(image)
input = input[:, :, ::-1].transpose(2, 0, 1).astype(dtype=np.float32) #BGR2RGB和HWC2CHW
input = input / 255.0
input = np.expand_dims(input, axis=0)
np.copyto(inputs_host, input.ravel())
with engine.create_execution_context() as context:
cuda.memcpy_htod_async(inputs_device, inputs_host, stream)
context.execute_async_v2(bindings=[int(inputs_device), int(outputs_device)], stream_handle=stream.handle)
cuda.memcpy_dtoh_async(outputs_host, outputs_device, stream)
stream.synchronize()
pred = outputs_host.reshape(context.get_binding_shape(1))
pred = np.transpose(pred[0], (1, 0))
pred = pred[pred[:, 4] > score_threshold] # 置信度阈值过滤
bboxs = xywh2xyxy(pred) # 中心宽高转左上点,右下点
bboxs = nms(bboxs, nms_threshold) # NMS处理
bboxs = xyxy2xywh(np.array(bboxs)) # 坐标从左上点,右下点 到 左上点,宽,高.
bboxs = scale_boxes(img.shape, bboxs, image.shape) # 坐标点还原到原图
for box in bboxs:
det_bbox, det_scores, kpts = box[0:4], box[4], box[5:]
cv2.rectangle(image, (int(det_bbox[0]), int(det_bbox[1])), (int(det_bbox[2]), int(det_bbox[3])), (0, 0, 255), 2)
cv2.putText(image, "conf:{:.2f}".format(det_scores), (int(det_bbox[0]) + 5, int(det_bbox[1]) - 5), cv2.FONT_HERSHEY_DUPLEX, 0.8, (0, 0, 255), 1)
plot_skeleton_kpts(image, kpts)
cv2.imwrite('result.jpg', image)
C++推理代码:
#include <iostream>
#include <fstream>
#include <vector>
#include <opencv2/opencv.hpp>
#include <cuda_runtime.h>
#include <NvInfer.h>
#include <NvInferRuntime.h>
const int input_width = 640;
const int input_height = 640;
const float score_threshold = 0.5;
const float nms_threshold = 0.5;
const int output_numprob = 56;
const int output_numbox = input_width / 8 * input_height / 8 + input_width / 16 * input_height / 16 + input_width / 32 * input_height / 32;
const int input_numel = 1 * 3 * input_width * input_height;
const int output_numel = 1 * output_numprob * output_numbox;
const std::unordered_multimap<int, int> skeletons = { {16, 14}, { 14, 12 }, {17, 15}, {15, 13}, {12, 13}, {6, 12},
{7, 13}, {6, 7}, {6, 8}, {7, 9}, {8, 10}, {9, 11}, {2, 3}, {1, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}, {5, 7} };
inline const char* severity_string(nvinfer1::ILogger::Severity t)
{
switch (t)
{
case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error";
case nvinfer1::ILogger::Severity::kERROR: return "error";
case nvinfer1::ILogger::Severity::kWARNING: return "warning";
case nvinfer1::ILogger::Severity::kINFO: return "info";
case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose";
default: return "unknow";
}
}
class TRTLogger : public nvinfer1::ILogger
{
public:
virtual void log(Severity severity, nvinfer1::AsciiChar const* msg) noexcept override
{
if (severity <= Severity::kINFO)
{
if (severity == Severity::kWARNING)
printf("\033[33m%s: %s\033[0m\n", severity_string(severity), msg);
else if (severity <= Severity::kERROR)
printf("\033[31m%s: %s\033[0m\n", severity_string(severity), msg);
else
printf("%s: %s\n", severity_string(severity), msg);
}
}
} logger;
std::vector<unsigned char> load_file(const std::string& file)
{
std::ifstream in(file, std::ios::in | std::ios::binary);
if (!in.is_open())
return {};
in.seekg(0, std::ios::end);
size_t length = in.tellg();
std::vector<uint8_t> data;
if (length > 0)
{
in.seekg(0, std::ios::beg);
data.resize(length);
in.read((char*)&data[0], length);
}
in.close();
return data;
}
//LetterBox处理
void LetterBox(const cv::Mat& image, cv::Mat& outImage,
const cv::Size& newShape = cv::Size(640, 640), const cv::Scalar& color = cv::Scalar(114, 114, 114))
{
cv::Size shape = image.size();
float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);
float ratio[2]{ r, r };
int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };
auto dw = (float)(newShape.width - new_un_pad[0]) / 2;
auto dh = (float)(newShape.height - new_un_pad[1]) / 2;
if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])
cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));
else
outImage = image.clone();
int top = int(std::round(dh - 0.1f));
int bottom = int(std::round(dh + 0.1f));
int left = int(std::round(dw - 0.1f));
int right = int(std::round(dw + 0.1f));
cv::Vec4d params;
params[0] = ratio[0];
params[1] = ratio[1];
params[2] = left;
params[3] = top;
cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}
//预处理
void pre_process(cv::Mat& image, float* input_data_host)
{
cv::Mat letterbox;
LetterBox(image, letterbox, cv::Size(input_width, input_height));
letterbox.convertTo(letterbox, CV_32FC3, 1.0f / 255.0f);
int image_area = letterbox.cols * letterbox.rows;
float* pimage = (float*)letterbox.data;
float* phost_b = input_data_host + image_area * 0;
float* phost_g = input_data_host + image_area * 1;
float* phost_r = input_data_host + image_area * 2;
for (int i = 0; i < image_area; ++i, pimage += 3)
{
*phost_r++ = pimage[0];
*phost_g++ = pimage[1];
*phost_b++ = pimage[2];
}
}
//网络推理
void process(std::string model, float* input_data_host, float* output_data_host)
{
TRTLogger logger;
auto engine_data = load_file(model);
auto runtime = nvinfer1::createInferRuntime(logger);
auto engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size());
cudaStream_t stream = nullptr;
cudaStreamCreate(&stream);
auto execution_context = engine->createExecutionContext();
float* input_data_device = nullptr;
cudaMalloc(&input_data_device, sizeof(float) * input_numel);
cudaMemcpyAsync(input_data_device, input_data_host, sizeof(float) * input_numel, cudaMemcpyHostToDevice, stream);
float* output_data_device = nullptr;
cudaMalloc(&output_data_device, sizeof(float) * output_numel);
float* bindings[] = { input_data_device, output_data_device };
execution_context->enqueueV2((void**)bindings, stream, nullptr);
cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
cudaStreamDestroy(stream);
cudaFree(input_data_device);
cudaFree(output_data_device);
}
//NMS
void nms(std::vector<cv::Rect>& boxes, std::vector<float>& scores, float score_threshold, float nms_threshold, std::vector<int>& indices)
{
assert(boxes.size() == scores.size());
struct BoxScore
{
cv::Rect box;
float score;
int id;
};
std::vector<BoxScore> boxes_scores;
for (size_t i = 0; i < boxes.size(); i++)
{
BoxScore box_conf;
box_conf.box = boxes[i];
box_conf.score = scores[i];
box_conf.id = i;
if (scores[i] > score_threshold) boxes_scores.push_back(box_conf);
}
std::sort(boxes_scores.begin(), boxes_scores.end(), [](BoxScore a, BoxScore b) { return a.score > b.score; });
std::vector<float> area(boxes_scores.size());
for (size_t i = 0; i < boxes_scores.size(); ++i)
{
area[i] = boxes_scores[i].box.width * boxes_scores[i].box.height;
}
std::vector<bool> isSuppressed(boxes_scores.size(), false);
for (size_t i = 0; i < boxes_scores.size(); ++i)
{
if (isSuppressed[i]) continue;
for (size_t j = i + 1; j < boxes_scores.size(); ++j)
{
if (isSuppressed[j]) continue;
float x1 = (std::max)(boxes_scores[i].box.x, boxes_scores[j].box.x);
float y1 = (std::max)(boxes_scores[i].box.y, boxes_scores[j].box.y);
float x2 = (std::min)(boxes_scores[i].box.x + boxes_scores[i].box.width, boxes_scores[j].box.x + boxes_scores[j].box.width);
float y2 = (std::min)(boxes_scores[i].box.y + boxes_scores[i].box.height, boxes_scores[j].box.y + boxes_scores[j].box.height);
float w = (std::max)(0.0f, x2 - x1);
float h = (std::max)(0.0f, y2 - y1);
float inter = w * h;
float ovr = inter / (area[i] + area[j] - inter);
if (ovr >= nms_threshold) isSuppressed[j] = true;
}
}
for (int i = 0; i < boxes_scores.size(); ++i)
{
if (!isSuppressed[i]) indices.push_back(boxes_scores[i].id);
}
}
//box缩放到原图尺寸
void scale_box(cv::Rect& box, std::vector<float>& keypoint, cv::Size size)
{
float gain = std::min(input_width * 1.0 / size.width, input_height * 1.0 / size.height);
int pad_w = (input_width - size.width * gain) / 2;
int pad_h = (input_height - size.height * gain) / 2;
box.x -= pad_w;
box.y -= pad_h;
box.x /= gain;
box.y /= gain;
box.width /= gain;
box.height /= gain;
for (size_t i = 0; i < keypoint.size() / 3; i++)
{
keypoint[3 * i] = (keypoint[3 * i] - pad_w) / gain;
keypoint[3 * i + 1] = (keypoint[3 * i + 1] - pad_h) / gain;
}
}
//可视化函数
void draw_result(cv::Mat& image, std::string label, cv::Rect box, std::vector<float> keypoint)
{
cv::rectangle(image, box, cv::Scalar(255, 0, 0), 1);
int baseLine;
cv::Size label_size = cv::getTextSize(label, 1, 1, 1, &baseLine);
cv::Point tlc = cv::Point(box.x, box.y);
cv::Point brc = cv::Point(box.x, box.y + label_size.height + baseLine);
cv::putText(image, label, cv::Point(box.x, box.y), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 1);
for (size_t i = 0; i < keypoint.size() / 3; i++)
{
if (keypoint[3 * i + 2] < score_threshold)
continue;
cv::circle(image, cv::Point(keypoint[3 * i], keypoint[3 * i + 1]), 4, cv::Scalar(0, 255, 0), -1);
}
for (auto skeleton : skeletons)
{
cv::Point pos1(keypoint[3 * (skeleton.first - 1)], keypoint[3 * (skeleton.first - 1) + 1]);
cv::Point pos2(keypoint[3 * (skeleton.second - 1)], keypoint[3 * (skeleton.second - 1) + 1]);
float conf1 = keypoint[3 * (skeleton.first - 1) + 2];
float conf2 = keypoint[3 * (skeleton.second - 1) + 2];
if (conf1 > score_threshold && conf2 > score_threshold)
{
cv::line(image, pos1, pos2, cv::Scalar(0, 255, 0), 2);
}
}
}
//后处理
void post_process(const cv::Mat image, cv::Mat& result, float* output_data_host)
{
std::vector<cv::Rect> boxes;
std::vector<float> scores;
std::vector<int> class_ids;
std::vector<std::vector<float>> keypoints;
for (int i = 0; i < output_numbox; i++)
{
float score = (output_data_host + 4 * output_numbox + i)[0];
if (score < score_threshold)
continue;
float x = (output_data_host + 0 * output_numbox + i)[0];
float y = (output_data_host + 1 * output_numbox + i)[0];
float w = (output_data_host + 2 * output_numbox + i)[0];
float h = (output_data_host + 3 * output_numbox + i)[0];
int left = int(x - 0.5 * w);
int top = int(y - 0.5 * h);
int width = int(w);
int height = int(h);
cv::Rect box = cv::Rect(left, top, width, height);
std::vector<float> keypoint(51);
for (int j = 0; j < keypoint.size(); j++)
{
keypoint[j] = (output_data_host + (5 + j) * output_numbox + i)[0];
}
scale_box(box, keypoint, image.size());
boxes.push_back(box);
scores.push_back(score);
keypoints.push_back(keypoint);
}
std::vector<int> indices;
nms(boxes, scores, score_threshold, nms_threshold, indices);
for (int i = 0; i < indices.size(); i++)
{
int idx = indices[i];
cv::Rect box = boxes[idx];
std::string label = cv::format("%.2f", scores[idx]);
std::vector<float> keypoint = keypoints[idx];
draw_result(result, label, box, keypoint);
}
}
int main(int argc, char* argv[])
{
float* inputs = nullptr;
float* outputs = nullptr;
cudaMallocHost(&inputs, sizeof(float) * input_numel);
cudaMallocHost(&outputs, sizeof(float) * output_numel);
cv::Mat image = cv::imread("bus.jpg");
pre_process(image, inputs);
std::string model = "yolov8n-pose.engine";
process(model, inputs, outputs);
cv::Mat result = image.clone();
post_process(image, result, outputs);
cv::imwrite("result.jpg", result);
cudaFreeHost(outputs);
cudaFreeHost(inputs);
return 0;
}
结果可视化如下:


1784

被折叠的 条评论
为什么被折叠?



