aufgeben, angeben, auftreten
==================================================
等比压缩部署ONNX
import cv2
import onnxruntime as ort
import numpy as np
# 加载 ONNX 模型
onnx_model_path = r"yolo.onnx"
session = ort.InferenceSession(onnx_model_path)
# 获取模型的输入和输出名称
input_name = session.get_inputs()[0].name
input_shape = session.get_inputs()[0].shape
output_name = session.get_outputs()[0].name
output_shape = session.get_outputs()[0].shape
print(f"input name {input_name}, input shape {input_shape}")
print(f"output name {output_name}, output shape {output_shape}")
detect_h = 640
detect_w = 640
max_wh = max(detect_h, detect_w)
iou_threshold = 0.45
conf_threshold = 0.5
namelist = ["red", "green", "yellow", "off"] # change
def img2input(img):
img = np.transpose(img, (2, 0, 1))
img = img / 255
return np.expand_dims(img, axis=0).astype(np.float32)
def letterbox(img, img_h, detect_h, img_w, detect_w):
if (img_h == detect_h and img_w == detect_w):
return img
scale = min(detect_w / img_w, detect_h / img_h) # 缩放比例
h_t, w_t = abs(detect_h - scale * img_h) / 2, abs(detect_w - scale * img_w) / 2
A = np.array([[scale, 0, w_t], [0, scale, h_t]], dtype=np.float32)
img_back = cv2.warpAffine(img, A, (detect_w, detect_h), borderValue=(128, 128, 128))
return img_back, A
def infer(onnx_session, img):
input_name = onnx_session.get_inputs()[0].name
label_name = onnx_session.get_outputs()[0].name
pred = onnx_session.run([label_name], {input_name: img})[0]
return pred
# apply nms
def xywh_to_x1y1x2y2(boxes):
"""
this function convert xywh to x1y1x2y2
"""
# 提取中心点坐标和宽高
x_center, y_center, width, height = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
# 计算左上角和右下角坐标
x1 = x_center - width / 2
y1 = y_center - height / 2
x2 = x_center + width / 2
y2 = y_center + height / 2
# 将计算结果组合成新的数组
xyxy_boxes = np.stack((x1, y1, x2, y2), axis=1)
return xyxy_boxes
def normalize_pred(pred, conf_threshold): # the style of v8 to v5
if pred.shape[1] < pred.shape[2]: # v8
pred = np.squeeze(pred).T # 1 * 84 * 8400 -> 8400 * 84
scores = np.max(pred[:, 4:], axis=1)
classes = np.argmax(pred[:, 4:], axis=1)
mask = scores > conf_threshold # 置信度过滤
boxes = xywh_to_x1y1x2y2(pred[mask])
scores = scores[mask]
classes = classes[mask]
return boxes, scores, classes
pred = np.squeeze(pred)
scores = pred[:, 4]
classes = np.argmax(pred[:, 5:], axis=1)
mask = scores > conf_threshold # 置信度过滤
boxes = xywh_to_x1y1x2y2(pred[mask])
scores = scores[mask]
classes = classes[mask]
return boxes, scores, classes
def box_area(boxes):
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
def box_iou(box1, box2):
area1 = box_area(box1) # N
area2 = box_area(box2) # M
# broadcasting, 两个数组各维度大小 从后往前对比一致, 或者 有一维度值为1;
lt = np.maximum(box1[:, np.newaxis, :2], box2[:, :2])
rb = np.minimum(box1[:, np.newaxis, 2:], box2[:, 2:])
wh = rb - lt
wh = np.maximum(0, wh) # [N, M, 2]
inter = wh[:, :, 0] * wh[:, :, 1]
iou = inter / (area1[:, np.newaxis] + area2 - inter)
return iou
def numpy_nms(boxes, scores, iou_threshold):
idxs = scores.argsort() # 按分数 降序排列的索引 [N]
keep = []
while idxs.size > 0: # 统计数组中元素的个数
max_score_index = idxs[-1]
max_score_box = boxes[max_score_index][None, :]
keep.append(max_score_index)
if idxs.size == 1:
break
idxs = idxs[:-1] # 将得分最大框 从索引中删除; 剩余索引对应的框 和 得分最大框 计算IoU;
other_boxes = boxes[idxs] # [?, 4]
ious = box_iou(max_score_box, other_boxes) # 一个框和其余框比较 1XM
idxs = idxs[ious[0] <= iou_threshold]
return keep
def draw_res(img_h, img_w, scores, boxes, classes, img, A, namelist, color=(255, 255, 0), thickness=2):
for _class, box, score in zip(classes, boxes, scores):
x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])
cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)
# 在矩形内部添加文字
# 设置字体和字体大小
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
thickness_text = 2
# 设置文字内容和颜色
text = '{0} {1:.2f}'.format(namelist[_class], score)
print("===>",text)
text_color = (255, 255, 255) # 白色
# 获取文字的宽度和高度
(text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness_text)
# 计算文字的位置(矩形中心)
text_x = x1
text_y = y1
# 在图片上添加文字
cv2.putText(img, text, (text_x, text_y), font, font_scale, text_color, thickness_text)
Q = cv2.invertAffineTransform(A)
img = cv2.warpAffine(img, Q, (img_w, img_h), borderValue=(128, 128, 128))
return img
cap = cv2.VideoCapture(r"F:\training\cv\test.mp4")
while cap.isOpened():
# retval 为一个检测是否读取到图片帧的参数
retval, img = cap.read()
if not retval:
print('读取失败')
break
# get img w
img_h = img.shape[0]
img_w = img.shape[1]
img, A = letterbox(img, img_h, detect_h, img_w, detect_w)
preprocessed_img = img2input(img)
pred = infer(session, preprocessed_img)
boxes, scores, classes = normalize_pred(pred, conf_threshold)
c = classes * max_wh
nb = boxes + c[:, np.newaxis]
_id = numpy_nms(nb, scores, iou_threshold)
img = draw_res(img_h, img_w, scores[_id], boxes[_id], classes[_id], img, A, namelist)
cv2.imshow('detect', img)
# 视频中每一帧数据显示的时间(毫秒),刷新率:90Hz(即90帧/秒)。
key = cv2.waitKey(10) # FPS:画面每秒传输帧数
if key == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
288

被折叠的 条评论
为什么被折叠?



