环境搭建或者模型转换之类的可以参考前面的文章,这里直接放代码。
首先是hrnet的推理检测函数hrnet_inference.py
:
import os
import urllib
import traceback
import time
import sys
import warnings
import numpy as np
import cv2
# RKNN_MODEL = "hrnet_w32_macaque_256x192-f7e9e04f_20230208.rknn"
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
QUANTIZE_ON = True
def bbox_xywh2cs(bbox, aspect_ratio, padding=1., pixel_std=200.):
"""Transform the bbox format from (x,y,w,h) into (center, scale)
Args:
bbox (ndarray): Single bbox in (x, y, w, h)
aspect_ratio (float): The expected bbox aspect ratio (w over h)
padding (float): Bbox padding factor that will be multilied to scale.
Default: 1.0
pixel_std (float): The scale normalization factor. Default: 200.0
Returns:
tuple: A tuple containing center and scale.
- np.ndarray[float32](2,): Center of the bbox (x, y).
- np.ndarray[float32](2,): Scale of the bbox w & h.
"""
x, y, w, h = bbox[:4]
center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)
if w > aspect_ratio * h:
h = w * 1.0 / aspect_ratio
elif w < aspect_ratio * h:
w = h * aspect_ratio
scale = np.array([w, h], dtype=np.float32) / pixel_std
scale = scale * padding
return center, scale
def rotate_point(pt, angle_rad):
"""Rotate a point by an angle.
Args:
pt (list[float]): 2 dimensional point to be rotated
angle_rad (float): rotation angle by radian
Returns:
list[float]: Rotated point.
"""
assert len(pt) == 2
sn, cs = np.sin(angle_rad), np.cos(angle_rad)
new_x = pt[0] * cs - pt[1] * sn
new_y = pt[0] * sn + pt[1] * cs
rotated_pt = [new_x, new_y]
return rotated_pt
def _get_3rd_point(a, b):
"""To calculate the affine matrix, three pairs of points are required. This
function is used to get the 3rd point, given 2D points a & b.
The 3rd point is defined by rotating vector `a - b` by 90 degrees
anticlockwise, using b as the rotation center.
Args:
a (np.ndarray): point(x,y)
b (np.ndarray): point(x,y)
Returns:
np.ndarray: The 3rd point.
"""
assert len(a) == 2
assert len(b) == 2
direction = a - b
third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
return third_pt
def get_affine_transform(center,
scale,
rot,
output_size,
shift=(0., 0.),
inv=False):
"""Get the affine transform matrix, given the center/scale/rot/output_size.
Args:
center (np.ndarray[2, ]): Center of the bounding box (x, y).
scale (np.ndarray[2, ]): Scale of the bounding box
wrt [width, height].
rot (float): Rotation angle (degree).
output_size (np.ndarray[2, ] | list(2,)): Size of the
destination heatmaps.
shift (0-100%): Shift translation ratio wrt the width/height.
Default (0., 0.).
inv (bool): Option to inverse the affine transform direction.
(inv=False: src->dst or inv=True: dst->src)
Returns:
np.ndarray: The transform matrix.
"""
assert len(center) == 2
assert len(scale) == 2
assert len(output_size) == 2
assert len(shift) == 2
# pixel_std is 200.
scale_tmp = scale * 200.0
shift = np.array(shift)
src_w = scale_tmp[0]
dst_w = output_size[0]
dst_h = output_size[1]
rot_rad = np.pi * rot / 180
src_dir = rotate_point([0., src_w * -0.5], rot_rad)
dst_dir = np.array([0., dst_w * -0.5])
src = np.zeros((3, 2), dtype=np.float32)
src[0, :] = center + scale_tmp * shift
src[1, :] = center + src_dir + scale_tmp * shift
src[2, :] = _get_3rd_point(src[0, :], src[1, :])
dst = np.zeros((3, 2), dtype=np.float32)
dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
if inv:
trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
else:
trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
return trans
def bbox_xyxy2xywh(bbox_xyxy):
"""Transform the bbox format from x1y1x2y2 to xywh.
Args:
bbox_xyxy (np.ndarray): Bounding boxes (with scores), shaped (n, 4) or
(n, 5). (left, top, right, bottom, [score])
Returns:
np.ndarray: Bounding boxes (with scores),
shaped (n, 4) or (n, 5). (left, top, width, height, [score])
"""
bbox_xywh = bbox_xyxy.copy()
bbox_xywh[:, 2] = bbox_xywh[:, 2] - bbox_xywh[:, 0]
bbox_xywh[:, 3] = bbox_xywh[:, 3] - bbox_xywh[:, 1]
return bbox_xywh
def _get_max_preds(heatmaps):
"""Get keypoint predictions from score maps.
Note:
batch_size: N
num_keypoints: K
heatmap height: H
heatmap width: W
Args:
heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.
Returns:
tuple: A tuple containing aggregated results.
- preds (np.ndarray[N, K, 2]): Predicted keypoint location.
- maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.
"""
assert isinstance(heatmaps,
np.ndarray), ('heatmaps should be numpy.ndarray')
assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
N, K, _, W = heatmaps.shape
heatmaps_reshaped = heatmaps.reshape((N, K, -1))
idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))
maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))
preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
preds[:, :, 0] = preds[:, :, 0] % W
preds[:, :, 1] = preds[:, :, 1] // W
preds = np.where(np.tile(maxvals, (1, 1, 2)) > 0.0, preds, -1)
return preds, maxvals
def transform_preds(coords, center, scale, output_size, use_udp=False):
"""Get final keypoint predictions from heatmaps and apply scaling and
translation to map them back to the image.
Note:
num_keypoints: K
Args:
coords (np.ndarray[K, ndims]):
* If ndims=2, corrds are predicted keypoint location.
* If ndims=4, corrds are composed of (x, y, scores, tags)
* If ndims=5, corrds are composed of (x, y, scores, tags,
flipped_tags)
center (np.ndarray[2, ]): Center of the bounding box (x, y).
scale (np.ndarray[2, ]): Scale of the bounding box
wrt [width, height].
output_size (np.ndarray[2, ] | list(2,)): Size of the
destination heatmaps.
use_udp (bool): Use unbiased data processing
Returns:
np.ndarray: Predicted coordinates in the images.
"""
assert coords.shape[1] in (2, 4, 5)
assert len(center) == 2
assert len(scale) == 2
assert len(output_size) == 2
# Recover the scale which is normalized by a factor of 200.
scale = scale * 200.0
if use_udp:
scale_x = scale[0] / (output_size[0] - 1.0)
scale_y = scale[1] / (output_size[1] - 1.0)
else:
scale_x = scale[0] / output_size[0]
scale_y = scale[1] / output_size[1]
target_coords = np.ones_like(coords)
target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5
return target_coords
def keypoints_from_heatmaps(heatmaps,
center,
scale,
unbiased=False,
post_process='default',
kernel=11,
valid_radius_factor=0.0546875,
use_udp=False,
target_type='GaussianHeatmap'):
# Avoid being affected
heatmaps = heatmaps.copy()
N, K, H, W = heatmaps.shape
preds, maxvals = _get_max_preds(heatmaps)
# add +/-0.25 shift to the predicted locations for higher acc.
for n in range(N):
for k in range(K):
heatmap = heatmaps[n][k]
px = int(preds[n][k][0])
py = int(preds[n][k][1])
if 1 < px < W - 1 and 1 < py < H - 1:
diff = np.array([
heatmap[py][px + 1] - heatmap[py][px - 1],
heatmap[py + 1][px] - heatmap[py - 1][px]
])
preds[n][k] += np.sign(diff) * .25
if post_process == 'megvii':
preds[n][k] += 0.5
# Transform back to the image
for i in range(N):
preds[i] = transform_preds(
preds[i], center[i], scale[i], [W, H], use_udp=use_udp)
if post_process == 'megvii':
maxvals = maxvals / 255.0 + 0.5
return preds, maxvals
def decode(output, center, scale, score_, batch_size=1):
c = np.zeros((batch_size, 2), dtype=np.float32)
s = np.zeros((batch_size, 2), dtype=np.float32)
score = np.ones(batch_size)
for i in range(batch_size):
c[i, :] = center
s[i, :] = scale
#score[i] = np.array(score_).reshape(-1)
score[i] = score_
preds, maxvals = keypoints_from_heatmaps(
output,
c,
s,
False,
'default',
11,
0.0546875,
False,
'GaussianHeatmap'
)
all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)
all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
all_preds[:, :, 0:2] = preds[:, :, 0:2]
all_preds[:, :, 2:3] = maxvals
all_boxes[:, 0:2] = c[:, 0:2]
all_boxes[:, 2:4] = s[:, 0:2]
all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
all_boxes[:, 5] = score
result = {}
result['preds'] = all_preds
result['boxes'] = all_boxes
return result
def draw(bgr, predict_dict, skeleton):
bboxes = predict_dict["boxes"]
for box in bboxes:
cv2.rectangle(bgr, (int(box[0]), int(box[1])), (int(box[0]) + int(box[2]), int(box[1]) + int(box[3])),
(255, 0, 0))
all_preds = predict_dict["preds"]
for all_pred in all_preds:
for x, y, s in all_pred:
cv2.circle(bgr, (int(x), int(y)), 3, (0, 255, 120), -1)
for sk in skeleton:
x0 = int(all_pred[sk[0]][0])
y0 = int(all_pred[sk[0]][1])
x1 = int(all_pred[sk[1]][0])
y1 = int(all_pred[sk[1]][1])
cv2.line(bgr, (x0, y0), (x1, y1), (0, 255, 0), 1)
cv2.imwrite("result.jpg", bgr)
def myFunc00(rknn_lite, IMG, yolo_box):
if yolo_box is None:
return IMG
# bbox = [450, 150, 1100, 550, 0.99]
bbox = [int(yolo_box[0]), int(yolo_box[1]), int(yolo_box[2]), int(yolo_box[3]), 0.99]
# bbox = [1428, 723, 1421, 847, 0.99]
image_size = [384, 288]
# img = src_img
img = cv2.cvtColor(IMG, cv2.COLOR_BGR2RGB) # hwc rgb
aspect_ratio = image_size[0] / image_size[1]
img_height = img.shape[0]
img_width = img.shape[1]
padding = 1.25
pixel_std = 200
center, scale = bbox_xywh2cs(
bbox,
aspect_ratio,
padding,
pixel_std)
trans = get_affine_transform(center, scale, 0, image_size)
img = cv2.warpAffine( # 旋转后加入了黑边 最后生成的点的坐标也要对齐
img,
trans, (int(image_size[0]), int(image_size[1])),
flags=cv2.INTER_LINEAR)
img = np.transpose(img, (2, 0, 1)).astype(np.float32) # chw rgb
# outputs = rknn.inference(inputs=[img], data_type=None, data_format="nchw")[0]
# img[0, ...] = ((img[0, ...] / 255.0) - 0.485) / 0.229
# img[1, ...] = ((img[1, ...] / 255.0) - 0.456) / 0.224
# img[2, ...] = ((img[2, ...] / 255.0) - 0.406) / 0.225
img = np.transpose(img, (1, 2, 0)).astype(np.float32) # chw rgb
# img = img.reshape(1,256,192,3)
# Inference
print("--> Running model")
start = time.time()
img = np.expand_dims(img, axis=0)
outputs = rknn_lite.inference(inputs=[img])[0]
end = time.time()
# 计算运行时间
runTime = end - start
runTime_ms = runTime * 1000
# 输出运行时间
print("运行时间:", runTime_ms, "毫秒")
predict_dict = decode(outputs, center, scale, bbox[-1])
skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
[8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4], [3, 5], [4, 6]]
draw(IMG, predict_dict, skeleton)
return IMG
然后是yolo推理检测函数yolo_inference.py
:
from copy import copy
import time
import numpy as np
import cv2
from rknnlite.api import RKNNLite
# BOX = (450, 150, 1100, 550)
BOX = (170, 80, 740, 1360)
x, y, w, h = BOX
OBJ_THRESH = 0.25
NMS_THRESH = 0.45
IMG_SIZE = (320, 320)
CLASSES = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush']
anchors = [[[10, 13], [16, 30], [33, 23]],
[[30, 61], [62, 45], [59, 119]],
[[116, 90], [156, 198], [373, 326]]]
class Letter_Box_Info():
def __init__(self, shape, new_shape, w_ratio, h_ratio, dw, dh, pad_color) -> None:
self.origin_shape = shape
self.new_shape = new_shape
self.w_ratio = w_ratio
self.h_ratio = h_ratio
self.dw = dw
self.dh = dh
self.pad_color = pad_color
def box_process(position, anchors):
grid_h, grid_w = position.shape[2:4]
col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h)) # (80, 80) (80, 80)
col = col.reshape(1, 1, grid_h, grid_w) # (1, 1, 80, 80)
row = row.reshape(1, 1, grid_h, grid_w)
grid = np.concatenate((col, row), axis=1) # (1, 2, 80, 80)
stride = np.array([IMG_SIZE[1]//grid_h, IMG_SIZE[0]//grid_w]).reshape(1,2,1,1) # 8 8
col = col.repeat(len(anchors), axis=0)
row = row.repeat(len(anchors), axis=0)
anchors = np.array(anchors)
anchors = anchors.reshape(*anchors.shape, 1, 1) # (3, 2, 1, 1)
box_xy = position[:,:2,:,:]*2 - 0.5
box_wh = pow(position[:,2:4,:,:]*2, 2) * anchors
box_xy += grid
box_xy *= stride
box = np.concatenate((box_xy, box_wh), axis=1) # (3, 4, 80, 80)
# Convert [c_x, c_y, w, h] to [x1, y1, x2, y2]
xyxy = np.copy(box)
xyxy[:, 0, :, :] = box[:, 0, :, :] - box[:, 2, :, :]/ 2 # top left x
xyxy[:, 1, :, :] = box[:, 1, :, :] - box[:, 3, :, :]/ 2 # top left y
xyxy[:, 2, :, :] = box[:, 0, :, :] + box[:, 2, :, :]/ 2 # bottom right x
xyxy[:, 3, :, :] = box[:, 1, :, :] + box[:, 3, :, :]/ 2 # bottom right y
return xyxy
#
def filter_boxes(boxes, box_confidences, box_class_probs):
"""Filter boxes with object threshold.
"""
print(f'filter_boxes:boxes:{boxes}')
print(f'filter_boxes:box_confidences:{box_confidences}')
print(f'filter_boxes:box_class_probs:{box_class_probs}')
"""Filter boxes with object threshold."""
box_confidences = box_confidences.reshape(-1)
class_max_score = np.max(box_class_probs, axis=-1)
classes = np.argmax(box_class_probs, axis=-1)
print(f'filter_boxes:box_confidences:{box_confidences}')
print(f'filter_boxes:class_max_score:{class_max_score}')
print(f'filter_boxes:classes:{classes}')
_class_pos = np.where(class_max_score * box_confidences >= OBJ_THRESH)
print(f'_class_pos:{_class_pos}')
scores = (class_max_score * box_confidences)[_class_pos]
boxes = boxes[_class_pos]
classes = classes[_class_pos]
print(f'boxes:{boxes}')
print(f'classes:{classes}')
print(f'scores:{scores}')
return boxes, classes, scores
def nms_boxes(boxes, scores):
"""Suppress non-maximal boxes.
# Returns
keep: ndarray, index of effective boxes.
"""
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= NMS_THRESH)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def post_process(input_data, anchors):
boxes, scores, classes_conf = [], [], []
# 1*255*h*w -> 3*85*h*w
input_data = [_in.reshape([len(anchors[0]),-1]+list(_in.shape[-2:])) for _in in input_data]
for i in range(len(input_data)): # (3, 85, 80, 80)
boxes.append(box_process(input_data[i][:,:4,:,:], anchors[i])) # (3, 4, 80, 80)
scores.append(input_data[i][:,4:5,:,:]) # (3, 1, 80, 80)
classes_conf.append(input_data[i][:,5:,:,:]) # (3, 80, 80, 80)
def sp_flatten(_in):
ch = _in.shape[1]
_in = _in.transpose(0,2,3,1)
return _in.reshape(-1, ch)
boxes = [sp_flatten(_v) for _v in boxes] # (3, 19200, 4)
classes_conf = [sp_flatten(_v) for _v in classes_conf] # (3, 19200, 80)
scores = [sp_flatten(_v) for _v in scores] # (3, 19200, 1)
boxes = np.concatenate(boxes) # (25200, 4)
classes_conf = np.concatenate(classes_conf) # (25200, 80)
scores = np.concatenate(scores) # (25200, 1)
# filter according to threshold
boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)
# (12, 4) 12 12
# nms
nboxes, nclasses, nscores = [], [], []
for c in set(classes):
inds = np.where(classes == c)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = nms_boxes(b, s)
if len(keep) != 0:
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
boxes = np.concatenate(nboxes)
classes = np.concatenate(nclasses)
scores = np.concatenate(nscores)
return boxes, classes, scores
def draw(image, boxes, scores, classes):
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = [int(_b) for _b in box]
# top += x
# left += y
# right += x
# bottom += y
print("%s @ (%d %d %d %d) %.3f" % (CLASSES[cl], top, left, right, bottom, score))
arealeft,areatop,areawidth,areaheight = BOX
cv2.rectangle(image, (arealeft, areatop), (arealeft+areawidth,areatop+areaheight), (0, 0, 255), 2)
cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
def letterbox(im, new_shape=(640, 640), color=(0, 0, 0), letter_box_info_list=[]):
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio = r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
# dw, dh = np.mod(dw, 32), np.mod(dh, 32)
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
letter_box_info_list.append(Letter_Box_Info(shape, new_shape, ratio, ratio, dw, dh, color))
return im, letter_box_info_list
def get_real_box(box, in_format='xyxy', letter_box_info_list=[]):
bbox = copy(box)
# unletter_box result
if in_format=='xyxy':
bbox[:,0] -= letter_box_info_list[-1].dw
bbox[:,0] /= letter_box_info_list[-1].w_ratio
bbox[:,0] = np.clip(bbox[:,0], 0, letter_box_info_list[-1].origin_shape[1])
bbox[:,1] -= letter_box_info_list[-1].dh
bbox[:,1] /= letter_box_info_list[-1].h_ratio
bbox[:,1] = np.clip(bbox[:,1], 0, letter_box_info_list[-1].origin_shape[0])
bbox[:,2] -= letter_box_info_list[-1].dw
bbox[:,2] /= letter_box_info_list[-1].w_ratio
bbox[:,2] = np.clip(bbox[:,2], 0, letter_box_info_list[-1].origin_shape[1])
bbox[:,3] -= letter_box_info_list[-1].dh
bbox[:,3] /= letter_box_info_list[-1].h_ratio
bbox[:,3] = np.clip(bbox[:,3], 0, letter_box_info_list[-1].origin_shape[0])
return bbox
def yolo_run(yolo_rknn, frame):
img0 = frame[y:y + h, x:x + w, :]
img, letter_box_info_list = letterbox(im= img0.copy(), new_shape=(IMG_SIZE[1], IMG_SIZE[0])) # padded resize
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # HWC to CHW, BGR to RGB
if len(img.shape) == 3:
img = img[None] # expand for batch dim
outputs = yolo_rknn.inference(inputs=[img]) # Inference
boxes, classes, scores = post_process(outputs, anchors)
boxes_filter, scores_filter, classes_filter = [0, 0, 0, 0], [], []
max_box = [0, 0, 0, 0]
for box, score, cl in zip(boxes, scores, classes):
if cl == 0:
if (box[2]-box[0])*(box[3]-box[1]) > (max_box[2]-max_box[0])*(max_box[3]-max_box[1]):
max_box = box
boxes_filter = np.expand_dims(max_box, axis=0)
scores_filter = np.expand_dims(score, axis=0)
classes_filter = np.expand_dims(cl, axis=0)
# img_p = img0.copy()
yolo_box = get_real_box(boxes_filter, 'xyxy', letter_box_info_list)
yolo_box[0][0] += x
yolo_box[0][1] += y
yolo_box[0][2] += x
yolo_box[0][3] += y
draw(frame, yolo_box, scores_filter, classes_filter)
# cv2.imwrite("11.jpg", frame)
return yolo_box[0]
最后是主函数inference.py
:
import cv2
import time
from hrnet_inference import myFunc00
from rknnlite.api import RKNNLite
from yolo_inference import yolo_run
rknn_model = './models/rktest.rknn'
yolo_model = './models/yolotest.rknn'
yolo_rknn = RKNNLite()
print('--> Load YOLO RKNN model')
yolo_ret = yolo_rknn.load_rknn(yolo_model)
if yolo_ret != 0:
print('Load YOLO RKNN model failed')
exit(yolo_ret)
print('done')
yolo_ret = yolo_rknn.init_runtime()
if yolo_ret != 0:
print('Init runtime environment failed!')
exit(yolo_ret)
print('done')
hrnet_rknn = RKNNLite()
print('--> Load HRNet RKNN model')
hrnet_ret = hrnet_rknn.load_rknn(rknn_model)
if hrnet_ret != 0:
print('Load HRNet RKNN model failed')
exit(hrnet_ret)
print('done')
hrnet_ret = hrnet_rknn.init_runtime()
if hrnet_ret != 0:
print('Init runtime environment failed!')
exit(hrnet_ret)
print('done')
cap = cv2.VideoCapture('./input/000.mp4')
frames, loopTime, initTime = 0, time.time(), time.time()
pTime = 0
while (cap.isOpened()):
frames += 1
ret, frame = cap.read()
if not ret:
break
try:
yolo_box = yolo_run(yolo_rknn, frame)
except:
continue
frame = myFunc00(hrnet_rknn, frame, yolo_box)
cTime = time.time()
fps = 1 / (cTime - pTime)
pTime = cTime
cv2.putText(frame, str(int(fps)), (50, 50), cv2.FONT_HERSHEY_PLAIN, 3, (0, 255, 0), 3)
frame = cv2.resize(frame, (int(frame.shape[1] / 2), int(frame.shape[0] / 2)))
cv2.imshow('test', frame)
cv2.imwrite("11.jpg", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if frames % 30 == 0:
print("30帧平均帧率:\t", 30 / (time.time() - loopTime), "帧")
loopTime = time.time()
print("总平均帧率\t", frames / (time.time() - initTime))
# 释放cap和rknn线程池
cap.release()
cv2.destroyAllWindows()
yolo_rknn.release()
hrnet_rknn.release()