基于昇腾310B 部署Yolov8系列工程,利用华为CANN提供的acl工具进行开发,实现了分类、检测、分割,以及如何调用。
import acl
import numpy as np
import colorsys
import copy
from PIL import Image
import cv2
import time
import os
# from utils import cvtColor, preprocess_input, resize_image
ROOT= os.getcwd()
#*************************分类**************************
class ACL_Yolov8_cls(object):
def __init__(self, config):
if os.path.isfile(config["weights"]):
self.model_path = config["weights"]
else:
self.model_path = os.path.join(ROOT, 'weights', config["weights"])
self.device_id = config["device_id"]
acl.init()
acl.rt.set_device(self.device_id)
self.context, _ = acl.rt.create_context(self.device_id)
self.ACL_MEMCPY_HOST_TO_DEVICE = 1
self.ACL_MEMCPY_DEVICE_TO_HOST = 2
self.ACL_MEM_MALLOC_HUGE_ONLY = 2
self.model_id = None
self.model_desc = None
self.load_input_dataset = None
self.load_output_dataset = None
self.input_data = []
self.output_data = []
self.ndtype = np.single
#模型输入参数
self.imgsz=config["img_size"]
self.model_height, self.model_width = self.imgsz[0], self.imgsz[1] # 图像resize大小
self.classes = config["classes"]
def init(self, model_path):
self.model_id, _ = acl.mdl.load_from_file(model_path)
self.model_desc = acl.mdl.create_desc()
acl.mdl.get_desc(self.model_desc, self.model_id)
self.gen_input_dataset()
self.gen_output_dataset()
def gen_output_dataset(self):
self.load_output_dataset = acl.mdl.create_dataset()
# 获取模型输出的数量。
output_size = acl.mdl.get_num_outputs(self.model_desc)
# 循环为每个输出申请内存,并将每个输出添加到aclmdlDataset类型的数据中。
for i in range(output_size):
buffer_size = acl.mdl.get_output_size_by_index(self.model_desc, i)
# 申请输出内存。
buffer, ret = acl.rt.malloc(buffer_size, self.ACL_MEM_MALLOC_HUGE_ONLY)
data = acl.create_data_buffer(buffer, buffer_size)
_, ret = acl.mdl.add_dataset_buffer(self.load_output_dataset, data)
self.output_data.append({"buffer": buffer, "size": buffer_size})
def gen_input_dataset(self):
self.load_input_dataset = acl.mdl.create_dataset()
input_size = acl.mdl.get_num_inputs(self.model_desc)
# print("input_size",input_size)
for i in range(input_size):
buffer_size = acl.mdl.get_input_size_by_index(self.model_desc, i)
# print("buffer_size",buffer_size)
buffer, ret = acl.rt.malloc(buffer_size, self.ACL_MEM_MALLOC_HUGE_ONLY)
# print("ret",ret)
data = acl.create_data_buffer(buffer, buffer_size)
# print("data",data.size())
_, ret = acl.mdl.add_dataset_buffer(self.load_input_dataset, data)
self.input_data.append({"buffer": buffer, "size": buffer_size})
def process_output(self):
inference_result = []
for i, item in enumerate(self.output_data):
dims = acl.mdl.get_output_dims(self.model_desc, i)
shape = tuple(dims[i]["dims"])
buffer_host, ret = acl.rt.malloc_host(self.output_data[i]["size"])
# 将推理输出数据从Device传输到Host。
acl.rt.memcpy(buffer_host, self.output_data[i]["size"], self.output_data[i]["buffer"],
self.output_data[i]["size"], self.ACL_MEMCPY_DEVICE_TO_HOST)
bytes_out = acl.util.ptr_to_bytes(buffer_host, self.output_data[i]["size"])
data = np.frombuffer(bytes_out, dtype=np.float32).reshape(shape)
# data = np.frombuffer(bytes_out, dtype=np.float16).reshape(shape)
inference_result.append(data)
return inference_result
def load_input_data(self, img):
# bytes_data = img.tobytes()
bytes_data = img.tostring()
# bytes_data=img.tobytes("F")
# print("bytes_data",bytes_data[0:50])
np_ptr = acl.util.bytes_to_ptr(bytes_data)
# 将图片数据从Host传输到Device。
# print("self.input_data[0]",self.input_data[0]["buffer"])
# print("self.input_data[0]",self.input_data[0]["size"])
acl.rt.memcpy(self.input_data[0]["buffer"], self.input_data[0]["size"], np_ptr,
self.input_data[0]["size"], self.ACL_MEMCPY_HOST_TO_DEVICE)
def execute(self):
acl.mdl.execute(self.model_id, self.load_input_dataset, self.load_output_dataset)
def destory(self):
acl.rt.destroy_context(self.context)
acl.rt.reset_device(self.device_id)
acl.finalize()
def preprocessing(self, img):
"""
Pre-processes the input image.
Args:
img (Numpy.ndarray): image about to be processed.
Returns:
img_process (Numpy.ndarray): image preprocessed for inference.
ratio (tuple): width, height ratios in letterbox.
pad_w (float): width padding in letterbox.
pad_h (float): height padding in letterbox.
"""
# Resize and pad input image using letterbox() (Borrowed from Ultralytics)
shape = img.shape[:2] # original image shape
new_shape = (self.model_height, self.model_width)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充
#2024-12-29
# image_data=img
# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
# print("image_data",image_data.shape)
# Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
img_process = img[None] if len(img.shape) == 3 else img
return img_process, ratio, (pad_w, pad_h)
def infer(self, img_):
time_ = time.time()
img, ratio, (pad_w, pad_h) = self.preprocessing(img_)
time0 = time.time()
model_cls.load_input_data(img)
print(f'data copy to device time cost:{time.time() - time0}')
time1 = time.time()
model_cls.execute()
print(f'device inference time cost:{time.time() - time1}')
time2 = time.time()
preds = model_cls.process_output()[0]
print(f'data copy to host time cost:{time.time() - time2}')
# print("**********",preds)
#后处理
result_dic={}
for index in range(len(self.classes)):
classname= self.classes[index]
result_dic[classname] = preds[:,index]
return result_dic #输出[1,class]fp32
#*************************检测**************************
class ACL_Yolov8_det(object):
def __init__(self, config):
if os.path.isfile(config["weights"]):
self.model_path = config["weights"]
else:
self.model_path = os.path.join(ROOT, 'weights', config["weights"])
self.device_id = config["device_id"]
acl.init()
acl.rt.set_device(self.device_id)
self.context, _ = acl.rt.create_context(self.device_id)
self.ACL_MEMCPY_HOST_TO_DEVICE = 1
self.ACL_MEMCPY_DEVICE_TO_HOST = 2
self.ACL_MEM_MALLOC_HUGE_ONLY = 2
self.model_id = None
self.model_desc = None
self.load_input_dataset = None
self.load_output_dataset = None
self.input_data = []
self.output_data = []
self.ndtype = np.single
self.imgsz=config["img_size"]
self.model_height, self.model_width = self.imgsz[0], self.imgsz[1] # 图像resize大小
self.conf_threshold= config["conf_thres"]
self.iou_threshold = config["iou_thres"]
self.classes = config["classes"]
def init(self, model_path):
self.model_id, _ = acl.mdl.load_from_file(model_path)
self.model_desc = acl.mdl.create_desc()
acl.mdl.get_desc(self.model_desc, self.model_id)
self.gen_input_dataset()
self.gen_output_dataset()
def gen_output_dataset(self):
self.load_output_dataset = acl.mdl.create_dataset()
# 获取模型输出的数量。
output_size = acl.mdl.get_num_outputs(self.model_desc)
# 循环为每个输出申请内存,并将每个输出添加到aclmdlDataset类型的数据中。
for i in range(output_size):
buffer_size = acl.mdl.get_output_size_by_index(self.model_desc, i)
# 申请输出内存。
buffer, ret = acl.rt.malloc(buffer_size, self.ACL_MEM_MALLOC_HUGE_ONLY)
data = acl.create_data_buffer(buffer, buffer_size)
_, ret = acl.mdl.add_dataset_buffer(self.load_output_dataset, data)
self.output_data.append({"buffer": buffer, "size": buffer_size})
def gen_input_dataset(self):
self.load_input_dataset = acl.mdl.create_dataset()
input_size = acl.mdl.get_num_inputs(self.model_desc)
# print("input_size",input_size)
for i in range(input_size):
buffer_size = acl.mdl.get_input_size_by_index(self.model_desc, i)
print("buffer_size",buffer_size)
buffer, ret = acl.rt.malloc(buffer_size, self.ACL_MEM_MALLOC_HUGE_ONLY)
data = acl.create_data_buffer(buffer, buffer_size)
_, ret = acl.mdl.add_dataset_buffer(self.load_input_dataset, data)
self.input_data.append({"buffer": buffer, "size": buffer_size})
def process_output(self):
inference_result = []
for i, item in enumerate(self.output_data):
dims = acl.mdl.get_output_dims(self.model_desc, i)
shape = tuple(dims[i]["dims"])
buffer_host, ret = acl.rt.malloc_host(self.output_data[i]["size"])
# 将推理输出数据从Device传输到Host。
acl.rt.memcpy(buffer_host, self.output_data[i]["size"], self.output_data[i]["buffer"],
self.output_data[i]["size"], self.ACL_MEMCPY_DEVICE_TO_HOST)
bytes_out = acl.util.ptr_to_bytes(buffer_host, self.output_data[i]["size"])
data = np.frombuffer(bytes_out, dtype=np.float32).reshape(shape)
inference_result.append(data)
return inference_result
def load_input_data(self, img):
bytes_data = img.tobytes()
np_ptr = acl.util.bytes_to_ptr(bytes_data)
# print("self.input_data[0]",self.input_data[0]["buffer"])
# print("self.input_data[0]",self.input_data[0]["size"])
# 将图片数据从Host传输到Device。
acl.rt.memcpy(self.input_data[0]["buffer"], self.input_data[0]["size"], np_ptr,
self.input_data[0]["size"], self.ACL_MEMCPY_HOST_TO_DEVICE)
def execute(self):
acl.mdl.execute(self.model_id, self.load_input_dataset, self.load_output_dataset)
def destory(self):
acl.rt.destroy_context(self.context)
acl.rt.reset_device(self.device_id)
acl.finalize()
def preprocessing(self, img):
"""
Pre-processes the input image.
Args:
img (Numpy.ndarray): image about to be processed.
Returns:
img_process (Numpy.ndarray): image preprocessed for inference.
ratio (tuple): width, height ratios in letterbox.
pad_w (float): width padding in letterbox.
pad_h (float): height padding in letterbox.
"""
# Resize and pad input image using letterbox() (Borrowed from Ultralytics)
shape = img.shape[:2] # original image shape
new_shape = (self.model_height, self.model_width)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充
#2024-12-29
# image_data=img
# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
# print("image_data",image_data.shape)
# Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
print("image_data",img.shape)
img_process = img[None] if len(img.shape) == 3 else img
return img_process, ratio, (pad_w, pad_h)
def postprocess_v8(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold):
"""
Post-process the prediction.
Args:
preds (Numpy.ndarray): predictions come from ort.session.run().
im0 (Numpy.ndarray): [h, w, c] original input image.
ratio (tuple): width, height ratios in letterbox.
pad_w (float): width padding in letterbox.
pad_h (float): height padding in letterbox.
conf_threshold (float): conf threshold.
iou_threshold (float): iou threshold.
Returns:
boxes (List): list of bounding boxes.
"""
color_palette = np.random.uniform(0, 255, size=(len(self.classes), 3))
x = preds # outputs: predictions (1, 84, 8400)
# Transpose the first output: (Batch_size, xywh_conf_cls, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls)
x = np.einsum('bcn->bnc', x) # (1, 8400, 84)
# Predictions filtering by conf-threshold
x = x[np.amax(x[..., 4:], axis=-1) > conf_threshold]
# Create a new matrix which merge these(box, score, cls) into one
# For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
x = np.c_[x[..., :4], np.amax(x[..., 4:], axis=-1), np.argmax(x[..., 4:], axis=-1)]
# NMS filtering
# 经过NMS后的值, np.array([[x, y, w, h, conf, cls], ...]), shape=(-1, 4 + 1 + 1)
x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
rois = []
class_ids = []
scores = []
# 重新缩放边界框,为画图做准备
if len(x) > 0:
# Bounding boxes format change: cxcywh -> xyxy
x[..., [0, 1]] -= x[..., [2, 3]] / 2
x[..., [2, 3]] += x[..., [0, 1]]
# Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
x[..., :4] /= min(ratio)
# Bounding boxes boundary clamp
x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
boxes= x[..., :6]
# 提取区域置信度和类别 ID
rois = boxes[:, :4].astype(int).tolist()
scores = boxes[:, 4].tolist()
class_ids = boxes[:, 5].astype(int).tolist()
# # 构造目标输出格式
# result = {
# 'rois': rois,
# 'class_ids': class_ids,
# 'scores': scores
# }
# Draw rectangles
for (*box, conf, cls_) in boxes:
cv2.rectangle(im0, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
color_palette[int(cls_)], 2, cv2.LINE_AA)
cv2.putText(im0, f'{self.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2, cv2.LINE_AA)
return rois,im0,class_ids # boxes
# return result,im0 # boxes
else:
# result = {
# 'rois': rois,
# 'class_ids': class_ids,
# 'scores': scores
# }
# return result,im0
print("No bounding boxes detected.")
return rois,im0,class_ids
def infer(self,img_):
time_ = time.time()
img, ratio, (pad_w, pad_h) = self.preprocessing(img_)
print(f'image preprocess time cost:{time.time() - time_}')
model_det.init(self.model_path)
# print("img_shape**********:",img.shape)
time0 = time.time()
model_det.load_input_data(img)
print(f'data copy to device time cost:{time.time() - time0}')
time1 = time.time()
model_det.execute()
print(f'device inference time cost:{time.time() - time1}')
time2 = time.time()
preds = model_det.process_output()[0]
print(f'data copy to host time cost:{time.time() - time2}')
boxes,img_ = self.postprocess_v8(preds,
im0=img_,
ratio=ratio,
pad_w=pad_w,
pad_h=pad_h,
conf_threshold=self.conf_threshold,
iou_threshold=self.iou_threshold,
)
model_det.destory()
return boxes,img_
#*************************分割**************************
class ACL_Yolov8_seg(object):
def __init__(self, config):
if os.path.isfile(config["weights"]):
self.model_path = config["weights"]
else:
self.model_path = os.path.join(ROOT, 'weights', config["weights"])
self.device_id = config["device_id"]
acl.init()
acl.rt.set_device(self.device_id)
self.context, _ = acl.rt.create_context(self.device_id)
self.ACL_MEMCPY_HOST_TO_DEVICE = 1
self.ACL_MEMCPY_DEVICE_TO_HOST = 2
self.ACL_MEM_MALLOC_HUGE_ONLY = 2
self.model_id = None
self.model_desc = None
self.load_input_dataset = None
self.load_output_dataset = None
self.input_data = []
self.output_data = []
self.ndtype = np.single # Numpy dtype: support both FP32(np.single) and FP16(np.half) om model
self.imgsz=config["img_size"]
self.model_height, self.model_width = self.imgsz[0], self.imgsz[1] # 图像resize大小
self.conf_threshold= config["conf_thres"]
self.iou_threshold = config["iou_thres"]
self.classes = config["classes"]
def init(self, model_path):
self.model_id, _ = acl.mdl.load_from_file(model_path)
self.model_desc = acl.mdl.create_desc()
acl.mdl.get_desc(self.model_desc, self.model_id)
self.gen_input_dataset()
self.gen_output_dataset()
def gen_output_dataset(self):
self.load_output_dataset = acl.mdl.create_dataset()
# 获取模型输出的数量。
output_size = acl.mdl.get_num_outputs(self.model_desc)
# 循环为每个输出申请内存,并将每个输出添加到aclmdlDataset类型的数据中。
for i in range(output_size):
buffer_size = acl.mdl.get_output_size_by_index(self.model_desc, i)
# 申请输出内存。
buffer, ret = acl.rt.malloc(buffer_size, self.ACL_MEM_MALLOC_HUGE_ONLY)
data = acl.create_data_buffer(buffer, buffer_size)
_, ret = acl.mdl.add_dataset_buffer(self.load_output_dataset, data)
self.output_data.append({"buffer": buffer, "size": buffer_size})
def gen_input_dataset(self):
self.load_input_dataset = acl.mdl.create_dataset()
input_size = acl.mdl.get_num_inputs(self.model_desc)
# print("input_size",input_size)
for i in range(input_size):
buffer_size = acl.mdl.get_input_size_by_index(self.model_desc, i)
print("buffer_size",buffer_size)
buffer, ret = acl.rt.malloc(buffer_size, self.ACL_MEM_MALLOC_HUGE_ONLY)
data = acl.create_data_buffer(buffer, buffer_size)
_, ret = acl.mdl.add_dataset_buffer(self.load_input_dataset, data)
self.input_data.append({"buffer": buffer, "size": buffer_size})
def process_output(self):
inference_result = []
for i, item in enumerate(self.output_data):
dims = acl.mdl.get_output_dims(self.model_desc, i)
# shape = tuple(dims[i]["dims"])
shape = tuple(dims[0]["dims"])
buffer_host, ret = acl.rt.malloc_host(self.output_data[i]["size"])
# 将推理输出数据从Device传输到Host。
acl.rt.memcpy(buffer_host, self.output_data[i]["size"], self.output_data[i]["buffer"],
self.output_data[i]["size"], self.ACL_MEMCPY_DEVICE_TO_HOST)
bytes_out = acl.util.ptr_to_bytes(buffer_host, self.output_data[i]["size"])
data = np.frombuffer(bytes_out, dtype=np.float32).reshape(shape)
inference_result.append(data)
return inference_result
def load_input_data(self, img):
bytes_data = img.tobytes()
np_ptr = acl.util.bytes_to_ptr(bytes_data)
# print("self.input_data[0]",self.input_data[0]["buffer"])
# print("self.input_data[0]",self.input_data[0]["size"])
# 将图片数据从Host传输到Device。
acl.rt.memcpy(self.input_data[0]["buffer"], self.input_data[0]["size"], np_ptr,
self.input_data[0]["size"], self.ACL_MEMCPY_HOST_TO_DEVICE)
def execute(self):
acl.mdl.execute(self.model_id, self.load_input_dataset, self.load_output_dataset)
def destory(self):
acl.rt.destroy_context(self.context)
acl.rt.reset_device(self.device_id)
acl.finalize()
def preprocessing(self, img):
"""
Pre-processes the input image.
Args:
img (Numpy.ndarray): image about to be processed.
Returns:
img_process (Numpy.ndarray): image preprocessed for inference.
ratio (tuple): width, height ratios in letterbox.
pad_w (float): width padding in letterbox.
pad_h (float): height padding in letterbox.
"""
# Resize and pad input image using letterbox() (Borrowed from Ultralytics)
shape = img.shape[:2] # original image shape
new_shape = (self.model_height, self.model_width)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
pad_w, pad_h = (new_shape[1] - new_unpad[0]) / 2, (new_shape[0] - new_unpad[1]) / 2 # wh padding
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # 填充
#2024-12-29
# image_data=img
# image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)
# print("image_data",image_data.shape)
# Transforms: HWC to CHW -> BGR to RGB -> div(255) -> contiguous -> add axis(optional)
img = np.ascontiguousarray(np.einsum('HWC->CHW', img)[::-1], dtype=self.ndtype) / 255.0
print("image_data",img.shape)
img_process = img[None] if len(img.shape) == 3 else img
return img_process, ratio, (pad_w, pad_h)
# YOLOv8/9/11通用后处理,包括:阈值过滤与NMS+masks处理
def postprocess_v8(self, preds, im0, ratio, pad_w, pad_h, conf_threshold, iou_threshold, nm=32):
"""
Post-process the prediction.
Args:
preds (Numpy.ndarray): predictions come from ort.session.run().
im0 (Numpy.ndarray): [h, w, c] original input image.
ratio (tuple): width, height ratios in letterbox.
pad_w (float): width padding in letterbox.
pad_h (float): height padding in letterbox.
conf_threshold (float): conf threshold.
iou_threshold (float): iou threshold.
nm (int): the number of masks.
Returns:
boxes (List): list of bounding boxes.
segments (List): list of segments.
masks (np.ndarray): [N, H, W], output masks.
"""
x, protos = preds[0], preds[1] # 与bbox区别:Two outputs: 检测头的输出(1, 116, 8400), 分割头的输出(1, 32, 160, 160)
# Transpose the first output: (Batch_size, xywh_conf_cls_nm, Num_anchors) -> (Batch_size, Num_anchors, xywh_conf_cls_nm)
x = np.einsum('bcn->bnc', x) # (1, 8400, 116)
# Predictions filtering by conf-threshold,不包括后32维的向量(32维的向量可以看作是与每个检测框关联的分割 mask 的系数或权重)
x = x[np.amax(x[..., 4:-nm], axis=-1) > conf_threshold]
# Create a new matrix which merge these(box, score, cls, nm) into one
# For more details about `numpy.c_()`: https://numpy.org/doc/1.26/reference/generated/numpy.c_.html
x = np.c_[x[..., :4], np.amax(x[..., 4:-nm], axis=-1), np.argmax(x[..., 4:-nm], axis=-1), x[..., -nm:]]
# NMS filtering
# 经过NMS后的值, np.array([[x, y, w, h, conf, cls, nm], ...]), shape=(-1, 4 + 1 + 1 + 32)
x = x[cv2.dnn.NMSBoxes(x[:, :4], x[:, 4], conf_threshold, iou_threshold)]
status=1
# 重新缩放边界框,为画图做准备
if len(x) > 0:
# Bounding boxes format change: cxcywh -> xyxy
x[..., [0, 1]] -= x[..., [2, 3]] / 2
x[..., [2, 3]] += x[..., [0, 1]]
# Rescales bounding boxes from model shape(model_height, model_width) to the shape of original image
x[..., :4] -= [pad_w, pad_h, pad_w, pad_h]
x[..., :4] /= min(ratio)
# Bounding boxes boundary clamp
x[..., [0, 2]] = x[:, [0, 2]].clip(0, im0.shape[1])
x[..., [1, 3]] = x[:, [1, 3]].clip(0, im0.shape[0])
# 与bbox区别:增加masks处理
# Process masks
masks = self.process_mask(protos[0], x[:, 6:], x[:, :4], im0.shape)
# Masks -> Segments(contours)
segments = self.masks2segments(masks)
return x[..., :6], masks ,segments,status # boxes, masks ,segments, status //xywh-id-class,掩码,掩码轮廓,状态
else:
return [], [], [] , status
@staticmethod
def masks2segments(masks):
"""
It takes a list of masks(n,h,w) and returns a list of segments(n,xy) (Borrowed from
https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L750)
Args:
masks (numpy.ndarray): the output of the model, which is a tensor of shape (batch_size, 160, 160).
Returns:
segments (List): list of segment masks.
"""
segments = []
for x in masks.astype('uint8'):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[0] # CHAIN_APPROX_SIMPLE 该函数用于查找二值图像中的轮廓。
if c:
# 这段代码的目的是找到图像x中的最外层轮廓,并从中选择最长的轮廓,然后将其转换为NumPy数组的形式。
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
else:
c = np.zeros((0, 2)) # no segments found
segments.append(c.astype('float32'))
return segments
def process_mask(self, protos, masks_in, bboxes, im0_shape):
"""
Takes the output of the mask head, and applies the mask to the bounding boxes. This produces masks of higher quality
but is slower. (Borrowed from https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L618)
Args:
protos (numpy.ndarray): [mask_dim, mask_h, mask_w].
masks_in (numpy.ndarray): [n, mask_dim], n is number of masks after nms.
bboxes (numpy.ndarray): bboxes re-scaled to original image shape.
im0_shape (tuple): the size of the input image (h,w,c).
Returns:
(numpy.ndarray): The upsampled masks.
"""
c, mh, mw = protos.shape
masks = np.matmul(masks_in, protos.reshape((c, -1))).reshape((-1, mh, mw)).transpose(1, 2, 0) # HWN
masks = np.ascontiguousarray(masks)
# masks = self.scale_mask(masks, im0_shape) # re-scale mask from P3 shape to original input image shape
masks = np.einsum('HWN -> NHW', masks) # HWN -> NHW
masks = self.crop_mask(masks, bboxes)
return np.greater(masks, 0.5)
@staticmethod
def scale_mask(masks, im0_shape, ratio_pad=None):
"""
Takes a mask, and resizes it to the original image size. (Borrowed from
https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L305)
Args:
masks (np.ndarray): resized and padded masks/images, [h, w, num]/[h, w, 3].
im0_shape (tuple): the original image shape.
ratio_pad (tuple): the ratio of the padding to the original image.
Returns:
masks (np.ndarray): The masks that are being returned.
"""
im1_shape = masks.shape[:2]
if ratio_pad is None: # calculate from im0_shape
gain = min(im1_shape[0] / im0_shape[0], im1_shape[1] / im0_shape[1]) # gain = old / new
pad = (im1_shape[1] - im0_shape[1] * gain) / 2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
else:
pad = ratio_pad[1]
# Calculate tlbr of mask
top, left = int(round(pad[1] - 0.1)), int(round(pad[0] - 0.1)) # y, x
bottom, right = int(round(im1_shape[0] - pad[1] + 0.1)), int(round(im1_shape[1] - pad[0] + 0.1))
if len(masks.shape) < 2:
raise ValueError(f'"len of masks shape" should be 2 or 3, but got {len(masks.shape)}')
masks = masks[top:bottom, left:right]
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]),
interpolation=cv2.INTER_LINEAR) # INTER_CUBIC would be better
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
@staticmethod
def crop_mask(masks, boxes):
"""
It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box. (Borrowed from
https://github.com/ultralytics/ultralytics/blob/465df3024f44fa97d4fad9986530d5a13cdabdca/ultralytics/utils/ops.py#L599)
Args:
masks (Numpy.ndarray): [n, h, w] tensor of masks.
boxes (Numpy.ndarray): [n, 4] tensor of bbox coordinates in relative point form.
Returns:
(Numpy.ndarray): The masks are being cropped to the bounding box.
"""
n, h, w = masks.shape
x1, y1, x2, y2 = np.split(boxes[:, :, None], 4, 1)
r = np.arange(w, dtype=x1.dtype)[None, None, :]
c = np.arange(h, dtype=x1.dtype)[None, :, None]
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
def infer(self,img_):
time_ = time.time()
img, ratio, (pad_w, pad_h) = self.preprocessing(img_)
#*******************与onnx结果对比验证用************************
# img = cv2.cvtColor(img_, cv2.COLOR_BGR2RGB)
# img = cv2.resize(img, (640, 640))
# img = img.astype(np.float32)
# img /= 255.0
# mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
# std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
# img = (img - mean) / std
# img = np.transpose(img, (2, 0, 1))
# img = np.expand_dims(img, axis=0) # 形成一个batch
#*******************与onnx结果对比验证用************************
print(f'image preprocess time cost:{time.time() - time_}')
time0 = time.time()
model_seg.load_input_data(img)
print(f'data copy to device time cost:{time.time() - time0}')
time1 = time.time()
model_seg.execute()
print(f'device inference time cost:{time.time() - time1}')
time2 = time.time()
preds=model_seg.process_output()
print(f'data copy to host time cost:{time.time() - time2}')
boxes, segments, masks,statu_= self.postprocess_v8(preds,
im0=img_,
ratio=ratio,
pad_w=pad_w,
pad_h=pad_h,
conf_threshold=self.conf_threshold,
iou_threshold=self.iou_threshold,
)
return boxes, masks ,segments,statu_ # boxes, masks ,segments, status //xywh-id-class, 掩码,掩码轮廓,状态
# # Draw rectangles and polygons
# im_canvas = im0.copy()
# for (*box, conf, cls_), segment in zip(boxes, segments):
# # draw contour and fill mask
# cv2.polylines(im0, np.int32([segment]), True, (255, 255, 255), 2) # white borderline
# cv2.fillPoly(im_canvas, np.int32([segment]), (255, 0, 0))
# # draw bbox rectangle
# cv2.rectangle(im0, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
# color_palette[int(cls_)], 1, cv2.LINE_AA)
# cv2.putText(im0, f'{args.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
# cv2.FONT_HERSHEY_SIMPLEX, 0.7, color_palette[int(cls_)], 2, cv2.LINE_AA)
# # Mix image
# im0 = cv2.addWeighted(im_canvas, 0.3, im0, 0.7, 0)
# return im0
if __name__ == "__main__":
#****************yolov8分类配置文件***********************
cfg_cls = {
"weights":'./weights/best_cls.om',
"img_size": [640, 640],
"device_id": 0,
'classes': ['coal',"mohu"]
}
#**********************yolov8检测配置文件*********************
cfg_det= {
"weights":'/mnt/data/yz/yolov8/weights/digital-number.om',
"conf_thres": 0.5,
"iou_thres": 0.4,
"img_size": [640, 640],
"device_id": 0,
'classes': ['dial_3', 'dial_4']
}
#**********************yolov8分割配置文件*********************
cfg_seg= {
"weights":'/mnt/data/yz/yolov8/weights/coalseg_0108_jhw.om',
"conf_thres": 0.5,
"iou_thres": 0.4,
"img_size": [640, 640],
"device_id": 0,
'classes': ['dial_3', 'dial_4']
}
# image_path=os.path.join(ROOT, "test_img/street.jpg")
image_path= "./test_img/001.png"
img_ = cv2.imread(image_path)
# img_= Image.open(image_path)
# image_ = Image.fromarray(cv2.cvtColor(img_,cv2.COLOR_BGR2RGB))
#*****************yolov8分类模型********************************
# model_cls=ACL_Yolov8_cls(cfg_cls)
# model_cls.init(cfg_cls["weights"])
# result_cls=model_cls.infer(img_) #输出:dict{classname:confidence,classname:confidence}
#*****************yolov8检测模型********************************
# model_det=ACL_Yolov8_det(cfg_det)
# model_det.init(cfg_det["weights"])
# result_det,img_res=model_det.infer(img_) #输出:result_det = {'rois': rois,'class_ids': class_ids,'scores': scores} ,img_res 结果图
#*****************yolov8分割模型********************************
model_seg=ACL_Yolov8_seg(cfg_seg)
model_seg.init(cfg_seg["weights"])
boxes, masks ,segments,_ =model_seg.infer(img_) #输出:boxes,分割区域、掩码
print("done")
# # 如何需要画图请参考下面
# color_palette = np.random.uniform(0, 255, size=(len(cfg_seg["classes"]), 3)) # 为每个类别生成调色板
# im_canvas = img_.copy()
# for (*box, conf, cls_), segment in zip(boxes, segments):
# # draw contour and fill mask
# cv2.polylines(img_, np.int32([segment]), True, (255, 255, 255), 2) # white borderline
# cv2.fillPoly(im_canvas, np.int32([segment]), (255, 0, 0))
# # draw bbox rectangle
# cv2.rectangle(img_, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])),
# color_palette[int(cls_)], 1, cv2.LINE_AA)
# cv2.putText(img_, f'{args.classes[int(cls_)]}: {conf:.3f}', (int(box[0]), int(box[1] - 9)),
# cv2.FONT_HERSHEY_SIMPLEX, 0.7, color_palette[int(cls_)], 2, cv2.LINE_AA)
# # Mix image
# img_ = cv2.addWeighted(im_canvas, 0.3, img_, 0.7, 0)
# cv2.imwrite("aaa.jpg", img_)