下载GRCNN项目:https://github.com/skumra/robotic-grasping.git
导出onnx模型:
import torch
net = torch.load("trained-models/jacquard-rgbd-grconvnet3-drop0-ch32/epoch_42_iou_0.93")
x = torch.rand(1, 4, 300, 300).cuda()
torch.onnx.export(net, x, "./grcnn.onnx", opset_version = 13)
onnx模型结构如下:

onnxruntime推理
import cv2
import onnxruntime
import numpy as np
from skimage.feature import peak_local_max
def process_data(rgb, depth, width, height, output_size):
left = (width - output_size) // 2
top = (height - output_size) // 2
right = (width + output_size) // 2
bottom = (height + output_size) // 2
depth_img = depth[top:bottom, left:right]
depth_img = np.clip((depth_img - depth_img.mean()), -1, 1)
depth_img = depth_img.transpose(2, 0, 1)
rgb_img = rgb[top:bottom, left:right]
rgb_img = rgb_img.astype(np.float32) / 255.0
rgb_img -= rgb_img.mean()
rgb_img = rgb_img.transpose(2, 0, 1)
ret = np.concatenate((np.expand_dims(depth_img, 0), np.expand_dims(rgb_img, 0)), axis=1)
return np.concatenate((np.expand_dims(depth_img, 0), np.expand_dims(rgb_img, 0)), axis=1)
if __name__ == '__main__':
rgb = cv2.imread('data/Jacquard/e35c7e8c9f85cac42a2f0bc2931a19e/0_e35c7e8c9f85cac42a2f0bc2931a19e_RGB.png', -1)
depth = cv2.imread('data/Jacquard/e35c7e8c9f85cac42a2f0bc2931a19e/0_e35c7e8c9f85cac42a2f0bc2931a19e_perfect_depth.tiff', -1)
depth = np.expand_dims(np.array(depth), axis=2)
input = process_data(rgb=rgb, depth=depth, width=1024, height=1024, output_size=300)
onnx_session = onnxruntime.InferenceSession("grcnn.onnx", providers=['CPUExecutionProvider'])
input_name = []
for node in onnx_session.get_inputs():
input_name.append(node.name)
output_name = []
for node in onnx_session.get_outputs():
output_name.append(node.name)
inputs = {}
for name in input_name:
inputs[name] = input
outputs = onnx_session.run(None, inputs)
q_img = outputs[0].squeeze()
ang_img = (np.arctan2(outputs[2], outputs[1]) / 2.0).squeeze()
width_img = outputs[3].squeeze() * 150.0
q_img = cv2.GaussianBlur(q_img, (0,0), 2)
ang_img = cv2.GaussianBlur(ang_img, (0,0), 2)
width_img = cv2.GaussianBlur(width_img, (0,0), 1)
local_max = peak_local_max(q_img, min_distance=20, threshold_abs=0.2, num_peaks=1) #128 220
for grasp_point_array in local_max:
grasp_point = tuple(grasp_point_array)
grasp_angle = ang_img[grasp_point]
width = width_img[grasp_point] /2
print(grasp_point, grasp_angle, width)
输出
(184, 213) -0.23662478 30.98381233215332
tensorrt推理
import cv2
import numpy as np
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
from skimage.feature import peak_local_max
def process_data(rgb, depth, width, height, output_size):
left = (width - output_size) // 2
top = (height - output_size) // 2
right = (width + output_size) // 2
bottom = (height + output_size) // 2
depth_img = depth[top:bottom, left:right]
depth_img = np.clip((depth_img - depth_img.mean()), -1, 1)
depth_img = depth_img.transpose(2, 0, 1)
rgb_img = rgb[top:bottom, left:right]
rgb_img = rgb_img.astype(np.float32) / 255.0
rgb_img -= rgb_img.mean()
rgb_img = rgb_img.transpose(2, 0, 1)
ret = np.concatenate((np.expand_dims(depth_img, 0), np.expand_dims(rgb_img, 0)), axis=1)
return np.concatenate((np.expand_dims(depth_img, 0), np.expand_dims(rgb_img, 0)), axis=1)
if __name__ == '__main__':
logger = trt.Logger(trt.Logger.WARNING)
with open("grcnn.engine", "rb") as f, trt.Runtime(logger) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
context = engine.create_execution_context()
inputs_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(0)), dtype=np.float32)
output0_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(1)), dtype=np.float32)
output1_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(2)), dtype=np.float32)
output2_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(3)), dtype=np.float32)
output3_host = cuda.pagelocked_empty(trt.volume(context.get_binding_shape(4)), dtype=np.float32)
inputs_device = cuda.mem_alloc(inputs_host.nbytes)
output0_device = cuda.mem_alloc(output0_host.nbytes)
output1_device = cuda.mem_alloc(output1_host.nbytes)
output2_device = cuda.mem_alloc(output2_host.nbytes)
output3_device = cuda.mem_alloc(output3_host.nbytes)
stream = cuda.Stream()
rgb = cv2.imread('0_e35c7e8c9f85cac42a2f0bc2931a19e_RGB.png', -1)
depth = cv2.imread('0_e35c7e8c9f85cac42a2f0bc2931a19e_perfect_depth.tiff', -1)
depth = np.expand_dims(np.array(depth), axis=2)
input = process_data(rgb=rgb, depth=depth, width=1024, height=1024, output_size=300)
np.copyto(inputs_host, input.ravel())
with engine.create_execution_context() as context:
cuda.memcpy_htod_async(inputs_device, inputs_host, stream)
context.execute_async_v2(bindings=[int(inputs_device), int(output0_device), int(output1_device), int(output2_device), int(output3_device)], stream_handle=stream.handle)
cuda.memcpy_dtoh_async(output0_host, output0_device, stream)
cuda.memcpy_dtoh_async(output1_host, output1_device, stream)
cuda.memcpy_dtoh_async(output2_host, output2_device, stream)
cuda.memcpy_dtoh_async(output3_host, output3_device, stream)
stream.synchronize()
q_img = output0_host.reshape(context.get_binding_shape(1)).squeeze()
ang_img = (np.arctan2(output2_host.reshape(context.get_binding_shape(3)), output1_host.reshape(context.get_binding_shape(2))) / 2.0).squeeze()
width_img = output3_host.reshape(context.get_binding_shape(4)).squeeze() * 150.0
q_img = cv2.GaussianBlur(q_img, (0,0), 2)
ang_img = cv2.GaussianBlur(ang_img, (0,0), 2)
width_img = cv2.GaussianBlur(width_img, (0,0), 1)
local_max = peak_local_max(q_img, min_distance=20, threshold_abs=0.2, num_peaks=1) #128 220
for grasp_point_array in local_max:
grasp_point = tuple(grasp_point_array)
grasp_angle = ang_img[grasp_point]
width = width_img[grasp_point] /2
print(grasp_point, grasp_angle, width)


被折叠的 条评论
为什么被折叠?



