准备:通过激光雷达相机联合标定得到标定数据,以及相机内参数据。可以联合标定教程得到
方法:
原3d坐标信息是给的是中心点坐标和长宽高信息,通过这些信息将3d框转换为8顶点的表示,将8个顶点以及外参的旋转矩阵和平移矩阵通过cv2.projectpoints函数投影到2维平面,得到2d坐标,通过比较2维坐标的大小取他们的最大最小值xmax,ymax,xmin,ymin,得到左上坐标和右下坐标。
image_points, _ = cv2.projectPoints(corners, rvec, tvec, camera_matrix, None)
corners为3D框的8个顶点坐标,rvec为旋转矩阵, tvec为平移矩阵, camera_matrix,为相机内参矩阵,输出为投影后的二维坐标
def lidar_box2camera_box(self, score, position,dimensions):
threshold = 0.6
method = 'Min'
lidar_to_camera_rotation = np.array([[-0.0061,-0.9998,0.0196],
[0.0104, -0.0197,-0.9998],
[ 0.9999, -0.0059, 0.0105]])
lidar_to_camera_translation = np.array([-0.0024, -0.0719, -0.1814])
#平移向量和旋转矩阵
tvec = lidar_to_camera_translation
rvec = cv2.Rodrigues(lidar_to_camera_rotation)[0].squeeze()
#distCoeffs=np.array([9.999239000000e-01, 9.837760000000e-03, -7.445048000000e-03, -9.869795000000e-03, 9.999421000000e-01,-4.278459000000e-03, 7.402527000000e-03, 4.351614000000e-03])
# 相机内参信息
camera_matrix = np.array([[811.54,-0.4069, 321.62],
[0, 812.21, 232.24],
[0, 0, 1]])
Trans = np.hstack((rvec.reshape(-1, 1), [[tvec[0]], [tvec[1]], [tvec[2]]]))
temp = np.dot(camera_matrix, Trans)
Pp = np.linalg.pinv(temp)
half_size = dimensions / 2.0
#print("dimensions:",dimensions)
#print("position:",position)
center = np.array([position[0], position[1], position[2]])
#print("center:",center)
corners = np.array([
center + [-half_size[0], -half_size[1], -half_size[2]],
center + [-half_size[0], -half_size[1], half_size[2]],
center + [half_size[0], -half_size[1], half_size[2]],
center + [half_size[0], -half_size[1], -half_size[2]],
center + [-half_size[0], half_size[1], -half_size[2]],
center + [-half_size[0], half_size[1], half_size[2]],
center + [half_size[0], half_size[1], half_size[2]],
center + [half_size[0], half_size[1], -half_size[2]]
])
corners[:, 0] = np.clip(corners[:, 0], 0, self.image_width )
#corners[:, 1] = np.clip(corners[:, 1], 0, self.image_height+50)
#print("3D框8个顶点:",corners)
#print("传进来后宽高",self.image_width,self.image_height)
image_points, _ = cv2.projectPoints(corners, rvec, tvec, camera_matrix, None)
#print("image_points shape:", image_points.shape)
# image_points, _ = cv2.projectPoints(corners, lidar_to_camera_rotation, lidar_to_camera_translation, camera_matrix, distCoeffs)
image_points_2d = image_points[:, 0, :]
xmin = np.min(image_points_2d[:, 0])
ymin = np.min(image_points_2d[:, 1])
xmax = np.max(image_points_2d[:, 0])
ymax = np.max(image_points_2d[:, 1])
# 取整,得到左上和右下的整数坐标
left_top_point = (max(0, int(xmin)), max(0, int(ymin)))
right_bottom_point = (min(int(xmax), self.image_width - 1), min(int(ymax), self.image_height - 1))
box_width = np.max(image_points[:, 0, 0]) - np.min(image_points[:, 0,0])
box_height = np.max(image_points[:, 0, 1]) - np.min(image_points[:, 0, 1])
# 如果框太大,可以根据需要进行调整
max_allowed_width = self.image_width-100 # 根据实际情况调整
max_allowed_height = self.image_height-50 # 根据实际情况调整
min_allowed_width = 40 # 根据实际情况调整
min_allowed_height = 30
boxes = [{
'xmin': int(xmin),
'ymin': int(ymin),
'xmax': int(xmax),
'ymax': int(ymax)
}]
boxes_array = np.array([[box['xmin'], box['ymin'], box['xmax'], box['ymax']] for box in boxes])
boxes_array = np.column_stack((boxes_array, score))
#print("boxes_array",boxes_array )
boxes_filter = self.nms(boxes_array, threshold, method)
xmin_list = boxes_array[:, 0].tolist()
ymin_list = boxes_array[:, 1].tolist()
xmax_list = boxes_array[:, 2].tolist()
ymax_list = boxes_array[:, 3].tolist()
# 创建包含字典的列表
after_boxes = [{'xmin': int(xmin), 'ymin': int(ymin), 'xmax': int(xmax), 'ymax': int(ymax)}
for xmin, ymin, xmax, ymax in zip(xmin_list, ymin_list, xmax_list, ymax_list)]
#cv2.rectangle(self.cv_image, (max(0, int(xmin)), max(0, int(ymin))),(min(self.image_width - 1, int(xmax)), min(self.image_height - 1, int(ymax))),(255, 0, 0),2)
#print("boxes",after_boxes)
return after_boxes
在投影之后会出现很多二维框,需要进行非极大值抑制方法,取最佳2D框。
def nms(self, boxes, threshold, method):
#print("changddu", len(boxes))
if len(boxes) == 0:
return np.empty((0, 3))
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
s = boxes[:, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
s_sort = np.argsort(s)
pick = np.zeros_like(s, dtype=np.int16)
counter = 0
while s_sort.size > 0:
i = s_sort[-1]
pick[counter] = i
counter += 1
idx = s_sort[0:-1]
xx1 = np.maximum(x1[i], x1[idx])
yy1 = np.maximum(y1[i], y1[idx])
xx2 = np.minimum(x2[i], x2[idx])
yy2 = np.minimum(y2[i], y2[idx])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
if method =='Min':
o = inter / np.minimum(area[i], area[idx])
else:
o = inter / (area[i] + area[idx] - inter)
s_sort = s_sort[np.where(o <= threshold)]
pick = pick[0:counter]
return pick
上述得到的2D框结果可以由以下代码可视化
def draw_detection_results(self, image, detections):
#print("detec",detections)
cv_image = image.copy()
for detection in detections:
# 获取检测结果的坐标信息
xmin = int(detection.xmin)
ymin = int(detection.ymin)
xmax = int(detection.xmax)
ymax = int(detection.ymax)
color_person = (128, 0, 128)
color_car = (0, 128, 0)
color_text = (0,0,0)
if detection.Class =="pedestrian" or detection.Class =="person":
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color_person, 2)
label = f"{detection.Class}"
cv2.putText(image, label, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color_text, 2)
#print("1111")
# 在图像上绘制矩形框
else:
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color_car, 2)
label = f"{detection.Class}"
cv2.putText(image, label, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,color_text, 2)