目标检测中3D框转换为2D框

最新推荐文章于 2025-03-01 02:05:24 发布

男宅光阳

最新推荐文章于 2025-03-01 02:05:24 发布

阅读量833

点赞数 7

文章标签：目标检测 3d 人工智能计算机视觉深度学习

本文链接：https://blog.youkuaiyun.com/weixin_45989883/article/details/138586904

版权

准备：通过激光雷达相机联合标定得到标定数据，以及相机内参数据。可以联合标定教程得到

方法：

原3d坐标信息是给的是中心点坐标和长宽高信息，通过这些信息将3d框转换为8顶点的表示，将8个顶点以及外参的旋转矩阵和平移矩阵通过cv2.projectpoints函数投影到2维平面，得到2d坐标，通过比较2维坐标的大小取他们的最大最小值xmax,ymax,xmin,ymin，得到左上坐标和右下坐标。

image_points, _ = cv2.projectPoints(corners, rvec, tvec, camera_matrix, None)

corners为3D框的8个顶点坐标，rvec为旋转矩阵， tvec为平移矩阵， camera_matrix,为相机内参矩阵，输出为投影后的二维坐标

    def lidar_box2camera_box(self, score, position,dimensions):
        threshold = 0.6
        method = 'Min'
        lidar_to_camera_rotation = np.array([[-0.0061,-0.9998,0.0196],
                                     [0.0104, -0.0197,-0.9998],
                                     [ 0.9999, -0.0059,  0.0105]])           
        lidar_to_camera_translation = np.array([-0.0024, -0.0719, -0.1814])
	#平移向量和旋转矩阵
        tvec = lidar_to_camera_translation
        rvec = cv2.Rodrigues(lidar_to_camera_rotation)[0].squeeze() 
        #distCoeffs=np.array([9.999239000000e-01, 9.837760000000e-03, -7.445048000000e-03, -9.869795000000e-03, 9.999421000000e-01,-4.278459000000e-03, 7.402527000000e-03, 4.351614000000e-03])
        # 相机内参信息
        camera_matrix = np.array([[811.54,-0.4069, 321.62],
                                 [0, 812.21, 232.24],
                                 [0, 0, 1]])


        Trans = np.hstack((rvec.reshape(-1, 1), [[tvec[0]], [tvec[1]], [tvec[2]]]))
        temp = np.dot(camera_matrix, Trans)
        Pp = np.linalg.pinv(temp)
        half_size = dimensions / 2.0
        #print("dimensions:",dimensions)
        #print("position:",position)
        center = np.array([position[0], position[1], position[2]])
        #print("center:",center)
        corners = np.array([
            center + [-half_size[0], -half_size[1], -half_size[2]],
            center + [-half_size[0], -half_size[1], half_size[2]],
            center + [half_size[0], -half_size[1], half_size[2]],
            center + [half_size[0], -half_size[1], -half_size[2]],
            center + [-half_size[0], half_size[1], -half_size[2]],
            center + [-half_size[0], half_size[1], half_size[2]],
            center + [half_size[0], half_size[1], half_size[2]],
            center + [half_size[0], half_size[1], -half_size[2]]
        ])
        corners[:, 0] = np.clip(corners[:, 0], 0, self.image_width )
        #corners[:, 1] = np.clip(corners[:, 1], 0, self.image_height+50)
        #print("3D框8个顶点：",corners)
        #print("传进来后宽高",self.image_width,self.image_height)
        image_points, _ = cv2.projectPoints(corners, rvec, tvec, camera_matrix, None)
        #print("image_points shape:", image_points.shape)
       # image_points, _ = cv2.projectPoints(corners, lidar_to_camera_rotation, lidar_to_camera_translation, camera_matrix, distCoeffs)
        
        image_points_2d = image_points[:, 0, :]
        xmin = np.min(image_points_2d[:, 0])
        ymin = np.min(image_points_2d[:, 1])
        xmax = np.max(image_points_2d[:, 0])
        ymax = np.max(image_points_2d[:, 1])

    # 取整，得到左上和右下的整数坐标
        left_top_point = (max(0, int(xmin)), max(0, int(ymin)))
        right_bottom_point = (min(int(xmax), self.image_width - 1), min(int(ymax), self.image_height - 1))
        box_width = np.max(image_points[:, 0, 0]) - np.min(image_points[:, 0,0])
        box_height = np.max(image_points[:, 0, 1]) - np.min(image_points[:, 0, 1])

        # 如果框太大，可以根据需要进行调整
        max_allowed_width = self.image_width-100  # 根据实际情况调整
        max_allowed_height = self.image_height-50 # 根据实际情况调整
        min_allowed_width = 40  # 根据实际情况调整
        min_allowed_height = 30
        
        boxes = [{
                'xmin': int(xmin),
                'ymin': int(ymin),
                'xmax': int(xmax),
                'ymax': int(ymax)
            }]
        boxes_array = np.array([[box['xmin'], box['ymin'], box['xmax'], box['ymax']] for box in boxes])
        
        boxes_array = np.column_stack((boxes_array, score))
        #print("boxes_array",boxes_array )
        boxes_filter = self.nms(boxes_array, threshold, method)
        xmin_list = boxes_array[:, 0].tolist()
        ymin_list = boxes_array[:, 1].tolist()
        xmax_list = boxes_array[:, 2].tolist()
        ymax_list = boxes_array[:, 3].tolist()

# 创建包含字典的列表
        after_boxes = [{'xmin': int(xmin), 'ymin': int(ymin), 'xmax': int(xmax), 'ymax': int(ymax)} 
         for xmin, ymin, xmax, ymax in zip(xmin_list, ymin_list, xmax_list,         ymax_list)]
        #cv2.rectangle(self.cv_image, (max(0, int(xmin)), max(0, int(ymin))),(min(self.image_width - 1, int(xmax)), min(self.image_height - 1, int(ymax))),(255, 0, 0),2)
        #print("boxes",after_boxes)
        
        return after_boxes

在投影之后会出现很多二维框，需要进行非极大值抑制方法，取最佳2D框。

    def nms(self, boxes, threshold, method):
        #print("changddu", len(boxes))
        if len(boxes) == 0:
        
            return np.empty((0, 3))
        x1 = boxes[:, 0]
        y1 = boxes[:, 1]
        x2 = boxes[:, 2]
        y2 = boxes[:, 3]
        s = boxes[:, 4]
        area = (x2 - x1 + 1) * (y2 - y1 + 1)
        s_sort = np.argsort(s)
        pick = np.zeros_like(s, dtype=np.int16)
        counter = 0
        while s_sort.size > 0:
            i = s_sort[-1]
            pick[counter] = i
            counter += 1
            idx = s_sort[0:-1]
            xx1 = np.maximum(x1[i], x1[idx])
            yy1 = np.maximum(y1[i], y1[idx])
            xx2 = np.minimum(x2[i], x2[idx])
            yy2 = np.minimum(y2[i], y2[idx])
            w = np.maximum(0.0, xx2 - xx1 + 1)
            h = np.maximum(0.0, yy2 - yy1 + 1)
            inter = w * h
            if method =='Min':
                o = inter / np.minimum(area[i], area[idx])
            else:
                o = inter / (area[i] + area[idx] - inter)
            s_sort = s_sort[np.where(o <= threshold)]
        pick = pick[0:counter]
        return pick

上述得到的2D框结果可以由以下代码可视化

    def draw_detection_results(self,  image, detections):
        #print("detec",detections)
        cv_image = image.copy()
        for detection in detections:
            # 获取检测结果的坐标信息
            xmin = int(detection.xmin)
            ymin = int(detection.ymin)
            xmax = int(detection.xmax)
            ymax = int(detection.ymax)
            color_person = (128, 0, 128)
            color_car = (0, 128, 0)
            color_text = (0,0,0)
            if detection.Class =="pedestrian" or detection.Class =="person":
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color_person, 2)
                label = f"{detection.Class}"
                cv2.putText(image, label, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color_text, 2)
                #print("1111")
            # 在图像上绘制矩形框
            else:
                cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color_car, 2)
                label = f"{detection.Class}"
                cv2.putText(image, label, (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5,color_text, 2)