Cascade R-CNN Python测试脚本

最新推荐文章于 2022-10-01 21:01:29 发布

原创最新推荐文章于 2022-10-01 21:01:29 发布 · 1.3k 阅读

7 ·

CC 4.0 BY-SA版权

程序设计同时被 2 个专栏收录

40 篇文章

订阅专栏

机器学习

34 篇文章

订阅专栏

本文档提供了一份Cascade R-CNN的Python测试脚本，详细介绍了如何使用该脚本进行对象检测。脚本来源于论文《Cascade R-CNN: Delving into High Quality Object Detection》的实现，并进行了相应的修改。

部署运行你感兴趣的模型镜像

传送门：

1. 前言

之前的博客中讲到了Cascade R-CNN的原理与训练数据的准备，这里贴出其Python测试的脚本。
下面这份代码是从该地址：代码地址，做了一些修改。

2. demo代码

# -*- coding=utf-8 -*-
import os
import sys
import argparse
import numpy as np
from PIL import Image, ImageDraw
import cv2
import time
caffe_root = '/home/xxxxx/codes/cascade-rcnn'
sys.path.insert(0, os.path.join(caffe_root, 'python'))
import caffe

# from google.protobuf import text_format
# from caffe.proto import caffe_pb2

class CaffeDetection:
    def __init__(self, gpu_id, model_def, model_weights, cascade=0, FPN=0):
        if gpu_id < 0:
            caffe.set_mode_cpu()
        else:
            caffe.set_device(gpu_id)
            caffe.set_mode_gpu()

        # Load the net in the test phase for inference, and configure input preprocessing.
        self.net = caffe.Net(model_def,  # defines the structure of the model
                             model_weights,  # contains the trained weights
                             caffe.TEST)  # use test mode (e.g., don't perform dropout)

        self.cascade = cascade > 0
        self.FPN = FPN > 0
        print(cascade, FPN)
        if not self.cascade:
            # baseline model
            if self.FPN:
                self.proposal_blob_names = ['proposals_to_all']
            else:
                self.proposal_blob_names = ['proposals']

            self.bbox_blob_names = ['output_bbox_1st']
            self.cls_prob_blob_names = ['cls_prob_1st']
            self.output_names = ['1st']
        else:
            # cascade-rcnn model
            if self.FPN:
                self.proposal_blob_names = ['proposals_to_all', 'proposals_to_all_2nd',
                                            'proposals_to_all_3rd', 'proposals_to_all_2nd', 'proposals_to_all_3rd']
            else:
                self.proposal_blob_names = ['proposals', 'proposals_2nd', 'proposals_3rd',
                                            'proposals_2nd', 'proposals_3rd']

            self.bbox_blob_names = ['output_bbox_1st', 'output_bbox_2nd', 'output_bbox_3rd',
                                    'output_bbox_2nd', 'output_bbox_3rd']
            self.cls_prob_blob_names = ['cls_prob_1st', 'cls_prob_2nd', 'cls_prob_3rd',
                                        'cls_prob_2nd_avg', 'cls_prob_3rd_avg']
            self.output_names = ['1st', '2nd', '3rd', '2nd_avg', '3rd_avg']

        self.num_outputs = len(self.proposal_blob_names)
        assert (self.num_outputs == len(self.bbox_blob_names))
        assert (self.num_outputs == len(self.cls_prob_blob_names))
        assert (self.num_outputs == len(self.output_names))
        # detection configuration
        # detect_final_boxes = np.zeros(nImg, num_outputs)
        # self.det_thr = 0.001 # threshold for testing
        self.det_thr = 0.001  # threshold for demo
        self.max_per_img = 100  # max number of detections
        self.nms_thresh = 0.5  # NMS
        if FPN:
            self.shortSize = 800
            self.longSize = 1312
        else:
            self.shortSize = 608
            self.longSize = 832

        self.PIXEL_MEANS = np.array([127, 127, 127], dtype=np.uint8)  # 模型均值
        self.num_cls = 2  # 检测模型的检测目标类别数，不包含背景类

    def detect(self, image_file):
        '''
        rcnn detection
        '''
        # image = caffe.io.load_image(image_file)
        image = cv2.imread(image_file)  # BGR, default is cv2.IMREAD_COLOR 3-channel
        orgH, orgW, channel = image.shape
        print("image shape:", image.shape)
        rzRatio = self.shortSize / min(orgH, orgW)	# 对输入图像的尺寸进行规整
        imgH = min(rzRatio * orgH, self.longSize)
        imgW = min(rzRatio * orgW, self.longSize)
        imgH = round(imgH / 32) * 32
        imgW = round(imgW / 32) * 32  # must be the multiple of 32
        hwRatios = [imgH / orgH, imgW / orgW]
        # transformed_image = self.transformer.preprocess('data', image)
        # image = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
        resized_w = int(imgW)
        resized_h = int(imgH)
        hwRatios = [float(resized_h) / orgH, float(resized_w) / orgW]  # 计算宽高缩放比例
        print('resized -> ', (resized_w, resized_h))
        image = cv2.resize(image, (resized_w, resized_h), interpolation=cv2.INTER_CUBIC)
        # print("after resized image shape:", image.shape)
        image -= self.PIXEL_MEANS
        # cv2.imwrite("transformed_image.jpg", image)
        transformed_image = np.transpose(image, (2, 0, 1))  # C H W

        # set net to batch size of 1
        self.net.blobs['data'].reshape(1, 3, resized_h, resized_w)

        # Run the net and examine the top_k results
        self.net.blobs['data'].data[...] = transformed_image.astype(np.float32, copy=False)

        start = time.time()
        # Forward pass.
        blobs_out = self.net.forward()
        print('output_bbox_1st---', blobs_out['output_bbox_1st'].shape)
        # print blobs_out
        end = time.time()
        cost_millis = int((end - start) * 1000)
        print("detection cost ms: ", cost_millis)

        detect_final_boxes = []
        for nn in range(self.num_outputs):
            # detect_boxes = cell(num_cls, 1);
            tmp = self.net.blobs[self.bbox_blob_names[nn]].data.copy()  # if no need modify,then no need copy
            print(self.bbox_blob_names[nn], tmp.shape)
            # tmp = tmp.reshape((-1,5))
            tmp = tmp[:, :, 0, 0]
            tmp[:, 1] /= hwRatios[1]
            tmp[:, 3] /= hwRatios[1]
            tmp[:, 2] /= hwRatios[0]
            tmp[:, 4] /= hwRatios[0]

            # clipping bbs to image boarders
            tmp[:, 1] = np.maximum(0, tmp[:, 1])
            tmp[:, 2] = np.maximum(0, tmp[:, 2])
            tmp[:, 3] = np.minimum(orgW, tmp[:, 3])
            tmp[:, 4] = np.minimum(orgH, tmp[:, 4])
            tmp[:, 3] = tmp[:, 3] - tmp[:, 1] + 1  # w
            tmp[:, 4] = tmp[:, 4] - tmp[:, 2] + 1  # h

            output_bboxs = tmp[:, 1:]

            tmp = self.net.blobs[self.cls_prob_blob_names[nn]].data
            print(self.cls_prob_blob_names[nn], tmp.shape)
            cls_prob = tmp.reshape((-1, self.num_cls + 1))

            tmp = self.net.blobs[self.proposal_blob_names[nn]].data.copy()
            print(self.proposal_blob_names[nn], tmp.shape)
            tmp = tmp[:, 1:]
            tmp[:, 2] = tmp[:, 2] - tmp[:, 0] + 1  # w
            tmp[:, 3] = tmp[:, 3] - tmp[:, 1] + 1  # h
            proposals = tmp
            keep_id = np.where((proposals[:, 2] > 0) & (proposals[:, 3] > 0))[0]
            proposals = proposals[keep_id, :]
            output_bboxs = output_bboxs[keep_id, :]
            cls_prob = cls_prob[keep_id, :]

            detect_boxes = []
            for i in range(self.num_cls):
                cls_id = i + 1
                prob = cls_prob[:, cls_id][:, np.newaxis]  # 0 is background
                # print (output_bboxs.shape, prob.shape)
                bbset = np.hstack([output_bboxs, prob])
                if self.det_thr > 0:
                    keep_id = np.where(prob >= self.det_thr)[0]
                    bbset = bbset[keep_id, :]

                keep = self.cpu_nms_single_cls(bbset, self.nms_thresh)
                if len(keep) == 0: continue
                bbset = bbset[keep, :]
                cls_ids = np.array([cls_id] * len(bbset))[:, np.newaxis]
                # print "cls_ids.shape", cls_ids.shape, bbset.shape
                detect_boxes.extend(np.hstack([cls_ids, bbset]).tolist())
            print("detected box num: ", len(detect_boxes))
            detect_boxes = np.asarray(detect_boxes)
            if self.max_per_img > 0 and len(detect_boxes) > self.max_per_img:
                rank_scores = detect_boxes[:, 5].copy()[::-1]
                rank_scores.sort()  # 'descend'
                print(len(rank_scores), self.max_per_img)
                print(np.where(detect_boxes[:, 5] >= rank_scores[self.max_per_img]))
                keep_id = np.where(detect_boxes[:, 5] >= rank_scores[self.max_per_img])[0]
                detect_boxes = detect_boxes[keep_id, :]
            # detect_final_boxes.extend(detect_boxes.tolist())
            detect_final_boxes.append(detect_boxes.tolist())

        return detect_final_boxes

    def cpu_nms_single_cls(self, dets, thresh):
        """Pure Python NMS baseline."""
        x1 = dets[:, 0]
        y1 = dets[:, 1]
        w = dets[:, 2]
        h = dets[:, 3]
        scores = dets[:, 4]

        x2 = x1 + w - 1
        y2 = y1 + h - 1
        # areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        areas = w * h
        order = scores.argsort()[::-1]

        keep = []
        while order.size > 0:
            i = order[0]
            keep.append(i)
            xx1 = np.maximum(x1[i], x1[order[1:]])
            yy1 = np.maximum(y1[i], y1[order[1:]])
            xx2 = np.minimum(x2[i], x2[order[1:]])
            yy2 = np.minimum(y2[i], y2[order[1:]])

            w = np.maximum(0.0, xx2 - xx1 + 1)
            h = np.maximum(0.0, yy2 - yy1 + 1)
            inter = w * h
            ovr = inter / (areas[i] + areas[order[1:]] - inter)

            inds = np.where(ovr <= thresh)[0]
            order = order[inds + 1]

        return keep


# 绘制检测结果
def draw_detect_res(results, img_src_path, img_save_path, img_name, label_name):
    src_img_path = os.path.join(img_src_path, img_name)
    dst_img_path = os.path.join(img_save_path, img_name)

    image = cv2.imread(src_img_path)
    for index in np.arange(0, len(results)):  # 这里绘制所有检测输出头的检测结果
        for item in results[index]:  # the 3rd_avg result
            xmin = int(round(item[1]))
            ymin = int(round(item[2]))
            xmax = int(round(item[1] + item[3] - 1))
            ymax = int(round(item[2] + item[4] - 1))
            cls_id = int(item[0])
            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(image, str(label_name[cls_id]), (xmin, ymin), font, 1.5, (255, 0, 0), 2)
            print([cls_id, xmin, ymin, xmax, ymax, round(item[-1] * 1000) / 1000])

    cv2.imwrite(dst_img_path, image)


def main():
    '''main '''
    label_name = ['blank', 'gq', 'sz', 'ss', 'ts', 'div']

    gpu_id = 0  # 选用的GPU设备号
    model_def = "deploy.prototxt"  # 网络定义文件
    model_weights = "./snapshot/_iter_50000.caffemodel"  # 模型文件
    cascade = 1
    FPN = 0
    image_test_path = "./test_data/"  # 测试图片路径
    res_save_path = "./detection_res/"
    if not os.path.exists(res_save_path):
        os.makedirs(res_save_path)

    # 初始化网络
    detection = CaffeDetection(gpu_id, model_def, model_weights, cascade=cascade, FPN=FPN)
    for img_name in os.listdir(image_test_path):
        print("forward img:{}".format(img_name))
        results = detection.detect(os.path.join(image_test_path, img_name))  # 得到检测结果
        draw_detect_res(results, image_test_path, res_save_path, img_name, label_name)


if __name__ == '__main__':
    main()