Faster Rcnn 源码解析(四)—— proposals_targte_layer.py

功能:

     根据GTbox和topN proposals选择满足要求的128个proposals(包括fg和bg),然后加上物体类别标签和bbox的回归目标,只有在该类别的对应位置上面才会有位置信息),并计算权重weights。(这128个proposals是包含了Gtbox的?)

输入:

bottom[0]: rpn_rois,从proposal_layer提取到的proposals
bottom[1]: gt_boxes:  GroundTruth boxes

输出:

top[0]: 'rois':包括所有roi的左上和右下角坐标
top[1]: 'labels':所有提取出的roi的标签,bg = 0
top[2]: 'bbox_targets':所有roi相对于与其有最大IOU的GTboxes的偏移量,是一个[4*classes]的vector,偏移量存放在对应label位置
top[3]: 'bbox_inside_weights': = 1
top[4]: 'bbox_outside_weights':代码里面没有出现

源码:

# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------

import caffe
import yaml
import numpy as np
import numpy.random as npr
from fast_rcnn.config import cfg
from fast_rcnn.bbox_transform import bbox_transform
from utils.cython_bbox import bbox_overlaps

DEBUG = False
class ProposalTargetLayer(caffe.Layer):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    """

    def setup(self, bottom, top):
        layer_params = yaml.load(self.param_str_)
        self._num_classes = layer_params['num_classes']#获取总的分类数量

        # sampled rois (0, x1, y1, x2, y2)
        top[0].reshape(1, 5)
        # labels
        top[1].reshape(1, 1)
        # bbox_targets
        top[2].reshape(1, self._num_classes * 4)
        # bbox_inside_weights
        top[3].reshape(1, self._num_classes * 4)
        # bbox_outside_weights
        top[4].reshape(1, self._num_classes * 4)

    def forward(self, bottom, top):
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        all_rois = bottom[0].data
        # GT boxes (x1, y1, x2, y2, label)
        # TODO(rbg): it's annoying that sometimes I have extra info before
        # and other times after box coordinates -- normalize to one format
        gt_boxes = bottom[1].data

        # Include ground-truth boxes in the set of candidate rois
        # 产生了一个M*1的0矩阵
        zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
        # 由于gt_boxes是有5列信息的(x1,y1,x2,y2,标签),
        # 此时只取前4个(gt_boxes[:, :-1])即位置信息,存入all_rois
        # 即将rois和gt_boxes在0维拼合在一起,数据还是五列,第一列全0,后四列是box坐标;
        # 所以此时的all_rois存放了所有的gt和proposals的坐标
        all_rois = np.vstack(
            (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
        )

        # Sanity check: single batch only
        assert np.all(all_rois[:, 0] == 0), \
                'Only single item batches are supported'

        num_images = 1
        #cfg.TRAIN.BATCH_SIZE:感兴趣区域的个数
        rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images #128/1 = 128
        # cfg.TRAIN.FG_FRACTION :fg的比例
        fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) #0.25*128=32

        # Sample rois with classification labels and bounding box regression
        # targets
        labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
            all_rois, gt_boxes, fg_rois_per_image,
            rois_per_image, self._num_classes)

        if DEBUG:
            print 'num fg: {}'.format((labels > 0).sum())
            print 'num bg: {}'.format((labels == 0).sum())
            self._count += 1
            self._fg_num += (labels > 0).sum()
            self._bg_num += (labels == 0).sum()
            print 'num fg avg: {}'.format(self._fg_num / self._count)
            print 'num bg avg: {}'.format(self._bg_num / self._count)
            print 'ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))

        # sampled rois
        top[0].reshape(*rois.shape)
        top[0].data[...] = rois

        # classification labels
        top[1].reshape(*labels.shape)
        top[1].data[...] = labels

        # bbox_targets
        top[2].reshape(*bbox_targets.shape)
        top[2].data[...] = bbox_targets

        # bbox_inside_weights
        top[3].reshape(*bbox_inside_weights.shape)
        top[3].data[...] = bbox_inside_weights

        # bbox_outside_weights
        top[4].reshape(*bbox_inside_weights.shape)
        top[4].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32)

    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass

    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass

#计算bbox_target_data向量
def _get_bbox_regression_labels(bbox_target_data, num_classes):
    """Bounding-box regression targets (bbox_target_data) are stored in a
    compact form N x (class, tx, ty, tw, th)

    This function expands those targets into the 4-of-4*K representation used
    by the network (i.e. only one class has non-zero targets).

    Returns:
        bbox_target (ndarray): N x 4K blob of regression targets
        bbox_inside_weights (ndarray): N x 4K blob of loss weights
    """

    clss = bbox_target_data[:, 0] #表示一共有clss 个 bbox。
    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)#4 * num_classes = 4*21列
    bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
    inds = np.where(clss > 0)[0] #只要fg的
    for ind in inds: #只在该类别对应的那4个位置放值
        cls = clss[ind]
        start = 4 * cls
        end = start + 4
        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
        bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS#前景的inside_weights =1
    return bbox_targets, bbox_inside_weights

#将label和其与GT的偏移量放在一起得到[label,偏移量]
def _compute_targets(ex_rois, gt_rois, labels):
    """Compute bounding-box regression targets for an image."""

    assert ex_rois.shape[0] == gt_rois.shape[0]
    assert ex_rois.shape[1] == 4
    assert gt_rois.shape[1] == 4
    #计算ROI和GT的偏移量
    targets = bbox_transform(ex_rois, gt_rois)
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        # Optionally normalize targets by a precomputed mean and stdev
        targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
                / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
    return np.hstack(
            (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
#从一张图片的rois里采样得到roi
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    #计算ROI和GT的IOU
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    #每行的最大值,表示与该anchor具有最大IOU的GT的下标
    gt_assignment = overlaps.argmax(axis=1)
    #最大IOU
    max_overlaps = overlaps.max(axis=1)
    #提取与该ROI具有最大IOU的GT的label
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    #找到大于规定阈值的fg,
    # 当fg_inds的个数比fg_rois_per_image大时,就只筛选32个出来;否则,全部保留;
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement

    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    #挑选满足条件的bg
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    #bg个数 = 总ROI-fg个数,接下来的方法和fg相同
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    #所有挑选出来的ROI的下标
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    #这些ROIs的标签
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    #将bg的label全部置为0.
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]
    #计算roi的gt的偏移量,返回为[label,四个偏移量]
    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, bbox_targets, bbox_inside_weights

以下是一个基于深度学习的目标检测代码示例,使用的是 TensorFlow 和 Keras 框架。这个代码示例使用的是 Faster R-CNN 模型,可以在 COCO 数据集上进行训练和测试,同时还包括了数据增强和模型评估等功能。 ```python import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras import models from tensorflow.keras import optimizers from tensorflow.keras import backend as K from tensorflow.keras.layers import Input from tensorflow.keras.applications import ResNet50 from tensorflow.keras.layers import Conv2D from tensorflow.keras.layers import MaxPooling2D from tensorflow.keras.layers import Flatten from tensorflow.keras.layers import Dense from tensorflow.keras.layers import Dropout from tensorflow.keras.layers import GlobalAveragePooling2D from tensorflow.keras.layers import GlobalMaxPooling2D from tensorflow.keras.layers import TimeDistributed from tensorflow.keras.layers import AveragePooling2D from tensorflow.keras.layers import BatchNormalization from tensorflow.keras.layers import Activation from tensorflow.keras.layers import Add from tensorflow.keras.layers import ZeroPadding2D from tensorflow.keras.layers import Cropping2D from tensorflow.keras.layers import Lambda from tensorflow.keras.layers import Reshape from tensorflow.keras.layers import Concatenate from tensorflow.keras.layers import Softmax from tensorflow.keras.models import Model from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping from tensorflow.keras.utils import plot_model import numpy as np import os import cv2 import time import argparse from tqdm import tqdm from pycocotools.coco import COCO from pycocotools import mask as maskUtils os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' np.random.seed(42) tf.random.set_seed(42) class Config: NAME = "faster_rcnn" BACKBONE = "resnet50" IMAGE_MIN_DIM = 800 IMAGE_MAX_DIM = 1333 RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) RPN_ANCHOR_RATIOS = [0.5, 1, 2] RPN_ANCHOR_STRIDE = 16 RPN_NMS_THRESHOLD = 0.7 RPN_TRAIN_ANCHORS_PER_IMAGE = 256 RPN_POSITIVE_RATIO = 0.5 DETECTION_MIN_CONFIDENCE = 0.7 DETECTION_NMS_THRESHOLD = 0.3 DETECTION_MAX_INSTANCES = 100 LEARNING_RATE = 0.001 WEIGHT_DECAY = 0.0001 EPOCHS = 50 BATCH_SIZE = 1 STEPS_PER_EPOCH = 1000 VALIDATION_STEPS = 50 IMAGES_PER_GPU = 1 MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) NUM_CLASSES = 81 # COCO has 80 classes + background class DataGenerator(keras.utils.Sequence): def __init__(self, dataset, config, shuffle=True, augment=True): self.dataset = dataset self.config = config self.shuffle = shuffle self.augment = augment self.image_ids = np.copy(self.dataset.image_ids) self.on_epoch_end() def __len__(self): return int(np.ceil(len(self.dataset.image_ids) / self.config.BATCH_SIZE)) def __getitem__(self, idx): batch_image_ids = self.image_ids[idx * self.config.BATCH_SIZE:(idx + 1) * self.config.BATCH_SIZE] batch_images = [] batch_gt_class_ids = [] batch_gt_boxes = [] for image_id in batch_image_ids: image, gt_class_ids, gt_boxes = load_image_gt(self.dataset, self.config, image_id, augment=self.augment) batch_images.append(image) batch_gt_class_ids.append(gt_class_ids) batch_gt_boxes.append(gt_boxes) batch_images = np.array(batch_images) batch_gt_class_ids = np.array(batch_gt_class_ids) batch_gt_boxes = np.array(batch_gt_boxes) rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes = build_rpn_targets(batch_images.shape, self.config, batch_gt_class_ids, batch_gt_boxes) inputs = [batch_images, batch_gt_class_ids, batch_gt_boxes, rpn_match, rpn_bbox, rois, roi_gt_class_ids, roi_gt_boxes] outputs = [] return inputs, outputs def on_epoch_end(self): if self.shuffle: np.random.shuffle(self.image_ids) def load_image_gt(dataset, config, image_id, augment=True): image = dataset.load_image(image_id) mask, class_ids = dataset.load_mask(image_id) bbox = maskUtils.toBbox(mask) bbox = np.expand_dims(bbox, axis=-1) class_ids = np.expand_dims(class_ids, axis=-1) gt_boxes = np.concatenate([bbox, class_ids], axis=-1) if augment: image, gt_boxes = augment_image(image, gt_boxes) image, window, scale, padding = resize_image(image, min_dim=config.IMAGE_MIN_DIM, max_dim=config.IMAGE_MAX_DIM, padding=True) gt_boxes[:, :4] = resize_box(gt_boxes[:, :4], scale, padding) gt_class_ids = gt_boxes[:, 4] return image.astype(np.float32) - config.MEAN_PIXEL, gt_class_ids.astype(np.int32), gt_boxes[:, :4].astype(np.float32) def augment_image(image, gt_boxes): if np.random.rand() < 0.5: image = np.fliplr(image) gt_boxes[:, 0] = image.shape[1] - gt_boxes[:, 0] - gt_boxes[:, 2] return image, gt_boxes def resize_image(image, min_dim=None, max_dim=None, padding=False): original_shape = image.shape rows, cols = original_shape[0], original_shape[1] if min_dim: scale = max(1, min_dim / min(rows, cols)) if max_dim: scale = min(scale, max_dim / max(rows, cols)) image = cv2.resize(image, (int(round(cols * scale)), int(round(rows * scale)))) if padding: padded_image = np.zeros((max_dim, max_dim, 3), dtype=np.float32) padded_image[:image.shape[0], :image.shape[1], :] = image window = (0, 0, image.shape[1], image.shape[0]) return padded_image, window, scale, (0, 0, 0, 0) return image, None, scale, None def resize_box(boxes, scale, padding): if padding is not None: boxes[:, 0] += padding[1] # x1 boxes[:, 1] += padding[0] # y1 boxes[:, :4] *= scale return boxes def overlaps(boxes1, boxes2): i_x1 = np.maximum(boxes1[:, 0], boxes2[:, 0]) i_y1 = np.maximum(boxes1[:, 1], boxes2[:, 1]) i_x2 = np.minimum(boxes1[:, 2], boxes2[:, 2]) i_y2 = np.minimum(boxes1[:, 3], boxes2[:, 3]) i_area = np.maximum(i_x2 - i_x1 + 1, 0) * np.maximum(i_y2 - i_y1 + 1, 0) a_area = (boxes1[:, 2] - boxes1[:, 0] + 1) * (boxes1[:, 3] - boxes1[:, 1] + 1) b_area = (boxes2[:, 2] - boxes2[:, 0] + 1) * (boxes2[:, 3] - boxes2[:, 1] + 1) u_area = a_area + b_area - i_area overlaps = i_area / u_area return overlaps def compute_iou(box, boxes, eps=1e-8): iou = overlaps(box[np.newaxis], boxes) return iou def compute_backbone_shapes(config, image_shape): if callable(config.BACKBONE): return config.BACKBONE(image_shape) assert isinstance(config.BACKBONE, str) if config.BACKBONE in ["resnet50", "resnet101"]: if image_shape[0] >= 800: return np.array([[200, 256], [100, 128], [50, 64], [25, 32], [13, 16]]) else: return np.array([[100, 128], [50, 64], [25, 32], [13, 16], [7, 8]]) else: raise ValueError("Invalid backbone name") def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) scales, ratios = scales.flatten(), ratios.flatten() heights = scales / np.sqrt(ratios) widths = scales * np.sqrt(ratios) shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) box_widths, box_centers_x = np.meshgrid(widths, shifts_x) box_heights, box_centers_y = np.meshgrid(heights, shifts_y) box_centers = np.stack([box_centers_y, box_centers_x], axis=2) box_sizes = np.stack([box_heights, box_widths], axis=2) box_centers = np.reshape(box_centers, [-1, 2]) box_sizes = np.reshape(box_sizes, [-1, 2]) boxes = np.concatenate([box_centers - 0.5 * box_sizes, box_centers + 0.5 * box_sizes], axis=1) boxes = np.round(boxes) return boxes def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride): anchors = [] for i in range(len(scales)): anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], feature_strides[i], anchor_stride)) return np.concatenate(anchors, axis=0) def norm_boxes(boxes, shape): boxes = boxes.astype(np.float32) h, w = shape[:2] scale = np.array([h - 1, w - 1, h - 1, w - 1]) shift = np.array([0, 0, 1, 1]) boxes = np.divide(boxes - shift, scale) boxes = np.maximum(np.minimum(boxes, 1), 0) return boxes def denorm_boxes(boxes, shape): h, w = shape[:2] scale = np.array([h - 1, w - 1, h - 1, w - 1]) shift = np.array([0, 0, 1, 1]) boxes = boxes * scale + shift return boxes.astype(np.int32) def overlaps_graph(boxes1, boxes2): b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), [1, 1, tf.shape(boxes2)[0]]), [-1, 4]) b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1]) b2 = tf.reshape(tf.transpose(b2), [-1, 4]) overlaps = compute_iou(b1, b2) overlaps = tf.reshape(overlaps, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]]) return overlaps def detection_target_graph(proposals, gt_class_ids, gt_boxes, config): proposals = tf.cast(proposals, tf.float32) gt_boxes = tf.cast(gt_boxes, tf.float32) gt_class_ids = tf.cast(gt_class_ids, tf.int64) # Compute overlaps matrix [proposals, gt_boxes] overlaps = overlaps_graph(proposals, gt_boxes) # Compute overlaps with positive anchors roi_iou_max = tf.reduce_max(overlaps, axis=1) positive_roi_bool = (roi_iou_max >= config.RPN_POSITIVE_RATIO) positive_indices = tf.where(positive_roi_bool)[:, 0] # Subsample ROIs. Aim for 33% positive # Positive ROIs positive_count = int(config.RPN_TRAIN_ANCHORS_PER_IMAGE * config.RPN_POSITIVE_RATIO) positive_indices = tf.random.shuffle(positive_indices)[:positive_count] positive_count = tf.shape(positive_indices)[0] # Negative ROIs. Add enough to maintain positive:negative ratio. r = 1.0 / config.RPN_POSITIVE_RATIO negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count negative_indices = tf.where(roi_iou_max < config.RPN_POSITIVE_RATIO)[:, 0] negative_count = tf.math.minimum(tf.shape(negative_indices)[0], negative_count) negative_indices = tf.random.shuffle(negative_indices)[:negative_count] # Gather selected ROIs positive_rois = tf.gather(proposals, positive_indices) negative_rois = tf.gather(proposals, negative_indices) # Assign positive ROIs to GT boxes. positive_overlaps = tf.gather(overlaps, positive_indices) roi_gt_box_assignment = tf.cond( tf.greater(tf.shape(positive_overlaps)[1], 0), true_fn=lambda: tf.argmax(positive_overlaps, axis=1), false_fn=lambda: tf.cast(tf.constant([]), tf.int64) ) roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) # Compute bbox refinement for positive ROIs deltas = keras_rcnn.backend.boxutils.bbox_transform(positive_rois, roi_gt_boxes) deltas /= tf.constant(config.BBOX_STD_DEV, dtype=tf.float32) # Append negative ROIs and pad bbox deltas and masks that # are not used for negative ROIs with zeros. rois = tf.concat([positive_rois, negative_rois], axis=0) N = tf.shape(negative_rois)[0] P = tf.math.maximum(config.RPN_TRAIN_ANCHORS_PER_IMAGE - tf.shape(rois)[0], 0) rois = tf.pad(rois, [(0, P), (0, 0)]) roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) # Return rois and deltas return rois, roi_gt_class_ids, deltas def build_rpn_targets(image_shape, config, gt_class_ids, gt_boxes): feature_shapes = compute_backbone_shapes(config, image_shape) anchors = generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, config.RPN_ANCHOR_RATIOS, feature_shapes, config.BACKBONE_SHAPES, config.RPN_ANCHOR_STRIDE) rpn_match, rpn_bbox = keras_rcnn.backend.anchor.get_best_anchor(anchors, gt_boxes, config) rpn_match = tf.expand_dims(rpn_match, axis=-1) rpn_bbox = tf.reshape(rpn_bbox, [-1, 4]) rois, roi_gt_class_ids, deltas = tf.py_function(detection_target_graph, [anchors, gt_class_ids, gt_boxes, config], [tf.float32, tf.int64, tf.float32]) rois.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4]) roi_gt_class_ids.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE]) deltas.set_shape([config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4 * config.NUM_CLASSES]) rpn_match.set_shape([None, 1]) rpn_bbox.set_shape([None, 4]) rois = tf.stop_gradient(rois) roi_gt_class_ids = tf.stop_gradient(roi_gt_class_ids) deltas = tf.stop_gradient(deltas) rpn_match = tf.stop_gradient(rpn_match) rpn_bbox = tf.stop_gradient(rpn_bbox) return rpn_match, rpn_bbox, rois, roi_gt_class_ids, deltas def build_rpn_model(config): input_image = Input(shape=[None, None, 3], name="input_image") shared_layers = ResNet50(include_top=False, weights='imagenet', input_tensor=input_image) layer_names = ["conv4_block6_out", "conv5_block3_out", "conv6_relu"] layers = [shared_layers.get_layer(name).output for name in layer_names] output_layers = layers rpn_layers = [] for n, layer in enumerate(output_layers): rpn = Conv2D(512, (3, 3), padding="same", activation="relu", name="rpn_conv%d" % (n + 1))(layer) rpn_class = Conv2D(2 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="sigmoid", name="rpn_class%d" % (n + 1))(rpn) rpn_bbox = Conv2D(4 * config.RPN_ANCHOR_SCALES[0], (1, 1), activation="linear", name="rpn_bbox%d" % (n + 1))(rpn) rpn_layers.append(rpn_class) rpn_layers.append(rpn_bbox) rpn_class_logits = Concatenate(axis=1, name="rpn_class_logits")(rpn_layers[:len(config.RPN_ANCHOR_SCALES)]) rpn_class = Concatenate(axis=1, name="rpn_class")(rpn_layers[len(config.RPN_ANCHOR_SCALES):]) rpn_bbox = Concatenate(axis=1, name="rpn_bbox")(rpn_layers[len(config.R
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值