1.RPN的构建
对RPN的构建在FPN.py的add_fpn_rpn_output函数中
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
"""Add RPN on FPN specific outputs."""
"""
blobs_in:
[BlobReference("gpu_0/fpn_res5_2_sum_subsampled_2x"),
BlobReference("gpu_0/fpn_res5_2_sum"),
BlobReference("gpu_0/fpn_res4_5_sum"),
BlobReference("gpu_0/fpn_res3_3_sum"),
BlobReference("gpu_0/fpn_res2_2_sum")]
以BlobReference("gpu_0/fpn_res2_2_sum")为例
3×3的卷积后,名称变为conv_rpn_fpn2,featuremap的个数为256
增加分类层后,名称变为rpn_cls_logits_fpn2
增加回归层后,名称变为rpn_bbox_pred_fpn2
"""
num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) #针对FPN设置的RPN,每一层的相应位置只产生RPN_ASPECT_RATIOS个anchor,面积是固定的。随着层数的增加,面积增加
dim_out = dim_in #经过FPN后,特征的维度是256
k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid 这里为6
k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid 这里为2
assert len(blobs_in) == k_max - k_min + 1
for lvl in range(k_min, k_max + 1): #从FPN的P2层开始,依次对于每一层FPN增加RPN的输出
bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order
sc = spatial_scales[k_max - lvl] # in reversed order
slvl = str(lvl)
if lvl == k_min:
# Create conv ops with randomly initialized weights and
# zeroed biases for the first FPN level; these will be shared by
# all other FPN levels
# RPN hidden representation
#首先完成3×3的卷积先(用来语义空间转换?)
conv_rpn_fpn = model.Conv(
bl_in, #输入的blob名称
'conv_rpn_fpn' + slvl, #输出的blob名称
dim_in, #输入维度:256
dim_out, #输出维度:256
kernel=3,
pad=1,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0)
)
model.Relu(conv_rpn_fpn, conv_rpn_fpn)
# Proposal classification scores 增加逻辑分类层,3个anchor
rpn_cls_logits_fpn = model.Conv(
conv_rpn_fpn,
'rpn_cls_logits_fpn' + slvl,
dim_in,
num_anchors,
kernel=1,
pad=0,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0)
)
# Proposal bbox regression deltas 增加回归层,3个anchor,每一个anchor4个参数
rpn_bbox_pred_fpn = model.Conv(
conv_rpn_fpn,
'rpn_bbox_pred_fpn' + slvl,
dim_in,
4 * num_anchors,
kernel=1,
pad=0,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0)
)
print(conv_rpn_fpn,rpn_cls_logits_fpn, rpn_bbox_pred_fpn)
else:
# Share weights and biases,共享W和b,也就是每一次都采用P2层用的W和b
sk_min = str(k_min)
# RPN hidden representation
conv_rpn_fpn = model.ConvShared(
bl_in,
'conv_rpn_fpn' + slvl,
dim_in,
dim_out,
kernel=3,
pad=1,
stride=1,
weight='conv_rpn_fpn' + sk_min + '_w',
bias='conv_rpn_fpn' + sk_min + '_b'
)
model.Relu(conv_rpn_fpn, conv_rpn_fpn)
# Proposal classification scores
rpn_cls_logits_fpn = model.ConvShared(
conv_rpn_fpn,
'rpn_cls_logits_fpn' + slvl,
dim_in,
num_anchors,
kernel=1,
pad=0,
stride=1,
weight='rpn_cls_logits_fpn' + sk_min + '_w',
bias='rpn_cls_logits_fpn' + sk_min + '_b'
)
# Proposal bbox regression deltas
rpn_bbox_pred_fpn = model.ConvShared(
conv_rpn_fpn,
'rpn_bbox_pred_fpn' + slvl,
dim_in,
4 * num_anchors,
kernel=1,
pad=0,
stride=1,
weight='rpn_bbox_pred_fpn' + sk_min + '_w',
bias='rpn_bbox_pred_fpn' + sk_min + '_b'
)
print(conv_rpn_fpn,rpn_cls_logits_fpn, rpn_bbox_pred_fpn)
if not model.train or cfg.MODEL.FASTER_RCNN:
# Proposals are needed during:
# 1) inference (== not model.train) for RPN only and Faster R-CNN
# OR
# 2) training for Faster R-CNN
# Otherwise (== training for RPN only), proposals are not needed
lvl_anchors = generate_anchors(
stride=2.**lvl,
sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS
)
#对于逻辑回归层增加sigmoid激活
rpn_cls_probs_fpn = model.net.Sigmoid(
rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl
)
print(rpn_cls_probs_fpn,rpn_bbox_pred_fpn,'rpn_rois_fpn' + slvl,'rpn_roi_probs_fpn' + slvl)
model.GenerateProposals(
[rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
anchors=lvl_anchors,
spatial_scale=sc
)#产生的proposal将以rpn_rois_fpn_x命名,rpn_roi_probs_fpn_x将会表示每一个proposal的分数
对于每一层FPN
层数 | 语义转换 | 逻辑分类(sigmoid激活后) | 回归层 | 输出rois | |
P2 | conv_rpn_fpn2 | rpn_cls_logits_fpn2(rpn_cls_probs_fpn2) | rpn_bbox_pred_fpn2 | rpn_rois_fpn2 | rpn_roi_probs_fpn2 |
P3 | conv_rpn_fpn2 | rpn_cls_logits_fpn3(rpn_cls_probs_fpn3) | rpn_bbox_pred_fpn3 | rpn_rois_fpn3 | rpn_roi_probs_fpn3 |
P4 | conv_rpn_fpn4 | rpn_cls_logits_fpn4(rpn_cls_probs_fpn4) | rpn_bbox_pred_fpn4 | rpn_rois_fpn4 | rpn_roi_probs_fpn4 |
P5 | conv_rpn_fpn5 | rpn_cls_logits_fpn5(rpn_cls_probs_fpn5) | rpn_bbox_pred_fpn5 | rpn_rois_fpn5 | rpn_roi_probs_fpn5 |
P6 | conv_rpn_fpn6 | rpn_cls_logits_fpn6(rpn_cls_probs_fpn6) | rpn_bbox_pred_fpn6 | rpn_rois_fpn6 | rpn_roi_probs_fpn6 |
黑色加粗部分是产生proposal(对应函数为model.GenerateProposals)的输入,红色加粗部分是产生proposal的输出。
完成的内容是:
-
从约20000个anchors中选取概率较大的 12000 个 anchor
-
利用回归的位置参数,修正这 12000 个 anchor 的位置,得到 RoIs
-
利用非极大值((Non-maximum suppression, NMS)抑制,选出概率最大的 2000 个 RoIs
2.为RPN构建损失
def add_fpn_rpn_losses(model):
"""Add RPN on FPN specific losses."""
loss_gradients = {}
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
slvl = str(lvl)
# Spatially narrow the full-sized RPN label arrays to match the feature map
# shape
model.net.SpatialNarrowAs(
['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
'rpn_labels_int32_fpn' + slvl
)
for key in ('targets', 'inside_weights', 'outside_weights'):
model.net.SpatialNarrowAs(
[
'rpn_bbox_' + key + '_wide_fpn' + slvl,
'rpn_bbox_pred_fpn' + slvl
],
'rpn_bbox_' + key + '_fpn' + slvl
)
loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
'loss_rpn_cls_fpn' + slvl,
normalize=0,
scale=(
model.GetLossScale() / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM /
cfg.TRAIN.IMS_PER_BATCH
)
)
# Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is
# handled by (1) setting bbox outside weights and (2) SmoothL1Loss
# normalizes by IMS_PER_BATCH
loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
[
'rpn_bbox_pred_fpn' + slvl, 'rpn_bbox_targets_fpn' + slvl,
'rpn_bbox_inside_weights_fpn' + slvl,
'rpn_bbox_outside_weights_fpn' + slvl
],
'loss_rpn_bbox_fpn' + slvl,
beta=1. / 9.,
scale=model.GetLossScale(),
)
loss_gradients.update(
blob_utils.
get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn])
)
model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
return loss_gradients
以P2层为例:
[u'rpn_labels_int32_wide_fpn2', u'rpn_cls_logits_fpn2'] ——> rpn_labels_int32_fpn2
[u'rpn_bbox_targets_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_targets_fpn2
[u'rpn_bbox_inside_weights_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_inside_weights_fpn2
[u'rpn_bbox_outside_weights_wide_fpn2', u'rpn_bbox_pred_fpn2'] ——> rpn_bbox_outside_weights_fpn2
[u'rpn_cls_logits_fpn2', u'rpn_labels_int32_fpn2'] ——> loss_rpn_cls_fpn2 #RPN的分类损失
[u'rpn_bbox_pred_fpn2', u'rpn_bbox_targets_fpn2', u'rpn_bbox_inside_weights_fpn2', u'rpn_bbox_outside_weights_fpn2'] ——> loss_rpn_bbox_fpn2 #RPN的边框损失