from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from model.config import cfg
import numpy as np
import numpy.random as npr
from utils.bbox import bbox_overlaps
from model.bbox_transform import bbox_transform
import torch
"""
这个程序里的东西都是为训练服务的,而训练好后前向出来后topn,NMS等操作在proposal_layer.py
因为是为训练服务的,所以其中有赋label的过程,还有为了计算loss而提供的rpn_bbox_inside_weights等数据
rpn_bbox_inside_weights即loss中为了让loss只计算前景的框位置回归而设置的tensor
用法类似于(delta - gt)点乘rpn_bbox_inside_weights,,即得到了边框loss。
anchor_target_layer是入口,此时有2W个原始anchor
处理步骤为:
-->首先赋label:总的锚点为all_anchors,去除边框外的框
-->只算在边界内的框,这些框的下标inds_inside,框为anchors,分类为label=-1,三者行数都相等。
-->将threshhold>0.7的label=1,<0.3的label=0。
-->还需要把与gt的overlap最大的框赋label=1。gt_argmax_overlaps是gt依次对应的第几个anchor。
-->各从前背景label 1 0中选取128个,其余重新设置label=-1
-->内部权重bbox_inside_weights 为坐标前乘的系数,格式为[[0,0,0,0或1111],...刚开始为内部框个,后来是全部all_anchors个]
外部权重bbox_outside_weights 为正负区别的系数,格式同上,不过非0数字为1/N_sample,正负权重可以设置(focal Loss论文中会设置)
-->前面都是在边界内anchor上计算的,
然后用_unmap函数把label,bbox_targets, bbox_inside_weights,bbox_outside_weights这几个参数扩展到
全部anchor的shape尺度,对于边界外的anchor,label填充的是-1,其他三个填充的是0.(列数不变)
-->然后相当于有all_anchors个信息,这些信息的排序方式通过label.reshape((1, height, width, A))可以看出是
联系特征图,先是A(K=9)个anchor为一组,按行排列,排列完一行另起一行。这样height, width, A就解释通了。
数字1应该是1张图片
然后.transpose(0,3,1,2),可以把k理解为通道(只是假设理解),即先填充了一个通道的W×H吗,然后再填第二个通道
最后.reshape((1, 1, A * height, width))顺序应该没变,只是括号[]前后位置变了
其余3个返回值应该还是k-W-H-图的理解方式,只不过元素个数×4,也就是4-k-W-H-图
.reshape((1, height, width, A * 4))
"""
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
all_anchors, num_anchors):
"""Same as the anchor target layer in original Fast/er RCNN """
A = num_anchors
total_anchors = all_anchors.shape[0]
K = total_anchors / num_anchors
_allowed_border = 0
height, width = rpn_cls_score.shape[1:3]
inds_inside = np.where(
(all_anchors[:, 0] >= -_allowed_border) &
(all_anchors[:, 1] >= -_allowed_border) &
(all_anchors[:, 2] < im_info[1] + _allowed_border) &
(all_anchors[:, 3] < im_info[0] + _allowed_border)
)[0]
anchors = all_anchors[inds_inside, :]
labels = np.empty((len(inds_inside), ), dtype=np.float32)
labels.fill(-1)
overlaps = bbox_overlaps(
np.ascontiguousarray(anchors, dtype=np.float),
np.ascontiguousarray(gt_boxes, dtype=np.float))
argmax_overlaps = overlaps.argmax(axis=1)
max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
gt_argmax_overlaps = overlaps.argmax(axis=0)
gt_max_overlaps = overlaps[gt_argmax_overlaps,
np.arange(overlaps.shape[1])]
gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
labels[gt_argmax_overlaps] = 1
labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
fg_inds = np.where(labels == 1)[0]
if len(fg_inds) > num_fg:
disable_inds = npr.choice(
fg_inds, size=(len(fg_inds) - num_fg), replace=False)
labels[disable_inds] = -1
num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
bg_inds = np.where(labels == 0)[0]
if len(bg_inds) > num_bg:
disable_inds = npr.choice(
bg_inds, size=(len(bg_inds) - num_bg), replace=False)
labels[disable_inds] = -1
bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
bbox_inside_weights[labels == 1, :] = np.array(
cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
num_examples = np.sum(labels >= 0)
positive_weights = np.ones((1, 4)) * 1.0 / num_examples
negative_weights = np.ones((1, 4)) * 1.0 / num_examples
else:
assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
(cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
positive_weights = (
cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1))
negative_weights = (
(1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0))
bbox_outside_weights[labels == 1, :] = positive_weights
bbox_outside_weights[labels == 0, :] = negative_weights
labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
bbox_inside_weights = _unmap(
bbox_inside_weights, total_anchors, inds_inside, fill=0)
bbox_outside_weights = _unmap(
bbox_outside_weights, total_anchors, inds_inside, fill=0)
labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
labels = labels.reshape((1, 1, A * height, width))
rpn_labels = labels
bbox_targets = bbox_targets \
.reshape((1, height, width, A * 4))
rpn_bbox_targets = bbox_targets
bbox_inside_weights = bbox_inside_weights \
.reshape((1, height, width, A * 4))
rpn_bbox_inside_weights = bbox_inside_weights
bbox_outside_weights = bbox_outside_weights \
.reshape((1, height, width, A * 4))
rpn_bbox_outside_weights = bbox_outside_weights
return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
size count) """
if len(data.shape) == 1:
ret = np.empty((count, ), dtype=np.float32)
ret.fill(fill)
ret[inds] = data
else:
ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
ret.fill(fill)
ret[inds, :] = data
return ret
def _compute_targets(ex_rois, gt_rois):
"""Compute bounding-box regression targets for an image."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 5
return bbox_transform(
torch.from_numpy(ex_rois), torch.from_numpy(gt_rois[:, :4])).numpy()