转载:https://blog.youkuaiyun.com/qiusuoxiaozi/article/details/79522136
先把整个函数体摘抄如下:
void MatchBBox(const vector<NormalizedBBox>& gt_bboxes,
const vector<NormalizedBBox>& pred_bboxes, const int label,
const MatchType match_type, const float overlap_threshold,
const bool ignore_cross_boundary_bbox,
vector<int>* match_indices, vector<float>* match_overlaps) {
int num_pred = pred_bboxes.size();
match_indices->clear();
match_indices->resize(num_pred, -1);
match_overlaps->clear();
match_overlaps->resize(num_pred, 0.);
int num_gt = 0;
vector<int> gt_indices;
if (label == -1) {
// label -1 means comparing against all ground truth.
num_gt = gt_bboxes.size();
for (int i = 0; i < num_gt; ++i) {
gt_indices.push_back(i);
}
} else {
// Count number of ground truth boxes which has the desired label.
for (int i = 0; i < gt_bboxes.size(); ++i) {
if (gt_bboxes[i].label() == label) {
num_gt++;
gt_indices.push_back(i);
}
}
}
if (num_gt == 0) {
return;
}
// Store the positive overlap between predictions and ground truth.
map<int, map<int, float> > overlaps;
for (int i = 0; i < num_pred; ++i) {
if (ignore_cross_boundary_bbox && IsCrossBoundaryBBox(pred_bboxes[i])) {
(*match_indices)[i] = -2;
continue;
}
for (int j = 0; j < num_gt; ++j) {
float overlap = JaccardOverlap(pred_bboxes[i], gt_bboxes[gt_indices[j]]);
if (overlap > 1e-6) {
(*match_overlaps)[i] = std::max((*match_overlaps)[i], overlap);
overlaps[i][j] = overlap;
}
}
}
// Bipartite matching. This block serves to find the maximum overlapped pred for each gt.
vector<int> gt_pool;
for (int i = 0; i < num_gt; ++i) {
gt_pool.push_back(i);
}
while (gt_pool.size() > 0) {
// Find the most overlapped gt and cooresponding predictions.
int max_idx = -1;
int max_gt_idx = -1;
float max_overlap = -1;
for (map<int, map<int, float> >::iterator it = overlaps.begin();
it != overlaps.end(); ++it) {
int i = it->first;
if ((*match_indices)[i] != -1) {
// The prediction already has matched ground truth or is ignored.
continue;
}
for (int p = 0; p < gt_pool.size(); ++p) {
int j = gt_pool[p];
if (it->second.find(j) == it->second.end()) {
// No overlap between the i-th prediction and j-th ground truth.
continue;
}
// Find the maximum overlapped pair.
if (it->second[j] > max_overlap) {
// If the prediction has not been matched to any ground truth,
// and the overlap is larger than maximum overlap, update.
max_idx = i;
max_gt_idx = j;
max_overlap = it->second[j];
}
}
}
if (max_idx == -1) {
// Cannot find good match.
break;
} else {
CHECK_EQ((*match_indices)[max_idx], -1);
(*match_indices)[max_idx] = gt_indices[max_gt_idx];
(*match_overlaps)[max_idx] = max_overlap;
// Erase the ground truth.
gt_pool.erase(std::find(gt_pool.begin(), gt_pool.end(), max_gt_idx));
}
}
switch (match_type) {
case MultiBoxLossParameter_MatchType_BIPARTITE:
// Already done.
break;
case MultiBoxLossParameter_MatchType_PER_PREDICTION:
// Get most overlaped for the rest prediction bboxes.
for (map<int, map<int, float> >::iterator it = overlaps.begin();
it != overlaps.end(); ++it) {
int i = it->first;
if ((*match_indices)[i] != -1) {
// The prediction already has matched ground truth or is ignored.
continue;
}
int max_gt_idx = -1;
float max_overlap = -1;
for (int j = 0; j < num_gt; ++j) {
if (it->second.find(j) == it->second.end()) {
// No overlap between the i-th prediction and j-th ground truth.
continue;
}
// Find the maximum overlapped pair.
float overlap = it->second[j];
if (overlap >= overlap_threshold && overlap > max_overlap) {
// If the prediction has not been matched to any ground truth,
// and the overlap is larger than maximum overlap, update.
max_gt_idx = j;
max_overlap = overlap;
}
}
if (max_gt_idx != -1) {
// Found a matched ground truth.
CHECK_EQ((*match_indices)[i], -1);
(*match_indices)[i] = gt_indices[max_gt_idx];
(*match_overlaps)[i] = max_overlap;
}
}
break;
default:
LOG(FATAL) << "Unknown matching type.";
break;
}
return;
}
- 解释:
其实在使用SSD默认参数的情况下,整个函数体做了两次matching,分别是BIPARTITE matching和PER_PREDICTION matching(这两个matching原本是switch语句块的两种case)。在BIPARTITE matching中,主要做是,以ground truth为出发点,找到与每个ground truth bounding box的jaccard overlap最大的prediction bounding box,并且记录其indices(如果该prediction bounding box已经被matching了,那么就会掉入continue语句,另外寻找一个未被matching的prediction bounding box)。由于是以ground truth bouding box为出发点做的matching,BIPARTITE matching之后可以保证所有的ground truth bouding box都找到了对应的prediction bouding box,但是,却不能保证所有的prediction bouding box都被matching到了。也就是说,可能存在一些prediction bounding box没有被matching到。而PER_PREDICTION matching就是为了handle这些落单的prediciton bounding box而生的。
PER_PREDICTION matching在BIPARTITE matching之后的switch语句中被执行。PER_PREDICTION matching首先做了一个判断来看第i个prediction box确实落单了。如果没有落单,那么就continue略过这个prediction box。
if ((*match_indices)[i] != -1) {
// The prediction already has matched ground truth or is ignored.
continue;
}
在确认第i个prediction box确实落单后,PER_PREDICTION matching便通过下面的语句来找到一个,与该prediction box的jaccard overlap既满足大于overlap_threshold,且jaccard overlap最大的那个ground truth bounding box来和这个落单的prediction box match。如果这样还是不能让这个prediction box“脱单”,那就没办法了,就只能让它“单”着了。
for (int j = 0; j < num_gt; ++j) {
if (it->second.find(j) == it->second.end()) {
// No overlap between the i-th prediction and j-th ground truth.
continue;
}
// Find the maximum overlapped pair.
float overlap = it->second[j];
if (overlap >= overlap_threshold && overlap > max_overlap) {
// If the prediction has not been matched to any ground truth,
// and the overlap is larger than maximum overlap, update.
max_gt_idx = j;
max_overlap = overlap;
}
}
总而言之,这样一来,MatchBBox通过BIPARTITE matching和PER_PREDICTION matching尽量为每个ground truth bounding box和每个prediction bounding box都找到一个matcher。
update: 2018/3/12
现在又有了一个问题就是,昨晚看的MatchBBox函数事实上在match的时候没有考虑prediction bounding box和ground truth bounding box的label的对应(看代码就能发现,如下,label传入的参数是为-1)
int num_gt = 0;
vector<int> gt_indices;
if (label == -1) {
// label -1 means comparing against all ground truth.
num_gt = gt_bboxes.size();
for (int i = 0; i < num_gt; ++i) {
gt_indices.push_back(i);
}
} else {
// Count number of ground truth boxes which has the desired label.
for (int i = 0; i < gt_bboxes.size(); ++i) {
if (gt_bboxes[i].label() == label) {
num_gt++;
gt_indices.push_back(i);
}
}
}
if (num_gt == 0) {
return;
}
那这岂不就乱套了??
确实是不需要的,因为SSD中的localization prediction分支是class agnostic的,它只负责把物体(无论是什么类别)框出来,而confidence prediction才负责给bounding box分类!搞半天原来是自己把SSD的工作原理都弄混了。真是老了啊。