multibox_target.h
namespace mxnet {
namespace op {
namespace mshadow_op {
struct safe_divide {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a, DType b) {
if (b == DType(0.0f)) return DType(0.0f);
return DType(a / b);
}
};
}
namespace mboxtarget_enum {
enum MultiBoxTargetOpInputs {kAnchor, kLabel, kClsPred};
enum MultiBoxTargetOpOutputs {kLoc, kLocMask, kCls};
enum MultiBoxTargetOpResource {kTempSpace};
}
struct MultiBoxTargetParam : public dmlc::Parameter<MultiBoxTargetParam> {
float overlap_threshold;
float ignore_label;
float negative_mining_ratio;
float negative_mining_thresh;
int minimum_negative_samples;
nnvm::Tuple<float> variances;
DMLC_DECLARE_PARAMETER(MultiBoxTargetParam) {
DMLC_DECLARE_FIELD(overlap_threshold).set_default(0.5f)
.describe("Anchor-GT overlap threshold to be regarded as a positive match.");
DMLC_DECLARE_FIELD(ignore_label).set_default(-1.0f)
.describe("Label for ignored anchors.");
DMLC_DECLARE_FIELD(negative_mining_ratio).set_default(-1.0f)
.describe("Max negative to positive samples ratio, use -1 to disable mining");
DMLC_DECLARE_FIELD(negative_mining_thresh).set_default(0.5f)
.describe("Threshold used for negative mining.");
DMLC_DECLARE_FIELD(minimum_negative_samples).set_default(0)
.describe("Minimum number of negative samples.");
DMLC_DECLARE_FIELD(variances).set_default({
0.1f, 0.1f, 0.2f, 0.2f})
.describe("Variances to be encoded in box regression target.");
}
};
template<typename xpu, typename DType>
class MultiBoxTargetOp : public Operator {
public:
explicit MultiBoxTargetOp(MultiBoxTargetParam param) {
this->param_ = param;
}
virtual void Forward(const OpContext &ctx,
const std::vector<TBlob> &in_data,
const std::vector<OpReqType> &req,
const std::vector<TBlob> &out_data,
const std::vector<TBlob> &aux_args) {
using namespace mshadow;
using namespace mshadow_op;
using namespace mshadow::expr;
CHECK_EQ(in_data.size(), 3);
CHECK_EQ(out_data.size(), 3);
Stream<xpu> *s = ctx.get_stream<xpu>();
Tensor<xpu, 2, DType> anchors = in_data[mboxtarget_enum::kAnchor]
.get_with_shape<xpu, 2, DType>(
Shape2(in_data[mboxtarget_enum::kAnchor].size(1), 4), s);
Tensor<xpu, 3, DType> labels = in_data[mboxtarget_enum::kLabel]
.get<xpu, 3, DType>(s);
Tensor<xpu, 3, DType> cls_preds = in_data[mboxtarget_enum::kClsPred]
.get<xpu, 3, DType>(s);
Tensor<xpu, 2, DType> loc_target = out_data[mboxtarget_enum::kLoc]
.get<xpu, 2, DType>(s);
Tensor<xpu, 2, DType> loc_mask = out_data[mboxtarget_enum::kLocMask]
.get<xpu, 2, DType>(s);
Tensor<xpu, 2, DType> cls_target = out_data[mboxtarget_enum::kCls]
.get<xpu, 2, DType>(s);
index_t num_batches = labels.size(0);
index_t num_anchors = anchors.size(0);
index_t num_labels = labels.size(1);
Shape<4> temp_shape = Shape4(11, num_batches, num_anchors, num_labels);
Tensor<xpu, 4, DType> temp_space = ctx.requested[mboxtarget_enum::kTempSpace]
.get_space_typed<xpu, 4, DType>(temp_shape, s);
loc_target = 0.f;
loc_mask = 0.0f;
cls_target = param_.ignore_label;
temp_space = -1.0f;
CHECK_EQ(anchors.CheckContiguous(), true);
CHECK_EQ(labels.CheckContiguous(), true);
CHECK_EQ(cls_preds.CheckContiguous(), true);
CHECK_EQ(loc_target.CheckContiguous(), true);
CHECK_EQ(loc_mask.CheckContiguous(), true);
CHECK_EQ(cls_target.CheckContiguous(), true);
CHECK_EQ(temp_space.CheckContiguous(), true);
temp_space[1] = broadcast_keepdim(broadcast_with_axis(slice<1>(anchors, 0, 1), -1,
num_batches), 2, num_labels);
temp_space[2] = broadcast_keepdim(broadcast_with_axis(slice<1>(anchors, 1, 2), -1,
num_batches), 2, num_labels);
temp_space[3] = broadcast_keepdim(broadcast_with_axis(slice<1>(anchors, 2, 3), -1,
num_batches), 2, num_labels);
temp_space[4] = broadcast_keepdim(broadcast_with_axis(slice<1>(anchors, 3, 4), -1,
num_batches), 2, num_labels);
Shape<3> t