该函数用于计算置信度、目标框和id的损失。训练时,该函数在models.py中的Darknet()类中的forward()函数中调用
将batchsize改为1,依次输出各变量的值
def forward(self, p_cat, img_size, targets=None, classifier=None, test_emb=False):
p, p_emb = p_cat[:, :24, ...], p_cat[:, 24:, ...]
nB, nGh, nGw = p.shape[0], p.shape[-2], p.shape[-1]
p_cat.shape=torch.Size([1, 536, 10, 18]),为模型的输出
p.shape=torch.Size([1, 24, 10, 18])
p_emb.shape=torch.Size([1, 512, 10, 18]),为embedding部分
nB=1,表示batchsize
nGh=18
nGw =10
if self.img_size != img_size:
create_grids(self, img_size, nGh, nGw)
进入create_grids()函数后,create函数如下:
def create_grids(self, img_size, nGh, nGw):
self.stride = img_size[0]/nGw
assert self.stride == img_size[1] / nGh, \
"{} v.s. {}/{}".format(self.stride, img_size[1], nGh)
# build xy offsets
grid_x = torch.arange(nGw).repeat((nGh, 1)).view((1, 1, nGh, nGw)).float()
grid_y = torch.arange(nGh).repeat((nGw, 1)).transpose(0,1).view((1, 1, nGh,
nGw)).float()
#grid_y = grid_x.permute(0, 1, 3, 2)
self.grid_xy = torch.stack((grid_x, grid_y), 4)
# build wh gains
self.anchor_vec = self.anchors / self.stride
self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2)
img_size={list: 2} [576, 320]
self.stride=576/18=32.0
grid_x.shape=torch.Size([1, 1, 10, 18])
grid_y.shape=torch.Size([1, 1, 10, 18])
self.grid_xy.shape = torch.Size([1, 1, 10, 18, 2])
self.anchors=tensor([ [ 85., 255.], [120., 360.], [170., 420.], [340., 320.]])
self.anchor_vec=tensor([ [ 2.6562, 7.9688],[ 3.7500, 11.2500],[ 5.3125, 13.1250],[10.6250, 10.0000] ])
self.nA=4
self.anchor_wh = self.anchor_vec.view(1, self.nA, 1, 1, 2)=tensor([[[[[ 2.6562, 7.9688]]],[[[ 3.7500, 11.2500]]],[[[ 5.3125, 13.1250]]],[[[10.6250, 10.0000]]]]])
if p.is_cuda:
self.grid_xy = self.grid_xy.cuda()
self.anchor_wh = self.anchor_wh.cuda()
把数据放到GPU上
# prediction
p = p.view(nB, self.nA, self.nC + 5, nGh, nGw).permute(0, 1, 3, 4, 2).contiguous()
p_emb = p_emb.permute(0,2,3,1).contiguous()# p_emb包含embedding信息
p_box = p[..., :4] # p_box包含检测框位置信息
p_conf = p[..., 4:6].permute(0, 4, 1, 2, 3) # 包含前景背景分类置信度的p_conf
self.nC=1
p.shape=torch.Size([1, 4, 10, 18, 6])
p_emb.shape = torch.Size([1, 10, 18, 512])
p_box.shape = torch.Size([1, 4, 10, 18, 4])
p_conf.shape =torch.Size([1, 2, 4, 10, 18])
if targets is not None:
if test_emb:
tconf, tbox, tids = build_targets_max(targets, self.anchor_vec.cuda(),
self.nA, self.nC, nGh, nGw)
else:
tconf, tbox, tids = build_targets_thres(targets, self.anchor_vec.cuda(),
self.nA, self.nC, nGh, nGw)
tconf, tbox, tids = tconf.cuda(), tbox.cuda(), tids.cuda()
mask = tconf > 0
这里调用了build_targets_thres()函数,build_targets_thres()函数的运行过程参考:Towards-Realtime-MOT源代码学习之build_targets_thres()函数_Ji_HON的博客-优快云博客
tconf = tensor([[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]]],device='cuda:0')
tbox = tensor([[[[[0., 0., 0., 0.],
...,
[0., 0., 0., 0.]],
...
[[0., 0., 0., 0.],
...,
[0., 0., 0., 0.]]]]], device='cuda:0')
tids = tensor([[[[[0., 0., 0., 0.],
...,
[0., 0., 0., 0.]],
[[0., 0., 0., 0.],
...,
[0., 0., 0., 0.]]]]], device='cuda:0')
然后将tconf, tbox, tids放到GPU上,
mask.shape=torch.Size([1, 4, 10, 18]),里面的值全为False
# Compute losses计算检测框回归损失和前景背景分类损失
nT = sum([len(x) for x in targets]) # number of targets
nM = mask.sum().float() # number of anchors (assigned to targets)
nP = torch.ones_like(mask).sum().float()
if nM > 0:
lbox = self.SmoothL1Loss(p_box[mask], tbox[mask]) # 计算目标框的损失
else:
FT = torch.cuda.FloatTensor if p_conf.is_cuda else torch.FloatTensor
lbox, lconf = FT([0]), FT([0])
lconf = self.SoftmaxLoss(p_conf, tconf) # 计算置信度的损失
lid = torch.Tensor(1).fill_(0).squeeze().cuda()
emb_mask,_ = mask.max(1)
nT = 36
nM = tensor(0., device='cuda:0')
nP = tensor(720., device='cuda:0')
进入else条件语句:
FT=<class 'torch.cuda.FloatTensor'>
lbox = lconf = tensor([0.], device='cuda:0')
lconf = self.SoftmaxLoss(p_conf, tconf)=tensor(0.6107, device='cuda:0', grad_fn=<NllLoss2DBackward>),这里的self.SoftmaxLoss为交叉熵损失函数nn.CrossEntropyLoss
lid = tensor(0., device='cuda:0')
emb_mask.shape= torch.Size([1, 10, 18]),其中的值全为False
_.shape= torch.Size([1, 10, 18]),其中的值全为0
#embedding损失的计算 we use max(1) to decide the id, TODO: more reseanable strategy
tids,_ = tids.max(1)
tids = tids[emb_mask]
embedding = p_emb[emb_mask].contiguous()
embedding = self.emb_scale * F.normalize(embedding)
nI = emb_mask.sum().float()
if test_emb:
if np.prod(embedding.shape)==0 or np.prod(tids.shape) == 0:
return torch.zeros(0, self.emb_dim+1).cuda()
emb_and_gt = torch.cat([embedding, tids.float()], dim=1)
return emb_and_gt
if len(embedding) > 1:
logits = classifier(embedding).contiguous()
lid = self.IDLoss(logits, tids.squeeze())#计算id的损失
tids.shape=torch.Size([1, 10, 18, 1]),其中的值全为-1
tids,_ = tids.max(1)
tids = tids[emb_mask]=tensor([], device='cuda:0', size=(0, 1), dtype=torch.int64)
embedding = tensor([], device='cuda:0', size=(0, 512), grad_fn=<IndexBackward>)
self.emb_scale = 10.467784747196607
embedding = tensor([], device='cuda:0', size=(0, 512), grad_fn=<MulBackward0>)
nI = tensor(0., device='cuda:0')
这里不进入第一个if条件语句,也不进入第二个条件语句
# 求总的损失
loss = torch.exp(-self.s_r)*lbox + torch.exp(-self.s_c)*lconf + \
torch.exp(-self.s_id)*lid + (self.s_r + self.s_c + self.s_id)
loss *= 0.5
self.s_r = tensor([-4.8500], device='cuda:0')
self.s_c = tensor([-4.1500], device='cuda:0')
self.s_id = tensor([-2.3000], device='cuda:0')
torch.exp(-self.s_r) = torch.exp(torch.tensor([-4.8500]))= tensor([0.0078]),即e^(-4.8500)
torch.exp(-self.s_c) = torch.exp(torch.tensor([-4.1500]))= tensor([0.0158])
torch.exp(-self.s_id) = torch.exp(torch.tensor([-2.3000]))= tensor([0.1003])
loss = tensor([27.4388], device='cuda:0', grad_fn=<AddBackward0>)
loss * 0.5 = tensor([13.7194], device='cuda:0', grad_fn=<MulBackward0>)
return loss, loss.item(), lbox.item(), lconf.item(), lid.item(), nT
最后返回
else:
p_conf = torch.softmax(p_conf, dim=1)[:,1,...].unsqueeze(-1)
p_emb = F.normalize(p_emb.unsqueeze(1).repeat(1,self.nA,1,1,1).contiguous(),
dim=-1)
#p_emb_up = F.normalize(shift_tensor_vertically(p_emb, -self.shift[self.layer]),
dim=-1)
#p_emb_down = F.normalize(shift_tensor_vertically(p_emb, self.shift[self.layer]),
dim=-1)
p_cls = torch.zeros(nB,self.nA,nGh,nGw,1).cuda() # Temp
p = torch.cat([p_box, p_conf, p_cls, p_emb], dim=-1)
#p = torch.cat([p_box, p_conf, p_cls, p_emb, p_emb_up, p_emb_down], dim=-1)
p[..., :4] = decode_delta_map(p[..., :4], self.anchor_vec.to(p))
p[..., :4] *= self.stride
return p.view(nB, -1, p.shape[-1])
如果没有真实目标,则进入else条件语句