目标检测2-SSD模型皮卡丘检测

1.数据集创建 皮卡丘

import torch
import numpy as np
from PIL import Image
class PikaDataset(torch.utils.data.Dataset):
    def __init__(self, part):
        self.part = part
        self.data = np.loadtxt(r'./data/pika/%s.csv' % part, delimiter=',')
    def __getitem__(self, idx):   # 根据idx读取图片. 返回图片和target
		x = Image.open(r'./data/pika/%s/%s.jpg' % (self.part, idx)).convert('RGB')
		x = np.array(x)    # 把图片变成ndarray [256, 256, 3]
        x = x.transpose((2, 0, 1))    # pytorch中图片的通道数要放到前面 [3, 256, 256]
		x = torch.tensor(x)    # 变成tensor类型 
        x = x.float()
        y = torch.FloatTensor(self.data[idx])
        return x, y
    def __len__(self):
        return len(self.data)

2.数据生成器

loader_train = torch.utils.data.DataLoader(dataset=PikaDataset(part='train'), 
                           batch_size=32,shuffle=True,drop_last=True)
loader_test = torch.utils.data.DataLoader(dataset=PikaDataset(part='test'), 
                           batch_size=32,shuffle=True,drop_last=True)
for x, y in loader_train:
    print(x.shape, y.shape, x.dtype, y.type)
    break
torch.Size([32, 3, 256, 256]) torch.Size([32, 4]) torch.float32 <built-in method type of Tensor object at 0x000002638006F048>

3.Anchors生成

def get_anchor(image_size, anchor_size_small, anchor_size_big):
	step = (np.arange(image_size) + 0.5) / image_size  # image_size=2 则step=[0.25, 0.75] 两个中心点
	point = []   # 生成中心点
    for i in range(image_size):
        for j in range(image_size):
            point.append([step[j], step[i]])   # 4个中心点
	anchors = []   # 根据中心点生成所有坐标
    for i in range(len(point)):
        # 计算大正方形2个坐标 左上角、右下角坐标
        x0 = point[i][0] - anchor_size_big / 2
        y0 = point[i][1] - anchor_size_big / 2
        x1 = point[i][0] + anchor_size_big / 2
        y1 = point[i][1] + anchor_size_big / 2
        anchors.append([x0, y0, x1, y1])
        # 计算小正方形坐标 左上角、右下角坐标
        x0 = point[i][0] - anchor_size_small / 2
        y0 = point[i][1] - anchor_size_small / 2
        x1 = point[i][0] + anchor_size_small / 2
        y1 = point[i][1] + anchor_size_small / 2
        anchors.append([x0, y0, x1, y1])
        # 计算小长方形坐标 左上角、右下角坐标
        x0 = point[i][0] - anchor_size_small * (2.0 ** 0.5) / 2
        y0 = point[i][1] - anchor_size_small / (2.0 ** 0.5) / 2
        x1 = point[i][0] + anchor_size_small * (2.0 ** 0.5) / 2
        y1 = point[i][1] + anchor_size_small / (2.0 ** 0.5) / 2
        anchors.append([x0, y0, x1, y1])
        # 计算另一个长方形坐标 左上角、右下角坐标
        x0 = point[i][0] - anchor_size_small * (0.5 ** 0.5) / 2
        y0 = point[i][1] - anchor_size_small / (0.5 ** 0.5) / 2
        x1 = point[i][0] + anchor_size_small * (0.5 ** 0.5) / 2
        y1 = point[i][1] + anchor_size_small / (0.5 ** 0.5) / 2
        anchors.append([x0, y0, x1, y1])
    anchors = torch.FloatTensor(anchors)
    return anchors
anchor = get_anchor(image_size=2, anchor_size_small=0.1, anchor_size_big=0.4)
anchor.shape
torch.Size([16, 4])

4.Anchors显示

import matplotlib.pyplot as plt
from PIL import ImageDraw
def draw_anchors(x, y, anchors):
    x = x.detach().numpy()
    x = x.astype(np.uint8)
    x = x.transpose([1, 2, 0])  # 还原通道数即通道数致最后
    y = y.detach().numpy()
    y = y * 256.0
    image = Image.fromarray(x)
    draw = ImageDraw.Draw(image)
    anchors = anchors.detach().numpy() * 256 # anchors是0到1之间, 在原图上画需要转化到实际图片尺寸
    for i in range(len(anchors)):  
        draw.rectangle(xy=anchors[i], outline='black', width=2) # 画预测框
    draw.rectangle(xy=y, outline='white', width=2)  # 真实框
    plt.figure(figsize=(5, 5))
    plt.imshow(image)
    plt.show()
anchor
tensor([[0.0500, 0.0500, 0.4500, 0.4500],
        [0.2000, 0.2000, 0.3000, 0.3000],
        [0.1793, 0.2146, 0.3207, 0.2854],
...
        [0.6793, 0.7146, 0.8207, 0.7854],
        [0.7146, 0.6793, 0.7854, 0.8207]])
x.shape
torch.Size([32, 3, 256, 256])

anchor = get_anchor(image_size=2, anchor_size_small=0.1, anchor_size_big=0.4)
draw_anchors(x[0], y[0], anchor)

5.偏移量Offset计算

  • 候选框anchor分配了一个target,anchor的label被标记为与target相同,偏移量将根据target和anchor中心坐标的相对位置,以及这两个框的相对大小进行计算。偏移量计算公式如下:
    ( t a r g e t . x − a n c h o r . x a n c h o r . w 0.1 , t a r g e t . y − a n c h o r . y a n c h o r . h 0.1 , log ⁡ t a r g e t . w a n c h o r . w 0.2 , log ⁡ t a r g e t . h a n c h o r . h 0.2 ) \left( \frac{ \frac{target.x - anchor.x}{anchor.w}}{0.1}, \frac{ \frac{target.y - anchor.y}{anchor.h}}{0.1}, \frac{ \log \frac{target.w}{anchor.w}}{0.2}, \frac{ \log \frac{target.h}{anchor.h}}{0.2}\right) (0.1anchor.wtarget.xanchor.x,0.1anchor.htarget.yanchor.y,0.2loganchor.wtarget.w,0.2loganchor.htarget.h)

def get_offset(anchor, target):
    anchor_w = anchor[2] - anchor[0]      # 计算每个anchor宽高
    anchor_h = anchor[3] - anchor[1]
    anchor_cx = anchor[0] + anchor_w / 2  # anchor中心点
    anchor_cy = anchor[1] + anchor_h / 2
    target_w = target[2] - target[0]         # 计算每个target宽高
    target_h = target[3] - target[1]
    target_cx = target[0] + target_w / 2     # target中心点
    target_cy = target[1] + target_h / 2
    offset_cx = (target_cx - anchor_cx) / anchor_w * 10   # 中心点offset
    offset_cy = (target_cy - anchor_cy) / anchor_h * 10
    offset_w = torch.log(1e-6 + target_w / anchor_w) * 5  # 宽高offset
    offset_h = torch.log(1e-6 + target_h / anchor_h) * 5
    offset = torch.tensor([offset_cx, offset_cy, offset_w, offset_h])
    return offset
anchor = torch.FloatTensor([0, 0, 10, 10])
target = torch.FloatTensor([10, 10, 20, 20])
get_offset(anchor, target)
tensor([1.0000e+01, 1.0000e+01, 4.7684e-06, 4.7684e-06])

6.工具类函数

# 6.1 iou计算
def get_iou(anchor, target):
    anchor_w = anchor[:, 2] - anchor[:, 0]  # anchors [4, 4]   target: gt [4]
    anchor_h = anchor[:, 3] - anchor[:, 1]
    anchor_s = anchor_w * anchor_h  # anchors面积
    y_w = target[2] - target[0]  # 同理可计算y的宽高和面积
    y_h = target[3] - target[1]
    y_s = y_w * y_h
    cross = torch.empty(anchor.shape)  # 求交集部分坐标
    cross[:, 0] = torch.max(anchor[:, 0], target[0])  # 左上角坐标最大值=相交部分矩形左上角坐标
    cross[:, 1] = torch.max(anchor[:, 1], target[1])
    cross[:, 2] = torch.min(anchor[:, 2], target[2])  # 右下角坐标
    cross[:, 3] = torch.min(anchor[:, 3], target[3])
    # 计算相交部分矩形的宽度和高度. 注意如果两个框完全不相交可能出现负数 
    cross_w = (cross[:, 2] - cross[:, 0]).clamp(min=0)
    cross_h = (cross[:, 3] - cross[:, 1]).clamp(min=0)
    cross_s =  cross_w * cross_h      # 相交面积
    union_s = anchor_s + y_s - cross_s  # 并集面积
    return cross_s / union_s            # 返回iou
anchor = torch.FloatTensor([[0, 0, 10, 10], [20, 20, 30, 30], [10, 10, 20, 20], [5, 5, 15, 15]])
target = torch.FloatTensor([10, 10, 20, 20])
iou = get_iou(anchor, target)
iou
tensor([0.0000, 0.0000, 1.0000, 0.1429])
iou > 0.5
tensor([False, False,  True, False])
iou[iou > 0.5]
tensor([1.])
iou[[1, 2, 1, 2, 3, 0, 1, 2, ]]
tensor([0.0000, 1.0000, 0.0000, 1.0000, 0.1429, 0.0000, 0.0000, 1.0000])
torch.argmax(iou)
tensor(2)

# 6.2 正样本判定
# 求每个anchor是正样本或负样本, 正样本则激活否则非激活,激活则把这个anchor标记为1非激活标记为0
def get_active(anchor, target):
    active = torch.zeros(len(anchor), dtype=torch.long)  # anchor [16, 4]  target [ 4]
    iou = get_iou(anchor, target)  # 求每个anchor和target的交并比. 
    active[iou > 0.5] = 1
    active[torch.argmax(iou)] = 1
    return active == 1
    
# 6.3 正样本掩码
def get_mask(active):  # active是一个布尔值的tensor
    mask = torch.zeros(len(active), 4)
    mask[active, :] = 1  # 激活的行设置为1
    return mask
    
# 6.4 正样本类别
def get_label(active):  # 获取每个激活的anchor类别
    label = torch.zeros(len(active), dtype=torch.long)
    label[active] = 1  # 只有一个类别即皮卡丘
    return label
    
# 6.5 正样本偏移量
def get_active_offset(active, anchor, target):  # 计算激活anchor和target的offset
    offset = torch.zeros(len(active), 4)  # active [16]   anchor[16, 4]  target [4]
    for i in range(len(active)):
        if active[i]:
            offset[i, :] = get_offset(anchor[i], target)
    return offset
    
# 6.6 汇总 真实正样本类别、真实偏移量及正样本掩码
def get_truth(anchor, target):  # 计算激活anchor对应的label和offset
    labels = []  # anchor[16, 4]  target[2, 4]
    offsets = []
    masks = []
    for i in range(len(target)):
        active = get_active(anchor, target[i])  # 哪些anchor是激活的
        mask = get_mask(active)  # 再根据active, 转化为0和1
        masks.append(mask.reshape(-1))
        label = get_label(active)  # 计算激活的anchor对应的lable
        labels.append(label)
        offset = get_active_offset(active, anchor, target[i])  
        offsets.append(offset.reshape(-1))
    labels = torch.stack(labels)   # [2, 16]
    offsets = torch.stack(offsets)  # [2, 64]
    masks = torch.stack(masks)
    return labels, offsets, masks
anchor = torch.FloatTensor([[0.0500, 0.0500, 0.4500, 0.4500],
                            [0.2000, 0.2000, 0.3000, 0.3000],
                            [0.1793, 0.2146, 0.3207, 0.2854],
                            [0.2146, 0.1793, 0.2854, 0.3207],
                            [0.5500, 0.0500, 0.9500, 0.4500],
                            [0.7000, 0.2000, 0.8000, 0.3000],
                            [0.6793, 0.2146, 0.8207, 0.2854],
                            [0.7146, 0.1793, 0.7854, 0.3207],
                            [0.0500, 0.5500, 0.4500, 0.9500],
                            [0.2000, 0.7000, 0.3000, 0.8000],
                            [0.1793, 0.7146, 0.3207, 0.7854],
                            [0.2146, 0.6793, 0.2854, 0.8207],
                            [0.5500, 0.5500, 0.9500, 0.9500],
                            [0.7000, 0.7000, 0.8000, 0.8000],
                            [0.6793, 0.7146, 0.8207, 0.7854],
                            [0.7146, 0.6793, 0.7854, 0.8207]])
target = torch.FloatTensor([[0.0500, 0.0500, 0.4500, 0.4500],
                            [0.7000, 0.2000, 0.8000, 0.3000]])
anchor.shape
torch.Size([16, 4])

labels, offsets, masks = get_truth(anchor, target)
labels.shape, offsets.shape, masks.shape
(torch.Size([2, 16]), torch.Size([2, 64]), torch.Size([2, 64]))

7.SSD模型分解

# 7.1 backbone层
class FirstModel(torch.nn.Module):  # ssd模型分三种结构:backbone、几个中间层、最后一层
    def __init__(self):
        super().__init__()
        self.cnn = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=3,
                           out_channels=16,
                           kernel_size=3,
                           padding=1),
            torch.nn.BatchNorm2d(num_features=16),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=16,
                           out_channels=16,
                           kernel_size=3, 
                           padding=1),
            torch.nn.BatchNorm2d(num_features=16),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2),
            
            torch.nn.Conv2d(in_channels=16,
                           out_channels=32,
                           kernel_size=3,
                           padding=1),
            torch.nn.BatchNorm2d(num_features=32),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=32,
                           out_channels=32,
                           kernel_size=3, 
                           padding=1),
            torch.nn.BatchNorm2d(num_features=32),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2),
            
            torch.nn.Conv2d(in_channels=32,
                           out_channels=64,
                           kernel_size=3,
                           padding=1),
            torch.nn.BatchNorm2d(num_features=64),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=64,
                           out_channels=64,
                           kernel_size=3, 
                           padding=1),
            torch.nn.BatchNorm2d(num_features=64),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2))
        self.label = torch.nn.Conv2d(in_channels=64,  # 预测分类的预测器
                                     out_channels=8,kernel_size=3,padding=1)
        self.offset = torch.nn.Conv2d(in_channels=64,  # 预测回归
                                     out_channels=16,kernel_size=3,padding=1)
    def forward(self, x):
        x = self.cnn(x)      #  [2, 3, 256, 256] -> [2, 64, 32, 32]  生成32* 32尺度下的anchor
        anchor = get_anchor(image_size=32, anchor_size_small=0.2, anchor_size_big=0.272)
        label = self.label(x)              # [2, 64, 32, 32] -> [2, 8, 32, 32]
        label = label.permute(0, 2, 3, 1)   # [2, 8, 32, 32] -> [2, 32, 32, 8]
        label = label.flatten(start_dim=1)  # 变成二维 [2, 8192]
        offset = self.offset(x)             # [2, 64, 32, 32] -> [2, 16 , 32, 32]
        offset = offset.permute(0, 2, 3, 1)
        offset = offset.flatten(start_dim=1)
        return x, anchor, label, offset
x = torch.zeros((2, 3, 256, 256))
x, anchor, label, offset = FirstModel()(x)
x.shape, anchor.shape, label.shape, offset.shape
(torch.Size([2, 64, 32, 32]),
 torch.Size([4096, 4]),
 torch.Size([2, 8192]),
 torch.Size([2, 16384]))
 
# 7.2 中间层
class MiddleModel(torch.nn.Module):
    def __init__(self, c_in, anchor_size_small, anchor_size_big):
        super().__init__()
        self.anchor_size_small = anchor_size_small
        self.anchor_size_big = anchor_size_big
        self.cnn = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=c_in, 
                           out_channels=128,
                           kernel_size=3,
                           padding=1),
            torch.nn.BatchNorm2d(num_features=128),
            torch.nn.ReLU(),
            torch.nn.Conv2d(in_channels=128, 
                           out_channels=128,
                           kernel_size=3,
                           padding=1),
            torch.nn.BatchNorm2d(num_features=128),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2))
        self.label = torch.nn.Conv2d(in_channels=128,  # 预测分类的预测器
                                     out_channels=8,kernel_size=3,padding=1)
        self.offset = torch.nn.Conv2d(in_channels=128,  # 预测回归
                                     out_channels=16,kernel_size=3,padding=1)
    def forward(self, x):
        x = self.cnn(x)  # [2, 64, 32, 32] -> [2, 128, 16, 16]
        anchor = get_anchor(image_size=x.shape[-1],
                           anchor_size_small=self.anchor_size_small,
                           anchor_size_big=self.anchor_size_big)
        label = self.label(x)               # [2, 128, 16, 16] -> [2, 8, 16, 16]
        label = label.permute(0, 2, 3, 1)    # [2, 8, 16, 16] -> [2, 16, 16, 8]
        label = label.flatten(start_dim=1)   # 变成二维 [2, 2048]
        offset = self.offset(x)               # [2, 128, 16, 16] -> [2, 16 , 16, 16]
        offset = offset.permute(0, 2, 3, 1)
        offset = offset.flatten(start_dim=1)  # [2, 16 , 16, 16] -> [2, 4096]
        return x, anchor, label, offset
x = torch.zeros((2, 64, 32, 32))
x, anchor, label, offset = MiddleModel(c_in=64, anchor_size_small=0.37, anchor_size_big=0.447)(x)
x.shape, anchor.shape, label.shape, offset.shape
(torch.Size([2, 128, 16, 16]),
 torch.Size([1024, 4]),
 torch.Size([2, 2048]),
 torch.Size([2, 4096]))
 
# 7.3 最后层
class LastModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = torch.nn.AdaptiveMaxPool2d(output_size=(1, 1))
        self.label = torch.nn.Conv2d(in_channels=128,  # 预测分类的预测器
                                     out_channels=8,kernel_size=3,padding=1)
        self.offset = torch.nn.Conv2d(in_channels=128,   # 预测回归
                                     out_channels=16,kernel_size=3,padding=1)
    def forward(self, x):
        x = self.cnn(x)  # [2, 128, 4, 4] -> [2, 128, 1, 1]
        anchor = get_anchor(image_size=1, anchor_size_small=0.88,
                           anchor_size_big=0.961)
        label = self.label(x)                # [2, 128, 1, 1] -> [2, 8, 1, 1]
        label = label.permute(0, 2, 3, 1)     # [2, 8, 1, 1] -> [2, 1, 1, 8]
        label = label.flatten(start_dim=1)    # 变成二维[2, 1, 1, 8] ->  [2, 8]
        offset = self.offset(x)               # [2, 128, 1, 1] -> [2, 16 , 1, 1]
        offset = offset.permute(0, 2, 3, 1)
        offset = offset.flatten(start_dim=1)   # [2, 1 , 1, 16] -> [2, 16]
        return x, anchor, label, offset
x = torch.zeros((2, 128, 4, 4))
x, anchor, label, offset = LastModel()(x)
x.shape, anchor.shape, label.shape, offset.shape
(torch.Size([2, 128, 1, 1]),
 torch.Size([4, 4]),
 torch.Size([2, 8]),
 torch.Size([2, 16]))

8.SSD模型汇总

class Model(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.first = FirstModel()
        self.middle_1 = MiddleModel(c_in=64,anchor_size_small=0.37,anchor_size_big=0.447)
        self.middle_2 = MiddleModel(c_in=128,anchor_size_small=0.54,anchor_size_big=0.619)
        self.middle_3 = MiddleModel(c_in=128,anchor_size_small=0.71,anchor_size_big=0.79)
        self.last = LastModel()
    def forward(self, x):
        anchor = [None] * 5  # 定位框
        label = [None] * 5    # 类别
        offset = [None] * 5   # 偏移量
        # [2, 3, 256, 256]-> [2, 64, 32, 32], [4096, 4], [2, 8192], [2, 16384]
        x, anchor[0], label[0], offset[0] = self.first(x)
        # [2, 64, 32, 32] -> [2, 128, 16, 16], [1024, 4], [2, 2048], [2, 4096]
        x, anchor[1], label[1], offset[1] = self.middle_1(x)
        x, anchor[2], label[2], offset[2] = self.middle_2(x)
        x, anchor[3], label[3], offset[3] = self.middle_3(x)
        x, anchor[4], label[4], offset[4] = self.last(x)
        anchor = torch.cat(anchor, dim=0) #  [4096 + 1024 + 256 + 4=5444, 4]
        label = torch.cat(label, dim=1)  #  [2, 8192 + 2048+512+128+8=10888]
        offset = torch.cat(offset, dim=1) #  [2, 16384 + 4096 + 1024 + 256 + 16= 21776]
        return anchor, label, offset
x = torch.zeros((2, 3, 256, 256))
anchor, label, offset = Model()(x)
x.shape, anchor.shape, label.shape, offset.shape
(torch.Size([2, 3, 256, 256]),
 torch.Size([5444, 4]),
 torch.Size([2, 10888]),
 torch.Size([2, 21776]))

9.计算损失

loss ( x , c l a s s ) = − log ⁡ ( exp ⁡ ( x [ c l a s s ] ) ∑ j exp ⁡ ( x [ j ] ) ) = − x [ c l a s s ] + log ⁡ ( ∑ j exp ⁡ ( x [ j ] ) ) \text{loss}(x, class) = -\log\left(\frac{\exp(x[class])}{\sum_j \exp(x[j])}\right) = -x[class] + \log\left(\sum_j \exp(x[j])\right) loss(x,class)=log(jexp(x[j])exp(x[class]))=x[class]+log(jexp(x[j]))
ℓ ( x , y ) = L = { l 1 , … , l N } ⊤ , l n = ∣ x n − y n ∣ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = \left| x_n - y_n \right| (x,y)=L={l1,,lN},ln=xnyn

get_loss_cls = torch.nn.CrossEntropyLoss(reduction='none')
get_loss_box = torch.nn.L1Loss(reduction='none')
def get_loss(label_pred, offset_pred, label, offset, masks):
    # label_pred -> [32, 5444, 2]
    # offset_pred ->[32, 21776]
    # label -> [32, 54444]
    # offset -> [32, 21776]
    # masks -> [32, 21776]
    # [32, 5444, 2] -> [174208, 2]
    label_pred = label_pred.reshape(-1, 2)
    label = label.reshape(-1)                 # [32, 54444]
    loss_cls = get_loss_cls(label_pred, label)  # [174208]
    loss_cls = loss_cls.reshape(32, -1)   # [32, 54444]
    loss_cls = loss_cls.mean(dim=1)     # 计算分类平均损失  [32]
    offset_pred = offset_pred * masks    # 只要正样本的offset  [32, 21776] * [32, 21776] -> [32, 21776]
    offset *= masks                    # 只计算正样本的回归损失
    loss_box = get_loss_box(offset_pred, offset)  # [32, 21776]
    loss_box = loss_box.mean(dim=1)    # 求回归平均损失
    loss = loss_box + loss_cls           # 总损失
    return loss
x = torch.zeros(32, 3, 256, 256)
y = torch.zeros(32, 4)
anchor, label_pred, offset_pred = Model()(x)  # [5444, 4], [32, 5444, 2], [32, 21766]
label, offset, masks = get_truth(anchor, y)  # 真实类别、真实偏移量、掩码=[32, 5444], [32, 21776], [32, 21776]
loss = get_loss(label_pred, offset_pred, label, offset, masks)
loss
tensor([0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000,
        0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000,
        0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000, 0.7000,
        0.7000, 0.7000, 0.7000, 0.7000, 0.7000], grad_fn=<AddBackward0>)
        
loss.shape
torch.Size([32])

10.训练

net = Model()
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
for epoch in range(20):
    net.train()
    for i, (x, y) in enumerate(loader_train):
        optimizer.zero_grad()
        anchor, label_pred, offset_pred = net(x)
        label, offset, masks = get_truth(anchor, y)   # 获取label_pred, offset_pred对应真实值
        loss = get_loss(label_pred, offset_pred, label, offset, masks)
        loss.mean().backward()   # loss反向传播一定要是个标量(一个数字)
        optimizer.step()
        if i % 10 == 0:
            print(epoch, i, loss.mean().item()) 
0 0 0.7333654761314392
0 10 0.018896669149398804
...

19 10 0.002308905590325594
19 20 0.0019166001584380865

11.模型保存

torch.save(net, './ssd.pth')

12.预测

# 12.1 偏移量变换公式逆运算: 即根据预测结果计算预测框中心点坐标和w,h
def inverse_offset(anchor, offset):     # anchor [4]  offset [4]
    anchor_center = torch.empty(4)   # 把左上角坐标和右下角坐标改成中心点坐标和w,h
    anchor_center[0] = (anchor[0] + anchor[2]) / 2
    anchor_center[1] = (anchor[1] + anchor[3]) / 2
    anchor_center[2] = anchor[2] - anchor[0]
    anchor_center[3] = anchor[3] - anchor[1]
    pred = torch.empty(4)
    # offset.x,y = (target.x,y - anchor.xy) / anchow.wh  * 10
    # offset.wh = ln(target.wh/ anchor.wh) * 5
    pred[0] = offset[0] * anchor_center[2] * 0.1 + anchor_center[0]
    pred[1] = offset[1] * anchor_center[3] * 0.1 + anchor_center[1]
    pred[2] = torch.exp(offset[2] / 5) * anchor_center[2]
    pred[3] = torch.exp(offset[3] / 5) * anchor_center[3]
    pred_corner = torch.empty(4)  # 变成左上角右上角的模式
    pred_corner[0] = pred[0] - 0.5 * pred[2]
    pred_corner[1] = pred[1] - 0.5 * pred[3]
    pred_corner[2] = pred[0] + 0.5 * pred[2]
    pred_corner[3] = pred[1] + 0.5 * pred[3]
    return pred_corner
anchor = torch.FloatTensor([0, 0, 10, 10])
target = torch.FloatTensor([10, 10, 20, 20])
offset = get_offset(anchor, target)
offset, inverse_offset(anchor, offset)
(tensor([1.0000e+01, 1.0000e+01, 4.7684e-06, 4.7684e-06]),
 tensor([10.0000, 10.0000, 20.0000, 20.0000]))
 
# 12.2 预测
def predict(x):
    net.eval()
    x = x.unsqueeze(dim=0)  # [3, 256, 256] -> [1, 3, 256, 256]
    anchor, label_pred, offset_pred = net(x)  # [5444, 4], [1, 5444, 2], [1, 21766]
    offset_pred = offset_pred.reshape(-1, 4)  # [1, 21766] -> [5444, 4]
    anchor_pred = torch.empty(5444, 4)
    for i in range(5444):
        anchor_pred[i] = inverse_offset(anchor[i], offset_pred[i])
    label_pred = label_pred.reshape(label_pred.shape[0], 5444, 2) # 用softmax把预测label变成概率
    label_pred = torch.nn.functional.softmax(label_pred, dim=2)
    label_pred = label_pred[0, :, 1]  # 取出是皮卡丘的概率  [ 1, 5444, 2] -> [5444]
    anchor_pred = anchor_pred[label_pred > 0.2] # 大于一定阈值概率认为预测成皮卡丘
    label_pred = label_pred[label_pred > 0.2]
    return anchor_pred, label_pred
net = torch.load('./ssd.pth')
for (x, y) in loader_train:
    break
for i in range(10):
    anchor_pred, label_pred = predict(x[i])
    if len(anchor_pred) == 0:
        print('not found')
        continue 
    draw_anchors(x[i], y[i], anchor_pred)
not found
not found
not found
not found

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

not found
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

阿值

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值