PyTorch深度学习实践第十一讲卷积神经网络(高级篇) Inception Module 手写数字识别

最新推荐文章于 2024-07-11 20:21:32 发布

pig774

最新推荐文章于 2024-07-11 20:21:32 发布

阅读量1k

点赞数 2

分类专栏： Pytorch深度学习实践文章标签： pytorch 深度学习 python 分类神经网络

本文链接：https://blog.youkuaiyun.com/weixin_62321421/article/details/121446268

版权

Pytorch深度学习实践专栏收录该内容

9 篇文章

订阅专栏

本文介绍了一个基于Inception模块的深度学习模型实现，该模型通过定义InceptionA类来复用相同的神经网络结构，最终应用于MNIST手写数字识别任务。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

视频链接：《PyTorch深度学习实践》完结合集_哔哩哔哩_bilibili

本次要实现的是一个更复杂的神经网络，虽然模型看起来很复杂，但是我们为了减少代码冗余，和提高代码的复用性，所以我们可以将结构相同的神经网络定义成一个类，提高代码的复用性，使代码更加简洁。

我们要实现的神经网络结果图，如下图所示：

如果理解了上面的图片，那现在写代码就很简单了，我这里先对实现Inception Modele的模型类进行说明。首先，因为我们后面需要将4个分路上的通道数连接，因此不管怎样除了通道数以外的batch_size,width,height都不能变，所以在Inception Modele实现卷积层操作时，我们都对其进行了填充，就是使每次卷积后的width,height保持不变，这里要理解，是很关键的。

注意：padding=2,是指原图的四周都增加2个像素。

下面来看Inception Modele（也就是我上面画红色线框出来的神经网络结构模型）的代码实现：

左一：就是指Inception Modele的左边的第一条神经网络前向传播的路线

import numpy as np
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt


# Inception Module
class InceptionA(torch.nn.Module):
    # 要求batch_size,windth,height都不可变,否则后面无法拼接，能变的只有channels
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        # 生成层
        # 左一，池化层在forward内
        self.branch_pool = torch.nn.Conv2d(in_channels, 24, kernel_size=1)
        
        # 左二
        self.branch1x1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)
        
        # 左三
        self.branch5x5_1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch5x5_2 = torch.nn.Conv2d(16, 24, kernel_size=5, padding=2)
        
        # 左四
        self.branch3x3_1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch3x3_2 = torch.nn.Conv2d(16, 24, kernel_size=3, padding=1)
        self.branch3x3_3 = torch.nn.Conv2d(24, 24, kernel_size=3, padding=1)

    def forward(self, x):
        # 前向传播
        # 左一
        # 池化层，平均值
        branch_pool = F.avg_pool2d(x, kernel_size=3, padding=1, stride=1)
        branch_pool = self.branch_pool(branch_pool)

        # 左二
        branch1x1 = self.branch1x1(x)

        # 左三
        branch5x5 = self.branch5x5_2(self.branch5x5_1(x))

        # 左四
        branch3x3 = self.branch3x3_3(self.branch3x3_2(self.branch3x3_1(x)))
        
        # 连接
        outputs = [branch_pool, branch1x1, branch5x5, branch3x3]
        return torch.cat(outputs, dim=1)  # 通道数=24+24+24+16=88

下面，给出整个模型的实现类代码：

# 整个神经网络
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(88, 20, kernel_size=5)

        self.incep1 = InceptionA(in_channels=10)
        self.incep2 = InceptionA(in_channels=20)

        self.mp = torch.nn.MaxPool2d(2)
        # 最后的全连接层
        self.fc = torch.nn.Linear(1408, 10)  # 88*4*4

    def forward(self, x):
        in_size = x.size(0)
        x = F.relu(self.mp(self.conv1(x)))
        x = self.incep1(x)
        x = F.relu(self.mp(self.conv2(x)))
        x = self.incep2(x)  # torch.Size([64, 88, 4, 4])
        # print(x.size())  # 先输出各阶数的数值，再确定全连接层的形状  torch.Size([64, 88, 4, 4])通过这里确定self.fc = nn.Linear(1408, 10)
        # 进入全连接层，转换为2阶[batch_size,C*H*W]
        x = x.view(in_size, -1)
        x = self.fc(x)
        return x

好了，就是上面的两部分有点困难，其实根据我所画的神经网络结构图来进行实现类的编写也不太困难啦。其他的部分和之前的作业代码一样。

下面是全部的代码实现：

import numpy as np
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt


# 内部神经网络
class InceptionA(torch.nn.Module):
    # 要求batch_size,windth,height都不可变,否则后面无法拼接，能变的只有channels
    def __init__(self, in_channels):
        super(InceptionA, self).__init__()
        # 生成卷积层
        self.branch_pool = torch.nn.Conv2d(in_channels, 24, kernel_size=1)

        self.branch1x1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)

        self.branch5x5_1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch5x5_2 = torch.nn.Conv2d(16, 24, kernel_size=5, padding=2)

        self.branch3x3_1 = torch.nn.Conv2d(in_channels, 16, kernel_size=1)
        self.branch3x3_2 = torch.nn.Conv2d(16, 24, kernel_size=3, padding=1)
        self.branch3x3_3 = torch.nn.Conv2d(24, 24, kernel_size=3, padding=1)

    def forward(self, x):
        # 第一层
        # 池化层，平均值
        branch_pool = F.avg_pool2d(x, kernel_size=3, padding=1, stride=1)
        branch_pool = self.branch_pool(branch_pool)

        # 第二层
        branch1x1 = self.branch1x1(x)

        # 第三层
        branch5x5 = self.branch5x5_2(self.branch5x5_1(x))

        # 第四层
        branch3x3 = self.branch3x3_3(self.branch3x3_2(self.branch3x3_1(x)))

        outputs = [branch_pool, branch1x1, branch5x5, branch3x3]
        return torch.cat(outputs, dim=1)  # 通道数=24+24+24+16=88


# 整个神经网络
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = torch.nn.Conv2d(88, 20, kernel_size=5)

        self.incep1 = InceptionA(in_channels=10)
        self.incep2 = InceptionA(in_channels=20)

        self.mp = torch.nn.MaxPool2d(2)
        # 最后的全连接层
        self.fc = torch.nn.Linear(1408, 10)  # 88*4*4

    def forward(self, x):
        in_size = x.size(0)
        x = F.relu(self.mp(self.conv1(x)))
        x = self.incep1(x)
        x = F.relu(self.mp(self.conv2(x)))
        x = self.incep2(x)  # torch.Size([64, 88, 4, 4])
        # print(x.size())  # 先输出各阶数的数值，再确定全连接层的形状  torch.Size([64, 88, 4, 4])通过这里确定self.fc = nn.Linear(1408, 10)
        # 进入全连接层，转换为2阶[batch_size,C*H*W]
        x = x.view(in_size, -1)
        x = self.fc(x)
        return x


# 加载数据集
# 1、准备数据集
# 处理数据
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
batch_size = 64
# 训练集
mnist_train = MNIST(root='../dataset/mnist', train=True, transform=transform, download=True)
train_loader = DataLoader(dataset=mnist_train, shuffle=True, batch_size=batch_size)
# 测试集
mnist_test = MNIST(root='../dataset/mnist', train=False, transform=transform, download=True)
test_loader = DataLoader(dataset=mnist_test, shuffle=True, batch_size=batch_size)

model = Net()
# 3、构造损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5)


# 4、训练和测试
# 定义训练方法，一个训练周期
def train(epoch):
    running_loss = 0.0
    for idx, (inputs, target) in enumerate(train_loader, 0):
        # 这里的代码与之前没有区别
        # 正向
        y_pred = model(inputs)
        loss = criterion(y_pred, target)
        # 反向
        optimizer.zero_grad()
        loss.backward()
        # 更新
        optimizer.step()

        running_loss += loss.item()
        if idx % 300 == 299:  # 每300次打印一次平均损失，因为idx是从0开始的，所以%299，而不是300
            print(f'epoch={epoch + 1},batch_idx={idx + 1},loss={running_loss / 300}')
            running_loss = 0.0


# 准确率列表，方便画图
accuracy_list = []


# 一个测试周期
def test():
    # 所有预测正确的样本数
    correct_num = 0
    # 所有样本的数量
    total = 0
    # 测试时，我们不需要计算梯度，因此可以加上这一句，不需要梯度追踪
    with torch.no_grad():
        for images, labels in test_loader:
            # 获得预测值
            outputs = model(images)
            # 获取dim=1的最大值的位置，该位置就代表所预测的标签值
            _, predicted = torch.max(outputs.data, dim=1)
            # 累加每批次的样本数，以获得一个测试周期所有的样本数
            total += labels.size(0)
            # 累加每批次的预测正确的样本数，以获得一个测试周期的所有预测正确的样本数
            correct_num += (predicted == labels).sum().item()
        print(f'Accuracy on test set:{100 * correct_num / total}%')  # 打印一个测试周期的正确率
        accuracy_list.append(100*correct_num / total)


if __name__ == '__main__':
    # 测试全连接层的神经元个数
    # for idx, (inputs, target) in enumerate(train_loader, 0):
    #     model(inputs)
    #     break
    # 训练周期为10次，每次训练所有的训练集样本数，并测试
    for epoch in range(10):
        train(epoch)
        test()
    # 画图
    plt.plot(np.arange(10),accuracy_list)
    plt.xlabel('epoch')
    plt.ylabel('accuracy %')
    plt.show()

结果如下图：

epoch=1,batch_idx=300,loss=0.9408395903557539
epoch=1,batch_idx=600,loss=0.19775621948142846
epoch=1,batch_idx=900,loss=0.1403631071994702
Accuracy on test set:96.35%

.......

epoch=9,batch_idx=300,loss=0.03314510397089179
epoch=9,batch_idx=600,loss=0.03342989099541834
epoch=9,batch_idx=900,loss=0.03618314559746068
Accuracy on test set:98.92%
epoch=10,batch_idx=300,loss=0.029011620228023578
epoch=10,batch_idx=600,loss=0.03060439710029944
epoch=10,batch_idx=900,loss=0.03674590568523854
Accuracy on test set:99.1%