深度学习-卷积神经网络CNN

案例-图像分类

网络结构: 卷积+BN+激活+池化

数据集介绍

CIFAR-10数据集5万张训练图像、1万张测试图像、10个类别、每个类别有6k个图像,图像大小32×32×3。下图列举了10个类,每一类随机展示了10张图片:

特征图计算

在卷积层和池化层结束后, 将特征图变形成一行n列数据, 计算特征图进行变化, 映射到全连接层时输入层特征为最后一层卷积层经池化后的特征图各维度相乘

具体流程-# Acc: 0.728

# 导包
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor, Compose  # Compose: 数据增强(扩充数据集)
import time
import matplotlib.pyplot as plt
​
batch_size = 16
​
​
# 创建数据集
def create_dataset():
    torch.manual_seed(21)
    train = CIFAR10(
        root='data',
        train=True,
        transform=Compose([ToTensor()])
    )
    test = CIFAR10(
        root='data',
        train=False,
        transform=Compose([ToTensor()])
    )
    return train, test
​
​
# 创建模型
class ImgCls(nn.Module):
    # 定义网络结构
    def __init__(self):
        super(ImgCls, self).__init__()
        # 定义网络层:卷积层+池化层
        self.conv1 = nn.Conv2d(3, 16, stride=1, kernel_size=3)
        self.batch_norm_layer1 = nn.BatchNorm2d(num_features=16, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        self.conv2 = nn.Conv2d(16, 32, stride=1, kernel_size=3)
        self.batch_norm_layer2 = nn.BatchNorm2d(num_features=32, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        self.conv3 = nn.Conv2d(32, 64, stride=1, kernel_size=3)
        self.batch_norm_layer3 = nn.BatchNorm2d(num_features=64, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        self.conv4 = nn.Conv2d(64, 128, stride=1, kernel_size=2)
        self.batch_norm_layer4 = nn.BatchNorm2d(num_features=128, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        self.conv5 = nn.Conv2d(128, 256, stride=1, kernel_size=2)
        self.batch_norm_layer5 = nn.BatchNorm2d(num_features=256, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        # 全连接层
        self.linear1 = nn.Linear(1024, 2048)
        self.linear2 = nn.Linear(2048, 1024)
        self.linear3 = nn.Linear(1024, 512)
        self.linear4 = nn.Linear(512, 256)
        self.linear5 = nn.Linear(256, 128)
        self.out = nn.Linear(128, 10)
​
    # 定义前向传播
    def forward(self, x):
        # 第1层: 卷积+BN+激活+池化
        x = self.conv1(x)
        x = self.batch_norm_layer1(x)
        x = torch.rrelu(x)
        x = self.pool1(x)
​
        # 第2层: 卷积+BN+激活+池化
        x = self.conv2(x)
        x = self.batch_norm_layer2(x)
        x = torch.rrelu(x)
        x = self.pool2(x)
​
        # 第3层: 卷积+BN+激活+池化
        x = self.conv3(x)
        x = self.batch_norm_layer3(x)
        x = torch.rrelu(x)
        x = self.pool3(x)
​
        # 第4层: 卷积+BN+激活+池化
        x = self.conv4(x)
        x = self.batch_norm_layer4(x)
        x = torch.rrelu(x)
        x = self.pool4(x)
​
        # 第5层: 卷积+BN+激活+池化
        x = self.conv5(x)
        x = self.batch_norm_layer5(x)
        x = torch.rrelu(x)
        x = self.pool5(x)
​
        # 将特征图做成以为向量的形式:相当于特征向量
        x = x.reshape(x.size(0), -1)  # 将3维特征图转化为1维向量(1, n)
​
        # 全连接层
        x = torch.rrelu(self.linear1(x))
        x = torch.rrelu(self.linear2(x))
        x = torch.rrelu(self.linear3(x))
        x = torch.rrelu(self.linear4(x))
        x = torch.rrelu(self.linear5(x))
        # 返回输出结果
        return self.out(x)
​
​
# 训练
def train(model, train_dataset, epochs):
    torch.manual_seed(21)
    loss = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=1e-4)
    for epoch in range(epochs):
        dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
        loss_total = 0
        iter = 0
        stat_time = time.time()
        for x, y in dataloader:
            output = model(x.to(device))
            loss_value = loss(output, y.to(device))
            opt.zero_grad()
            loss_value.backward()
            opt.step()
            loss_total += loss_value.item()
            iter += 1
        print(f'epoch:{epoch + 1:4d}, loss:{loss_total / iter:6.4f}, time:{time.time() - stat_time:.2f}s')
    torch.save(model.state_dict(), 'model/img_cls_model.pth')
​
​
# 测试
def test(valid_dataset, model, batch_size):
    # 构建数据加载器
    dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
​
    # 计算精度
    total_correct = 0
    # 遍历每个batch的数据,获取预测结果,计算精度
    for x, y in dataloader:
        output = model(x.to(device))
        y_pred = torch.argmax(output, dim=-1)
        total_correct += (y_pred == y.to(device)).sum()
    # 打印精度
    print(f'Acc: {(total_correct.item() / len(valid_dataset))}')
​
​
if __name__ == '__main__':
    batch_size = 16
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # 获取数据集
    train_data, test_data = create_dataset()
​
    # # 查看数据集
    # print(f'数据集类别: {train_data.class_to_idx}')
    # print(f'训练集: {train_data.data.shape}')
    # print(f'验证集: {test_data.data.shape}')
    # print(f'类别数量: {len(np.unique(train_data.targets))}')
    # # 展示图像
    # plt.figure(figsize=(8, 8))
    # plt.imshow(train_data.data[0])
    # plt.title(train_data.classes[train_data.targets[0]])
    # plt.show()
​
    # 实例化模型
    model = ImgCls().to(device)
​
    # 查看网络结构
    summary(model, (3, 32, 32), device='cuda', batch_size=batch_size)
​
    # 模型训练
    train(model, train_data, epochs=60)
    # 加载训练好的模型参数
    model.load_state_dict(torch.load('model/img_cls_model.pth'))
    model.eval()
    # 模型评估
    test(test_data, model, batch_size=16)   # Acc: 0.728
​

调整网络结构

第一次调整: 训练50轮, Acc: 0.71

第二次调整: 训练30轮, Acc:0.7351

第三次调整: batch_size=8, epoch=50 => Acc: 0.7644

# 导包
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchsummary import summary
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor, Compose  # Compose: 数据增强(扩充数据集)
import time
import matplotlib.pyplot as plt
​
batch_size = 16
​
​
# 创建数据集
def create_dataset():
    torch.manual_seed(21)
    train = CIFAR10(
        root='data',
        train=True,
        transform=Compose([ToTensor()])
    )
    test = CIFAR10(
        root='data',
        train=False,
        transform=Compose([ToTensor()])
    )
    return train, test
​
​
# 创建模型
class ImgCls(nn.Module):
    # 定义网络结构
    def __init__(self):
        super(ImgCls, self).__init__()
        # 定义网络层:卷积层+池化层
        self.conv1 = nn.Conv2d(3, 16, stride=1, kernel_size=3, padding=1)
        self.batch_norm_layer1 = nn.BatchNorm2d(num_features=16, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        self.conv2 = nn.Conv2d(16, 32, stride=1, kernel_size=3, padding=1)
        self.batch_norm_layer2 = nn.BatchNorm2d(num_features=32, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        self.conv3 = nn.Conv2d(32, 64, stride=1, kernel_size=3, padding=1)
        self.batch_norm_layer3 = nn.BatchNorm2d(num_features=64, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        self.conv4 = nn.Conv2d(64, 128, stride=1, kernel_size=3, padding=1)
        self.batch_norm_layer4 = nn.BatchNorm2d(num_features=128, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=1)
​
        self.conv5 = nn.Conv2d(128, 256, stride=1, kernel_size=3)
        self.batch_norm_layer5 = nn.BatchNorm2d(num_features=256, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True)
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)
​
        # 全连接层
        self.linear1 = nn.Linear(1024, 2048)
        self.linear2 = nn.Linear(2048, 1024)
        self.linear3 = nn.Linear(1024, 512)
        self.linear4 = nn.Linear(512, 256)
        self.linear5 = nn.Linear(256, 128)
        self.out = nn.Linear(128, 10)
​
    # 定义前向传播
    def forward(self, x):
        # 第1层: 卷积+BN+激活+池化
        x = self.conv1(x)
        x = self.batch_norm_layer1(x)
        x = torch.relu(x)
        x = self.pool1(x)
​
        # 第2层: 卷积+BN+激活+池化
        x = self.conv2(x)
        x = self.batch_norm_layer2(x)
        x = torch.relu(x)
        x = self.pool2(x)
​
        # 第3层: 卷积+BN+激活+池化
        x = self.conv3(x)
        x = self.batch_norm_layer3(x)
        x = torch.relu(x)
        x = self.pool3(x)
​
        # 第4层: 卷积+BN+激活+池化
        x = self.conv4(x)
        x = self.batch_norm_layer4(x)
        x = torch.relu(x)
        x = self.pool4(x)
​
        # 第5层: 卷积+BN+激活+池化
        x = self.conv5(x)
        x = self.batch_norm_layer5(x)
        x = torch.rrelu(x)
        x = self.pool5(x)
​
        # 将特征图做成以为向量的形式:相当于特征向量
        x = x.reshape(x.size(0), -1)  # 将3维特征图转化为1维向量(1, n)
​
        # 全连接层
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        x = torch.relu(self.linear3(x))
        x = torch.relu(self.linear4(x))
        x = torch.rrelu(self.linear5(x))
        # 返回输出结果
        return self.out(x)
​
​
# 训练
def train(model, train_dataset, epochs):
    torch.manual_seed(21)
    loss = nn.CrossEntropyLoss()
    opt = optim.Adam(model.parameters(), lr=1e-4)
    for epoch in range(epochs):
        dataloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
        loss_total = 0
        iter = 0
        stat_time = time.time()
        for x, y in dataloader:
            output = model(x.to(device))
            loss_value = loss(output, y.to(device))
            opt.zero_grad()
            loss_value.backward()
            opt.step()
            loss_total += loss_value.item()
            iter += 1
        print(f'epoch:{epoch + 1:4d}, loss:{loss_total / iter:6.4f}, time:{time.time() - stat_time:.2f}s')
    torch.save(model.state_dict(), 'model/img_cls_model1.pth')
​
​
# 测试
def test(valid_dataset, model, batch_size):
    # 构建数据加载器
    dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
​
    # 计算精度
    total_correct = 0
    # 遍历每个batch的数据,获取预测结果,计算精度
    for x, y in dataloader:
        output = model(x.to(device))
        y_pred = torch.argmax(output, dim=-1)
        total_correct += (y_pred == y.to(device)).sum()
    # 打印精度
    print(f'Acc: {(total_correct.item() / len(valid_dataset))}')
​
​
if __name__ == '__main__':
    batch_size = 8
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # 获取数据集
    train_data, test_data = create_dataset()
​
    # # 查看数据集
    # print(f'数据集类别: {train_data.class_to_idx}')
    # print(f'训练集: {train_data.data.shape}')
    # print(f'验证集: {test_data.data.shape}')
    # print(f'类别数量: {len(np.unique(train_data.targets))}')
    # # 展示图像
    # plt.figure(figsize=(8, 8))
    # plt.imshow(train_data.data[0])
    # plt.title(train_data.classes[train_data.targets[0]])
    # plt.show()
​
    # 实例化模型
    model = ImgCls().to(device)
​
    # 查看网络结构
    summary(model, (3, 32, 32), device='cuda', batch_size=batch_size)
​
    # 模型训练
    train(model, train_data, epochs=50)
    # 加载训练好的模型参数
    model.load_state_dict(torch.load('model/img_cls_model1.pth', weights_only=True))
    model.eval()
    # 模型评估
    test(test_data, model, batch_size=16)   # Acc: 0.7644
​

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值