第P5周:运动鞋识别

目录

一、代码及运行结果

1.前期准备

2.构建简单的CNN网络 

3.训练模型 

 4.结果可视化

5.保存并加载模型 

二、总结

 


 

一、代码及运行结果

1.前期准备

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
from torchvision import transforms, datasets

import os,PIL,pathlib

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import os,PIL,random,pathlib

data_dir = './5-data/'
data_dir = pathlib.Path(data_dir)

data_paths  = list(data_dir.glob('*'))
classeNames = [str(path).split("/")[1] for path in data_paths]
print(classeNames)

# 关于transforms.Compose的更多介绍可以参考:https://blog.youkuaiyun.com/qq_38251616/article/details/124878863
train_transforms = transforms.Compose([
    transforms.Resize([224, 224]),  # 将输入图片resize成统一尺寸
    # transforms.RandomHorizontalFlip(), # 随机水平翻转
    transforms.ToTensor(),          # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
    transforms.Normalize(           # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225])  # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])

test_transform = transforms.Compose([
    transforms.Resize([224, 224]),  # 将输入图片resize成统一尺寸
    transforms.ToTensor(),          # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
    transforms.Normalize(           # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225])  # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])

train_dataset = datasets.ImageFolder("./5-data/train/",transform=train_transforms)
test_dataset  = datasets.ImageFolder("./5-data/test/",transform=train_transforms)

print(train_dataset.class_to_idx)

batch_size = 32

train_dl = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,
                                           num_workers=1)
test_dl = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=1)
for X, y in test_dl:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    break
['test', 'train']
{'adidas': 0, 'nike': 1}
Shape of X [N, C, H, W]:  torch.Size([32, 3, 224, 224])
Shape of y:  torch.Size([32]) torch.int64

2.构建简单的CNN网络 

import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1=nn.Sequential(
            nn.Conv2d(3, 12, kernel_size=5, padding=0), # 12*220*220
            nn.BatchNorm2d(12),
            nn.ReLU())
        
        self.conv2=nn.Sequential(
            nn.Conv2d(12, 12, kernel_size=5, padding=0), # 12*216*216
            nn.BatchNorm2d(12),
            nn.ReLU())
        
        self.pool3=nn.Sequential(
            nn.MaxPool2d(2))                              # 12*108*108
        
        self.conv4=nn.Sequential(
            nn.Conv2d(12, 24, kernel_size=5, padding=0), # 24*104*104
            nn.BatchNorm2d(24),
            nn.ReLU())
        
        self.conv5=nn.Sequential(
            nn.Conv2d(24, 24, kernel_size=5, padding=0), # 24*100*100
            nn.BatchNorm2d(24),
            nn.ReLU())
        
        self.pool6=nn.Sequential(
            nn.MaxPool2d(2))                              # 24*50*50

        self.dropout = nn.Sequential(
            nn.Dropout(0.2))
        
        self.fc=nn.Sequential(
            nn.Linear(24*50*50, len(classeNames)))
        
    def forward(self, x):
        
        batch_size = x.size(0)
        x = self.conv1(x)  # 卷积-BN-激活
        x = self.conv2(x)  # 卷积-BN-激活
        x = self.pool3(x)  # 池化
        x = self.conv4(x)  # 卷积-BN-激活
        x = self.conv5(x)  # 卷积-BN-激活
        x = self.pool6(x)  # 池化
        x = self.dropout(x)
        x = x.view(batch_size, -1)  # flatten 变成全连接网络需要的输入 (batch, 24*50*50) ==> (batch, -1), -1 此处自动算出的是24*50*50
        x = self.fc(x)
       
        return x

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

model = Model().to(device)
model
Using cuda device

Out[2]:

Model(
  (conv1): Sequential(
    (0): Conv2d(3, 12, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(12, 12, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (pool3): Sequential(
    (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(12, 24, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (conv5): Sequential(
    (0): Conv2d(24, 24, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (pool6): Sequential(
    (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dropout): Sequential(
    (0): Dropout(p=0.2, inplace=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=60000, out_features=2, bias=True)
  )
)

3.训练模型 

# 训练循环
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)  # 训练集的大小
    num_batches = len(dataloader)   # 批次数目, (size/batch_size,向上取整)

    train_loss, train_acc = 0, 0  # 初始化训练损失和正确率
    
    for X, y in dataloader:  # 获取图片及其标签
        X, y = X.to(device), y.to(device)
        
        # 计算预测误差
        pred = model(X)          # 网络输出
        loss = loss_fn(pred, y)  # 计算网络输出和真实值之间的差距,targets为真实值,计算二者差值即为损失
        
        # 反向传播
        optimizer.zero_grad()  # grad属性归零
        loss.backward()        # 反向传播
        optimizer.step()       # 每一步自动更新
        
        # 记录acc与loss
        train_acc  += (pred.argmax(1) == y).type(torch.float).sum().item()
        train_loss += loss.item()
            
    train_acc  /= size
    train_loss /= num_batches

    return train_acc, train_loss

def test (dataloader, model, loss_fn):
    size        = len(dataloader.dataset)  # 测试集的大小
    num_batches = len(dataloader)          # 批次数目, (size/batch_size,向上取整)
    test_loss, test_acc = 0, 0
    
    # 当不进行训练时,停止梯度更新,节省计算内存消耗
    with torch.no_grad():
        for imgs, target in dataloader:
            imgs, target = imgs.to(device), target.to(device)
            
            # 计算loss
            target_pred = model(imgs)
            loss        = loss_fn(target_pred, target)
            
            test_loss += loss.item()
            test_acc  += (target_pred.argmax(1) == target).type(torch.float).sum().item()

    test_acc  /= size
    test_loss /= num_batches

    return test_acc, test_loss

def adjust_learning_rate(optimizer, epoch, start_lr):
    # 每 2 个epoch衰减到原来的 0.98
    lr = start_lr * (0.92 ** (epoch // 2))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

learn_rate = 2e-4 # 初始学习率
optimizer  = torch.optim.SGD(model.parameters(), lr=learn_rate)

loss_fn    = nn.CrossEntropyLoss() # 创建损失函数
epochs     = 40

train_loss = []
train_acc  = []
test_loss  = []
test_acc   = []

for epoch in range(epochs):
    # 更新学习率(使用自定义学习率时使用)
    adjust_learning_rate(optimizer, epoch, learn_rate)
    
    model.train()
    epoch_train_acc, epoch_train_loss = train(train_dl, model, loss_fn, optimizer)
    # scheduler.step() # 更新学习率(调用官方动态学习率接口时使用)
    
    model.eval()
    epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
    
    train_acc.append(epoch_train_acc)
    train_loss.append(epoch_train_loss)
    test_acc.append(epoch_test_acc)
    test_loss.append(epoch_test_loss)
    
    # 获取当前的学习率
    lr = optimizer.state_dict()['param_groups'][0]['lr']
    
    template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}')
    print(template.format(epoch+1, epoch_train_acc*100, epoch_train_loss, 
                          epoch_test_acc*100, epoch_test_loss, lr))
print('Done')

 

Epoch: 1, Train_acc:49.6%, Train_loss:1.058, Test_acc:50.0%, Test_loss:0.714, Lr:2.00E-04
Epoch: 2, Train_acc:60.6%, Train_loss:0.817, Test_acc:65.8%, Test_loss:0.643, Lr:2.00E-04
Epoch: 3, Train_acc:67.1%, Train_loss:0.635, Test_acc:69.7%, Test_loss:0.518, Lr:1.84E-04
Epoch: 4, Train_acc:70.3%, Train_loss:0.604, Test_acc:73.7%, Test_loss:0.586, Lr:1.84E-04
Epoch: 5, Train_acc:79.7%, Train_loss:0.462, Test_acc:72.4%, Test_loss:0.524, Lr:1.69E-04
Epoch: 6, Train_acc:80.3%, Train_loss:0.463, Test_acc:75.0%, Test_loss:0.492, Lr:1.69E-04
Epoch: 7, Train_acc:83.9%, Train_loss:0.408, Test_acc:77.6%, Test_loss:0.447, Lr:1.56E-04
Epoch: 8, Train_acc:86.5%, Train_loss:0.379, Test_acc:75.0%, Test_loss:0.484, Lr:1.56E-04
Epoch: 9, Train_acc:88.0%, Train_loss:0.348, Test_acc:76.3%, Test_loss:0.510, Lr:1.43E-04
Epoch:10, Train_acc:90.8%, Train_loss:0.314, Test_acc:78.9%, Test_loss:0.495, Lr:1.43E-04
Epoch:11, Train_acc:92.4%, Train_loss:0.297, Test_acc:76.3%, Test_loss:0.483, Lr:1.32E-04
Epoch:12, Train_acc:92.8%, Train_loss:0.287, Test_acc:78.9%, Test_loss:0.449, Lr:1.32E-04
Epoch:13, Train_acc:94.6%, Train_loss:0.277, Test_acc:80.3%, Test_loss:0.432, Lr:1.21E-04
Epoch:14, Train_acc:93.0%, Train_loss:0.272, Test_acc:77.6%, Test_loss:0.436, Lr:1.21E-04
Epoch:15, Train_acc:94.8%, Train_loss:0.259, Test_acc:78.9%, Test_loss:0.459, Lr:1.12E-04
Epoch:16, Train_acc:97.0%, Train_loss:0.240, Test_acc:80.3%, Test_loss:0.440, Lr:1.12E-04
Epoch:17, Train_acc:95.2%, Train_loss:0.239, Test_acc:78.9%, Test_loss:0.445, Lr:1.03E-04
Epoch:18, Train_acc:95.2%, Train_loss:0.237, Test_acc:78.9%, Test_loss:0.455, Lr:1.03E-04
Epoch:19, Train_acc:97.0%, Train_loss:0.219, Test_acc:78.9%, Test_loss:0.444, Lr:9.44E-05
Epoch:20, Train_acc:96.6%, Train_loss:0.220, Test_acc:78.9%, Test_loss:0.438, Lr:9.44E-05
Epoch:21, Train_acc:96.6%, Train_loss:0.207, Test_acc:78.9%, Test_loss:0.404, Lr:8.69E-05
Epoch:22, Train_acc:96.6%, Train_loss:0.202, Test_acc:78.9%, Test_loss:0.422, Lr:8.69E-05
Epoch:23, Train_acc:98.0%, Train_loss:0.206, Test_acc:78.9%, Test_loss:0.423, Lr:7.99E-05
Epoch:24, Train_acc:97.6%, Train_loss:0.194, Test_acc:78.9%, Test_loss:0.406, Lr:7.99E-05
Epoch:25, Train_acc:98.4%, Train_loss:0.191, Test_acc:77.6%, Test_loss:0.444, Lr:7.35E-05
Epoch:26, Train_acc:97.6%, Train_loss:0.191, Test_acc:81.6%, Test_loss:0.438, Lr:7.35E-05
Epoch:27, Train_acc:97.6%, Train_loss:0.195, Test_acc:80.3%, Test_loss:0.385, Lr:6.77E-05
Epoch:28, Train_acc:98.2%, Train_loss:0.190, Test_acc:80.3%, Test_loss:0.405, Lr:6.77E-05
Epoch:29, Train_acc:98.4%, Train_loss:0.175, Test_acc:81.6%, Test_loss:0.374, Lr:6.22E-05
Epoch:30, Train_acc:98.2%, Train_loss:0.182, Test_acc:80.3%, Test_loss:0.439, Lr:6.22E-05
Epoch:31, Train_acc:98.4%, Train_loss:0.177, Test_acc:80.3%, Test_loss:0.390, Lr:5.73E-05
Epoch:32, Train_acc:97.6%, Train_loss:0.177, Test_acc:80.3%, Test_loss:0.431, Lr:5.73E-05
Epoch:33, Train_acc:98.8%, Train_loss:0.174, Test_acc:80.3%, Test_loss:0.472, Lr:5.27E-05
Epoch:34, Train_acc:98.4%, Train_loss:0.165, Test_acc:80.3%, Test_loss:0.431, Lr:5.27E-05
Epoch:35, Train_acc:98.0%, Train_loss:0.163, Test_acc:80.3%, Test_loss:0.435, Lr:4.85E-05
Epoch:36, Train_acc:98.0%, Train_loss:0.167, Test_acc:80.3%, Test_loss:0.471, Lr:4.85E-05
Epoch:37, Train_acc:98.6%, Train_loss:0.166, Test_acc:80.3%, Test_loss:0.392, Lr:4.46E-05
Epoch:38, Train_acc:98.2%, Train_loss:0.163, Test_acc:81.6%, Test_loss:0.403, Lr:4.46E-05
Epoch:39, Train_acc:97.8%, Train_loss:0.159, Test_acc:81.6%, Test_loss:0.390, Lr:4.10E-05
Epoch:40, Train_acc:98.6%, Train_loss:0.156, Test_acc:81.6%, Test_loss:0.405, Lr:4.10E-05
Done

 4.结果可视化

import matplotlib.pyplot as plt
#隐藏警告
import warnings
warnings.filterwarnings("ignore")               #忽略警告信息
plt.rcParams['font.sans-serif']    = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False      # 用来正常显示负号
plt.rcParams['figure.dpi']         = 100        #分辨率

epochs_range = range(epochs)

plt.figure(figsize=(12, 3))
plt.subplot(1, 2, 1)

plt.plot(epochs_range, train_acc, label='Training Accuracy')
plt.plot(epochs_range, test_acc, label='Test Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, test_loss, label='Test Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

from PIL import Image 

classes = list(train_dataset.class_to_idx)

def predict_one_image(image_path, model, transform, classes):
    
    test_img = Image.open(image_path).convert('RGB')
    # plt.imshow(test_img)  # 展示预测的图片

    test_img = transform(test_img)
    img = test_img.to(device).unsqueeze(0)
    
    model.eval()
    output = model(img)

    _,pred = torch.max(output,1)
    pred_class = classes[pred]
    print(f'预测结果是:{pred_class}')
    
# 预测训练集中的某张照片
predict_one_image(image_path='./5-data/test/adidas/1.jpg', 
                  model=model, 
                  transform=train_transforms, 
                  classes=classes)

预测结果是:adidas

5.保存并加载模型 

# 模型保存
PATH = './model.pth'  # 保存的参数文件名
torch.save(model.state_dict(), PATH)

# 将参数加载到model当中
model.load_state_dict(torch.load(PATH, map_location=device))
<All keys matched successfully>

二、总结

总结一下提高正确率过程和收获:

本次任务首先是根据K同学提供的代码进行训练,最后训练结果中测试集准确率最高是78%。

然后就想着进行优化:

  • 第一次优化,在K同学给的范例模型的基础上,在前面的每次池化层后面又增加了 Dropout。Dropout 层的丢弃率设置为0.3,并去除了全连接层前面的Dropout。40轮后,测试集准确率只达到了77%,还没有修改之前的正确率高。
  • 第二次优化,舍弃了第一次优化的改动,将初始学习率设置为2e-4。40轮后,训练集准确率达到了98.6%,测试集准确率达到了81.6%,效果还不错。

在本次的学习中,最大的收获就是获得了许多调整网络结构和各种超参数的经验,让我对CNN网络有了更深的理解。

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值