- 🍨 本文为🔗365天深度学习训练营中的学习记录博客
- 🍖 原作者:K同学啊
本周任务:
1.根据TensorFlow代码,写出pytorch代码
2.了解ResNetV2与ResNetV的区别
3.改进思路是否可以迁移到其他地方
一、论文解读
1.ResNetV2结构与ResNet结构对比
改进点:
(a)表示原始的ResNet残差结构,(b)表示新的ResNet残差结构
主要差别:(a)先卷积后进行BN和激活函数计算,最后在执行addition后再进行ReLU计算,(b)先进行BN和激活函数计算后卷积,把addition后的ReLU计算放到残差结构内部。
改进结果:
作者使用两种不同结构在CIFAR-10数据集上测试,模型1001层的ResNet模型;
从图中结果得出,proposed模型的测试集错误率明显更低一些,达到了4.92%的错误率,而原始的结构的测试集错误率是7.61%
2.关于残差结构的不同尝试
(简化了插图,不显示BN层,所有单位均采用权值层后的BN层。)
根据结果,可以看出original结构最好,即identity mapping恒等映射最好。
3.关于激活的尝试
e结果最好,其次是a,再是d。
二.模型复现
这一步也从这个博客进行学习借鉴,感谢!
实验的数据集采用的是上周鸟类识别的数据,除了模型具体结构有变化,其余都和上周的代码一致
2.1数据导入
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
from torchvision import transforms, datasets
import os, PIL, pathlib, warnings
import torch.nn.functional as F
import matplotlib.pyplot as plt
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import Dataset
import torch.utils.data as data
from PIL import Image
import copy
import numpy as np
# 一、导入数据
'''
1.1 设置GPU
'''
warnings.filterwarnings("ignore")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
'''
1.2 导入数据
'''
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
data_dir = './第8天/bird_photos/'
data_dir = pathlib.Path(data_dir)
data_paths = list(data_dir.glob('*'))
def count_images(folder):
count = 0
for item in folder.iterdir():
if item.is_file():
count += 1
if item.is_dir():
count += count_images(item)
return count
image_count = count_images(data_dir)
print("图片总数为:", image_count)
classNames = [str(path).split('\\')[1] for path in data_paths]
# 利用split()函数对data_paths中的每个文件路径执行分割操作,获取各个文件所属的类别名称并储存在classNames中
# 4类天气,各300张图片
print(classNames)
# 二、数据预处理
'''
2.1 加载数据
'''
train_transforms = transforms.Compose([
transforms.Resize([224, 224]), # 将输入图片resize成统一尺寸
# transforms.RandomHorizontalFlip(), # 随机水平翻转
transforms.ToTensor(), # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
transforms.Normalize( # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]) # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])
test_transform = transforms.Compose([
transforms.Resize([224, 224]), # 将输入图片resize成统一尺寸
transforms.ToTensor(), # 将PIL Image或numpy.ndarray转换为tensor,并归一化到[0,1]之间
transforms.Normalize( # 标准化处理-->转换为标准正太分布(高斯分布),使模型更容易收敛
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]) # 其中 mean=[0.485,0.456,0.406]与std=[0.229,0.224,0.225] 从数据集中随机抽样计算得到的。
])
total_data = datasets.ImageFolder("./第8天/bird_photos/", transform=train_transforms)
print(total_data)
print(total_data.class_to_idx)
'''
2.2 划分数据集
'''
train_size = int(0.8 * len(total_data))
test_size = len(total_data) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(total_data, [train_size, test_size])
print(train_dataset, test_dataset)
'''
2.3 可视化数据
'''
batch_size = 8
train_dl = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=0)
test_dl = torch.utils.data.DataLoader(test_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=0)
for X, y in test_dl:
print("Shape of X [N, C, H, W]: ", X.shape)
print("Shape of y: ", y.shape, y.dtype)
break
image_folder = './第8天/bird_photos/Cockatoo/' # 指定图像文件夹路径
image_files = [f for f in os.listdir(image_folder) if f.endswith((".jpg", ".png", ".jpeg"))]
fig, axes = plt.subplots(2, 4, figsize=(16, 6))
for ax, img_file in zip(axes.flat, image_files):
img_path = os.path.join(image_folder, img_file)
img = Image.open(img_path)
ax.imshow(img)
ax.axis('off')
plt.tight_layout()
plt.show()
输出结果:
2.2模型构建
ResNet50V2、ResNet101V2与ResNet152V2的搭建方式完全一样,区别就在于堆叠的Residual Block的数量不同。
'''
3.1 residual block
'''
class Block2(nn.Module):
def __init__(self, in_channels, filters, kernel_size=3, stride=1, conv_shortcut=False):
super(Block2, self).__init__()
self.conv_shortcut = conv_shortcut
self.stride = stride
self.bn_preact = nn.BatchNorm2d(in_channels)
self.relu_preact = nn.ReLU(inplace=True)
if conv_shortcut:
self.shortcut = nn.Conv2d(in_channels, 4 * filters, kernel_size=1, stride=stride)
else:
self.shortcut = nn.MaxPool2d(kernel_size=1, stride=stride) if stride > 1 else nn.Identity()
self.conv1 = nn.Conv2d(in_channels, filters, kernel_size=1, stride=1, bias=False)
self.bn1 = nn.BatchNorm2d(filters)
self.relu1 = nn.ReLU(inplace=True)
self.pad2 = nn.ZeroPad2d(1)
self.conv2 = nn.Conv2d(filters, filters, kernel_size=kernel_size, stride=stride, bias=False)
self.bn2 = nn.BatchNorm2d(filters)
self.relu2 = nn.ReLU(inplace=True)
self.conv3 = nn.Conv2d(filters, 4 * filters, kernel_size=1)
def forward(self, x):
preact = self.bn_preact(x)
preact = self.relu_preact(preact)
shortcut = self.shortcut(preact)
x = self.conv1(preact)
x = self.bn1(x)
x = self.relu1(x)
x = self.pad2(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.conv3(x)
x += shortcut
return x
class Stack2(nn.Module):
def __init__(self, in_channels, filters, blocks, stride=2):
super(Stack2, self).__init__()
self.blocks = nn.ModuleList()
self.blocks.append(Block2(in_channels, filters, conv_shortcut=True))
for i in range(1, blocks - 1):
self.blocks.append(Block2(4 * filters, filters))
self.blocks.append(Block2(4 * filters, filters, stride=stride))
def forward(self, x):
for block in self.blocks:
x = block(x)
return x
class ResNet50V2(nn.Module):
def __init__(self,
include_top=True, # 是否包含位于网络顶部的全链接层
preact=True, # 是否使用预激活
use_bias=True, # 是否对卷积层使用偏置
input_shape=[224, 224, 3],
classes=1000,
pooling=None): # 用于分类图像的可选类数
super(ResNet50V2, self).__init__()
self.conv1 = nn.Sequential()
self.conv1.add_module('conv', nn.Conv2d(3, 64, 7, stride=2, padding=3, bias=use_bias, padding_mode='zeros'))
if not preact:
self.conv1.add_module('bn', nn.BatchNorm2d(64))
self.conv1.add_module('relu', nn.ReLU())
self.conv1.add_module('max_pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
self.conv2 = Stack2(64, 64, 3)
self.conv3 = Stack2(256, 128, 4)
self.conv4 = Stack2(512, 256, 6)
self.conv5 = Stack2(1024, 512, 3, stride=1)
self.post = nn.Sequential()
if preact:
self.post.add_module('bn', nn.BatchNorm2d(2048))
self.post.add_module('relu', nn.ReLU())
if include_top:
self.post.add_module('avg_pool', nn.AdaptiveAvgPool2d((1, 1)))
self.post.add_module('flatten', nn.Flatten())
self.post.add_module('fc', nn.Linear(2048, classes))
else:
if pooling == 'avg':
self.post.add_module('avg_pool', nn.AdaptiveAvgPool2d((1, 1)))
elif pooling == 'max':
self.post.add_module('max_pool', nn.AdaptiveMaxPool2d((1, 1)))
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.post(x)
return x
model = ResNet50V2().to(device)
print(model)
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(pad2): ZeroPad2d((1, 1, 1, 1))
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
)
(3): Block2(
(bn_preact): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu_preact): ReLU(inplace=True)
(shortcut): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(pad2): ZeroPad2d((1, 1, 1, 1))
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1))
)
)
)
(conv4): Stack2(
(blocks): ModuleList(
(0): Block2(
(bn_preact): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu_preact): ReLU(inplace=True)
(shortcut): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1))
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(pad2): ZeroPad2d((1, 1, 1, 1))
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1))
)
(1-4): 4 x Block2(
(bn_preact): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu_preact): ReLU(inplace=True)
(shortcut): Identity()
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(pad2): ZeroPad2d((1, 1, 1, 1))
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1))
)
(5): Block2(
(bn_preact): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu_preact): ReLU(inplace=True)
(shortcut): MaxPool2d(kernel_size=1, stride=2, padding=0, dilation=1, ceil_mode=False)
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(pad2): ZeroPad2d((1, 1, 1, 1))
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1))
)
)
)
(conv5): Stack2(
(blocks): ModuleList(
(0): Block2(
(bn_preact): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu_preact): ReLU(inplace=True)
(shortcut): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1))
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(pad2): ZeroPad2d((1, 1, 1, 1))
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
)
(1-2): 2 x Block2(
(bn_preact): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu_preact): ReLU(inplace=True)
(shortcut): Identity()
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU(inplace=True)
(pad2): ZeroPad2d((1, 1, 1, 1))
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU(inplace=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1))
)
)
)
(post): Sequential(
(bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU()
(avg_pool): AdaptiveAvgPool2d(output_size=(1, 1))
(flatten): Flatten(start_dim=1, end_dim=-1)
(fc): Linear(in_features=2048, out_features=1000, bias=True)
)
)
三、模型训练
'''
4.1 编写训练函数
'''
def train(dataloader, model, optimizer, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
train_acc, train_loss = 0, 0
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
train_acc += (pred.argmax(1) == y).type(torch.float).sum().item()
train_loss /= num_batches
train_acc /= size
return train_acc, train_loss
'''
4.2 编写测试函数
'''
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset) # 测试集的大小
num_batches = len(dataloader) # 批次数目, (size/batch_size,向上取整)
test_loss, test_acc = 0, 0
# 当不进行训练时,停止梯度更新,节省计算内存消耗
with torch.no_grad():
for imgs, target in dataloader:
imgs, target = imgs.to(device), target.to(device)
# 计算loss
target_pred = model(imgs)
loss = loss_fn(target_pred, target)
test_loss += loss.item()
test_acc += (target_pred.argmax(1) == target).type(torch.float).sum().item()
test_acc /= size
test_loss /= num_batches
return test_acc, test_loss
'''
4.3 正式训练
'''
loss_fn = nn.CrossEntropyLoss() #交叉熵函数
learn_rate = 1e-3
opt = torch.optim.Adam(model.parameters(), lr=learn_rate)
epochs = 30
train_loss = []
train_acc = []
test_loss = []
test_acc = []
best_acc = 0
# 开始训练
for epoch in range(epochs):
model.train()
epoch_train_acc, epoch_train_loss = train(train_dl, model, opt, loss_fn)
model.eval()
epoch_test_acc, epoch_test_loss = test(test_dl, model, loss_fn)
if epoch_test_acc > best_acc:
best_acc = epoch_test_acc
best_model = copy.deepcopy(model)
train_acc.append(epoch_train_acc)
train_loss.append(epoch_train_loss)
test_acc.append(epoch_test_acc)
test_loss.append(epoch_test_loss)
lr = opt.state_dict()['param_groups'][0]['lr']
template = ('Epoch:{:2d}, Train_acc:{:.1f}%, Train_loss:{:.3f}, Test_acc:{:.1f}%, Test_loss:{:.3f}, Lr:{:.2E}')
print(template.format(epoch + 1, epoch_train_acc * 100, epoch_train_loss,
epoch_test_acc * 100, epoch_test_loss, lr))
print('Done')
结果输出:
四、结果可视化
#五、结果可视化
import warnings
warnings.filterwarnings("ignore") #忽略警告信息
plt.rcParams['font.sans-serif'] = ['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
plt.rcParams['figure.dpi'] = 100 #分辨率
epochs_range = range(epochs)
plt.figure(figsize=(12, 3))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_acc, label='Training Accuracy')
plt.plot(epochs_range, test_acc, label='Test Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_loss, label='Training Loss')
plt.plot(epochs_range, test_loss, label='Test Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
五、指定图片预测
#六、指定图片预测
classes = list(total_data.class_to_idx)
def predict_one_image(image_path, model, transform, classes):
test_img = Image.open(image_path).convert('RGB')
plt.imshow(test_img) # 展示预测的图片
test_img = transform(test_img)
img = test_img.to(device).unsqueeze(0)
model.eval()
output = model(img)
_, pred = torch.max(output, 1)
pred_class = classes[pred]
print(f'预测结果是:{pred_class}')
# 预测训练集中的某张照片
predict_one_image(image_path='./bird_photos/Bananaquit/008.jpg',
model=model,
transform=train_transforms,
classes=classes)
六、总结
- 了解ResNetV2的结构,并对比与ResNet的不同
- 将tensflow代码转换为pytorch
- 具体的细节还得再去读一下论文, 有些专有名词还是不太懂