>- **🍨 本文为[🔗365天深度学习训练营]中的学习记录博客**
>- **🍖 原作者:[K同学啊]**
本人往期文章可查阅: 深度学习总结
本周任务:
1.请根据本文TensorFlow代码,编写出相应的pytorch代码
2.了解残差结构
3.是否可以将残差模块融入到C3当中(自由探索)
🏡 我的环境:
- 语言环境:Python3.8
- 编译器:Jupyter Notebook
- 深度学习环境:Pytorch
-
- torch==2.3.1+cu118
-
- torchvision==0.18.1+cu118
本文完全根据 第J1周:ResNet-50算法实战与解析(TensorFlow版)中的内容转换为pytorch版本,所以前述性的内容不在一一重复,仅就pytorch版本中的内容进行叙述。
一、 前期准备
1. 设置GPU
如果设备上支持GPU就使用GPU,否则使用CPU
import warnings
warnings.filterwarnings("ignore") #忽略警告信息
import torch
device=torch.device("cuda" if torch.cuda.is_available() else "CPU")
device
运行结果:
device(type='cuda')
2. 导入数据
同时查看数据集中图片的数量
import pathlib
data_dir=r"D:\THE MNIST DATABASE\J-series\J1\bird_photos"
data_dir=pathlib.Path(data_dir)
image_count=len(list(data_dir.glob('*/*')))
print("图片总数为:",image_count)
运行结果:
图片总数为: 565
3. 查看数据集分类
data_paths=list(data_dir.glob('*'))
classeNames=[str(path).split("\\")[5] for path in data_paths]
classeNames
运行结果:
['Bananaquit', 'Black Skimmer', 'Black Throated Bushtiti', 'Cockatoo']
4. 随机查看图片
随机抽取数据集中的20张图片进行查看
import random,PIL
import matplotlib.pyplot as plt
from PIL import Image
data_paths2=list(data_dir.glob('*/*'))
plt.figure(figsize=(20,4))
for i in range(20):
plt.subplot(2,10,i+1)
plt.axis('off')
image=random.choice(data_paths2) #随机选择一个图片
plt.title(image.parts[-2]) #通过glob对象取出他的文件夹名称,即分类名
plt.imshow(Image.open(str(image))) #显示图片
运行结果:
5. 图片预处理
import torchvision.transforms as transforms
from torchvision import transforms,datasets
train_transforms=transforms.Compose([
transforms.Resize([224,224]), #将图片统一尺寸
transforms.RandomHorizontalFlip(), #将图片随机水平翻转
transforms.ToTensor(), #将图片转换为tensor
transforms.Normalize( #标准化处理—>转换为正态分布,使模型更容易收敛
mean=[0.485,0.456,0.406],
std=[0.229,0.224,0.225]
)
])
test_transforms=transforms.Compose([
transforms.Resize([224,224]), #将图片统一尺寸
transforms.RandomHorizontalFlip(), #将图片随机水平翻转
transforms.ToTensor(), #将图片转换为tensor
transforms.Normalize( #标准化处理—>转换为正态分布,使模型更容易收敛
mean=[0.485,0.456,0.406],
std=[0.229,0.224,0.225]
)
])
total_data=datasets.ImageFolder(
r"D:\THE MNIST DATABASE\J-series\J1\bird_photos",
transform=train_transforms
)
total_data
运行结果:
Dataset ImageFolder
Number of datapoints: 565
Root location: D:\THE MNIST DATABASE\J-series\J1\bird_photos
StandardTransform
Transform: Compose(
Resize(size=[224, 224], interpolation=bilinear, max_size=None, antialias=True)
RandomHorizontalFlip(p=0.5)
ToTensor()
Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)
将数据集分类情况进行映射输出:
total_data.class_to_idx
运行结果:
{'Bananaquit': 0,
'Black Skimmer': 1,
'Black Throated Bushtiti': 2,
'Cockatoo': 3}
6. 划分数据集
train_size=int(0.8*len(total_data))
test_size=len(total_data)-train_size
train_dataset,test_dataset=torch.utils.data.random_split(
total_data,
[train_size,test_size]
)
train_dataset,test_dataset
运行结果:
(<torch.utils.data.dataset.Subset at 0x2195b60dd50>,
<torch.utils.data.dataset.Subset at 0x219508d5910>)
查看训练集和测试集的数据数量:
train_size,test_size
运行结果:
(452, 113)
7. 加载数据集
batch_size=8
train_dl=torch.utils.data.DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=1
)
test_dl=torch.utils.data.DataLoader(
test_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=1
)
查看测试集的情况:
for x,y in train_dl:
print("Shape of x [N,C,H,W]:",x.shape)
print("Shape of y:",y.shape,y.dtype)
break
运行结果:
Shape of x [N,C,H,W]: torch.Size([8, 3, 224, 224])
Shape of y: torch.Size([8]) torch.int64
二、手动搭建ResNet50模型
ResNet-50有两个基本的块,分别名为Conv Block和Identity Block
Conv Block结构:
1. 搭建模型
import torch.nn as nn
#定义使得网络前后一致的padding
def autopad(k,p=None):
if p is None:
p=k//2 if isinstance(k,int) else [x//2 for x in k]
return p
class IdentityBlock(nn.Module):
def __init__(self,in_channel,kernel_size,filters):
super(IdentityBlock,self).__init__()
filters1,filters2,filters3=filters
#卷积块1
self.conv1=nn.Sequential(
nn.Conv2d(in_channel,filters1,1,stride=1,padding=0,bias=False),
nn.BatchNorm2d(filters1),
nn.ReLU(True)
)
#卷积块2
self.conv2=nn.Sequential(
nn.Conv2d(filters1,filters2,kernel_size,stride=1,padding=autopad(kernel_size),bias=False),
nn.BatchNorm2d(filters2),
nn.ReLU(True)
)
#卷积块3
self.conv3=nn.Sequential(
nn.Conv2d(filters2,filters3,1,stride=1,padding=0,bias=False),
nn.BatchNorm2d(filters3)
)
self.relu=nn.ReLU(True)
def forward(self,x):
x1=self.conv1(x)
x1=self.conv2(x1)
x1=self.conv3(x1)
x=x+x1
self.relu(x)
return x
class ConvBlock(nn.Module):
def __init__(self,in_channel,kernel_size,filters,stride=2):
super(ConvBlock,self).__init__()
filters1,filters2,filters3=filters
#卷积块1
self.conv1=nn.Sequential(
nn.Conv2d(in_channel,filters1,1,stride=stride,padding=0,bias=False),
nn.BatchNorm2d(filters1),
nn.ReLU(True)
)
#卷积块2
self.conv2=nn.Sequential(
nn.Conv2d(filters1,filters2,kernel_size,stride=1,padding=autopad(kernel_size),bias=False),
nn.BatchNorm2d(filters2),
nn.ReLU(True)
)
#卷积块3
self.conv3=nn.Sequential(
nn.Conv2d(filters2,filters3,1,stride=1,padding=0,bias=False),
nn.BatchNorm2d(filters3)
)
#短路卷积块
self.shortcut=nn.Sequential(
nn.Conv2d(in_channel,filters3,1,stride=stride,padding=0,bias=False),
nn.BatchNorm2d(filters3)
)
self.relu=nn.ReLU(True)
def forward(self,x):
x1=self.conv1(x)
x1=self.conv2(x1)
x1=self.conv3(x1)
x_shortcut=self.shortcut(x)
x=x1+x_shortcut
self.relu(x)
return x
class ResNet50(nn.Module):
def __init__(self,classes=4):
super(ResNet50,self).__init__()
#初始操作
self.first=nn.Sequential(
nn.Conv2d(3,64,7,stride=2,padding=3,bias=False,padding_mode='zeros'),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3,stride