pytorch入门(三)卷积神经网络
建立CNN网络进行训练
定义网络
nn.Conv2d
nn.Conv2d(in_channels,out_channels, kernel_size, stride, padding)
- in_channels表示输入通道,这里就是(36464)
- out_channels表示输出通道数量
- kernel_size表示卷积核大小
- stride表示每次滑动步长
- padding表示边缘填充长度,如果不赋值那么如果出现边缘没有足够数量元素进行卷积,这一个部分则会被丢弃。
卷积输出大小公式:
W 2 = W 1 − F + 2 P S + 1 W_2=\frac{W_1-F+2P}{S}\quad+1 W2=SW1−F+2P+1
W:宽 F:卷积核大小 S:步长 P:padding
除不尽取整数。
Pooling
pooling结合卷积层有效减少参数并加快收敛速度。
这里我们用的时maxpooling。pytorch同样包含average pooling。
Dropout
其使得网络在训练时随机drop掉一些神经元不参与训练。
nn.Sequential()
通过nn.Sequential()可以定义一个层链。可以将网络分解成更有逻辑的排列。
这里我们将设置一个特征提取层和分类层。
按照输入图片格式为:64*64,batchsize=64为例.
class CNNNet(nn.Module):
def __init__(self, num_classes=2):
super(CNNNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), # 输出:通道数:64 输出大小:15
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # 输出:通道数:64,输出大小:7
nn.Conv2d(64, 192, kernel_size=5, padding=2), # 输出: 通道数:192,输出大小:7
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # 输出: 通道数:192,输出大小:3
nn.Conv2d(192, 384, kernel_size=3, padding=1), # 输出: 通道数:384,输出大小:3
nn.ReLU(),
nn.Conv2d(384, 256, kernel_size=3, padding=1), # 输出: 通道数:256,输出大小:3
nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, padding=1), # 输出: 通道数:256,输出大小:3
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # 输出: 通道数:256,输出大小:1 // batch_size = 64, channel = 265 outsize:1,1
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6)) # 输出:256*6*6
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x) # 64,256,6,6
x = torch.flatten(x, 1) # 64,9216
x = self.classifier(x)
return x
以下是完整代码。
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from PIL import Image
# 定义神经网络
class net(nn.Module):
def __init__(self):
super(net, self).__init__()
self.fc1 = nn.Linear(12288, 84) # 64*64*3
self.fc2 = nn.Linear(84, 30)
self.fc3 = nn.Linear(30, 84)
self.fc4 = nn.Linear(84, 2)
def forward(self, x):
x = x.view(-1, 12288)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return x
# convnet
class CNNNet(nn.Module):
def __init__(self, num_classes=2):
super(CNNNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), # 输出:通道数:64 输出大小:15
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # 输出:通道数:64,输出大小:7
nn.Conv2d(64, 192, kernel_size=5, padding=2), # 输出: 通道数:192,输出大小:7
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # 输出: 通道数:192,输出大小:3
nn.Conv2d(192, 384, kernel_size=3, padding=1), # 输出: 通道数:384,输出大小:3
nn.ReLU(),
nn.Conv2d(384, 256, kernel_size=3, padding=1), # 输出: 通道数:256,输出大小:3
nn.ReLU(),
nn.Conv2d(256, 256, kernel_size=3, padding=1), # 输出: 通道数:256,输出大小:3
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2), # 输出: 通道数:256,输出大小:1 // batch_size = 64, channel = 265 outsize:1,1
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))