纯小白,纯记录
一、环境
- ubuntu 18.04
- CUDA 9.0
- Cudnn 7.0
- Opencv
- conda3
- pycharm
- pytorch
二、简介
使用Alexnet 网络,识别猫狗图片的分类。机子性能原因,只使用了22张图片,epoch 只迭代了10次,只实现了训练代码,纯学习pytorch,Alexnet用的。
三、网络结构图
四、代码
代码和数据:https://download.youkuaiyun.com/download/liuchaohs/10733644
百度云盘:
https://pan.baidu.com/s/1xTRs0GLEdMr3ZlAxQn7g9A
import math
import torch
import torch.backends.cudnn as cudnn
import cv2
from torch.autograd import Variable
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import os
def opencvLoad(imgPath, resizeH, resizeW):
image = cv2.imread(imgPath)
image = cv2.resize(image, (resizeH, resizeW), interpolation=cv2.INTER_CUBIC)
image = image.astype(np.float32)
image = np.transpose(image, (2, 1, 0))
image = torch.from_numpy(image)
return image
class LoadPartDataset(Dataset):
def __init__(self, path):
self.images = []
paths = os.path.join(path, '0/')
file_list = os.listdir(paths)
for file_name in file_list:
image_path = os.path.join(paths, file_name)
image_label = 0
self.images.append((image_path, image_label))
paths = os.path.join(path, '1/')
file_list = os.listdir(paths)
for file_name in file_list:
image_path = os.path.join(paths, file_name)
image_label = 1
self.images.append((image_path, image_label))
def __getitem__(self, item):
image, label = self.images[item]
img = opencvLoad(image, 227, 227)
return img, label
def __len__(self):
return len(self.images)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
# 227*227*3
self.conv1 = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=0),
# in_channels, out_channels, kernel_size(int/tuple), stride ,padding
# (227-11)/4+1=55, 55*55*96
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
# kernel_size, stride, padding
# (55-3)/2+1=27, 27*27*96
)
# 27*27*96
self.conv2 = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2),
# (27-5 + 2*2)/ 1 + 1 = 27, 27*27*256
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
# (27 - 3 )/2 + 1 = 13, 13*13*256
)
# 13*13*256
self.conv3 = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
# (13 - 3 +1*2)/1 + 1 = 13 , 13*13*384
torch.nn.ReLU()
)
# 13*13*384
self.conv4 = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
# (13 - 3 + 1*2)/1 +1 = 13, 13*13*384
torch.nn.ReLU()
)
# 13*13*384
self.conv5 = torch.nn.Sequential(
torch.nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
# (13 - 3 + 1*2) +1 = 13, 13*13*256
torch.nn.ReLU(),
torch.nn.MaxPool2d(kernel_size=3, stride=2, padding=0)
# (13 - 3 )/2 +1 =6, 6*6*256
)
# 6*6*256 = 9216
self.dense = torch.nn.Sequential(
torch.nn.Linear(9216, 4096),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(4096, 4096),
torch.nn.ReLU(),
torch.nn.Dropout(0.5),
torch.nn.Linear(4096, 2)
# 因为只有两类
)
def forward(self, x):
conv1_out = self.conv1(x)
conv2_out = self.conv2(conv1_out)
conv3_out = self.conv3(conv2_out)
conv4_out = self.conv4(conv3_out)
conv5_out = self.conv5(conv4_out)
res = conv5_out.view(conv5_out.size(0), -1)
out = self.dense(res)
return out
trainSet = LoadPartDataset(path='./datas')
train_loader = DataLoader(dataset=trainSet, batch_size=1, shuffle=True)
model = Net()
model.cuda()
#model.apply(_initialize_weights)
lr = 1e-5
loss_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9)
for epoch in range(100):
print('epoch {}'.format(epoch))
train_loss = 0.0
trian_acc = 0.0
for X, y in train_loader:
optimizer.zero_grad()
X = Variable(X.cuda())
y = Variable(y.cuda())
y_ = model(X)
loss = loss_func(y_, y)
train_loss += loss.data[0]
pred = torch.max(y_, 1)[1]
# 返回最大值的索引
train_correct = (pred == y).sum()
trian_acc += train_correct[0]
loss.backward()
optimizer.step()
print('loss : {:.6f}, ACC : {:.6f}'.format(train_loss/len(trainSet), trian_acc))
五、不足
- 不会使用多GPU实现代码,等有机子,再试试。
- 不会初始化超参,太小白了。慢慢学习。
六、思考
- 卷积核都只改变了通道数,maxpool改变图的大小。不知道其中的意义。