CNN实现图像分类
对于本教程,我们将使用CIFAR10数据集,它包含十个类别:‘airplane’, ‘automobile’, ‘bird’, ‘cat’,
‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’。CIFAR-10 中的图像尺寸为33232,也就是RGB的3层颜色
通道,每层通道内的尺寸为32*32。
构建CNN 网络:
卷积–>激活函数–>池化—>卷积–>激活函数–>池化—> 线性变换–> 线性变换–> 线性变换
import torch
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as Data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
#[3,32, 32] =>[6,28,28] 核 是5 减2
self.conv1 = nn.Conv2d(in_channels=3,out_channels=6,kernel_size=5)
# [6,28,28] =>[6,14,14] 核 是5 减2
self.pool = nn.MaxPool2d(kernel_size=2,stride=2)
#[6,14,14] =>[16,10,10] 在pool 一次 [16,5,5]
self.conv2 =nn.Conv2d(in_channels=6,out_channels=16,kernel_size=5)
self.fc1 = nn.Linear(16*5*5,120)
self.fc2 = nn.Linear(120,84)
self.fc3 = nn.Linear(84,10)
self.criterion = nn.CrossEntropyLoss()
self.optimizer = optim.SGD(self.parameters(),lr=LR,momentum=0.9)
def forward(self,x):
#print("x0=", x.shape)
# [3,32, 32] =>[6,28,28] 核 是5 减2
x=self.conv1(x)
#print("x1=", x.shape)
# [6,28,28] =>[6,14,14]
x = self.pool(F.relu(x))
#print("x2=",x.shape)
# [6,14,14] =>[16,10,10] 在pool 一次 [16,5,5]
x = self.pool(F.relu(self.conv2(x)))
#print("x3=", x.shape)
#[batchsize , 16*5*5]
x = x.view(-1,16*5*5)
#[batchsize , 16*5*5] * [16*5*5,120]
x = F.relu(self.fc1(x))
#[batchsize ,120] *[120,84]
x = F.relu(self.fc2(x))
#[batchsize,84]*[84,10]
x = self.fc3(x)
#[batchsize,10]
return x
def LossFunc(self,predict,BathY):
self.optimizer.zero_grad()
loss= self.criterion(predict,BathY)
loss.backward()
self.optimizer.step()
return loss
加载数据:
#数据大小
BATHSIZE= 4
#训练还是 测试
TRAIN = True
#是否下载数据
DOWNLOAD= False
#一般用Compose把多个步骤整合到一起
transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
#训练数据
#torchvision 数据集的输出是范围在[0,1]之间的 PILImage,我们将他们转换成归一化范围为[-1,1]之间的张量 Tensors。
trainSet= torchvision.datasets.CIFAR10(root="./data",train=TRAIN,download=DOWNLOAD,transform=transform)
trainLoader = Data.DataLoader(trainSet,batch_size=BATHSIZE,shuffle=True)
#测试数据
testSet= torchvision.datasets.CIFAR10(root="./data",train=TRAIN,download=DOWNLOAD,transform=transform)
testLoader = Data.DataLoader(testSet,batch_size=BATHSIZE,shuffle=False)
#图像分类
classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')
## 展示图像
import numpy as np
import matplotlib.pyplot as plt
def ImageShow(img):
#-1 1 ==> 0 ,1
img = img /2 +0.5
npimg = img.numpy()
#将图片转换0,1,2 维度 转换成 1,2,0
#BGR RGB
plt.imshow(np.transpose(npimg,(1,2,0)))
plt.show()
dataiter = iter(trainLoader)
images,labels = dataiter.next()
print(images.shape)
ImageShow( torchvision.utils.make_grid(images))
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
使用神经网络训练数据 展示结果
net = Net()
if(TRAIN):
for epoch in range(10):
running_loss = 0
for i, (X, Y) in enumerate(trainLoader):
predict = net(X)
loss = net.LossFunc(predict, Y)
running_loss += loss.item()
if i % 2000 == 1999:
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
torch.save(net.state_dict(), "d:/mnist/imag.pkl")
else:
net.load_state_dict(torch.load("d:/mnist/imag.pkl"))
from torch.autograd import Variable
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
#with torch.no_grad:
for (images,labels) in testLoader:
outPut = net(images)
_, predicted = torch.max(outPut, 1)
c = (predicted == labels).squeeze()
for i in range(BATHSIZE):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
for i in range(10):
print('Accuracy of %5s : %2d %%' % (classes[i], 100 * class_correct[i] / class_total[i]))