tool.py
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
import collections
import math
import os
import random
import sys
import time
"""
** 1/4 卷积(互相关运算) **
"""
def corr2d(X, K):
"""
二维互相关运算 对应书p107
X :二维输入数组
K :二维卷积核, kernel_size
X = torch.tensor([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
y = torch.tensor([[0, 1],
[2, 3]])
print(corr2d(X, y))
>> tensor([[19., 25.],
[37., 43.]])
"""
k_h, k_w = K.shape
x_h, x_w = X.shape
X, K = X.float(), K.float()
Y = torch.zeros((x_h - k_h + 1, x_w - k_w + 1))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
Y[i, j] = (X[i: i + k_h, j: j + k_w] * K).sum()
return Y
class Conv2D(nn.Module):
"""
自定义二维卷积层
即把 输入数据(二维)x 与 权重参数w 进行上面的互相关运算 再加上bias参数
"""
def __init__(self, kernel_size):
super(Conv2D, self).__init__()
self.weight = nn.Parameter(torch.randn(kernel_size))
self.bias = nn.Parameter(torch.randn(1))
def forward(self, x):
return corr2d(x, self.weight) + self.bias
def edge_detection():
"""
p108 图像边缘检测实验
卷积核作为特定任务工具
X :
tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.],
[1., 1., 0., 0., 0., 0., 1., 1.]])
Y :
tensor([[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.],
[ 0., 1., 0., 0., 0., -1., 0.]])
"""
X = torch.ones(6, 8)
X[:, 2:6]=0
K = torch.tensor([[1, -1]])
Y = corr2d(X, K)
return X, Y
def learn_from_data():
"""
书p109 5.1.4通过数据学习核数组
训练数据来自上一实验边缘检测
可以看成小型 CNN学习过程
实际卷积核: [[1, -1]]
学习结果:
weight: tensor([[ 1.0015, -1.0014]])
bias: tensor([-7.0750e-05])
"""
X, Y = edge_detection()
conv2d = Conv2D(kernel_size=(1, 2))
lr = 0.01
for i in range(30):
Y_hat = conv2d(X)
l = ((Y_hat - Y) ** 2).sum()
l.backward()
conv2d.weight.data -= lr * conv2d.weight.grad
conv2d.bias.data -= lr * conv2d.bias.grad
conv2d.weight.grad.fill_(0)
conv2d.bias.grad.fill_(0)
if (i + 1) % 5 == 0:
print('Step %d, loss %.3f' % (i + 1, l.item()))
print("weight: ", conv2d.weight.data)
print("bias: ", conv2d.bias.data)
"""
** 2/4 填充padding 步幅stride **
"""
def comp_conv2d(conv2d, X):
"""
函数工具,用来计算卷积结果Y的大小
"""
X = X.view((1, 1) + X.shape)
Y = conv2d(X)
return Y.view(Y.shape[2:])
def padding_test(x_shape, k_shape, p_shape, s_shape):
"""
书p112 填充实验
padding: 左右各填充padding列 (或上下-行)
引入padding 后Y.shape 与 X,K shape的关系为(书p111公式):
y_h = x_h - k_h + p_h(2*padding) + 1
y_w = x_w - k_w + p_w + 1
若p_h = k_h - 1, 则 y_h = x_h 输入输出规模相等
stride: 步幅, 卷积核每次滑动的行数和列数
引入stride后公式为(书p113):
y_h = [ (x_h - k_h + p_h(2*padding) + s_h(1*stride) / s_h ] (下取整)
y_w 同理
padding_test((14, 12), (5, 3), (2, 1), (2, 1))
>> torch.Size([7, 12])
"""
X = torch.rand(x_shape)
conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=k_shape, padding=p_shape, stride=s_shape)
print(comp_conv2d(conv2d, X).shape)
"""
** 3/4 多输入通道 in_channel 多输出通道out_channel **
"""
def corr2d_multi_in(X, K):
"""
# 沿着X和K的第0维(通道维)分别计算再相加 书p114
X = torch.tensor([[[0, 1, 2],
[3, 4, 5],
[6, 7, 8]],
[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]]])
K = torch.tensor([[[0, 1],
[2, 3]],
[[1, 2],
[3, 4]]])
print(corr2d_multi_in(X, K))
>> tensor([[ 56., 72.],
[104., 120.]])
"""
res = 0.0
for i in range(0, X.shape[0]):
res += corr2d(X[i, :, :], K[i, :, :])
return res
def corr2d_multi_in_out(X, K):
"""
# 对K的第0维遍历,每次同输入X做互相关计算。所有结果使用stack函数合并在一起
K = torch.stack([K, K + 1, K + 2]) # 广播机制: K的元素全部加1
print(K.shape) >> torch.Size([3, 2, 2, 2])
corr2d_multi_in_out(X, K)
tensor([[[ 56., 72.],
[104., 120.]],
[[ 76., 100.],
[148., 172.]],
[[ 96., 128.],
[192., 224.]]])
"""
return torch.stack([corr2d_multi_in(X, k) for k in K])
def corr2d_multi_in_out_1x1(X, K):
"""
X = torch.rand(3, 3, 3) # (c_i, h, w) --> (c_i, h*w)
K = torch.rand(2, 3, 1, 1) # (c_o, c_i, k_h(1), k_w(1)) --> (c_o, c_i)
Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)
(Y1 - Y2).norm().item() < 1e-6 # 等价
>> True
"""
c_i, h, w = X.shape
c_o = K.shape[0]
X = X.view(c_i, h * w)
K = K.view(c_o, c_i)
Y = torch.mm(K, X)
return Y.view(c_o, h, w)
"""卷积层总结:
输入X (batch_size, c_i, x_h, x_w) / (c_i, x_h, x_w)
卷积核 (c_o, c_i, k_w, k_h)
卷积层: conv2d = nn.Conv2d( in_channels=c_i, # 输入通道维 X.shape[0]=K.shape[1]
out_channels=c_o, # 输出通道维 K.shape[0]
kernel_size=(k_h, k_w), # 卷积核大小
stride=(s_h, s_w) # 步幅
padding=(p_h, p_w), # 填充
)
使用 Y = conv2d(X) # Y.shape: [c_o, y_h, y_w]
y_h = [ (x_h - k_h + p_h(2*padding) + s_h(1*stride) / s_h ] (下取整)
y_w 同理
"""
"""
** 4/4 池化 **
"""
def pool2d(X, pool_size, mode='max'):
"""
池化计算 书p118
与互相关计算corr2d类似, 区别在于无参数
X = torch.tensor([[0, 1, 2],
[3, 4, 5],
[6, 7, 8]])
pool2d(X, (2, 2)) # 在2*2窗口内取最大值
>> tensor([[4., 5.],
[7., 8.]])
pool2d(X, (2, 2), 'avg') # 在2*2窗口内取平均值
>> tensor([[2., 3.],
[5., 6.]])
"""
X = X.float()
p_h, p_w = pool_size
Y = torch.zeros(X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
if mode == 'max':
Y[i, j] = X[i: i + p_h, j: j + p_w].max()
elif mode == 'avg':
Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
return Y
def pad_test():
"""
书p119 填充步幅实验
"""
X = torch.arange(16, dtype=torch.float).view((1, 1, 4, 4))
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
print(pool2d(X))
pool2d = nn.MaxPool2d((2, 4), padding=(1, 2), stride=(2, 3))
print(pool2d(X))
def chann_test():
"""
书p120 多通道实验
"""
X = torch.arange(16, dtype=torch.float).view((1, 1, 4, 4))
X = torch.cat((X, X + 1), dim=1)
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
print(pool2d(X))
"""池化层
输入X (c_i, x_h, x_w)
池化层 pool2d = nn.MaxPool2d(kernel_size = (m_h, m_w), # 池化窗口
stride=(s_h, s_w)) # 步幅
padding=(p_h, p_w), # 填充
使用 Y = pool2d(X) Y.shape: [c_i, y_h, y_w]
y_h = [(x_h + 2*p_h - 1*(k_h - 1) - 1) / s_h + 1 ] 下取整
y_w同理
"""
def evaluate_accuracy(data_iter, net):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
acc_sum, n = 0.0, 0
with torch.no_grad():
for X, y in data_iter:
if isinstance(net, torch.nn.Module):
net.eval()
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train()
else:
if('is_training' in net.__code__.co_varnames):
acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
def train_ch5(net, train_iter, test_iter, optimizer, device, batch_size=256, num_epochs=5):
net = net.to(device)
print("training on ", device)
loss = torch.nn.CrossEntropyLoss()
batch_count = 0
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
for X, y in train_iter:
X = X.to(device)
y = y.to(device)
y_hat = net(X)
l = loss(y_hat, y)
optimizer.zero_grad()
l.backward()
optimizer.step()
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
n += y.shape[0]
batch_count += 1
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
"""Download the fashion mnist dataset and then load into memory."""
trans = []
if resize:
trans.append(torchvision.transforms.Resize(size=resize))
trans.append(torchvision.transforms.ToTensor())
transform = torchvision.transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
if sys.platform.startswith('win'):
num_workers = 0
else:
num_workers = 4
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
return train_iter, test_iter
model.py
import os
import time
import torch
from torch import nn, optim
import sys
import tool
sys.path.append("..")
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
class LeNet(nn.Module):
"""
the network:
input 1*28*28
Conv2d 6*24*24
MaxPool2d 6*12*12
Conv2d 16*8*8
MaxPool2d 16*4*4
MLP 120
MLP 84
MLP 10
Softmax
"""
def __init__(self):
super(LeNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
nn.Sigmoid(),
nn.MaxPool2d(kernel_size=2, stride=2),
nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
nn.Sigmoid(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.fc = nn.Sequential(
nn.Linear(16*4*4, 120),
nn.Sigmoid(),
nn.Linear(120, 84),
nn.Sigmoid(),
nn.Linear(84, 10)
)
def forward(self, img):
x = self.conv(img)
x = x.view(img.shape[0], -1)
x = self.fc(x)
return x
run.py
import torch
from model import LeNet
import tool
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size=256
train_iter, test_iter = tool.load_data_fashion_mnist(batch_size=256)
net = LeNet()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
tool.train_ch5(net, train_iter, test_iter, optimizer, device)