动手学pytorch CNN

本文深入探讨了卷积神经网络的基础知识,包括卷积运算、填充与步幅、多通道卷积以及池化操作,通过实例展示了如何使用PyTorch实现这些概念。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

tool.py

#coding=utf-8
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

import numpy as np

import collections
import math
import os
import random
import sys
import time

"""
    ** 1/4 卷积(互相关运算) **
    """

def corr2d(X, K):
    """
    二维互相关运算 对应书p107
    X :二维输入数组
    K :二维卷积核, kernel_size

    X = torch.tensor([[0, 1, 2],
                      [3, 4, 5],
                      [6, 7, 8]])

    y = torch.tensor([[0, 1],
                      [2, 3]])

    print(corr2d(X, y))
    >> tensor([[19., 25.],
               [37., 43.]])
    """
    k_h, k_w = K.shape   # (高, 宽)  =  (shape[0], shape[1])
    x_h, x_w = X.shape
    X, K = X.float(), K.float()
    Y = torch.zeros((x_h - k_h + 1, x_w - k_w + 1))  # 重要公式  Y.shape ~ (X.shape, K.shape)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + k_h, j: j + k_w] * K).sum()
    return Y


class Conv2D(nn.Module):
    """
    自定义二维卷积层
    即把 输入数据(二维)x 与 权重参数w 进行上面的互相关运算 再加上bias参数
    """
    def __init__(self, kernel_size):
        super(Conv2D, self).__init__()
        self.weight = nn.Parameter(torch.randn(kernel_size))
        self.bias = nn.Parameter(torch.randn(1))

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias


def edge_detection():
    """
    p108 图像边缘检测实验
    卷积核作为特定任务工具

    X :
    tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
            [1., 1., 0., 0., 0., 0., 1., 1.],
            [1., 1., 0., 0., 0., 0., 1., 1.],
            [1., 1., 0., 0., 0., 0., 1., 1.],
            [1., 1., 0., 0., 0., 0., 1., 1.],
            [1., 1., 0., 0., 0., 0., 1., 1.]])

    Y :
    tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
            [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
            [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
            [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
            [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
            [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

    """
    X = torch.ones(6, 8)
    X[:, 2:6]=0

    K = torch.tensor([[1, -1]])  # 特定功能的卷积核
    Y = corr2d(X, K)
    return X, Y


def learn_from_data():
    """
    书p109 5.1.4通过数据学习核数组
    训练数据来自上一实验边缘检测
    可以看成小型 CNN学习过程

    实际卷积核: [[1, -1]]
    学习结果:
    weight:  tensor([[ 1.0015, -1.0014]])
    bias:  tensor([-7.0750e-05])

    """
    X, Y = edge_detection()

    conv2d = Conv2D(kernel_size=(1, 2))

    lr = 0.01
    for i in range(30):
        Y_hat = conv2d(X)
        l = ((Y_hat - Y) ** 2).sum()
        l.backward()

        # 梯度下降
        conv2d.weight.data -= lr * conv2d.weight.grad
        conv2d.bias.data -= lr * conv2d.bias.grad

        # 梯度清0
        conv2d.weight.grad.fill_(0)
        conv2d.bias.grad.fill_(0)
        if (i + 1) % 5 == 0:
            print('Step %d, loss %.3f' % (i + 1, l.item()))

    print("weight: ", conv2d.weight.data)
    print("bias: ", conv2d.bias.data)


"""
    ** 2/4 填充padding 步幅stride **
    """

def comp_conv2d(conv2d, X):
    """
    函数工具,用来计算卷积结果Y的大小
    """
    # (1, 1)代表批量大小和通道数均为1
    X = X.view((1, 1) + X.shape)
    Y = conv2d(X)
    return Y.view(Y.shape[2:])  # 排除不关心的前两维:批量和通道

def padding_test(x_shape, k_shape, p_shape, s_shape):
    """
    书p112 填充实验
    padding: 左右各填充padding列 (或上下-行)
    引入padding 后Y.shape 与 X,K shape的关系为(书p111公式):
    y_h = x_h - k_h + p_h(2*padding) + 1
    y_w = x_w - k_w + p_w + 1
    若p_h = k_h - 1, 则 y_h = x_h 输入输出规模相等

    stride: 步幅, 卷积核每次滑动的行数和列数
    引入stride后公式为(书p113):
    y_h = [ (x_h - k_h + p_h(2*padding) + s_h(1*stride) / s_h ] (下取整)
    y_w 同理

    padding_test((14, 12), (5, 3), (2, 1), (2, 1))
    >> torch.Size([7, 12])
    """
    X = torch.rand(x_shape)

    # # 注意这里是两侧分别填充1行或列,所以在两侧一共填充2行或列
    # conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=3, padding=1)
    # print(comp_conv2d(conv2d, X).shape)
    #
    # # 使用高为5、宽为3的卷积核。在高和宽两侧的填充数分别为2和1
    # conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=(5, 3), padding=(2, 1))
    # print(comp_conv2d(conv2d, X).shape)

    conv2d = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=k_shape, padding=p_shape, stride=s_shape)
    print(comp_conv2d(conv2d, X).shape)


"""
    ** 3/4 多输入通道 in_channel  多输出通道out_channel **
    """
def corr2d_multi_in(X, K):
    """
    # 沿着X和K的第0维(通道维)分别计算再相加 书p114
    X = torch.tensor([[[0, 1, 2],
                       [3, 4, 5],
                       [6, 7, 8]],

                     [[1, 2, 3],
                      [4, 5, 6],
                      [7, 8, 9]]])

    K = torch.tensor([[[0, 1],
                       [2, 3]],

                      [[1, 2],
                       [3, 4]]])

    print(corr2d_multi_in(X, K))
    >>  tensor([[ 56.,  72.],
                [104., 120.]])
   """
    res = 0.0
    for i in range(0, X.shape[0]):
        res += corr2d(X[i, :, :], K[i, :, :])
    return res


def corr2d_multi_in_out(X, K):
    """
    # 对K的第0维遍历,每次同输入X做互相关计算。所有结果使用stack函数合并在一起

    K = torch.stack([K, K + 1, K + 2])   # 广播机制: K的元素全部加1
    print(K.shape) >> torch.Size([3, 2, 2, 2])

    corr2d_multi_in_out(X, K)
    tensor([[[ 56.,  72.],
             [104., 120.]],

            [[ 76., 100.],
             [148., 172.]],

            [[ 96., 128.],
             [192., 224.]]])
    """
    return torch.stack([corr2d_multi_in(X, k) for k in K])


def corr2d_multi_in_out_1x1(X, K):
    """
        X = torch.rand(3, 3, 3)    # (c_i, h, w) --> (c_i, h*w)
        K = torch.rand(2, 3, 1, 1) # (c_o, c_i, k_h(1), k_w(1)) --> (c_o, c_i)

        Y1 = corr2d_multi_in_out_1x1(X, K)
        Y2 = corr2d_multi_in_out(X, K)

        (Y1 - Y2).norm().item() < 1e-6  # 等价
        >> True
        """
    c_i, h, w = X.shape
    c_o = K.shape[0]

    # 展开 二维卷积窗口所有元素转为一维输入数据向量, 此时通道维作为特征维
    X = X.view(c_i, h * w)    # X.shape  (c_i, h, w) --> (c_i, h*w)
    K = K.view(c_o, c_i)      # K.shape  (c_o, c_i, k_h(1), k_w(1)) --> (c_o, c_i)

    Y = torch.mm(K, X)  # 全连接层的矩阵乘法
    return Y.view(c_o, h, w)

"""卷积层总结:
输入X  (batch_size, c_i, x_h, x_w)  /  (c_i, x_h, x_w)   
卷积核 (c_o, c_i, k_w, k_h)
卷积层: conv2d = nn.Conv2d( in_channels=c_i,         # 输入通道维  X.shape[0]=K.shape[1]
                             out_channels=c_o,       # 输出通道维  K.shape[0]
                             kernel_size=(k_h, k_w), # 卷积核大小 
                             stride=(s_h, s_w)       # 步幅 
                             padding=(p_h, p_w),     # 填充
                         )

使用  Y = conv2d(X)    # Y.shape: [c_o, y_h, y_w]
                        y_h = [ (x_h - k_h + p_h(2*padding) + s_h(1*stride) / s_h ] (下取整)
                        y_w 同理
"""


"""
    ** 4/4 池化 **
    """

def pool2d(X, pool_size, mode='max'):
    """
    池化计算 书p118
    与互相关计算corr2d类似, 区别在于无参数

    X = torch.tensor([[0, 1, 2],
                      [3, 4, 5],
                      [6, 7, 8]])

    pool2d(X, (2, 2))  # 在2*2窗口内取最大值
        >> tensor([[4., 5.],
                   [7., 8.]])

    pool2d(X, (2, 2), 'avg')  # 在2*2窗口内取平均值
        >> tensor([[2., 3.],
                   [5., 6.]])
    """
    X = X.float()
    p_h, p_w = pool_size
    Y = torch.zeros(X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
    return Y


def pad_test():
    """
    书p119 填充步幅实验
    """
    X = torch.arange(16, dtype=torch.float).view((1, 1, 4, 4))

    pool2d = nn.MaxPool2d(3, padding=1, stride=2)
    print(pool2d(X))

    pool2d = nn.MaxPool2d((2, 4), padding=(1, 2), stride=(2, 3))
    print(pool2d(X))

def chann_test():
    """
    书p120 多通道实验
    """
    X = torch.arange(16, dtype=torch.float).view((1, 1, 4, 4))
    X = torch.cat((X, X + 1), dim=1)

    pool2d = nn.MaxPool2d(3, padding=1, stride=2)
    print(pool2d(X))

"""池化层
输入X  (c_i, x_h, x_w)
池化层  pool2d = nn.MaxPool2d(kernel_size = (m_h, m_w),           # 池化窗口
                              stride=(s_h, s_w))    # 步幅
                              padding=(p_h, p_w),  # 填充
                              
使用 Y = pool2d(X)  Y.shape:  [c_i, y_h, y_w]    
    y_h = [(x_h + 2*p_h - 1*(k_h - 1) - 1) / s_h + 1 ] 下取整
    y_w同理
"""


def evaluate_accuracy(data_iter, net):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
            n += y.shape[0]
    return acc_sum / n


def train_ch5(net, train_iter, test_iter, optimizer, device, batch_size=256, num_epochs=5):
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))


# 获取 mnist 数据集
def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'):
    """Download the fashion mnist dataset and then load into memory."""
    trans = []
    if resize:
        trans.append(torchvision.transforms.Resize(size=resize))
    trans.append(torchvision.transforms.ToTensor())

    transform = torchvision.transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
    mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
    if sys.platform.startswith('win'):
        num_workers = 0  # 0表示不用额外的进程来加速读取数据
    else:
        num_workers = 4
    train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return train_iter, test_iter


model.py

import os
import time
import torch
from torch import nn, optim

import sys
import tool

sys.path.append("..")

os.environ["CUDA_VISIBLE_DEVICES"] = "0"


# print(torch.__version__) # 1.2.0+cpu
# print(device)            # cpu


class LeNet(nn.Module):
    """
    the network:
        input      1*28*28
        Conv2d     6*24*24
        MaxPool2d  6*12*12
        Conv2d     16*8*8
        MaxPool2d  16*4*4
        MLP        120
        MLP        84
        MLP        10
        Softmax
        """
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.Sigmoid(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.fc = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )

    def forward(self, img):
        x = self.conv(img)
        x = x.view(img.shape[0], -1)
        x = self.fc(x)
        return x

run.py

import torch
from model import LeNet
import tool

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size=256
train_iter, test_iter = tool.load_data_fashion_mnist(batch_size=256)
net = LeNet()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
tool.train_ch5(net, train_iter, test_iter, optimizer, device)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值