CS231n assignment2

本文深入解析神经网络中关键层的实现原理,包括Batch Normalization、ReLU、Dropout、卷积神经网络及Pooling层的前向与反向传播,提供详细的代码示例。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >


下面代码均来自: https://github.com/sharedeeply/cs231n-assignment-solution

Batch Normalization

batch_normalization的前向通路实现,均值和方差是对所有样本同一位置上的值求均值和方差 。

def batchnorm_forward(x, gamma, beta, bn_param):
    mode = bn_param['mode']
    eps = bn_param.get('eps', 1e-5)
    momentum = bn_param.get('momentum', 0.9)

    N, D = x.shape
    running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))
    running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))
    if mode == 'train':
        mu = np.mean(x, axis=0)
        sigma2 = np.var(x, axis=0)
        x_hat = (x - mu) / np.sqrt(sigma2 + eps)
        out = gamma * x_hat + beta
        running_mean = momentum * running_mean + (1 - momentum) * mu# 通过对过程中均值和方差的存储得到在测试时需要使用的均值和方差
        running_var = momentum * running_var + (1 - momentum) * sigma2
        inv_sigma = 1. / np.sqrt(sigma2 + eps)
        cache = (x, x_hat, gamma, mu, inv_sigma)
    elif mode == 'test':
      	 x_hat = (x - running_mean) / np.sqrt(running_var + eps)
        out = gamma * x_hat + beta
        
    else:
        raise ValueError('Invalid forward batchnorm mode "%s"' % mode)

    # Store the updated running means back into bn_param
    bn_param['running_mean'] = running_mean
    bn_param['running_var'] = running_var

    return out, cache

batch_normalization的反向传播,其中求导时应该注意均值和方差其实都是一个关于x的变量。

def batchnorm_backward(dout, cache):
    x, x_hat, gamma, mu, inv_sigma = cache
    N = x.shape[0]
    dbeta = np.sum(dout, axis=0)
    dgamma = np.sum(x_hat * dout, axis=0)
    dvar = np.sum(-0.5 * inv_sigma ** 3 * (x - mu) * gamma * dout, axis=0)
    dmu = np.sum(-1 * inv_sigma * gamma * dout, axis=0)
    dx = gamma * dout * inv_sigma + (2 / N) * (x - mu) * dvar + (1 / N) * dmu

    return dx, dgamma, dbeta

ReLU

def relu_forward(x):
 
    out = x * (x > 0)
    cache = x
    return out, cache


def relu_backward(dout, cache):
   
    dx, x = None, cache  
    dx = dout * (x > 0)
    return dx

Dropout

def dropout_forward(x, dropout_param):

    p, mode = dropout_param['p'], dropout_param['mode']
    if 'seed' in dropout_param:
        np.random.seed(dropout_param['seed'])

    mask = None
    out = None

    if mode == 'train':

        mask = (np.random.rand(*x.shape) < p) / p# 在训练时除以概率,使得在测试时不需要做任何处理;或者可以在训练不除,在测试时乘以概率
        out = x * mask
        
    elif mode == 'test':
        
        out = x
        

    cache = (dropout_param, mask)
    out = out.astype(x.dtype, copy=False)

    return out, cache


def dropout_backward(dout, cache):

    dropout_param, mask = cache
    mode = dropout_param['mode']

    dx = None
    if mode == 'train':
        
        dx = dout * mask# 只更新未被丢弃的参数
        
    elif mode == 'test':
        dx = dout
    return dx

卷积神经网络

文中使用for循环实现了卷积神经网络。有些简陋,不过可以帮助我们很好的理解神经网络的操作过程。

def conv_forward_naive(x, w, b, conv_param):
    out = None
    
    stride = conv_param['stride']
    padding = conv_param['pad']
    if isinstance(stride, numbers.Number):
        stride = (stride, stride)  #
    if isinstance(padding, numbers.Number):
        padding = [(padding, padding), (padding, padding)]
    else:
        padding = [(i,) * 2 for i in padding]
    pad = [(0, 0), (0, 0)]
    pad.extend(padding)
    x_pad = np.pad(x, pad_width=pad, mode='constant', constant_values=0)
    n, c, pad_h, pad_w = x_pad.shape
    f, w_c, hh, ww = w.shape
    assert c == w_c, 'input channels must equal to filter channels'
    out_h = (pad_h - hh) // stride[0] + 1
    out_w = (pad_w - ww) // stride[1] + 1
    out = np.zeros(shape=(n, f, out_h, out_w))
    for i in range(n):  # 每个样本点
        for j in range(f):  # 每个filter
            for _w in range(out_w):  # 水平方向
                for _h in range(out_h):  # 竖直方向
                    out[i, j, _h, _w] = np.sum(
                        x_pad[i, :, _h*stride[1]: _h*stride[1]+hh, _w*stride[0]: _w*stride[0]+ww] * w[j]) + b[j]# 输出矩阵点与原矩阵对应位置是stride倍数关系
    
    cache = (x, w, b, conv_param)
    return out, cache
def conv_backward_naive(dout, cache):
   
    dx, dw, db = None, None, None
    
    x, w, b, conv_param = cache
    stride = conv_param['stride']
    padding = conv_param['pad']
    if isinstance(stride, numbers.Number):
    	# isinstance判断类型是否一致
        stride = (stride, stride)  #
    if isinstance(padding, numbers.Number):
        padding = [(padding, padding), (padding, padding)]
    else:
        padding = [(i,) * 2 for i in padding]
    pad = [(0, 0), (0, 0)]
    pad.extend(padding)
    x_pad = np.pad(x, pad_width=pad, mode='constant', constant_values=0)
    n, c, pad_h, pad_w = x_pad.shape
    f, w_c, hh, ww = w.shape
    assert c == w_c, 'input channels must equal to filter channels'
    out_h = (pad_h - hh) // stride[0] + 1
    out_w = (pad_w - ww) // stride[1] + 1
    dw = np.zeros_like(w)
    db = np.zeros_like(b)
    dx_pad = np.zeros_like(x_pad)
    for i in range(n):  # 每个样本点
        for j in range(f):  # 每个filter
            for _w in range(out_w):  # 水平方向
                for _h in range(out_h):  # 竖直方向
                    dw[j] += dout[i, j, _h, _w] * x_pad[i, :, _h*stride[1]: _h*stride[1]+ww, _w*stride[0]: _w*stride[0]+hh]
                    db[j] += dout[i, j, _h, _w]
                    dx_pad[i, :, _h*stride[1]: _h*stride[1]+hh, _w*stride[0]: _w*stride[0]+ww] += \
                        dout[i, j, _h, _w] * w[j]
    dx = dx_pad[:, :, pad[2][0]:-pad[2][1], pad[3][0]:-pad[3][1]]
    
    return dx, dw, db

由上面实现我们可知,w一层的导数为遍历整张图片之后导数的叠加。

Pooling

def max_pool_forward_naive(x, pool_param):

    out = None
    
    n, c, h, w = x.shape
    pool_h = pool_param['pool_height']
    pool_w = pool_param['pool_width']
    stride = pool_param['stride']
    out_h = 1 + (h - pool_h) // stride
    out_w = 1 + (w - pool_w) // stride
    out = np.zeros(shape=(n, c, out_h, out_w))
    for i in range(n):
        for j in range(c):
            for _h in range(out_h):
                for _w in range(out_w):
                    out[i, j, _h, _w] = np.max(x[i, j, _h*stride: _h*stride+pool_h, _w*stride: _w*stride+pool_w])
    
    cache = (x, pool_param)
    return out, cache


def max_pool_backward_naive(dout, cache):

    dx = None
    
    x, pool_param = cache
    n, c, h, w = x.shape
    pool_h = pool_param['pool_height']
    pool_w = pool_param['pool_width']
    stride = pool_param['stride']
    out_h = 1 + (h - pool_h) // stride
    out_w = 1 + (w - pool_w) // stride
    dx = np.zeros_like(x)
    for i in range(n):
        for j in range(c):
            for _h in range(out_h):
                for _w in range(out_w):
                    # 查看一下 np.unravel_index 有什么作用
                    indices = np.unravel_index(np.argmax(x[i, j, _h*stride: _h*stride+pool_h,
                                                         _w*stride: _w*stride+pool_w]), dims=(pool_h, pool_w))
# np.argmax函数返回最大值所在的位置(这个位置是将numpy拉成一行array的位置),然后使用np.unravel_index获取这个位置相对于矩阵的具体坐标,即max_pooling所保留值的位置也是需要更新权重的位置。
                    dx[i, j, _h*stride+indices[0], _w*stride+indices[1]] += dout[i, j, _h, _w]
    
    return dx
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值