吴恩达机器学习课程:编程练习 | (4) ex4-NN back propagation

本文通过使用BP算法实现手写数字识别,详细介绍了神经网络的构建过程,包括前向传播、反向传播等核心步骤,并展示了如何通过训练提高模型的准确性。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

BP算法实现起来有点复杂,视频讲解的不是很清楚,在此放上一个大白话讲解BP算法的链接:大白话讲解BP算法

"""
神经网络-反向传播多分类问题
案例: 手写数字识别
"""

import numpy as np
import scipy.io as sio
import matplotlib
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from sklearn.metrics import classification_report # 这个包是评价报告


def load_data(path, transpose=True):
    data = sio.loadmat(path)
    y = data.get('y')  # (5000,1)
    y = y.reshape(y.shape[0])  # make it back to column vector

    X = data.get('X')  # (5000,400) 每一行是一个数字图(20×20)

    if transpose:
        # for this dataset, you need a transpose to get the orientation right
        # X = np.array([im.reshape((20, 20)).T for im in X])  # 把每一行还原为20×20的二维数组形式,共5000行,每一行一个二维数组
        # and I flat the image again to preserve the vector presentation
        X = np.array([im.reshape(400) for im in X])

    return X, y


def load_weight(path):
    data = sio.loadmat(path)
    return data["Theta1"], data["Theta2"]


def expand_y(y):
    """expand 5000*1 into 5000*10
         where y=10 -> [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]: ndarray

    from sklearn.preprocessing import OneHotEncoder
    encoder = OneHotEncoder(sparse=False)
    y_onehot = encoder.fit_transform(y)
    y_onehot.shape #这个函数与expand_y(y)一致
    """
    res = []
    for i in y:  # y分类标签1-10
        y_array = np.zeros(10)
        y_array[i - 1] = 1

        res.append(y_array)

    return np.array(res)


def serialize(a, b):
    return np.append(a.flatten(), b.flatten())


def deserialize(theta_serialize):
    """into ndarray of (25, 401), (10, 26)"""
    theta1 = theta_serialize[:25 * 401].reshape(25, 401)
    theta2 = theta_serialize[25 * 401:].reshape(10, 26)
    return theta1, theta2


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def feed_forward(theta_serialize, X):  # 前向传播
    theta1, theta2 = deserialize(theta_serialize)
    a1 = X
    z2 = a1 @ theta1.T
    a2 = sigmoid(z2)
    a2 = np.insert(a2, 0, values=1, axis=1)
    z3 = a2 @ theta2.T
    h = sigmoid(z3)
    return a1, z2, a2, z3, h


def cost(theta_serialize, X, y):  # 不带正则化的损失函数
    a1, z2, a2, z3, h = feed_forward(theta_serialize, X)
    J = -np.sum(y * np.log(h) + (1 - y) * np.log(1 - h)) / len(X)
    return J


def regularized_cost(theta_serialize, X, y, λ=1):  # 带正则化的损失函数
    """the first column of t1 and t2 is intercept theta, ignore them when you do regularization"""
    t1, t2 = deserialize(theta_serialize)  # t1: (25,401) t2: (10,26)
    m = X.shape[0]

    reg_t1 = (λ / (2 * m)) * np.power(t1[:, 1:], 2).sum()  # this is how you ignore first col
    reg_t2 = (λ / (2 * m)) * np.power(t2[:, 1:], 2).sum()

    return cost(theta_serialize, X, y) + reg_t1 + reg_t2


def sigmoid_gradient(z): # sigmoid函数的导数实现
    return sigmoid(z) * (1 - sigmoid(z))


def gradient(theta_serialize, X, y):  # 无正则化梯度
    theta1, theta2 = deserialize(theta_serialize)
    a1, z2, a2, z3, h = feed_forward(theta_serialize, X)
    d3 = h - y
    d2 = d3 @ theta2[:, 1:] * sigmoid_gradient(z2)
    D2 = (d3.T @ a2) / len(X)
    D1 = (d2.T @ a1) / len(X)
    return serialize(D1, D2)


def reg_gradient(theta_serialize, X, y, λ=1):  # 带正则化梯度
    D = gradient(theta_serialize, X, y)
    D1, D2 = deserialize(D)

    theta1, theta2 = deserialize(theta_serialize)
    D1[:, 1:] = D1[:, 1:] + theta1[:, 1:] * λ / len(X)
    D2[:, 1:] = D2[:, 1:] + theta2[:, 1:] * λ / len(X)

    return serialize(D1, D2)


def nn_training(X, y, λ=1): # 训练模型
    init_theta = np.random.uniform(-0.12, 0.12, 10285) # 随机初始化theta值
    res = minimize(fun=regularized_cost,
                   x0=init_theta,
                   args=(X, y, λ),
                   method='TNC',
                   jac=reg_gradient,
                   options={'maxiter': 400})

    return res


def plot_hidden_layer(theta):  # 可视化隐藏层
    theta1, _ = deserialize(theta)
    hidden_layer = theta1[:, 1:]  # 25,400  去掉偏差单元

    fig, ax = plt.subplots(ncols=5, nrows=5, figsize=(8, 8), sharex=True, sharey=True)

    for r in range(5):
        for c in range(5):
            ax[r, c].matshow(hidden_layer[5 * r + c].reshape((20, 20)), cmap=matplotlib.cm.binary)

    plt.xticks([])
    plt.yticks([])


# -------梯度校验---------
def expand_array(arr):
    """replicate array into matrix
    [1, 2, 3]

    [[1, 2, 3],
     [1, 2, 3],
     [1, 2, 3]]
    """
    # turn matrix back to ndarray
    return np.array(np.mat(np.ones(arr.shape[0])).T @ np.mat(arr))


def gradient_checking(theta, X, y, epsilon, regularized=False):
    def a_numeric_grad(plus, minus, regularized=False):
        """calculate a partial gradient with respect to 1 theta"""
        if regularized:
            return (regularized_cost(plus, X, y) - regularized_cost(minus, X, y)) / (epsilon * 2)
        else:
            return (cost(plus, X, y) - cost(minus, X, y)) / (epsilon * 2)

    theta_matrix = expand_array(theta)  # expand to (10285, 10285)
    epsilon_matrix = np.identity(len(theta)) * epsilon

    plus_matrix = theta_matrix + epsilon_matrix
    minus_matrix = theta_matrix - epsilon_matrix

    # calculate numerical gradient with respect to all theta
    numeric_grad = np.array([a_numeric_grad(plus_matrix[i], minus_matrix[i], regularized)
                             for i in range(len(theta))])

    # analytical grad will depend on if you want it to be regularized or not
    analytic_grad = reg_gradient(theta, X, y) if regularized else gradient(theta, X, y)

    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # the diff below should be less than 1e-9
    # this is how original matlab code do gradient checking
    diff = np.linalg.norm(numeric_grad - analytic_grad) / np.linalg.norm(numeric_grad + analytic_grad)

    print(
        'If your backpropagation implementation is correct,\nthe relative difference will be smaller than 10e-9 (assume epsilon=0.0001).\nRelative Difference: {}\n'.format(
            diff))


if __name__ == '__main__':
    X_raw, y_raw = load_data('ex4data1.mat', transpose=False)
    X = np.insert(X_raw, 0, np.ones(X_raw.shape[0]), axis=1)  # 增加全部为1的一列
    theta1, theta2 = load_weight('ex4weights.mat')
    y = expand_y(y_raw) # y-->(5000,10),标签分类范围0-9
    theta = serialize(theta1, theta2)  # 扁平化参数,25*401+10*26=10285,(10285,)
    print(theta.shape)
    print(regularized_cost(theta, X, y))

    # 反向传播
    res = nn_training(X, y, 1)
    _, _, _, _, h = feed_forward(res.x, X)

    y_pred = np.argmax(h, axis=1) + 1
    print('Accuracy={}'.format(np.mean(y_raw == y_pred)))
    plot_hidden_layer(res.x)  # 显示的是第一组θ的值
    # gradient_checking(res.x, X, y, epsilon=0.0001)  # 这个运行很慢,谨慎运行,占用很大内存
    # If your backpropagation implementation is correct,
    # the relative difference will be smaller than 10e-9 (assume epsilon=0.0001).
    # Relative Difference: 2.1455623285988868e-09

    # 混淆矩阵精度验证
    print(classification_report(y_raw, y_pred))

    plt.show()

### 解决PyCharm无法加载Conda虚拟环境的方法 #### 配置设置 为了使 PyCharm 能够成功识别并使用 Conda 创建的虚拟环境,需确保 Anaconda 的路径已正确添加至系统的环境变量中[^1]。这一步骤至关重要,因为只有当 Python 解释器及其关联工具被加入 PATH 后,IDE 才能顺利找到它们。 对于 Windows 用户而言,在安装 Anaconda 时,默认情况下会询问是否将它添加到系统路径里;如果当时选择了否,则现在应该手动完成此操作。具体做法是在“高级系统设置”的“环境变量”选项内编辑 `Path` 变量,追加 Anaconda 安装目录下的 Scripts 文件夹位置。 另外,建议每次新建项目前都通过命令行先激活目标 conda env: ```bash conda activate myenvname ``` 接着再启动 IDE 进入工作区,这样有助于减少兼容性方面的问题发生概率。 #### 常见错误及修复方法 ##### 错误一:未发现任何解释器 症状表现为打开 PyCharm 新建工程向导页面找不到由 Conda 构建出来的 interpreter 列表项。此时应前往 Preferences/Settings -> Project:...->Python Interpreter 下方点击齿轮图标选择 Add...按钮来指定自定义的位置。按照提示浏览定位到对应版本 python.exe 的绝对地址即可解决问题。 ##### 错误二:权限不足导致 DLL 加载失败 有时即使指定了正确的解释器路径,仍可能遇到由于缺乏适当的操作系统级许可而引发的功能缺失现象。特别是涉及到调用某些特定类型的动态链接库 (Dynamic Link Library, .dll) 时尤为明显。因此拥有管理员身份执行相关动作显得尤为重要——无论是从终端还是图形界面触发创建新 venv 流程均如此处理能够有效规避此类隐患。 ##### 错误三:网络连接异常引起依赖下载超时 部分开发者反馈过因网速慢或者其他因素造成 pip install 操作中途断开进而影响整个项目的初始化进度条卡住的情况。对此可尝试调整镜像源加速获取速度或是离线模式预先准备好所需资源包后再继续后续步骤。 ---
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值