《机器学习》编程作业的Python实现【ex4.py】

最新推荐文章于 2024-01-12 09:39:12 发布

原创最新推荐文章于 2024-01-12 09:39:12 发布 · 620 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#python #machine learning

《机器学习》编程作业Python实现专栏收录该内容

7 篇文章

订阅专栏

本文详细介绍了一个神经网络模型的构建过程，包括数据加载、参数初始化、前向传播计算成本、反向传播计算梯度、梯度检查、参数正则化、共轭梯度法优化以及模型预测准确率评估。

代码

import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import math
import scipy.optimize as op


def displayData(X, *example_width):
    if example_width == ():
        example_width = round(np.sqrt(X.shape[1]))

    # gray image
    # ...
    m, n = X.shape
    rows = math.floor(np.sqrt(m))
    cols = math.ceil(m / rows)
    fig, ax_array = plt.subplots(
        nrows=rows, ncols=cols, sharey=True, sharex=True, figsize=(8, 8))

    for row in range(rows):
        for column in range(cols):
            ax_array[row, column].matshow(
                X[rows*row+column].reshape((20, 20)), cmap='gray_r')
    plt.xticks([])
    plt.yticks([])
    plt.show()


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def sigmoidGradient(z):
    return sigmoid(z) * (1 - sigmoid(z))


def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda):

    Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)]
    Theta1 = Theta1.reshape(hidden_layer_size, input_layer_size+1)
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):]
    Theta2 = Theta2.reshape(num_labels, hidden_layer_size+1)

    m, n = X.shape
    Theta1_grad = np.zeros_like(Theta1)
    Theta2_grad = np.zeros_like(Theta2)
    # 以下是要求学生完成的部分
    ########################
    #       part1		   #
    ########################
    # 将y映射为只有0和1的向量
    Y = np.zeros((X.shape[0], num_labels))  # (5000,10)
    Y[np.arange(m), y.flatten() - 1] = 1  # 这里不把y展开的话，Y的所有元素都是1，不信你可以试试

    a1 = np.hstack((np.ones((m, 1)), X))  # (5000,401)
    z2 = a1.dot(Theta1.T)  # (5000,25)
    a2 = sigmoid(z2)  # (5000,25)
    a2 = np.hstack((np.ones((m, 1)), a2))
    z3 = a2.dot(Theta2.T)  # (5000,10)
    a3 = sigmoid(z3)
    h = a3  # (5000,10)

    J = (1/m) * np.sum(np.sum(-Y * np.log(h) - (1 - Y) * np.log(1 - h), axis=1))
    temp1 = Theta1.copy()  # (25, 401)
    temp2 = Theta2.copy()  # (10, 26)
    # bias项不参与正则化，将其置为0
    temp1[:, 0] = 0
    temp2[:, 0] = 0
    reg_item = Lambda/(2 * m) * (np.sum(temp1**2) + np.sum(temp2**2))
    J += reg_item
    ########################
    #       part2		   #
    ########################
    D1 = np.zeros_like(Theta1)  # D1:(25,401)
    D2 = np.zeros_like(Theta2)  # D2:(10,26)
    for i in range(m):
        a1 = X[i, :]

        a1 = np.hstack((1, a1))  # a1:(401,)
        a2 = sigmoid(Theta1.dot(a1))  # a2: (25,)
        a2 = np.hstack((1, a2))  # a2:(26, )
        a3 = sigmoid(Theta2.dot(a2))  # a3:(10, )
        delta3 = a3 - Y[i, :]  # delta3:(10, )
        delta2 = Theta2.T.dot(delta3)
        delta2 = delta2[1:] * sigmoidGradient(Theta1.dot(a1))  # delta2:(25, )
        D2 += np.dot(delta3.reshape(delta3.shape[0],
                                    1), a2.reshape(1, a2.shape[0]))
        D1 += np.dot(delta2.reshape(delta2.shape[0],
                                    1), a1.reshape(1, a1.shape[0]))
        # print(D1.shape)
    Theta1_grad = (1/m) * D1
    Theta2_grad = (1/m) * D2
    grad_item1 = Lambda/m * \
        np.hstack((np.zeros((Theta1.shape[0], 1)), Theta1[:, 1:]))
    grad_item2 = Lambda/m * \
        np.hstack((np.zeros((Theta2.shape[0], 1)), Theta2[:, 1:]))
    Theta1_grad += grad_item1
    Theta2_grad += grad_item2
    grad = np.vstack((Theta1_grad.reshape(-1, 1), Theta2_grad.reshape(-1, 1)))

    return J, grad
    print(Theta1[:, 1:].shape)


def randInitializeWeights(L_in, L_out):
    W = np.zeros((L_in, L_out + 1))
    epsilon_init = 0.12
    W = np.random.rand(L_out, L_in + 1) * 2 * epsilon_init - epsilon_init
    return W


def debugInitializeWeights(fan_out, fan_in):
    W = np.zeros((fan_out, 1 + fan_in))
    W = np.reshape(np.sin(np.arange(1, W.size + 1)), W.shape) / 10
    return W


def checkNNGradients(Lambda):
    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5
    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    # Reusing debugInitializeWeights to generate X
    X = debugInitializeWeights(m, input_layer_size-1)
    y = 1 + np.mod(np.arange(1, m + 1), num_labels)
    # unroll parameters
    nn_params = np.vstack((Theta1.reshape(-1, 1), Theta2.reshape(-1, 1)))
    cost, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                                num_labels, X, y, Lambda)

# def computeNumericalGradient(J, theta):
    numgrad = np.zeros((nn_params.shape))
    perturb = np.zeros(nn_params.shape)
    e = 1e-4
    for p in range(nn_params.size):
        # 设置扰动变量
        perturb[p] = e
        loss1, grad1 = nnCostFunction(
            nn_params-perturb, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda)
        loss2, grad2 = nnCostFunction(
            nn_params+perturb, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda)
        numgrad[p] = (loss2 - loss1) / (2*e)
        perturb[p] = 0

    for i in range(numgrad.size):
        print('{}----{}'.format(numgrad[i], grad[i]))

    print('\nThe above two columns you get should be very similar.')
    print('Left-Your Numerical Gradient, Right-Analytical Gradient\n')
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print('If your backpropagatin implementation is correct, then')
    print('the relative diffence will be small (less than 1e-9).')
    print('Relative Difference:{}\n'.format(diff))


def nnCost(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda):

    Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)]
    Theta1 = Theta1.reshape(hidden_layer_size, input_layer_size+1)
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):]
    Theta2 = Theta2.reshape(num_labels, hidden_layer_size+1)

    m, n = X.shape
    Theta1_grad = np.zeros_like(Theta1)
    Theta2_grad = np.zeros_like(Theta2)

    Y = np.zeros((X.shape[0], num_labels))  # (5000,10)
    Y[np.arange(m), y.flatten() - 1] = 1  # 这里不把y展开的话，Y的所有元素都是1，不信你可以试试

    a1 = np.hstack((np.ones((m, 1)), X))  # (5000,401)
    z2 = a1.dot(Theta1.T)  # (5000,25)
    a2 = sigmoid(z2)  # (5000,25)
    a2 = np.hstack((np.ones((m, 1)), a2))
    z3 = a2.dot(Theta2.T)  # (5000,10)
    a3 = sigmoid(z3)
    h = a3  # (5000,10)

    J = (1/m) * np.sum(np.sum(-Y * np.log(h) - (1 - Y) * np.log(1 - h), axis=1))
    temp1 = Theta1.copy()  # (25, 401)
    temp2 = Theta2.copy()  # (10, 26)
    # bias项不参与正则化，将其置为0
    temp1[:, 0] = 0
    temp2[:, 0] = 0
    reg_item = Lambda/(2 * m) * (np.sum(temp1**2) + np.sum(temp2**2))
    J += reg_item
    return J


def nnGrad(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda):

    Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)]
    Theta1 = Theta1.reshape(hidden_layer_size, input_layer_size+1)
    Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):]
    Theta2 = Theta2.reshape(num_labels, hidden_layer_size+1)

    m, n = X.shape
    Theta1_grad = np.zeros_like(Theta1)
    Theta2_grad = np.zeros_like(Theta2)

    # 将y映射为只有0和1的向量
    Y = np.zeros((X.shape[0], num_labels))  # (5000,10)
    Y[np.arange(m), y.flatten() - 1] = 1  # 这里不把y展开的话，Y的所有元素都是1，不信你可以试试

    D1 = np.zeros_like(Theta1)  # D1:(25,401)
    D2 = np.zeros_like(Theta2)  # D2:(10,26)
    for i in range(m):
        a1 = X[i, :]

        a1 = np.hstack((1, a1))  # a1:(401,)
        a2 = sigmoid(Theta1.dot(a1))  # a2: (25,)
        a2 = np.hstack((1, a2))  # a2:(26, )
        a3 = sigmoid(Theta2.dot(a2))  # a3:(10, )
        delta3 = a3 - Y[i, :]  # delta3:(10, )
        delta2 = Theta2.T.dot(delta3)
        delta2 = delta2[1:] * sigmoidGradient(Theta1.dot(a1))  # delta2:(25, )
        D2 += np.dot(delta3.reshape(delta3.shape[0],
                                    1), a2.reshape(1, a2.shape[0]))
        D1 += np.dot(delta2.reshape(delta2.shape[0],
                                    1), a1.reshape(1, a1.shape[0]))
        # print(D1.shape)
    Theta1_grad = (1/m) * D1
    Theta2_grad = (1/m) * D2
    grad_item1 = Lambda/m * \
        np.hstack((np.zeros((Theta1.shape[0], 1)), Theta1[:, 1:]))
    grad_item2 = Lambda/m * \
        np.hstack((np.zeros((Theta2.shape[0], 1)), Theta2[:, 1:]))
    Theta1_grad += grad_item1
    Theta2_grad += grad_item2
    grad = np.concatenate((Theta1_grad.flatten(), Theta2_grad.flatten()))

    return grad


def predict(Theta1, Theta2, X):
    m, n = X.shape
    num_labels = Theta2.shape[0]
    p = np.zeros((m, 1))

    h1 = sigmoid(np.hstack((np.ones((m, 1)), X)).dot(Theta1.T))  # h1:(5000,25)
    h2 = sigmoid(np.hstack((np.ones((m, 1)), h1)).dot(
        Theta2.T))  # h2:(5000,10)
    p = np.argmax(h2, axis=1)  # p:(5000,1)
    return p


if __name__ == '__main__':
    # etup the parameters we will use for this exercise
    input_layer_size = 400
    hidden_layer_size = 25
    num_labels = 10

    # =========== Part 1: Loading and Visualizing Data =============
    # Load Training Data
    print('Loading and Visualizing Data ...')
    file = 'ex4data1.mat'
    data = loadmat(file)
    X = data['X']
    y = data['y']
    m, n = X.shape  # m:训练样本数量  n:训练样本维度
    mask = np.random.choice(m, size=100, replace=False)  # 随机选择样本
    # displayData(X[mask])
    print('='*40)
# ================ Part 2: Loading Parameters ================
    # In this part of the exercise, we load some pre-initialized neural network parameters.
    print('Loading Saved Neural Network Parameters ...')
    # Load the weights into variables Theta1 and Theta2
    pre_weights = loadmat('ex4weights.mat')
    Theta1 = pre_weights['Theta1']  # 25x401
    Theta2 = pre_weights['Theta2']  # 10x26
    nn_params = np.vstack((np.reshape(Theta1, (-1, 1)),
                           np.reshape(Theta2, (-1, 1))))
    print('='*40)
# ================ Part 3: Compute Cost (Feedforward) ================
    print('Feedforward Using Neural Network ...')
    # Weight regularization parameter (we set this to 0 here)
    Lambda = 0
    J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                             num_labels, X, y, Lambda)
    print('Cost at parameters (loaded from ex4weights): ')
    print('this value should be about 0.287629)\n', J)
    print('='*40)
# =============== Part 4: Implement Regularization ===============
    print('Checking Cost Function (w/ Regularization) ...')
    Lambda = 1
    J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                             num_labels, X, y, Lambda)
    print('Cost at parameters (loaded from ex4weights):')
    print('(this value should be about 0.383770)\n', J)
    print('='*40)
# ================ Part 5: Sigmoid Gradient  ================
    print('Evaluating sigmoid gradient...')
    test_num = np.array([-1, -0.5, 0, 0.5, 1])
    g = sigmoidGradient(test_num)
    print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n', g)
    print('='*40)
# ================ Part 6: Initializing Pameters ================
    print('Initializing Neural Network Parameters ...')
    initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
    initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
    # Unroll parametes
    initial_nn_params = np.vstack(
        (initial_Theta1.reshape(-1, 1), initial_Theta2.reshape(-1, 1)))
# =============== Part 7: Implement Backpropagation ===============
    print('Checking Backpropagation...')
    # Check gradients by running checkNNGradients
    checkNNGradients(Lambda)
    print('='*40)
# =============== Part 8: Implement Regularization ===============
    # Once your backpropagation implementation is correct, you should now
    # continue to implement the regularization with the cost and gradient
    print('Checking Backpropagation (w/ Regularization) ... ')
    # Check gradients by running checkNNGradients
    Lambda = 3
    checkNNGradients(Lambda)
    # Also output the costFunction debugging values
    debug_J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                             num_labels, X, y, Lambda)
    print('Cost at (fixed) debugging parameters (w/ lambda ={}):\n{}\n\
		   (for lambda = 3, this value should be about 0.576051)'.format(Lambda, debug_J))
    print('='*40)
# =================== Part 8: Training NN ===================
    # You have now implemented all the code necessary to train a neural
    # network.
    print('Training Neural Network...\n 可能需要几分钟，请耐心等待...')
    # 使用共轭梯度法求解
    nnParam = op.fmin_cg(f=nnCost, x0=initial_nn_params, fprime=nnGrad,
                         args=(input_layer_size, hidden_layer_size,
                               num_labels, X, y, Lambda),
                         maxiter=50, disp=True)
    # 为了后面的调试方便，将训练数据存储起来以备后面调用，避免重复训练浪费时间
    # np.save('Training_Result',nnParam)
    # print(nnParam)
    Theta1 = np.reshape(nnParam[:hidden_layer_size*(input_layer_size+1)],
                        (hidden_layer_size, input_layer_size+1))
    Theta2 = np.reshape(nnParam[hidden_layer_size*(input_layer_size+1):],
                        (num_labels, hidden_layer_size+1))
    print('='*40)
# ================= Part 9: Visualize Weights =================
    print('Visualizing Neural Network...')
    displayData(Theta1[:, 1:])
    print('='*40)
# ================= Part 10: Implement Predict =================
    pred = predict(Theta1, Theta2, X) + 1
    acc = np.mean(pred == y.flatten())
    print('The Accuracy is about:', acc)
    print('='*40)
    print('END')

值得注意的是

与以往不同的是，在这里的求神经网络参数时的优化方法使用了共轭梯度下降法scipy.optimize.fmin_cg()，如下

    nnParam = op.fmin_cg(f=nnCost, x0=initial_nn_params, fprime=nnGrad,
                         args=(input_layer_size, hidden_layer_size,
                               num_labels, X, y, Lambda),
                         maxiter=50, disp=True)