以人事招聘为例的误差反向传播算法（第三小问）

最新推荐文章于 2025-05-12 23:16:27 发布

kinglif

最新推荐文章于 2025-05-12 23:16:27 发布

阅读量1.3k

点赞数 36

文章标签：算法 numpy

本文链接：https://blog.youkuaiyun.com/kinglif/article/details/144564240

版权

import numpy as np
import matplotlib.pyplot as plt

# 输入数据
X = np.array([[1.0, 0.1],
              [0.1, 1.0],
              [0.1, 0.1],
              [1.0, 1.0]])

# 标签
T = np.array([[1],
              [0],
              [0],
              [1]])

# 定义一个2隐层的神经网络：2-2-2-1
W1 = np.array([[0.8, 0.2],
               [0.2, 0.8]])
W2 = np.array([[0.5, 0.0],
               [0.5, 1.0]])
W3 = np.array([[0.5],
               [0.5]])

b1 = np.array([[-1, 0.3]])
b2 = np.array([[0.1, -0.1]])
b3 = np.array([[-0.6]])

lr = 0.1
epochs = 10000
report = 1000


# 定义sigmoid函数及其导数
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def dsigmoid(x):
    return x * (1 - x)


# 更新权值和偏置值
def update(batch_X, batch_T):
    global W1, W2, W3, b1, b2, b3

    Z1 = np.dot(batch_X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    Z3 = np.dot(A2, W3) + b3
    A3 = sigmoid(Z3)

    delta_A3 = batch_T - A3
    delta_Z3 = delta_A3 * dsigmoid(A3)
    delta_W3 = A2.T.dot(delta_Z3) / batch_X.shape[0]
    delta_B3 = np.sum(delta_Z3, axis=0) / batch_X.shape[0]

    delta_A2 = delta_Z3.dot(W3.T)
    delta_Z2 = delta_A2 * dsigmoid(A2)
    delta_W2 = A1.T.dot(delta_Z2) / batch_X.shape[0]
    delta_B2 = np.sum(delta_Z2, axis=0) / batch_X.shape[0]

    delta_A1 = delta_Z2.dot(W2.T)
    delta_Z1 = delta_A1 * dsigmoid(A1)
    delta_W1 = batch_X.T.dot(delta_Z1) / batch_X.shape[0]
    delta_B1 = np.sum(delta_Z1, axis=0) / batch_X.shape[0]

    W3 += lr * delta_W3
    W2 += lr * delta_W2
    W1 += lr * delta_W1
    b3 += lr * delta_B3
    b2 += lr * delta_B2
    b1 += lr * delta_B1


# 训练模型
def train(gd_method='batch', batch_size=1):
    loss = []
    for idx_epoch in range(epochs):
        if gd_method == 'stochastic':
            indices = np.random.permutation(X.shape[0])
            X_shuffled = X[indices]
            T_shuffled = T[indices]
            for i in range(X.shape[0]):
                batch_X = X_shuffled[i:i + 1]
                batch_T = T_shuffled[i:i + 1]
                update(batch_X, batch_T)
        elif gd_method == 'mini-batch':
            indices = np.random.permutation(X.shape[0])
            X_shuffled = X[indices]
            T_shuffled = T[indices]
            max_batch = X.shape[0] // batch_size
            for idx_batch in range(max_batch):
                batch_X = X_shuffled[idx_batch * batch_size:(idx_batch + 1) * batch_size, :]
                batch_T = T_shuffled[idx_batch * batch_size:(idx_batch + 1) * batch_size, :]
                update(batch_X, batch_T)
        else:  # batch gradient descent
            batch_X = X
            batch_T = T
            update(batch_X, batch_T)

        if idx_epoch % report == 0:
            A1 = sigmoid(np.dot(X, W1) + b1)
            A2 = sigmoid(np.dot(A1, W2) + b2)
            A3 = sigmoid(np.dot(A2, W3) + b3)
            print(f'Epochs: {idx_epoch}, Loss: {np.mean(np.square(T - A3) / 2)}')
            loss.append(np.mean(np.square(T - A3) / 2))

    plt.plot(range(0, epochs, report), loss)
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.title(f'{gd_method.capitalize()} Gradient Descent')
    plt.show()


# 测试不同梯度下降方法
methods = ['batch', 'stochastic', 'mini-batch']
for method in methods:
    print(f'\nTraining with {method} gradient descent:')
    W1 = np.array([[0.8, 0.2], [0.2, 0.8]])
    W2 = np.array([[0.5, 0.0], [0.5, 1.0]])
    W3 = np.array([[0.5], [0.5]])
    b1 = np.array([[-1, 0.3]])
    b2 = np.array([[0.1, -0.1]])
    b3 = np.array([[-0.6]])
    train(method, batch_size=2 if method == 'mini-batch' else None)


# 预测
def predict(x):
    A1 = sigmoid(np.dot(x, W1) + b1)
    A2 = sigmoid(np.dot(A1, W2) + b2)
    A3 = sigmoid(np.dot(A2, W3) + b3)
    return 1 if A3 >= 0.5 else 0


print('\nPredictions:')
for i, sample in enumerate(X):
    print(f'Sample {i + 1}: {predict(sample)}')