import numpy as np import matplotlib.pyplot as plt # 输入数据 X = np.array([[1.0, 0.1], [0.1, 1.0], [0.1, 0.1], [1.0, 1.0]]) # 标签 T = np.array([[1], [0], [0], [1]]) # 定义一个2隐层的神经网络:2-2-2-1 W1 = np.array([[0.8, 0.2], [0.2, 0.8]]) W2 = np.array([[0.5, 0.0], [0.5, 1.0]]) W3 = np.array([[0.5], [0.5]]) b1 = np.array([[-1, 0.3]]) b2 = np.array([[0.1, -0.1]]) b3 = np.array([[-0.6]]) lr = 0.1 epochs = 10000 report = 1000 # 定义sigmoid函数及其导数 def sigmoid(x): return 1 / (1 + np.exp(-x)) def dsigmoid(x): return x * (1 - x) # 更新权值和偏置值 def update(batch_X, batch_T): global W1, W2, W3, b1, b2, b3 Z1 = np.dot(batch_X, W1) + b1 A1 = sigmoid(Z1) Z2 = np.dot(A1, W2) + b2 A2 = sigmoid(Z2) Z3 = np.dot(A2, W3) + b3 A3 = sigmoid(Z3) delta_A3 = batch_T - A3 delta_Z3 = delta_A3 * dsigmoid(A3) delta_W3 = A2.T.dot(delta_Z3) / batch_X.shape[0] delta_B3 = np.sum(delta_Z3, axis=0) / batch_X.shape[0] delta_A2 = delta_Z3.dot(W3.T) delta_Z2 = delta_A2 * dsigmoid(A2) delta_W2 = A1.T.dot(delta_Z2) / batch_X.shape[0] delta_B2 = np.sum(delta_Z2, axis=0) / batch_X.shape[0] delta_A1 = delta_Z2.dot(W2.T) delta_Z1 = delta_A1 * dsigmoid(A1) delta_W1 = batch_X.T.dot(delta_Z1) / batch_X.shape[0] delta_B1 = np.sum(delta_Z1, axis=0) / batch_X.shape[0] W3 += lr * delta_W3 W2 += lr * delta_W2 W1 += lr * delta_W1 b3 += lr * delta_B3 b2 += lr * delta_B2 b1 += lr * delta_B1 # 训练模型 def train(gd_method='batch', batch_size=1): loss = [] for idx_epoch in range(epochs): if gd_method == 'stochastic': indices = np.random.permutation(X.shape[0]) X_shuffled = X[indices] T_shuffled = T[indices] for i in range(X.shape[0]): batch_X = X_shuffled[i:i + 1] batch_T = T_shuffled[i:i + 1] update(batch_X, batch_T) elif gd_method == 'mini-batch': indices = np.random.permutation(X.shape[0]) X_shuffled = X[indices] T_shuffled = T[indices] max_batch = X.shape[0] // batch_size for idx_batch in range(max_batch): batch_X = X_shuffled[idx_batch * batch_size:(idx_batch + 1) * batch_size, :] batch_T = T_shuffled[idx_batch * batch_size:(idx_batch + 1) * batch_size, :] update(batch_X, batch_T) else: # batch gradient descent batch_X = X batch_T = T update(batch_X, batch_T) if idx_epoch % report == 0: A1 = sigmoid(np.dot(X, W1) + b1) A2 = sigmoid(np.dot(A1, W2) + b2) A3 = sigmoid(np.dot(A2, W3) + b3) print(f'Epochs: {idx_epoch}, Loss: {np.mean(np.square(T - A3) / 2)}') loss.append(np.mean(np.square(T - A3) / 2)) plt.plot(range(0, epochs, report), loss) plt.xlabel('epochs') plt.ylabel('loss') plt.title(f'{gd_method.capitalize()} Gradient Descent') plt.show() # 测试不同梯度下降方法 methods = ['batch', 'stochastic', 'mini-batch'] for method in methods: print(f'\nTraining with {method} gradient descent:') W1 = np.array([[0.8, 0.2], [0.2, 0.8]]) W2 = np.array([[0.5, 0.0], [0.5, 1.0]]) W3 = np.array([[0.5], [0.5]]) b1 = np.array([[-1, 0.3]]) b2 = np.array([[0.1, -0.1]]) b3 = np.array([[-0.6]]) train(method, batch_size=2 if method == 'mini-batch' else None) # 预测 def predict(x): A1 = sigmoid(np.dot(x, W1) + b1) A2 = sigmoid(np.dot(A1, W2) + b2) A3 = sigmoid(np.dot(A2, W3) + b3) return 1 if A3 >= 0.5 else 0 print('\nPredictions:') for i, sample in enumerate(X): print(f'Sample {i + 1}: {predict(sample)}')
以人事招聘为例的误差反向传播算法 (第三小问)
最新推荐文章于 2025-05-12 23:16:27 发布