代码
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
import math
import scipy.optimize as op
def displayData(X, *example_width):
if example_width == ():
example_width = round(np.sqrt(X.shape[1]))
# gray image
# ...
m, n = X.shape
rows = math.floor(np.sqrt(m))
cols = math.ceil(m / rows)
fig, ax_array = plt.subplots(
nrows=rows, ncols=cols, sharey=True, sharex=True, figsize=(8, 8))
for row in range(rows):
for column in range(cols):
ax_array[row, column].matshow(
X[rows*row+column].reshape((20, 20)), cmap='gray_r')
plt.xticks([])
plt.yticks([])
plt.show()
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def sigmoidGradient(z):
return sigmoid(z) * (1 - sigmoid(z))
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda):
Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)]
Theta1 = Theta1.reshape(hidden_layer_size, input_layer_size+1)
Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):]
Theta2 = Theta2.reshape(num_labels, hidden_layer_size+1)
m, n = X.shape
Theta1_grad = np.zeros_like(Theta1)
Theta2_grad = np.zeros_like(Theta2)
# 以下是要求学生完成的部分
########################
# part1 #
########################
# 将y映射为只有0和1的向量
Y = np.zeros((X.shape[0], num_labels)) # (5000,10)
Y[np.arange(m), y.flatten() - 1] = 1 # 这里不把y展开的话,Y的所有元素都是1,不信你可以试试
a1 = np.hstack((np.ones((m, 1)), X)) # (5000,401)
z2 = a1.dot(Theta1.T) # (5000,25)
a2 = sigmoid(z2) # (5000,25)
a2 = np.hstack((np.ones((m, 1)), a2))
z3 = a2.dot(Theta2.T) # (5000,10)
a3 = sigmoid(z3)
h = a3 # (5000,10)
J = (1/m) * np.sum(np.sum(-Y * np.log(h) - (1 - Y) * np.log(1 - h), axis=1))
temp1 = Theta1.copy() # (25, 401)
temp2 = Theta2.copy() # (10, 26)
# bias项不参与正则化,将其置为0
temp1[:, 0] = 0
temp2[:, 0] = 0
reg_item = Lambda/(2 * m) * (np.sum(temp1**2) + np.sum(temp2**2))
J += reg_item
########################
# part2 #
########################
D1 = np.zeros_like(Theta1) # D1:(25,401)
D2 = np.zeros_like(Theta2) # D2:(10,26)
for i in range(m):
a1 = X[i, :]
a1 = np.hstack((1, a1)) # a1:(401,)
a2 = sigmoid(Theta1.dot(a1)) # a2: (25,)
a2 = np.hstack((1, a2)) # a2:(26, )
a3 = sigmoid(Theta2.dot(a2)) # a3:(10, )
delta3 = a3 - Y[i, :] # delta3:(10, )
delta2 = Theta2.T.dot(delta3)
delta2 = delta2[1:] * sigmoidGradient(Theta1.dot(a1)) # delta2:(25, )
D2 += np.dot(delta3.reshape(delta3.shape[0],
1), a2.reshape(1, a2.shape[0]))
D1 += np.dot(delta2.reshape(delta2.shape[0],
1), a1.reshape(1, a1.shape[0]))
# print(D1.shape)
Theta1_grad = (1/m) * D1
Theta2_grad = (1/m) * D2
grad_item1 = Lambda/m * \
np.hstack((np.zeros((Theta1.shape[0], 1)), Theta1[:, 1:]))
grad_item2 = Lambda/m * \
np.hstack((np.zeros((Theta2.shape[0], 1)), Theta2[:, 1:]))
Theta1_grad += grad_item1
Theta2_grad += grad_item2
grad = np.vstack((Theta1_grad.reshape(-1, 1), Theta2_grad.reshape(-1, 1)))
return J, grad
print(Theta1[:, 1:].shape)
def randInitializeWeights(L_in, L_out):
W = np.zeros((L_in, L_out + 1))
epsilon_init = 0.12
W = np.random.rand(L_out, L_in + 1) * 2 * epsilon_init - epsilon_init
return W
def debugInitializeWeights(fan_out, fan_in):
W = np.zeros((fan_out, 1 + fan_in))
W = np.reshape(np.sin(np.arange(1, W.size + 1)), W.shape) / 10
return W
def checkNNGradients(Lambda):
input_layer_size = 3
hidden_layer_size = 5
num_labels = 3
m = 5
# We generate some 'random' test data
Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
# Reusing debugInitializeWeights to generate X
X = debugInitializeWeights(m, input_layer_size-1)
y = 1 + np.mod(np.arange(1, m + 1), num_labels)
# unroll parameters
nn_params = np.vstack((Theta1.reshape(-1, 1), Theta2.reshape(-1, 1)))
cost, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
num_labels, X, y, Lambda)
# def computeNumericalGradient(J, theta):
numgrad = np.zeros((nn_params.shape))
perturb = np.zeros(nn_params.shape)
e = 1e-4
for p in range(nn_params.size):
# 设置扰动变量
perturb[p] = e
loss1, grad1 = nnCostFunction(
nn_params-perturb, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda)
loss2, grad2 = nnCostFunction(
nn_params+perturb, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda)
numgrad[p] = (loss2 - loss1) / (2*e)
perturb[p] = 0
for i in range(numgrad.size):
print('{}----{}'.format(numgrad[i], grad[i]))
print('\nThe above two columns you get should be very similar.')
print('Left-Your Numerical Gradient, Right-Analytical Gradient\n')
diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
print('If your backpropagatin implementation is correct, then')
print('the relative diffence will be small (less than 1e-9).')
print('Relative Difference:{}\n'.format(diff))
def nnCost(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda):
Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)]
Theta1 = Theta1.reshape(hidden_layer_size, input_layer_size+1)
Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):]
Theta2 = Theta2.reshape(num_labels, hidden_layer_size+1)
m, n = X.shape
Theta1_grad = np.zeros_like(Theta1)
Theta2_grad = np.zeros_like(Theta2)
Y = np.zeros((X.shape[0], num_labels)) # (5000,10)
Y[np.arange(m), y.flatten() - 1] = 1 # 这里不把y展开的话,Y的所有元素都是1,不信你可以试试
a1 = np.hstack((np.ones((m, 1)), X)) # (5000,401)
z2 = a1.dot(Theta1.T) # (5000,25)
a2 = sigmoid(z2) # (5000,25)
a2 = np.hstack((np.ones((m, 1)), a2))
z3 = a2.dot(Theta2.T) # (5000,10)
a3 = sigmoid(z3)
h = a3 # (5000,10)
J = (1/m) * np.sum(np.sum(-Y * np.log(h) - (1 - Y) * np.log(1 - h), axis=1))
temp1 = Theta1.copy() # (25, 401)
temp2 = Theta2.copy() # (10, 26)
# bias项不参与正则化,将其置为0
temp1[:, 0] = 0
temp2[:, 0] = 0
reg_item = Lambda/(2 * m) * (np.sum(temp1**2) + np.sum(temp2**2))
J += reg_item
return J
def nnGrad(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda):
Theta1 = nn_params[:hidden_layer_size * (input_layer_size + 1)]
Theta1 = Theta1.reshape(hidden_layer_size, input_layer_size+1)
Theta2 = nn_params[hidden_layer_size * (input_layer_size + 1):]
Theta2 = Theta2.reshape(num_labels, hidden_layer_size+1)
m, n = X.shape
Theta1_grad = np.zeros_like(Theta1)
Theta2_grad = np.zeros_like(Theta2)
# 将y映射为只有0和1的向量
Y = np.zeros((X.shape[0], num_labels)) # (5000,10)
Y[np.arange(m), y.flatten() - 1] = 1 # 这里不把y展开的话,Y的所有元素都是1,不信你可以试试
D1 = np.zeros_like(Theta1) # D1:(25,401)
D2 = np.zeros_like(Theta2) # D2:(10,26)
for i in range(m):
a1 = X[i, :]
a1 = np.hstack((1, a1)) # a1:(401,)
a2 = sigmoid(Theta1.dot(a1)) # a2: (25,)
a2 = np.hstack((1, a2)) # a2:(26, )
a3 = sigmoid(Theta2.dot(a2)) # a3:(10, )
delta3 = a3 - Y[i, :] # delta3:(10, )
delta2 = Theta2.T.dot(delta3)
delta2 = delta2[1:] * sigmoidGradient(Theta1.dot(a1)) # delta2:(25, )
D2 += np.dot(delta3.reshape(delta3.shape[0],
1), a2.reshape(1, a2.shape[0]))
D1 += np.dot(delta2.reshape(delta2.shape[0],
1), a1.reshape(1, a1.shape[0]))
# print(D1.shape)
Theta1_grad = (1/m) * D1
Theta2_grad = (1/m) * D2
grad_item1 = Lambda/m * \
np.hstack((np.zeros((Theta1.shape[0], 1)), Theta1[:, 1:]))
grad_item2 = Lambda/m * \
np.hstack((np.zeros((Theta2.shape[0], 1)), Theta2[:, 1:]))
Theta1_grad += grad_item1
Theta2_grad += grad_item2
grad = np.concatenate((Theta1_grad.flatten(), Theta2_grad.flatten()))
return grad
def predict(Theta1, Theta2, X):
m, n = X.shape
num_labels = Theta2.shape[0]
p = np.zeros((m, 1))
h1 = sigmoid(np.hstack((np.ones((m, 1)), X)).dot(Theta1.T)) # h1:(5000,25)
h2 = sigmoid(np.hstack((np.ones((m, 1)), h1)).dot(
Theta2.T)) # h2:(5000,10)
p = np.argmax(h2, axis=1) # p:(5000,1)
return p
if __name__ == '__main__':
# etup the parameters we will use for this exercise
input_layer_size = 400
hidden_layer_size = 25
num_labels = 10
# =========== Part 1: Loading and Visualizing Data =============
# Load Training Data
print('Loading and Visualizing Data ...')
file = 'ex4data1.mat'
data = loadmat(file)
X = data['X']
y = data['y']
m, n = X.shape # m:训练样本数量 n:训练样本维度
mask = np.random.choice(m, size=100, replace=False) # 随机选择样本
# displayData(X[mask])
print('='*40)
# ================ Part 2: Loading Parameters ================
# In this part of the exercise, we load some pre-initialized neural network parameters.
print('Loading Saved Neural Network Parameters ...')
# Load the weights into variables Theta1 and Theta2
pre_weights = loadmat('ex4weights.mat')
Theta1 = pre_weights['Theta1'] # 25x401
Theta2 = pre_weights['Theta2'] # 10x26
nn_params = np.vstack((np.reshape(Theta1, (-1, 1)),
np.reshape(Theta2, (-1, 1))))
print('='*40)
# ================ Part 3: Compute Cost (Feedforward) ================
print('Feedforward Using Neural Network ...')
# Weight regularization parameter (we set this to 0 here)
Lambda = 0
J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
num_labels, X, y, Lambda)
print('Cost at parameters (loaded from ex4weights): ')
print('this value should be about 0.287629)\n', J)
print('='*40)
# =============== Part 4: Implement Regularization ===============
print('Checking Cost Function (w/ Regularization) ...')
Lambda = 1
J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
num_labels, X, y, Lambda)
print('Cost at parameters (loaded from ex4weights):')
print('(this value should be about 0.383770)\n', J)
print('='*40)
# ================ Part 5: Sigmoid Gradient ================
print('Evaluating sigmoid gradient...')
test_num = np.array([-1, -0.5, 0, 0.5, 1])
g = sigmoidGradient(test_num)
print('Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:\n', g)
print('='*40)
# ================ Part 6: Initializing Pameters ================
print('Initializing Neural Network Parameters ...')
initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
# Unroll parametes
initial_nn_params = np.vstack(
(initial_Theta1.reshape(-1, 1), initial_Theta2.reshape(-1, 1)))
# =============== Part 7: Implement Backpropagation ===============
print('Checking Backpropagation...')
# Check gradients by running checkNNGradients
checkNNGradients(Lambda)
print('='*40)
# =============== Part 8: Implement Regularization ===============
# Once your backpropagation implementation is correct, you should now
# continue to implement the regularization with the cost and gradient
print('Checking Backpropagation (w/ Regularization) ... ')
# Check gradients by running checkNNGradients
Lambda = 3
checkNNGradients(Lambda)
# Also output the costFunction debugging values
debug_J = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
num_labels, X, y, Lambda)
print('Cost at (fixed) debugging parameters (w/ lambda ={}):\n{}\n\
(for lambda = 3, this value should be about 0.576051)'.format(Lambda, debug_J))
print('='*40)
# =================== Part 8: Training NN ===================
# You have now implemented all the code necessary to train a neural
# network.
print('Training Neural Network...\n 可能需要几分钟,请耐心等待...')
# 使用共轭梯度法求解
nnParam = op.fmin_cg(f=nnCost, x0=initial_nn_params, fprime=nnGrad,
args=(input_layer_size, hidden_layer_size,
num_labels, X, y, Lambda),
maxiter=50, disp=True)
# 为了后面的调试方便,将训练数据存储起来以备后面调用,避免重复训练浪费时间
# np.save('Training_Result',nnParam)
# print(nnParam)
Theta1 = np.reshape(nnParam[:hidden_layer_size*(input_layer_size+1)],
(hidden_layer_size, input_layer_size+1))
Theta2 = np.reshape(nnParam[hidden_layer_size*(input_layer_size+1):],
(num_labels, hidden_layer_size+1))
print('='*40)
# ================= Part 9: Visualize Weights =================
print('Visualizing Neural Network...')
displayData(Theta1[:, 1:])
print('='*40)
# ================= Part 10: Implement Predict =================
pred = predict(Theta1, Theta2, X) + 1
acc = np.mean(pred == y.flatten())
print('The Accuracy is about:', acc)
print('='*40)
print('END')
值得注意的是
与以往不同的是,在这里的求神经网络参数时的优化方法使用了共轭梯度下降法scipy.optimize.fmin_cg(),如下
nnParam = op.fmin_cg(f=nnCost, x0=initial_nn_params, fprime=nnGrad,
args=(input_layer_size, hidden_layer_size,
num_labels, X, y, Lambda),
maxiter=50, disp=True)
用这个优化方法求出来的神经网络权值Theta还是蛮精确的。这算是写这个matlab转python以来又学到的一个比较有用的优化函数。
写在后面
这个ex4的python实现其实半个月前就已经写好了,由于前段时间学习任务比较重且杂事比较多,所以一直拖到现在才上传。总的来说自己亲自动手去用python实现一遍这些matlab代码好处还是蛮多的,其中最大的好处当然是能较好的学习numpy啦~
本文详细介绍了一个神经网络模型的构建过程,包括数据加载、参数初始化、前向传播计算成本、反向传播计算梯度、梯度检查、参数正则化、共轭梯度法优化以及模型预测准确率评估。
917

被折叠的 条评论
为什么被折叠?



