建立神经网络模型
# 完整的训练建模函数定义
def build_model(nn_hdim, num_passes=20000, print_loss=False):
'''
参数:
1) nn_hdim: 隐层节点个数
2)num_passes: 梯度下降迭代次数
3)print_loss: 设定为True的话,每1000次迭代输出一次loss的当前值
'''
# 随机初始化一下权重呗
np.random.seed(0)
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
b1 = np.zeros((1, nn_hdim))
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
b2 = np.zeros((1, nn_output_dim))
# 这是咱们最后学到的模型
model = {}
# 开始梯度下降...
for i in xrange(0, num_passes):
# 前向运算计算loss
z1 = X.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
# 反向传播
delta3 = probs
delta3[range(num_examples), y] -= 1
dW2 = (a1.T).dot(delta3)
db2 = np.sum(delta3, axis=0, keepdims=True)
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2))
dW1 = np.dot(X.T, delta2)
db1 = np.sum(delta2, axis=0)
# 加上正则化项
dW2 += reg_lambda * W2
dW1 += reg_lambda * W1
# 梯度下降更新参数
W1 += -epsilon * dW1
b1 += -epsilon * db1
W2 += -epsilon * dW2
b2 += -epsilon * db2
# 得到的模型实际上就是这些权重
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}
# 如果设定print_loss了,那我们汇报一下中间状况
if print_loss and i % 1000 == 0:
print "Loss after iteration %i: %f" %(i, calculate_loss(model))
return model
# 判定结果的函数
def predict(model, x):
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
# 前向运算
z1 = x.dot(W1) + b1
a1 = np.tanh(z1)
z2 = a1.dot(W2) + b2
exp_scores = np.exp(z2)
# 计算概率输出最大概率对应的类别
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)
return np.argmax(probs, axis=1)
# 建立隐层有3个节点(神经元)的神经网络
model = build_model(3, print_loss=True)
# 然后再把决策/判定边界画出来
plot_decision_boundary(lambda x: predict(model, x))
plt.title("Decision Boundary for hidden layer size 3")
plt.show()