python梯度下降代码
import numpy as np
import random
## 两层神经网络,n个特征,第一层k个神经元,第二层一个
## 利用向量前向计算网络输出
## 隐藏层激活函数为ReLU,输出层为sigmoid
## 可能存在梯度消失和梯度爆炸的问题
n = 3
# k即n1
k = 4
# 利用指数加权平均来,自适应学习率,有这个效果出奇的好,设为0就是关闭
beta_2 = 0.999
# beta_2 = 0
m = random.randint(100,100)
# 使用相同的网络结构来生成训练集
w_t_1 = np.random.randn(n,k)
b_t_1 = np.random.rand(1,k) * 0.1
w_t_2 = np.random.randn(k,1)
b_t_2 = np.random.rand(1,1) * 0.1
def network_function(fa_0,fw_t_1,fb_t_1,fw_t_2,fb_t_2):
fz_t_1 = np.dot(fa_0, fw_t_1) + fb_t_1
fa_t_1 = np.where(fz_t_1<0,0,fz_t_1)
fz_t_2 = np.dot(fa_t_1, fw_t_2) + fb_t_2
fa_t_2 = 1 / (1 + np.exp(-fz_t_2))
return fa_t_2
def loss_function(la1,la2):
l_mat = - la1 * np.log(la2 + 0.00000001) - (1-la1) * np.log(1-la2+ 0.00000001)
return l_mat.sum() / m
# 学习率
p = 0.001
epoch = 2000000
# 初始化参数
w_1 = np.random.randn(n,k) * 0.01
s_w_1 = np.zeros((n,k))
b_1 = np.zeros((1,k))
w_2 = np.random.randn(k,1) * 0.01
s_w_2 = np.zeros((k,1))
b_2 = np.zeros((1,1))
# w_1 = w_t_1
# b_1 = b_t_1
# w_2 = w_t_2
# b_2 = b_t_2
b_k = False
for i in range(epoch):
## 随机构建一批m个训练集,输出0或1
a_0 = np.random.randn(m, n)
a_t_2 = network_function(a_0,w_t_1,b_t_1,w_t_2,b_t_2)
# 可以取其它值0.4,0.3
a_t_2[a_t_2>0.5] = 1
a_t_2[a_t_2<=0.5] = 0
# a_t_1 = network_function2(a_0,w_t_1,b_t_1)
# 前向传播
z_1 = np.dot(a_0, w_1) + b_1
assert (z_1.shape == (m, k))
# ReLU
a_1 = np.where(z_1<0,0,z_1)
# 输出层还是sigmoid
z_2 = np.dot(a_1,w_2) + b_2
a_2 = 1 / (1 + np.exp(-z_2))
has_inf = np.isinf(np.exp(-z_2))
if np.any(has_inf):
pass
# 反向传播第二层
da_2 = a_2-a_t_2
assert (da_2.shape == (m,1))
dw2 = np.dot(da_2.T,a_1).T/m
s_w_2 = beta_2 * s_w_2 + (1-beta_2) * dw2 * dw2
db2 = da_2.sum()/m
w_2 -= p/np.sqrt(s_w_2 + 0.00000001)*dw2
b_2 -= p*db2
# 传播到第一层根据公式
assert (a_0.shape == (m, n))
# ReLU的导数
dg_1 = (a_1 > 0).astype(int)
dz_1 = np.dot(da_2,w_2.T) * dg_1
dw1 = np.dot(dz_1.T,a_0).T/m
s_w_1 = beta_2 * s_w_1 + (1 - beta_2) * dw1 * dw1
db1 = (np.dot(da_2,w_2.T) * dg_1).sum(axis=0,keepdims=True)/m
w_1 -= p/np.sqrt(s_w_1 + 0.00000001) * dw1
b_1 -= p * db1
if b_k:
llff = loss_function(a_t_2, a_2)
print(llff)
if i%10000 == 0:
llff = loss_function(a_t_2, a_2)
print(llff)
if b_k:
break
if llff < 0.0000001:
b_k = True
最终的a_2接近a_t_2