import h5py
import matplotlib.pyplot as plt
import numpy as np
def load_dataset():
#拿到这个文件字典
train_dataset = h5py.File('./train_catvnoncat.h5','r')
test_dataset = h5py.File('./test_catvnoncat.h5','r')
#拿到字典中所有的键值
# for key in train_dataset.keys():
#打印Key-Value
# print (train_dataset[key])
#list_classes表示是种类,train_set_x表示训练的数据集,train_set_y"表示训练的数据集的标签
#有209个图片集,每个图片集是 64*64*3
#<HDF5 dataset "list_classes": shape (2,), type "|S7">
#<HDF5 dataset "train_set_x": shape (209, 64, 64, 3), type "|u1">
#<HDF5 dataset "train_set_y": shape (209,), type "<i8">
#拿到209个图片集,使用np做成数组
train_set_x = np.array(train_dataset["train_set_x"][:])
train_set_y = np.array(train_dataset["train_set_y"][:])
test_set_x = np.array(test_dataset["test_set_x"][:])
test_set_y = np.array(test_dataset["test_set_y"][:])
#打印维度
# print(train_set_x.shape)
#(209, 64, 64, 3)
#设置窗体大小
# plt.figure(figsize = (2,2))
#设置第0张
# plt.imshow(train_set_x[11])
# plt.show()
#显示标签是否为猫
# print(train_set_y[11])
#第1 个元素,也就是图片数,第二个参数 -1 是 其余数相乘),T代表转置,形成(12288,209)
train_set_x = train_set_x.reshape(train_set_x.shape[0],-1).T
# print(train_set_x.shape)
test_set_x = test_set_x.reshape(train_set_x.shape[0],-1).T
#转化y的坐标
train_set_y = train_set_y.reshape(train_set_y.shape[0],-1).T
test_set_y = train_set_y.reshape(train_set_y.shape[0],-1).T#(1,209)
return train_set_x,train_set_y,test_set_x,test_set_y
def init_parameters(fc_net):
#定义一个字典,存放参数矩阵W1,b1,W2,b2,W3,b3,W4,b4
#a1 = W1*a0+b1
parameters = {}
#拿到层数
layers_num = len(fc_net)
# print(layers_num)
for L in range(1,layers_num):
#使用高斯分布,每一个字典,形成一个fc_net[L]行fc_net[L-1]列的随机数组
parameters["W"+str(L)] = np.random.randn(fc_net[L],fc_net[L-1])
#fc_net[L]行1列
parameters["b"+str(L)] = np.zeros((fc_net[L],1))
# for L in range(1,layers_num):
#打印W1到W4
# print("W"+str(L) +"=",parameters["W"+str(L)])
# print("b"+str(L) +"=",parameters["b"+str(L)])
return parameters
#定义sigmoid函数
def sigmoid(Z):
return 1/(1+np.exp(-Z))
def forward_pass(A0,parameters):
#缓存函数
cache = {}
A = A0
#缓存A0
cache["A0"] = A0
#python //代表整除,向下取整
Layer_num = len(parameters) // 2
#遍历 rang(1,5)是遍历1,2,3,4
for L in range(1, Layer_num+1):
#dot是矩阵乘法
#其中对于b来说的话,b是1列的,但是 python中有广播机制,可以扩展为209列
Z = np.dot(parameters["W"+str(L)],A) +parameters["b"+str(L)]
#A1 = (4,12288)*(12288,209) +(4,1) = (4,209) +(4,1) = (4,209)
#A2 = (3,4)*(4,209) +(3,1) = (3,209)+(3,1) = (3,209)
#A3 = (2,3)*(3,209) +(2,1) = (2,209)
#A4 = (1,209)
A = sigmoid(Z)
#给所有 中间值Z1-Z4 和 A1 -A1缓存
cache["Z"+str(L)] = Z
cache["A"+str(L)] = A
return A,cache
def compute_loss(AL,Y):
#代价函数
m = Y.shape[1 ] #Y = (1,209)
cost = (1/m)*np.sum((1/2)*(AL-Y)*(AL-Y))
return cost
def backward_pass(AL,parameters,cache,Y):
#样本总数 209
m = Y.shape[1]
#定义字典,存储所有层的梯度
gredient = {}
#拿到层数
Layer_num = len(parameters) //2
#末层误差 dJ/dz dZL.shape = (1,209)
dZL= (AL -Y)*(AL*(1-AL) )
#第4层的梯度 W4 = 1/m *(dZL, A )
gredient["dW"+str(Layer_num)] = (1/m)*np.dot(dZL,cache["A"+ str(Layer_num -1)].T)
gredient["db" +str(Layer_num)] = (1/m)*np.sum(dZL,axis=1,keepdims =True)#axis=1代表横向叠加,keepdims让其横向为矩阵,有维度
#遍历[3,2,1]
for L in reversed (range(1,Layer_num)):
dZL = np.dot(parameters["W"+str(L+1)].T,dZL)*(AL*(1-AL))
gredient["dW"+str(L)] = (1/m)*np.dot(dZL,cache["A"+ str(L -1)].T)
gredient["db" +str(L)] = (1/m)*np.sum(dZL,axis=1,keepdims =True)
return gredient
# 反向传播算法
def update_parameters(gredient,parameters,LearnRate):
# w: = w -learningRate *dw
# w: = b -learningRate *db
Layer_num = len(parameters)//2
for L in range(1,Layer_num+1):
#遍历[1,2,3,4]
parameters["W" +str(L)] = parameters["W" +str(L)] - LearnRate*gredient["dW"+str(L)]
parameters["b" +str(L)] = parameters["b" +str(L)] - LearnRate*gredient["db"+str(L)]
return parameters
if __name__ == '__main__':
#1.加载数据
train_set_x,train_set_y,test_set_x,test_set_y = load_dataset()
#2.输入像素值做归一化 ()
train_set_x = train_set_x/255.0
test_set_x = test_set_x/255.0
#3.定义全连接神经网络各层神经元个数,并初始化参数w和b,12288代表输入像素数
fc_net = [12288,4,3,2,1]
#4.初始化全连接层
parameters = init_parameters(fc_net)
#z = wx+b ;a =f(z)
#AL = (1,209)
#5前向计算 iterations代表反向传播更新的次数
iterations = 500
#设置学习率
LearnRate = 0.01
#cost保存每10次迭代计算得到的代价值
costs = []
for iteration in range (0,iterations):
AL,cache = forward_pass(train_set_x,parameters)
#6.代价函数,多样本损失值
loss = compute_loss(AL, train_set_y)
if iteration%10 == 0:
#每100个打印一次
costs.append(loss)
print("loss == ",loss)
#7 .AL为输入数据,parameters中有w和b,cache中有Z和A,train_set_y为标签值
gredient = backward_pass(AL,parameters,cache,train_set_y)
#8. 梯度下降 gredient,根据梯度更新一次参数
parameters = update_parameters(gredient,parameters,LearnRate)
plt.plot(costs,'p')
plt.xlabel("opooc-iteration")
plt.ylabel("opooc-cost")
plt.show()
代码实践:MLP的反向传播算法
最新推荐文章于 2024-09-04 19:53:00 发布