手写实现bpnn神经网络

最新推荐文章于 2024-04-20 23:22:57 发布

thereblue

最新推荐文章于 2024-04-20 23:22:57 发布

阅读量493

点赞数

CC 4.0 BY-SA版权

文章标签：深度学习 python 神经网络机器学习

本文链接：https://blog.youkuaiyun.com/thereblue/article/details/117908000

Numpy实现BPNN:bpnn.py

import numpy as np

class bpnn(object):
    def __init__(self, in_size, out_size, hidden_size, fc_size):
        """
        This class implements a bpnn.
        """
		#初始化权重，偏置
        self.Wih = np.random.randn(hidden_size, in_size) 
        self.Who = np.random.randn(out_size, hidden_size)  

        self.bih = np.zeros((hidden_size, 1))  # bias for candidate
        self.bho = np.zeros((out_size, 1))  # bias for reset

        self.weights = [self.Wih
            , self.Who
            , self.bih
            , self.bho
                        ]

        # 这个是用来自己检查相应的参数个数是否少了的，可以删了
        self.names = ["Wih"
            , "Who"
            , "bih"
            , "bho"
                      ]


    def softmax_np(self, x):
        shift_x = x - np.max(x)  # 防止输入增大时输出为nan
        exp_x = np.exp(shift_x)
        return exp_x / np.sum(exp_x)

    def tanh_derivate(self, x, one):
        return one - np.square(x)


    def lossFun(self, inputs , targets):

        xs, yh, yh_tanh, yo, yo_softmax, one_yh = {}, {}, {}, {}, {}, {}

        loss = 0
		#前向传播
        for t in range(len(inputs)):
            xs[t] = np.matrix(inputs[t]).T

            yh[t] = np.dot(self.Wih, xs[t]) + self.bih

            yh_tanh[t] = np.tanh(yh[t])

            one_yh[t] = np.ones_like(yh_tanh[t])

            yo[t] = np.dot(self.Who, yh_tanh[t]) + self.bho

            yo_softmax[t] = self.softmax_np(yo[t])
            
            tar_tran = []
            for tar_i in range(len(targets[t])):
                tar_x = [targets[t][tar_i]]
                tar_tran.append(tar_x)
            loss = -np.mean(np.sum(np.multiply(tar_tran, np.log(yo_softmax[t]))))
		
        dWih = np.zeros_like(self.Wih)
        dWho = np.zeros_like(self.Who)

        dbih = np.zeros_like(self.bih)
        dbho = np.zeros_like(self.bho)

		#反向传播
        for t in reversed(range(len(inputs))):

            list_tar = []

            dy = np.copy(yo_softmax[t])
            for tars_i in range(len(targets[t])):
                tars_x = [targets[t][tars_i]]
                list_tar.append(tars_x)
            one_targets = np.ones_like(list_tar)
            dy -= list_tar  # backprop into y

            update = dy

            dbho += update
			
            dWho += np.dot(update, yh_tanh[t].T)

            update = np.multiply(np.dot(self.Who.T, update), self.tanh_derivate(yh_tanh[t], one_yh[t]))

            dWih += np.dot(xs[t], update.T).T

            dbih += update
		#我们没有在这里更新权重参数，而是在优化器的地方更新
        deltas = [dWih
            , dWho
            , dbih
            , dbho
                  ]

        return loss, deltas, yo_softmax

优化器部分：optimizers.py

import numpy as np


class Adagrad(object):
	def __init__(self, weights, learning_rate):
	  self.lr = learning_rate
	   self.mems = []
	   for tensor in weights:
	    	self.mems.append(np.zeros_like(tensor))

	def update_weights(self, params, dparams):
		for param, dparam, mem in zip(params, dparams, self.mems):
		   dparam = np.clip(dparam,-1,1) # clip so gradients don't explode
		   mem += dparam*dparam
		   param += -self.lr / np.sqrt(mem + 1e-7) * dparam # adagrad update
		   #print('optimizers' , param)

写在最后

我没有加入train.py文件，实际上如果对上述的两个文件有所了解，那么最后的train.py文件很容易写，仅仅是循环。
身为初学者神经网络的实现过程中，可能比较麻烦的是在反向传播的过程。但是，这个过程其实可以仅仅看作是微分的过程，按照从后向前进行多次微分，最终用优化器更新整体的权重。
softmax这个也很有意思，但是在本篇文章中，对经过softmax激活的fc层使用的损失函数是多分类的交叉熵，注意这里的多分类交叉熵和二分类的还是有区别的，可是他们的设计确实很巧妙，即使看起来十分复杂，但是在经过微分后却十分简单，这个地方我推荐一篇文章：https://zhuanlan.zhihu.com/p/99923080 有兴趣的可以看看这篇文章，文章中对交叉熵，mse的微分写的十分详细，当然我不推荐对交叉熵或者mse本身了解并不多的人去看这篇文章，因为它对交叉熵本质的介绍并不多，更多的是公式和微分的推导。
后续可能还会更新些别的手写深度神经网络，具体的看心情吧。