几个概念
1、BP神经网络是指按照误差反向传播算法训练的多层前馈神经网络,一般结构如下,包含输入层,隐层,输出层
2、正向传播时,输入样本从输入层传入,经各隐层逐层处理后,传向输出层。若输出层的实际输出与期望的输出不符,则转入误差的反向传播阶段。
3、反向传播时,将输出以某种形式通过隐层向输入层逐层反传,并将误差分摊给各层的所有单元,从而获得各层单元的误差信号,此误差信号即作为修正各单元权值的依据。
4、S型非线性激活函数sigmoid ,优点是值域在0和1之间,可表示概率;单调递增;连续可导。
代码解析
import math
import random
import string
random.seed(0)
# calculate a random number where: a <= rand < b
def rand(a, b):
return (b-a)*random.random() + a
# Make a matrix (we could use NumPy to speed this up)
def makeMatrix(I, J, fill=0.0):#构建矩阵,实际表示两点之间连线权值
m = []
for i in range(I):
m.append([fill]*J)
return m
# our sigmoid function, tanh is a little nicer than the standard 1/(1+e^-x)
def sigmoid(x):#非线性激活函数
return math.tanh(x)
# derivative of our sigmoid function, in terms of the output (i.e. y)
def dsigmoid(y):#激活函数的导数
return 1.0 - y**2
class NN:
def __init__(self, ni, nh, no):
# number of input, hidden, and output nodes
self.ni = ni + 1 # +1 for bias node
self.nh = nh
self.no = no
# activations for nodes
self.ai = [1.0]*self.ni
self.ah = [1.0]*self.nh
self.ao = [1.0]*self.no
# create weights#初始化矩阵,赋予随机值
self.wi = makeMatrix(self.ni, self.nh)
self.wo = makeMatrix(self.nh, self.no)
# set them to random vaules
for i in range(self.ni):
for j in range(self.nh):
self.wi[i][j] = rand(-0.2, 0.2)
for j in range(self.nh):
for k in range(self.no):
self.wo[j][k] = rand(-2.0, 2.0)
# last change in weights for momentum
self.ci = makeMatrix(self.ni, self.nh)
self.co = makeMatrix(self.nh, self.no)
def update(self, inputs):#正向传播
if len(inputs) != self.ni-1:
raise ValueError('wrong number of inputs')
# input activations
for i in range(self.ni-1):
#self.ai[i] = sigmoid(inputs[i])
self.ai[i] = inputs[i]
# hidden activations
for j in range(self.nh):
sum = 0.0
for i in range(self.ni):
sum = sum + self.ai[i] * self.wi[i][j]
self.ah[j] = sigmoid(sum)
# output activations
for k in range(self.no):
sum = 0.0
for j in range(self.nh):
sum = sum + self.ah[j] * self.wo[j][k]
self.ao[k] = sigmoid(sum)
return self.ao[:]
def backPropagate(self, targets, N, M):#反向传播
if len(targets) != self.no:
raise ValueError('wrong number of target values')
# calculate error terms for output#计算误差
output_deltas = [0.0] * self.no
for k in range(self.no):
error = targets[k]-self.ao[k]
output_deltas[k] = dsigmoid(self.ao[k]) * error
# calculate error terms for hidden#计算误差
hidden_deltas = [0.0] * self.nh
for j in range(self.nh):
error = 0.0
for k in range(self.no):
error = error + output_deltas[k]*self.wo[j][k]
hidden_deltas[j] = dsigmoid(self.ah[j]) * error
# update output weights#修正权值
for j in range(self.nh):
for k in range(self.no):
change = output_deltas[k]*self.ah[j]
self.wo[j][k] = self.wo[j][k] + N*change + M*self.co[j][k]
self.co[j][k] = change
#print N*change, M*self.co[j][k]
# update input weights#修正权值
for i in range(self.ni):
for j in range(self.nh):
change = hidden_deltas[j]*self.ai[i]
self.wi[i][j] = self.wi[i][j] + N*change + M*self.ci[i][j]
self.ci[i][j] = change
# calculate error
error = 0.0
for k in range(len(targets)):
error = error + 0.5*(targets[k]-self.ao[k])**2
return error
def predict(self, pattern):
return self.update(pattern)
def weights(self):#这个权值值可以保存,下次不需要再重复训练
print('Input weights:')
for i in range(self.ni):
print(self.wi[i])
print()
print('Output weights:')
for j in range(self.nh):
print(self.wo[j])
def train(self, patterns, iterations=10000, N=0.5, M=0.1):
# N: learning rate
# M: momentum factor
for i in range(iterations):#训练10000次
error = 0.0
for p in patterns:
inputs = p[0]
targets = p[1]
self.update(inputs)
error = error + self.backPropagate(targets, N, M)
if i % 100 == 0:
print('error %-.5f' % error)
def demo():
# Teach network XOR function
pat = [
[[0,0], [0]],
[[0,1], [1]],
[[1,0], [1]],
[[1,1], [0]]
]
# create a network with two input, two hidden, and one output nodes
n = NN(2, 2, 1)
# train it with some patterns
n.train(pat)
# predict it
for p in pat:
print(p[0], '->', n.predict(p[0]))
if __name__ == '__main__':
demo()
训练及运行结果
C:\Users\jin.yan\Desktop>python bp.py
error 0.94250
error 0.04287
error 0.00348
error 0.00164
error 0.00106
error 0.00078
error 0.00092
error 0.00053
error 0.00044
error 0.00038
error 0.00105
error 0.00033
error 0.00028
error 0.00026
error 0.00078
error 0.00030
error 0.00020
error 0.00021
error 0.00039
error 0.00025
error 0.00017
error 0.00017
error 0.00016
error 0.00023
error 0.00019
error 0.00013
error 0.00012
error 0.00014
error 0.00026
error 0.00012
error 0.00012
error 0.00011
error 0.00010
error 0.00021
error 0.00012
error 0.00012
error 0.00009
error 0.00011
error 0.00011
error 0.00022
error 0.00010
error 0.00008
error 0.00009
error 0.00008
error 0.00016
error 0.00010
error 0.00008
error 0.00008
error 0.00008
error 0.00007
error 0.00010
error 0.00013
error 0.00007
error 0.00006
error 0.00007
error 0.00007
error 0.00007
error 0.00006
error 0.00012
error 0.00012
error 0.00008
error 0.00006
error 0.00005
error 0.00005
error 0.00007
error 0.00009
error 0.00007
error 0.00006
error 0.00005
error 0.00005
error 0.00005
error 0.00006
error 0.00007
error 0.00007
error 0.00006
error 0.00005
error 0.00004
error 0.00004
error 0.00004
error 0.00004
error 0.00005
error 0.00006
error 0.00008
error 0.00008
error 0.00007
error 0.00006
error 0.00005
error 0.00005
error 0.00005
error 0.00005
error 0.00004
error 0.00004
error 0.00004
error 0.00004
error 0.00004
error 0.00004
error 0.00004
error 0.00004
error 0.00003
error 0.00003
[0, 0] -> [0.0019514089062884685]
[0, 1] -> [0.9947972032147019]
[1, 0] -> [0.9947921631365915]
[1, 1] -> [0.0013874327009247476]
C:\Users\jin.yan\Desktop>
可以看到,经过10000次训练,随着训练的次数越多误差越来越小,看误差很符合S型曲线。而且训练后,实际输出的结果是如下这样
[0, 0] -> [0.0019514089062884685]
[0, 1] -> [0.9947972032147019]
[1, 0] -> [0.9947921631365915]
[1, 1] -> [0.0013874327009247476]
期望输出的结果应该是如下这样,和实际输出误差很小了。
[[0,0] -> [0]]
[[0,1] -> [1]]
[[1,0] -> [1]]
[[1,1] -> [0]]
总结
说说神经网络的优点,先看看下面的问题。
假设每个值都有0和1两种可能。上述只有2个输入,1个输出。因此输入有2^2=4种可能,输出有2种可能。那如果有100个输入,1个输出,那么输入有2^100=1,267,650,600,228,229,401,496,703,205,376种可能。
如果没有神经网络的支持,编写普通程序需要做1,267,650,600,228,229,401,496,703,205,376次判断,而且前提是需要收集这么多数据样本。
如果有神经网络的支持,我们仅需要收集部分数据样本,比如703,205,376个样本,然后训练神经网络。剩下的数据样本虽然没有收集,但是会根据神经网络会输出一个误差很小的概率值。