正向传播
比较简单,为了更好理解,直接举例子:
X4=W14∗X1+W24∗X2+W34∗X3+B4X_4=W_{14}*X_1+W_{24}*X_2+W_{34}*X_3+B_4X4=W14∗X1+W24∗X2+W34∗X3+B4X5=W15∗X1+W25∗X2+W35∗X3+B5X_5=W_{15}*X_1+W_{25}*X_2+W_{35}*X_3+B_5X5=W15∗X1+W25∗X2+W35∗X3+B5O4=sigmoid(X4)O_4=sigmoid(X_4)O4=sigmoid(X4)O5=sigmoid(X5)O_5=sigmoid(X _5)O5=sigmoid(X5)X6=W46∗O4+W56∗O5+B6X_6=W_{46}*O_4+W_{56}*O_5+B_6X6=W46∗O4+W56∗O5+B6O6=sigmoid(X6)O_6=sigmoid(X_6)O6=sigmoid(X6)
说明
- O6O_6O6即为预测值。
- 隐藏层的单元个数一般都要比输入层单元个数多,并且每个隐藏层的单元个数都相等。
- 理论上的隐藏层的单元个数越多、隐藏层的层数越多,该神经网络模型就会越好,但是计算量会增大,在训练数据少的时候,会出现过拟合。
- 初始化权重W和偏向B: 随机初始化在-1到1之间,或者-0.5到0.5之间,每个单元有一个偏向。
- 对于输入层X1X_1X1~X3X_3X3这几个特征值都必须标准化到0到1之间。
- 激活函数(sigmoid函数)可以为logistic function或者其他。
其中logistic function为y=11+e−xy=\dfrac{1}{1+e^{-x}}y=1+e−x1
图像如下:
反向传播
稍微复杂,看公式就懂了。
对于输出层:Ei=Oi∗(1−Oi)(Ti−Oi)E_i=O_i*(1-O_i)(T_i-O_i)Ei=Oi∗(1−Oi)(Ti−Oi)
对于隐藏层:Ei=Oi∗(1−Oi)∑(EjWij)E_i=O_i*(1-O_i)\sum(E_jW_{ij})Ei=Oi∗(1−Oi)∑(EjWij)
权重更新:Wij=Wij+lEjOiW_{ij}=W_{ij}+lE_jO_iWij=Wij+lEjOi
偏向更新:Bi=Bi+EiB_i=B_i+E_iBi=Bi+Ei
说明
- l为学习率,l取值过大,会导致代价函数J(θ)的值上下跳动或者越来越大。l取值过小,会导致梯度下降算法过慢。一般取l的值都是3倍递增地取。(…、0.001、0.03、0.1、0.3、1、3、10、…)
训练终止条件
- 权重的更新低于某个阈值
- 预测的错误率低于某个阈值
- 达到预设一定的循环次数
代码(python)
NeuralNetwork代码:
# coding=gbk
import numpy as np
from distributed.worker import weight
def tanh(x):
return np.tanh(x)
def tanh_derivative(x):
return 1 - np.tanh(x) * np.tanh(x)
# sigmod函数
def logistic(x):
return 1 / (1 + np.exp(-x))
# sigmod函数的导数
def logistic_derivative(x):
return logistic(x) * (1 - logistic(x))
class NeuralNetwork:
def __init__ (self, layers, activation = 'tanh'):
if activation == 'logistic':
self.activation = logistic
self.activation_deriv = logistic_derivative
elif activation == 'tanh':
self.activation = tanh
self.activation_deriv = tanh_derivative
# 随机产生权重值
self.weights = []
for i in range(1, len(layers) - 1): # 不算输入层,循环
self.weights.append((2 * np.random.random( (layers[i-1] + 1, layers[i] + 1)) - 1) * 0.25 )
self.weights.append((2 * np.random.random( (layers[i] + 1, layers[i+1])) - 1) * 0.25 )
print("weight:"+str(self.weights))
print("weight.type:"+str(type(self.weights)))
def fit(self, x, y, learning_rate=0.2, epochs=10000):
x = np.atleast_2d(x)
temp = np.ones([x.shape[0], x.shape[1]+1])
temp[:, 0:-1] = x
x = temp
y = np.array(y)
for k in range(epochs): # 循环epochs次
i = np.random.randint(x.shape[0]) # 随机产生一个数,对应行号,即数据集编号
a = [x[i]] # 抽出这行的数据集
# 迭代将输出数据更新在a的最后一行
for l in range(len(self.weights)):
a.append(self.activation(np.dot(a[l], self.weights[l])))
# 减去最后更新的数据,得到误差
error = y[i] - a[-1]
deltas = [error * self.activation_deriv(a[-1])]
# 求梯度
for l in range(len(a) - 2, 0, -1):
deltas.append(deltas[-1].dot(self.weights[l].T) * self.activation_deriv(a[l]) )
#反向排序
deltas.reverse()
# 梯度下降法更新权值
for i in range(len(self.weights)):
layer = np.atleast_2d(a[i])
delta = np.atleast_2d(deltas[i])
self.weights[i] += learning_rate * layer.T.dot(delta)
def predict(self, x):
x = np.array(x)
temp = np.ones(x.shape[0] + 1)
temp[0:-1] = x
a = temp
for l in range(0, len(self.weights)):
a = self.activation(np.dot(a, self.weights[l]))
return a
异或运算
#异或运算
from NeuralNetwork import NeuralNetwork
import numpy as np
nn = NeuralNetwork([2, 3, 1], 'tanh')
x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])
nn.fit(x, y)
for i in [[0, 0], [0, 1], [1, 0], [1, 1]]:
print(i, nn.predict(i))
手写数字识别
import numpy as np
from sklearn.datasets import load_digits
from sklearn.metrics import confusion_matrix,classification_report
from sklearn.preprocessing import LabelBinarizer
from NeuralNetwork import NeuralNetwork
from sklearn.cross_validation import train_test_split
digits = load_digits()#加载样本
X = digits.data#所有特征点
y = digits.target#标签
#将所有特征点的值标准到0、1之间,这是圣经网络算法的要求
X -= X.min()
X /=X.max()
nn = NeuralNetwork([64,100,10],"logistic")
X_train,X_test,y_train,y_test = train_test_split(X,y)
lables_train = LabelBinarizer().fit_transform(y_train)#化成向量与结果一一对应
lables_test = LabelBinarizer().fit_transform(y_test)
nn.fit(X_train, lables_train, epochs=3000)#开始训练神经网络模型
#o = nn.predict(X_test[0])#预测
predictions = []
for i in range(X_test.shape[0]):
o = nn.predict(X_test[i])
predictions.append(np.argmax(o))
print("predictions:\n"+str(predictions))
print("confusion_matrix:\n"+str(confusion_matrix(y_test, predictions)))#confusion_matrix函数生混淆矩阵,混淆矩阵的纵列是真实值,横列是预测值
print(classification_report(y_test, predictions))#classification_report函数用于显示主要分类指标的文本报告.在报告中显示每个类的精确度,召回率,F1值
可视化手写数字的图片,用于理解程序
from sklearn.datasets import load_digits
digits = load_digits()
print(digits.data.shape)
import pylab as pl
pl.gray()
pl.matshow(digits.images[0])
pl.show()