1.基本组成
输入层(特征向量)
隐藏层
输出层
每层由神经元或单元组成;
输入层由训练集的样本特征向量传入;
经过连接点的权重传入下一层,上一层的加权求和,然后根据非线性方程转化为下一层的输入;
对于多层神经网络,理论上如果有足够多的隐藏层和足够多的的训练样本,可以拟合出任意方程
2.设计神经网络结构
包括确定网络层数,每层的节点个数;
特征向量在被传入输入层之前要进行标准化到0-1之间,为了加速训练过程;
神经网络既可以用来解决分类问题,也可以解决回归问题;
输出层的单元数十类别的个数;
一般通过交叉验证的方法确定隐藏层的个数和节点的个数
3.反向传播
通过迭代处理训练集中的样本;
对比经过神经网络后输出层的预测值和真实值;
从输出层经过隐藏层反向传播到输入层,以最小化误差更新连接权重
4.算法过程
初始化权重和偏置:随机初始化在-1到1之间的值,每个节点有一个偏向;
每个训练样本执行以下步骤:
有输入层前向传播,根据初始化的参数及激活函数,计算出节点的输出,直到输出层;
反向传播计算误差,更新权重和偏置;
终止条件:权重的更新低于某个阈值、预测的错误率低于某个阈值、达到预设的循环次数
5.代码及注释
# -*- coding: utf-8 -*-
from neural_network import NeuralNetwork
def run_main():
"""
主函数
"""
# 输入层:2个节点
# 隐含层:2个节点
# 输出层:2个节点
# 初始化各层的权重和偏置
nn = NeuralNetwork(2, 2, 2,
hidden_layer_weights=[0.15, 0.2, 0.25, 0.3],
hidden_layer_bias=0.35,
output_layer_weights=[0.4, 0.45, 0.5, 0.55],
output_layer_bias=0.6)
# 训练网络(这里只用一个样本作为训练作为示例)
sample_input = [0.05, 0.1]
sample_output = [0.01, 0.99]
# 迭代10000次训练网络
print('训练神经网络')
for i in range(10000):
# 提供输入和输出
nn.fit(sample_input, sample_output)
# 输出误差
# 会看到随着迭代的增多,误差是变小的
print('{}次迭代,误差:{}'.format(i + 1, nn.calculate_total_error([[sample_input, sample_output]])))
# 测试网络(这里选取一个和训练样本接近的样本)
print('测试神经网络:')
test_sample = [0.049, 0.11]
print('训练样本:', sample_input, sample_output)
print()
print('输入样本:', test_sample)
print('预测结果:', nn.feed_forward(test_sample))
if __name__ == '__main__':
run_main()
neural_network.py
# -*- coding: utf-8 -*-
import random
import math
#
# "pd_" : 偏导数
# "d_" : 导数
# "_wrt_" : "with respect to" 对于变量
# "w_ho" : 由隐含层(h)到输出层(o)的weights的索引
# "w_ih" : 由输入层(i)到隐含层(h)的weights的索引
class NeuralNetwork(object):
"""
神经网络类
"""
LEARNING_RATE = 0.5 # 学习率
def __init__(self,
num_inputs, # 输入层神经元个数
num_hidden, # 隐含层神经元个数
num_outputs, # 输出层神经元个数
hidden_layer_weights=None, # 隐含层神经元权重
hidden_layer_bias=None, # 隐含层神经元偏置
output_layer_weights=None, # 输出层神经元权重
output_layer_bias=None # 输出层神经元偏置
):
# 输入层
self.num_inputs = num_inputs
# 隐含层
self.hidden_layer = Layer(num_hidden, hidden_layer_bias)
# 输出层
self.output_layer = Layer(num_outputs, output_layer_bias)
# 初始化各层权重
self.init_weights_from_inputs_to_hidden_layer_neurons(hidden_layer_weights)
self.init_weights_from_hidden_layer_neurons_to_output_layer_neurons(output_layer_weights)
def init_weights_from_inputs_to_hidden_layer_neurons(self, hidden_layer_weights):
"""
初始化从输入层到隐含层的权重
"""
weight_num = 0
for h in range(len(self.hidden_layer.neurons)):
for i in range(self.num_inputs):
if not hidden_layer_weights:
self.hidden_layer.neurons[h].weights.append(random.random())
else:
self.hidden_layer.neurons[h].weights.append(hidden_layer_weights[weight_num])
weight_num += 1
def init_weights_from_hidden_layer_neurons_to_output_layer_neurons(self, output_layer_weights):
"""
初始化从隐含层到输出层的权重
"""
weight_num = 0
for o in range(len(self.output_layer.neurons)):
for h in range(len(self.hidden_layer.neurons)):
if not output_layer_weights:
self.output_layer.neurons[o].weights.append(random.random())
else:
self.output_layer.neurons[o].weights.append(output_layer_weights[weight_num])
weight_num += 1
def inspect(self):
"""
信息输出
"""
print('------')
print('* 输入层: {}'.format(self.num_inputs))
print('------')
print('隐含层')
self.hidden_layer.inspect()
print('------')
print('* 输出层')
self.output_layer.inspect()
print('------')
def feed_forward(self, inputs):
"""
前向传播
"""
hidden_layer_outputs = self.hidden_layer.feed_forward(inputs)
return self.output_layer.feed_forward(hidden_layer_outputs)
def fit(self, training_inputs, training_outputs):
"""
根据输入输出训练网络
"""
# 前向传播
self.feed_forward(training_inputs)
# 1. 输出层神经元error计算
pd_errors_wrt_output_neuron_total_net_input = [0] * len(self.output_layer.neurons)
for o in range(len(self.output_layer.neurons)):
# 求偏导
pd_errors_wrt_output_neuron_total_net_input[o] = \
self.output_layer.neurons[o].calculate_pd_error_wrt_total_net_input(training_outputs[o])
# 2. 隐含层神经元erro计算
pd_errors_wrt_hidden_neuron_total_net_input = [0] * len(self.hidden_layer.neurons)
for h in range(len(self.hidden_layer.neurons)):
d_error_wrt_hidden_neuron_output = 0
for o in range(len(self.output_layer.neurons)):
d_error_wrt_hidden_neuron_output += \
pd_errors_wrt_output_neuron_total_net_input[o] * self.output_layer.neurons[o].weights[h]
pd_errors_wrt_hidden_neuron_total_net_input[h] = \
d_error_wrt_hidden_neuron_output * self.hidden_layer.neurons[h].calculate_pd_total_net_input_wrt_input()
# 3. 更新输出层权重
for o in range(len(self.output_layer.neurons)):
for w_ho in range(len(self.output_layer.neurons[o].weights)):
pd_error_wrt_weight = \
pd_errors_wrt_output_neuron_total_net_input[o] * self.output_layer.neurons[o].calculate_pd_total_net_input_wrt_weight(w_ho)
self.output_layer.neurons[o].weights[w_ho] -= self.LEARNING_RATE * pd_error_wrt_weight
# 4. 更新隐含层权重
for h in range(len(self.hidden_layer.neurons)):
for w_ih in range(len(self.hidden_layer.neurons[h].weights)):
pd_error_wrt_weight = pd_errors_wrt_hidden_neuron_total_net_input[h] * self.hidden_layer.neurons[h].calculate_pd_total_net_input_wrt_weight(w_ih)
self.hidden_layer.neurons[h].weights[w_ih] -= self.LEARNING_RATE * pd_error_wrt_weight
def calculate_total_error(self, training_sets):
"""
计算整体误差
"""
total_error = 0
for t in range(len(training_sets)):
training_inputs, training_outputs = training_sets[t]
self.feed_forward(training_inputs)
for o in range(len(training_outputs)):
total_error += self.output_layer.neurons[o].calculate_error(training_outputs[o])
return total_error
class Layer:
"""
网络层类
"""
def __init__(self, num_neurons, bias):
# 同一层神经元的偏置都一致
self.bias = bias if bias else random.random()
self.neurons = []
for i in range(num_neurons):
self.neurons.append(Neuron(self.bias))
def inspect(self):
"""
信息输出
"""
print('神经元:', len(self.neurons))
for n in range(len(self.neurons)):
print(' 神经元', n)
for w in range(len(self.neurons[n].weights)):
print(' 权重:', self.neurons[n].weights[w])
print(' 偏置:', self.bias)
def feed_forward(self, inputs):
"""
前向传播
"""
outputs = []
for neuron in self.neurons:
outputs.append(neuron.calculate_output(inputs))
return outputs
def get_outputs(self):
"""
获取输出结果
"""
outputs = []
for neuron in self.neurons:
outputs.append(neuron.output)
return outputs
class Neuron(object):
"""
神经元节点
"""
def __init__(self, bias):
self.bias = bias
self.weights = []
def calculate_output(self, inputs):
"""
计算输出
"""
self.inputs = inputs
# 激活函数
self.output = self.activate(self.calculate_total_net_input())
return self.output
def calculate_total_net_input(self):
"""
计算整体输入
"""
total = 0
for i in range(len(self.inputs)):
total += self.inputs[i] * self.weights[i]
return total + self.bias
def activate(self, total_net_input):
"""
逻辑回归激活函数
"""
return 1 / (1 + math.exp(-total_net_input))
def calculate_pd_error_wrt_total_net_input(self, target_output):
"""
计算相对于整体网络输入的误差偏导数
"""
return self.calculate_pd_error_wrt_output(target_output) * self.calculate_pd_total_net_input_wrt_input();
# The error for each neuron is calculated by the Mean Square Error method:
def calculate_error(self, target_output):
"""
计算每个神经元节点的误差(均方误差)
"""
return 0.5 * (target_output - self.output) ** 2
def calculate_pd_error_wrt_output(self, target_output):
"""
计算相对于输出的误差偏导数
"""
return -(target_output - self.output)
def calculate_pd_total_net_input_wrt_input(self):
"""
计算相对于输入的总体偏导数
"""
return self.output * (1 - self.output)
def calculate_pd_total_net_input_wrt_weight(self, index):
"""
计算相对于权重的整体输入的偏导数
"""
return self.inputs[index]