代码来自闵老师”日撸 Java 三百行(71-80天):
日撸 Java 三百行(71-80天,BP 神经网络)_闵帆的博客-优快云博客
扩展抽象类GeneralAnn,必须要实现对应的类内的抽象方法。
核心代码就是正向和方向传播两个方法的实现。
Mobp:momentum coefficient(动量系数). SGD通常来说下降速度比较快,但却容易造成另一个问题,就是更新过程不稳定,容易出现震荡。加入“惯性”的影响,就是在更新下降方向的时候不仅要考虑到当前的方向,也要考虑到上一次的更新方向。两者加权,某些情况下可以避免震荡,摆脱局部凹域的束缚,进入全局凹域。动量,就是上一次更新方向所占的权值。当误差曲面中存在平坦区域,SGD可以更快的学习,是梯度下降法中一种常用的加速技术。
Learning rate:学习率决定了权值更新的速度,设置得太大会使结果超过最优值,太小会使下降速度过慢。
package machinelearning.ann;
/**
* Back-propagation neural networks.
*
* @author WX873
*
*/
public class SimpleAnn extends GeneralAnn{
/**
* The value of each node that changes during the forward process. The first
* dimension stands for the layer, and the second stands for the node.
*/
public double[][] layerNodeValues;
/**
* The error on each node that changes during the back-propagation process.
* The first dimension stands for the layer, and the second stands for the nodes.
*/
public double[][] layerNodeErrors;
/**
* The weights of edges. The first dimension stands for the layer, the
* second stands for the node index of the layer, and the third dimension
* stands for the node index of the next layer.
*/
public double[][][] edgeWeights;
/**
* The change of edge weights. It has the same size as edgeWeights.
*/
public double[][][] edgeWeightsDelta;
/**
* ************************************************************************
* The first constructor.
*
* @param paraFilename The arff filename.
* @param paraLayerNumNodes The number of nodes for each layer (may be different).
* @param paraLearningRate Learning rate.
* @param paraMobp Momentum coefficient.
* ************************************************************************
*/
public SimpleAnn(String paraFilename, int[] paraLayerNumNodes, double paraLearningRate, double paraMobp) {
super(paraFilename, paraLayerNumNodes, paraLearningRate, paraMobp);
// Step 1. Across layer initialization.
layerNodeValues = new double[numLayers][];
layerNodeErrors = new double[numLayers][];
edgeWeights = new double[numLayers - 1][][];
edgeWeightsDelta = new double[numLayers - 1][][];
// Step 2. Inner layer initialization.
for (int l = 0; l < numLayers; l++) {
layerNodeValues[l] = new double[layerNumNodes[l]];
layerNodeErrors[l] = new double[layerNumNodes[l]];
// One less layer because each edge crosses two layers.
if (l + 1 == numLayers) {
break;
}//of if
// In layerNumNodes[l] + 1, the last one is reserved for the offset.(偏置)
edgeWeights[l] = new double[layerNumNodes[l] + 1][layerNumNodes[l + 1]];
//第一维是本层节点加一个偏置,第二维是下一层的节点个数
edgeWeightsDelta[l] = new double[layerNumNodes[l] + 1][layerNumNodes[l + 1]];
for (int j = 0; j < layerNumNodes[l] + 1; j++) {
for (int i = 0; i < layerNumNodes[l + 1]; i++) {
// Initialize weights.
edgeWeights[l][j][i] = random.nextDouble();
}//of for i
}//of for j
}//of for l
}// Of the constructor
/**
* ***********************************************************
* Forward prediction.
*
* @param paraInput The input data of one instance.
* @return The data at the output end.
* ***********************************************************
*/
public double[] forward(double[] paraInput) {
// Initialize the input layer.
for (int i = 0; i < layerNodeValues[0].length; i++) {
layerNodeValues[0][i] = paraInput[i];
}//of for i
// Calculate the node values of each layer.
double z;
for (int l = 1; l < numLayers; l++) {
for (int j = 0; j < layerNodeValues[l].length; j++) {
// Initialize according to the offset, which is always +1
//(l-1)层的第(该层节点个数)个节点(偏置)指向下一层[j]节点的值;z等于该边的权值。
z = edgeWeights[l - 1][layerNodeValues[l - 1].length][j];
// Weighted sum on all edges for this node.
for (int i = 0; i < layerNodeValues[l - 1].length; i++) {
z += edgeWeights[l - 1][i][j] * layerNodeValues[l - 1][i];
}//of for i (循环上一层的节点)
// Sigmoid activation.
// This line should be changed for other activation functions.
layerNodeValues[l][j] = 1 / (1 + Math.exp(-z));
}//of for j(循环要计算的节点的当前层节点)
}//of for l
return layerNodeValues[numLayers - 1];
}//of forward
/**
* ******************************************************
* Back propagation and change the edge weights.
*
* @param paraTarget For 3-class data, it is [0, 0, 1], [0, 1, 0] or [1, 0, 0].
* ******************************************************
*/
public void backPropagation(double[] paraTarget) {
// Step 1. Initialize the output layer error.
int l = numLayers - 1; //第l层就是输出层
for (int j = 0; j < layerNodeErrors[l].length; j++) {
layerNodeErrors[l][j] = layerNodeValues[l][j] * (1 - layerNodeValues[l][j])
* (paraTarget[j] - layerNodeValues[l][j]); //套用输出层误差公式
}//of for j
// Step 2. Back-propagation even for l == 0
while (l > 0) {
l--;
// Layer l, for each node.
for (int j = 0; j < layerNumNodes[l]; j++) {
double z = 0.0;
// For each node of the next layer.
for (int i = 0; i < layerNumNodes[l + 1]; i++) {
if (l > 0) {
z += layerNodeErrors[l + 1][i] * edgeWeights[l][j][i];
//(l+1)层的第i个节点,乘以l层第j个节点指向下一层第i个节点的边的权重。
}//of if
// Weight adjusting.
edgeWeightsDelta[l][j][i] = mobp * edgeWeightsDelta[l][j][i] +
learningRate * layerNodeErrors[l + 1][i] * layerNodeValues[l][j];
edgeWeights[l][j][i] += edgeWeightsDelta[l][j][i];
if (j == layerNumNodes[l] - 1) {
// Weight adjusting for the offset part.
edgeWeightsDelta[l][j + 1][i] = mobp * edgeWeightsDelta[l][j + 1][i]
+ learningRate * layerNodeErrors[l + 1][i];
edgeWeights[l][j + 1][i] += edgeWeightsDelta[l][j + 1][i];
}//of if
}//of for i
// Record the error according to the differential of Sigmoid.
// This line should be changed for other activation functions.
layerNodeErrors[l][j] = layerNodeValues[l][j] * (1 - layerNodeValues[l][j]) * z;
}//of for j
}//of while
}//of backPropagation
public static void main(String args[]) {
int[] tempLayerNodes = {4, 8, 8, 3};
SimpleAnn tempNetwork = new SimpleAnn("E:/Datasets/UCIdatasets/其他数据集/iris.arff",
tempLayerNodes, 0.01, 0.6);
for (int round = 0; round < 5000; round++) {
System.out.println("The round is: " + round);
tempNetwork.train();
}//of for round
double tempAccuracy = tempNetwork.test();
System.out.println("The accuracy is: " + tempAccuracy);
}//of main
}//of SimpleAnn