Spark MLlib Deep Learning Neural Net(深度学习-神经网络)1.1
http://blog.youkuaiyun.com/sunbow0
Spark MLlib Deep Learning工具箱,是根据现有深度学习教程《UFLDL教程》中的算法,在SparkMLlib中的实现。具体Spark MLlib Deep Learning(深度学习)目录结构:
第一章Neural Net(NN)
1、源码
2、源码解析
3、实例
第二章Deep Belief Nets(DBNs)
第三章Convolution Neural Network(CNN)
第四章 Stacked Auto-Encoders(SAE)
第五章CAE
第一章Neural Net(神经网络)
1源码
目前Spark MLlib Deep Learning工具箱源码的github地址为:
https://github.com/sunbow1/SparkMLlibDeepLearn
1.1 NeuralNet代码
package NN
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
import org.apache.spark.Logging
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import breeze.linalg.{
Matrix => BM,
CSCMatrix => BSM,
DenseMatrix => BDM,
Vector => BV,
DenseVector => BDV,
SparseVector => BSV,
axpy => brzAxpy,
svd => brzSvd
}
import breeze.numerics.{
exp => Bexp,
tanh => Btanh
}
import scala.collection.mutable.ArrayBuffer
import java.util.Random
import scala.math._
/**
* label:目标矩阵
* nna:神经网络每层节点的输出值,a(0),a(1),a(2)
* error:输出层与目标值的误差矩阵
*/
case class NNLabel(label: BDM[Double], nna: ArrayBuffer[BDM[Double]], error: BDM[Double]) extends Serializable
/**
* 配置参数
*/
case class NNConfig(
size: Array[Int],
layer: Int,
activation_function: String,
learningRate: Double,
momentum: Double,
scaling_learningRate: Double,
weightPenaltyL2: Double,
nonSparsityPenalty: Double,
sparsityTarget: Double,
inputZeroMaskedFraction: Double,
dropoutFraction: Double,
testing: Double,
output_function: String) extends Serializable
/**
* NN(neural network)
*/
class NeuralNet(
private var size: Array[Int],
private var layer: Int,
private var activation_function: String,
private var learningRate: Double,
private var momentum: Double,
private var scaling_learningRate: Double,
private var weightPenaltyL2: Double,
private var nonSparsityPenalty: Double,
private var sparsityTarget: Double,
private var inputZeroMaskedFraction: Double,
private var dropoutFraction: Double,
private var testing: Double,
private var output_function: String) extends Serializable with Logging {
// var size=Array(5, 7, 1)
// var layer=3
// var activation_function="tanh_opt"
// var learningRate=2.0
// var momentum=0.5
// var scaling_learningRate=1.0
// var weightPenaltyL2=0.0
// var nonSparsityPenalty=0.0
// var sparsityTarget=0.05
// var inputZeroMaskedFraction=0.0
// var dropoutFraction=0.0
// var testing=0.0
// var output_function="sigm"
/**
* size = architecture;
* n = numel(nn.size);
* activation_function = sigm 隐含层函数Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh).
* learningRate = 2; 学习率learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs.
* momentum = 0.5; Momentum
* scaling_learningRate = 1; Scaling factor for the learning rate (each epoch)
* weightPenaltyL2 = 0; 正则化L2 regularization
* nonSparsityPenalty = 0; 权重稀疏度惩罚值on sparsity penalty
* sparsityTarget = 0.05; Sparsity target
* inputZeroMaskedFraction = 0; 加入noise,Used for Denoising AutoEncoders
* dropoutFraction = 0; 每一次mini-batch样本输入训练时,随机扔掉x%的隐含层节点Dropout level (http://www.cs.toronto.edu/~hinton/absps/dropout.pdf)
* testing = 0; Internal variable. nntest sets this to one.
* output = 'sigm'; 输出函数output unit 'sigm' (=logistic), 'softmax' and 'linear' *
*/
def this() = this(NeuralNet.Architecture, 3, NeuralNet.Activation_Function, 2.0, 0.5, 1.0, 0.0, 0.0, 0.05, 0.0, 0.0, 0.0, NeuralNet.Output)
/** 设置神经网络结构. Default: [10, 5, 1]. */
def setSize(size: Array[Int]): this.type = {
this.size = size
this
}
/** 设置神经网络层数据. Default: 3. */
def setLayer(layer: Int): this.type = {
this.layer = layer
this
}
/** 设置隐含层函数. Default: sigm. */
def setActivation_function(activation_function: String): this.type = {
this.activation_function = activation_function
this
}
/** 设置学习率因子. Default: 2. */
def setLearningRate(learningRate: Double): this.type = {
this.learningRate = learningRate
this
}
/** 设置Momentum. Default: 0.5. */
def setMomentum(momentum: Double): this.type = {
this.momentum = momentum
this
}
/** 设置scaling_learningRate. Default: 1. */
def setScaling_learningRate(scaling_learningRate: Double): this.type = {
this.scaling_learningRate = scaling_learningRate
this
}
/** 设置正则化L2因子. Default: 0. */
def setWeightPenaltyL2(weightPenaltyL2: Double): this.type = {
this.weightPenaltyL2 = weightPenaltyL2
this
}
/** 设置权重稀疏度惩罚因子. Default: 0. */
def setNonSparsityPenalty(nonSparsityPenalty: Double): this.type = {
this.nonSparsityPenalty = nonSparsityPenalty
this
}
/** 设置权重稀疏度目标值. Default: 0.05. */
def setSparsityTarget(sparsityTarget: Double): this.type = {
this.sparsityTarget = sparsityTarget
this
}
/** 设置权重加入噪声因子. Default: 0. */
def setInputZeroMaskedFraction(inputZeroMaskedFraction: Double): this.type = {
this.inputZeroMaskedFraction = inputZeroMaskedFracti