3、Spark MLlib Deep Learning Convolution Neural Network (深度学习-卷积神经网络)3.1
http://blog.youkuaiyun.com/sunbow0
Spark MLlib Deep Learning工具箱,是根据现有深度学习教程《UFLDL教程》中的算法,在SparkMLlib中的实现。具体Spark MLlib Deep Learning(深度学习)目录结构:
第一章Neural Net(NN)
1、源码
2、源码解析
3、实例
第二章Deep Belief Nets(DBNs)
1、源码
2、源码解析
3、实例
第三章Convolution Neural Network(CNN)
1、源码
2、源码解析
3、实例
第四章 Stacked Auto-Encoders(SAE)
第五章CAE
第三章Convolution Neural Network (卷积神经网络)
1 源码
目前SparkMLlib Deep Learning工具箱源码的github地址为:
https://github.com/sunbow1/SparkMLlibDeepLearn
1.1 CNN代码
package CNN
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
import org.apache.spark.Logging
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg._
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import breeze.linalg.{
Matrix => BM,
CSCMatrix => BSM,
DenseMatrix => BDM,
Vector => BV,
DenseVector => BDV,
SparseVector => BSV,
axpy => brzAxpy,
svd => brzSvd,
accumulate => Accumulate,
rot90 => Rot90,
sum => Bsum
}
import breeze.numerics.{
exp => Bexp,
tanh => Btanh
}
import scala.collection.mutable.ArrayBuffer
import java.util.Random
import scala.math._
/**
* types:网络层类别
* outputmaps:特征map数量
* kernelsize:卷积核k大小
* k: 卷积核
* b: 偏置
* dk: 卷积核的偏导
* db: 偏置的偏导
* scale: pooling大小
*/
case class CNNLayers(
types: String,
outputmaps: Double,
kernelsize: Double,
scale: Double,
k: Array[Array[BDM[Double]]],
b: Array[Double],
dk: Array[Array[BDM[Double]]],
db: Array[Double]) extends Serializable
/**
* CNN(convolution neural network)卷积神经网络
*/
class CNN(
private var mapsize: BDM[Double],
private var types: Array[String],
private var layer: Int,
private var onum: Int,
private var outputmaps: Array[Double],
private var kernelsize: Array[Double],
private var scale: Array[Double],
private var alpha: Double,
private var batchsize: Double,
private var numepochs: Double) extends Serializable with Logging {
// var mapsize = new BDM(1, 2, Array(28.0, 28.0))
// var types = Array("i", "c", "s", "c", "s")
// var layer = 5
// var onum = 10
// var outputmaps = Array(0.0, 6.0, 0.0, 12.0, 0.0)
// var kernelsize = Array(0.0, 5.0, 0.0, 5.0, 0.0)
// var scale = Array(0.0, 0.0, 2.0, 0.0, 2.0)
// var alpha = 1.0
// var batchsize = 50.0
// var numepochs = 1.0
def this() = this(new BDM(1, 2, Array(28.0, 28.0)),
Array("i", "c", "s", "c", "s"), 5, 10,
Array(0.0, 6.0, 0.0, 12.0, 0.0),
Array(0.0, 5.0, 0.0, 5.0, 0.0),
Array(0.0, 0.0, 2.0, 0.0, 2.0),
1.0, 50.0, 1.0)
/** 设置输入层大小. Default: [28, 28]. */
def setMapsize(mapsize: BDM[Double]): this.type = {
this.mapsize = mapsize
this
}
/** 设置网络层类别. Default: [1"i", "c", "s", "c", "s"]. */
def setTypes(types: Array[String]): this.type = {
this.types = types
this
}
/** 设置网络层数. Default: 5. */
def setLayer(layer: Int): this.type = {
this.layer = layer
this
}
/** 设置输出维度. Default: 10. */
def setOnum(onum: Int): this.type = {
this.onum = onum
this
}
/** 设置特征map数量. Default: [0.0, 6.0, 0.0, 12.0, 0.0]. */
def setOutputm