自编码使得训练出的y直接对着自己的输入层x
自编码是由编码器和解码器组成的,编码器是输入层映射到隐藏层,解码器是隐藏层映射到输出层。编码器将输入层向量通过。。函数转移至隐藏层转换成新功能。解码器转移这些功能回到原始的输入层。
require(autoencoder)
require(ripa)
data(logo)
image(logo)
> logo size: 77 x 101 type: grey
> x_train<-t(logo) > x_train size: 101 x 77 type: grey
set.seed(2016)
fit<-autoencode(X.train=x_train,X.test = NULL,nl=3,N.hidden = 60,
unit.type = "logistic",lambda = 1e-5,beta=1e-5,rho=0.3,
epsilon = 0.1,max.iterations = 100,optim.method = c("BFGS"),
rel.tol = 0.01,rescale.flag = TRUE,rescaling.offset = 0.001)
#将x_train图像转化为函数,nl=3设置神经网络分为3层,logistic函数的隐藏节点数为60
#lambda是衰减参数,这是数值比较小
#beta是稀疏惩罚的权重,同样比较小
#稀疏度rho设置为0.3,服从正态分布
#最大步长为100
#rescale.flag表示x_train矩阵的缩放范围允许在0-1之间
autoencoding... Optimizer counts: function gradient 88 86 Optimizer: successful convergence. Optimizer: convergence = 0, message = J.init = 5.513073, J.final = 0.1928874, mean(rho.hat.final) = 0.627708
> attributes(fit) $names [1] "W" "b" "unit.type" [4] "rescaling" "nl" "sl" [7] "N.input" "N.hidden" "mean.error.training.set" [10] "mean.error.test.set" $class [1] "autoencoder"
> fit$mean.error.training.set [1] 0.3489713我们已经看出,在隐藏层抽取特征非常有用,简单的获取方式是使用predict函数:hidden.output=TRUE
features<-predict(fit,X.input = x_train,hidden.output = TRUE)
image(t(features$X.output))
> pred<-predict(fit,X.input = x_train,hidden.output = FALSE) > pred$mean.error [1] 0.3503714
aburl='http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
names=c('sex','length','diameter','height','whole.weight','shucked.weight',
'viscera.weight','shell.weight','rings')
data=read.table(aburl,header=F,sep=',',col.names = names)
summary(data)
sex length diameter height whole.weight F:1307 Min. :0.075 Min. :0.0550 Min. :0.0000 Min. :0.0020 I:1342 1st Qu.:0.450 1st Qu.:0.3500 1st Qu.:0.1150 1st Qu.:0.4415 M:1528 Median :0.545 Median :0.4250 Median :0.1400 Median :0.7995 Mean :0.524 Mean :0.4079 Mean :0.1395 Mean :0.8287 3rd Qu.:0.615 3rd Qu.:0.4800 3rd Qu.:0.1650 3rd Qu.:1.1530 Max. :0.815 Max. :0.6500 Max. :1.1300 Max. :2.8255 shucked.weight viscera.weight shell.weight rings Min. :0.0010 Min. :0.0005 Min. :0.0015 Min. : 1.000 1st Qu.:0.1860 1st Qu.:0.0935 1st Qu.:0.1300 1st Qu.: 8.000 Median :0.3360 Median :0.1710 Median :0.2340 Median : 9.000 Mean :0.3594 Mean :0.1806 Mean :0.2388 Mean : 9.934 3rd Qu.:0.5020 3rd Qu.:0.2530 3rd Qu.:0.3290 3rd Qu.:11.000 Max. :1.4880 Max. :0.7600 Max. :1.0050 Max. :29.000
> data[data$height==0,] sex length diameter height whole.weight shucked.weight viscera.weight 1258 I 0.430 0.34 0 0.428 0.2065 0.0860 3997 I 0.315 0.23 0 0.134 0.0575 0.0285 shell.weight rings 1258 0.1150 8 3997 0.3505 6
#找缺失值
data[data$height==0,]
#删除缺失值
data$height[data$height==0]=NA
data<-na.omit(data)
data$sex<-NULL
summary(data)
length diameter height whole.weight shucked.weight Min. :0.0750 Min. :0.0550 Min. :0.0100 Min. :0.0020 Min. :0.0010 1st Qu.:0.4500 1st Qu.:0.3500 1st Qu.:0.1150 1st Qu.:0.4422 1st Qu.:0.1862 Median :0.5450 Median :0.4250 Median :0.1400 Median :0.8000 Median :0.3360 Mean :0.5241 Mean :0.4079 Mean :0.1396 Mean :0.8290 Mean :0.3595 3rd Qu.:0.6150 3rd Qu.:0.4800 3rd Qu.:0.1650 3rd Qu.:1.1535 3rd Qu.:0.5020 Max. :0.8150 Max. :0.6500 Max. :1.1300 Max. :2.8255 Max. :1.4880 viscera.weight shell.weight rings Min. :0.0005 Min. :0.0015 Min. : 1.000 1st Qu.:0.0935 1st Qu.:0.1300 1st Qu.: 8.000 Median :0.1710 Median :0.2340 Median : 9.000 Mean :0.1807 Mean :0.2388 Mean : 9.935 3rd Qu.:0.2530 3rd Qu.:0.3287 3rd Qu.:11.000 Max. :0.7600 Max. :1.0050 Max. :29.000
data1<-t(data)
data1<-as.matrix(data1)
require(autoencoder)
set.seed(2016)
n=nrow(data)
train<-sample(1:n,10,FALSE)
fit<-autoencode(X.train = data1[,train],X.test = NULL,nl=3,N.hidden = 5,
unit.type = "logistic",lambda = 1e-5,beta=1e-5,rho=0.07,
epsilon = 0.1,max.iterations = 100,optim.method = c("BFGS"),
rel.tol = 0.01,rescale.flag = TRUE,rescaling.offset = 0.001)
autoencoding... Optimizer counts: function gradient 34 32 Optimizer: successful convergence. Optimizer: convergence = 0, message = J.init = 1.038595, J.final = 0.01098651, mean(rho.hat.final) = 0.5697456
> fit$mean.error.training.set [1] 0.01654644 > features<-predict(fit,X.input = data1[,train],hidden.output = TRUE) > features$X.output [,1] [,2] [,3] [,4] [,5] length 6.572353e-01 7.459814e-01 4.025650e-01 5.306412e-01 6.325756e-01 diameter 7.149880e-01 7.907795e-01 4.670981e-01 5.899761e-01 6.929413e-01 height 8.119831e-01 8.628111e-01 5.986132e-01 7.009952e-01 7.956291e-01 whole.weight 4.901213e-01 6.072550e-01 2.545919e-01 3.770476e-01 4.642603e-01 shucked.weight 7.437931e-01 8.128396e-01 5.023596e-01 6.205615e-01 7.235000e-01 viscera.weight 7.893552e-01 8.464983e-01 5.645063e-01 6.734824e-01 7.718709e-01 shell.weight 7.721268e-01 8.337987e-01 5.403436e-01 6.532020e-01 7.534961e-01 rings 1.855435e-09 1.038285e-08 1.111354e-09 9.623515e-09 1.471309e-09
> pred<-predict(fit,X.input = data1[,train],hidden.output = FALSE)
> pred $X.output [,1] [,2] [,3] [,4] [,5] [,6] length 0.5843936 0.5338661 0.6988651 0.5914587 0.017422624 0.16787938 diameter 0.3886484 0.3786442 0.5063712 0.3952463 0.012960351 0.10031409 height 0.1891073 0.2051164 0.2854515 0.1935582 0.008717783 0.04205945 whole.weight 1.7357320 1.3348344 1.6519252 1.7294214 0.047866685 0.70391258 shucked.weight 0.3154349 0.3174224 0.4292071 0.3215069 0.011363892 0.07750871 viscera.weight 0.2245608 0.2376717 0.3275103 0.2295557 0.009443400 0.05144013 shell.weight 0.2556035 0.2654247 0.3631185 0.2610123 0.010089224 0.06003454 rings 12.1850860 9.4368786 9.9615860 12.0389287 1.413263792 12.10066454 [,7] [,8] [,9] [,10] length 0.4133904 0.7215291 0.4240386 0.3911886 diameter 0.2878643 0.5106162 0.2686229 0.2690845 height 0.1516875 0.2759206 0.1203867 0.1383446 whole.weight 1.0948191 1.8092411 1.4439660 1.0658803 shucked.weight 0.2392124 0.4275806 0.2128008 0.2221398 viscera.weight 0.1768079 0.3197935 0.1457233 0.1622826 shell.weight 0.1983995 0.3572845 0.1683064 0.1829217 rings 9.1683347 10.9723027 12.5646378 9.2603170 $hidden.output [1] FALSE $mean.error [1] 3.727675
下一步绘制雷达图
#第一步,设置各个变量的取值范围(即最小值和最大值)
maxmin<-data.frame(length=c(1,0),diameter=c(1,0),height=c(1,0),whole.weight=c(5,0),
shucked.weight=c(1,0),viscera.weight=c(1,0),shell.weight=c(1,0),rings=c(20,0))
#第二步,设置准备绘图的指标类型的数据,如x1的8种observed和reconstructed的贡献率
dat.A<-data.frame(length=c(0.455,0.584),diameter=c(0.365,0.389),height=c(0.095,0.189),whole.weight=c(0.514,1.736),
shucked.weight=c(0.2245,0.315),viscera.weight=c(0.101,0.225),shell.weight=c(0.15,0.257),rings=c(15,12.19))
#第三步,把数据集合并到一起
dat.B<-rbind(maxmin,dat.A)
#第四步,绘图,安装fmsb包
#install.packages("fmsb")
library(fmsb)
radarchart(dat.B,axistype = 0,seg = 8,centerzero = T)