MNIST
代码
library(keras)
#loading the keras inbuilt mnist dataset
data<-dataset_mnist()
#separating train and test file
train_x<-data$train$x
train_y<-data$train$y
test_x<-data$test$x
test_y<-data$test$y
rm(data)
#converting a 2D array into a 1D array for feeding into the MLP and normalising the matrix
train_x <- array(train_x, dim = c(dim(train_x)[1], prod(dim(train_x)[-1]))) / 255
test_x <- array(test_x, dim = c(dim(test_x)[1], prod(dim(test_x)[-1]))) / 255
#converting the target variable to once hot encoded vectors using keras inbuilt function
train_y<-to_categorical(train_y,10)
test_y<-to_categorical(test_y,10)
#defining a keras sequential model
model <- keras_model_sequential()
#defining the model with 1 input layer[784 neurons], 1 hidden layer[784 neurons] with dropout rate 0.4 and 1 output layer[10 neurons]
#i.e number of digits from 0 to 9
model %>%
layer_dense(units = 784, input_shape = 784) %>%
layer_dropout(rate=0.4)%>%
layer_activation(activation = 'relu') %>%
layer_dense(units = 10) %>%
layer_activation(activation = 'softmax')
#compiling the defined model with metric = accuracy and optimiser as adam.
model %>% compile(
loss = 'categorical_crossentropy',
optimizer = 'adam',
metrics = c('accuracy')
)
#fitting the model on the training dataset
history <- model %>% fit(
train_x,
train_y,
epochs = 100,
batch_size = 128
)
plot(history)
#Evaluating model on the cross validation dataset
loss_and_metrics <- model %>% evaluate(test_x, test_y, batch_size = 128)
IMDB
代码
library(keras)
#加载imdb数据集
imdb <- dataset_imdb(num_words = 10000)
c(c(train_data, train_labels), c(test_data, test_labels)) %<-% imdb
#数据预处理
vectorize_sequences <- function(sequences, dimension = 10000) {
results <- matrix(0, nrow = length(sequences), ncol = dimension)
for (i in 1:length(sequences))
results[i, sequences[[i]]] <- 1
results
}
x_train <- vectorize_sequences(train_data)
x_test <- vectorize_sequences(test_data)
str(x_train[1,])
#标签预处理
y_train <- as.numeric(train_labels)
y_test <- as.numeric(test_labels)
#建立模型
model <- keras_model_sequential() %>%
layer_dense(units = 16, activation = "relu", input_shape = c(10000)) %>%
layer_dense(units = 16, activation = "relu") %>%
layer_dense(units = 1, activation = "sigmoid")
#优化器和损失函数
#可自定义optimizer = optimizer_rmsprop(lr=0.001)
model %>% compile(
optimizer = "rmsprop",
loss = "binary_crossentropy",
metrics = c("accuracy")
)
#验证集
val_indices <- 1:10000
x_val <- x_train[val_indices,]
partial_x_train <- x_train[-val_indices,]
y_val <- y_train[val_indices]
partial_y_train <- y_train[-val_indices]
#训练模型
history <- model %>%
fit(
partial_x_train,
partial_y_train,
epochs = 20,
batch_size = 512,
validation_data = list(x_val, y_val)
)
str(history)
#可视化
plot(history)
#预测新数据
model %>% predict(x_test[1:10,])
Tips
通常需要对原始数据进行相当多的预处理。 具有ReLU激活的密集层可以解决广泛的问题。 在二分类问题中,网络应该以一个具有一个单元和一个sigmoid激活的密集层结束。 对于二分类问题,有这样一个标量sigmoid输出,应该使用的损失函数是binary_crossentropy。 无论有什么问题,rmsprop优化器通常是一个足够好的选择。
Reuters
代码
library(keras)
#加载reuters数据集
reuters <- dataset_reuters(num_words = 10000)
c(c(train_data, train_labels), c(test_data, test_labels)) %<-% reuters
#数据预处理
vectorize_sequences <- function(sequences, dimension = 10000) {
results <- matrix(0, nrow = length(sequences), ncol = dimension)
for (i in 1:length(sequences))
results[i, sequences[[i]]] <- 1
results
}
x_train <- vectorize_sequences(train_data)
x_test <- vectorize_sequences(test_data)
#标签预处理
one_hot_train_labels <- to_categorical(train_labels)
one_hot_test_labels <- to_categorical(test_labels)
#建立模型
model <- keras_model_sequential() %>%
layer_dense(units = 64, activation = "relu", input_shape = c(10000)) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 46, activation = "softmax")
#优化器和损失函数
#整数标签loss = "sparse_categorical_crossentropy"
model %>% compile(
optimizer = "rmsprop",
loss = "categorical_crossentropy",
metrics = c("accuracy")
)
#验证集
val_indices <- 1:1000
x_val <- x_train[val_indices,]
partial_x_train <- x_train[-val_indices,]
y_val <- one_hot_train_labels[val_indices,]
partial_y_train = one_hot_train_labels[-val_indices,]
#训练模型
history <- model %>% fit(
partial_x_train,
partial_y_train,
epochs = 20,
batch_size = 512,
validation_data = list(x_val, y_val)
)
#可视化
plot(history)
#9次迭代后过拟合,重新训练
model <- keras_model_sequential() %>%
layer_dense(units = 64, activation = "relu", input_shape = c(10000)) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 46, activation = "softmax")
model %>% compile(
optimizer = "rmsprop",
loss = "categorical_crossentropy",
metrics = c("accuracy")
)
history <- model %>% fit(
partial_x_train,
partial_y_train,
epochs = 9,
batch_size = 512,
validation_data = list(x_val, y_val)
)
Tips
在单标签、多类分类问题中,网络应该以softmax激活结束 categorical_crossentropy几乎总是应该用的损失函数。它以最小化网络输出的概率分布与目标的真实分布之间的距离为目标。 在多分类中有两种处理标签的方法:独热编码和categorical_crossentropy对标签进行编码,作为损失函数。 将标签编码为整数并使用sparse_categorical_crossentropy函数。
Boston
代码
library(keras)
#加载boston数据集
dataset <- dataset_boston_housing()
c(c(train_data, train_targets), c(test_data, test_targets)) %<-% dataset
#数据预处理
#feature-wise标准化:每个特征减去特征的均值,除以标准差,使特征以0为中心,并具有单位标准差。
#使用scale()函数在R中很容易做到这一点。
mean <- apply(train_data, 2, mean)
std <- apply(train_data, 2, sd)
train_data <- scale(train_data, center = mean, scale = std)
test_data <- scale(test_data, center = mean, scale = std)
#建立模型
#网络以一个单元结束,没有激活(它将是一个线性层)。这是标量回归(试图预测单个连续值的回归)的典型设置。
build_model <- function() {
model <- keras_model_sequential() %>%
layer_dense(units = 64, activation = "relu",
input_shape = dim(train_data)[[2]]) %>%
layer_dense(units = 64, activation = "relu") %>%
layer_dense(units = 1)
#优化器和损失函数
#用mse损失函数来编译网络
model %>% compile(
optimizer = "rmsprop",
loss = "mse",
metrics = c("mae")
)
}
#验证集和训练集
#数据较少,使用K-fold交叉验证
k <- 4
indices <- sample(1:nrow(train_data))
folds <- cut(indices, breaks = k, labels = FALSE)
num_epochs <- 200
all_mae_histories <- NULL
for (i in 1:k) {
cat("processing fold #", i, "\n")
val_indices <- which(folds == i, arr.ind = TRUE)
val_data <- train_data[val_indices,]
val_targets <- train_targets[val_indices]
partial_train_data <- train_data[-val_indices,]
partial_train_targets <- train_targets[-val_indices]
model <- build_model()
history <- model %>% fit(
partial_train_data, partial_train_targets,
validation_data = list(val_data, val_targets),
epochs = num_epochs, batch_size = 1, verbose = 0
)
mae_history <- history$metrics$val_mean_absolute_error
all_mae_histories <- rbind(all_mae_histories, mae_history)
}
#建立连续K-fold验证分数的history
average_mae_history <- data.frame(
epoch = seq(1:ncol(all_mae_histories)),
validation_mae = apply(all_mae_histories, 2, mean)
)
#可视化
library(ggplot2)
ggplot(average_mae_history, aes(x = epoch, y = validation_mae)) + geom_line()
ggplot(average_mae_history, aes(x = epoch, y = validation_mae)) + geom_smooth()
#根据这个图,MAE在125次迭代后停止显著改善,之后开始过拟合。
#建立模型
model <- build_model()
model %>% fit(train_data,
train_targets,
epochs = 80,
batch_size = 16,
verbose = 0
)
result <- model %>% evaluate(test_data, test_targets)
Tips
回归使用不同于分类的损失函数,均方误差(MSE)是回归常用的损失函数。 当几乎没有可用的数据时,使用K-fold验证是可靠地评估模型的一种方法。 当可用的训练数据很少时,最好使用一个隐藏层较少的小网络(通常只有一个或两个),以避免严重的过拟合。