运用支持向量机实现多分类预测任务,并采用重采样的方法确保模型稳定性。
#1. find the parameters
fit.svm <- train(trainset[,-1], trainset$y, "svmRadialWeights", trControl = trainControl(method = "cv",number = 10))
fit.svm
#查看预测变量的重要性排序
varImp(fit.svm)
plot(fit.svm)
#2. ksvm(kernlab package)
require("kernlab")
set.seed(7777)
ksvm.train <- ksvm(y ~ .,
data=trainset[,-1],
sigma = 0.01164937, C = 1,weight = 1, shrinking = TRUE)
#3. predict in testset
set.seed(77)
ksvm.test <- predict(ksvm.train,newdata=testset)
ksvm.t <- table(ksvm.test,testset$y)
ksvm.Precision <- ksvm.t[2,2]/(ksvm.t[2,2]+ksvm.t[2,1])
ksvm.Recall <- ksvm.t[2,2]/(ksvm.t[2,2]+ksvm.t[1,2])
ksvm.fscore <- 2*(ksvm.Precision*ksvm.Recall)/(ksvm.Precision+ksvm.Recall)
#4.confusion matrix
set.seed(77)
ksvm.cf <- confusionMatrix(ksvm.test,testset$y)
#5. ROC, AUC
set.seed(77)
roc.ksvm <- roc(testset$y, as.numeric(ksvm.test))
#6. 10-fold CV
set.seed(77) # for 10-fold EM
tuned.svm <- tune.svm(x = trainset[,-1], y = as.factor(trainset$y)) # tune
result.svm1 <- data.frame()
result.svm1 <- rbind(result.svm1, ksvm.cf$overall[1])
result.svm1 <- cbind(result.svm1, t(ksvm.cf$byClass[c(1,2)]))
result.svm1$auc <- roc.ksvm$auc
result.svm1$cv <- tuned.svm$performances$error
names(result.svm1) <- c("accuracy", "sensitivity", "specificity", "auc", "cv")
#7. plot
data.svm <- subset(trainset,select=c(cancer))
svm.train4 <- svm(as.factor(cancer) ~., data = data.svm, kernel = "sigmoid",gamma = 0.01, cost = 100)
summary(svm.train4)
plot(svm.train4,data = data.svm, svSymbol = 1, dataSymbol = 2, symbolPalette = rainbow(2),
color.palette = terrain.colors)