iris数据集预测

iris数据集预测(对比随机森林和逻辑回归算法)

 

随机森林

library(randomForest)

#挑选响应变量

index <- subset(iris,Species != "setosa")
ir <- droplevels(index)

 

set.seed(1)

ind<-sample(2,nrow(ir),replace=TRUE,prob=c(0.7,0.3))

train<-ir[ind==1,]

test<-ir[ind==2,]

 rf<-randomForest(Species~.,data=train,ntree=100)

rf

Call:
 randomForest(formula = Species ~ ., data = train, ntree = 100)
               Type of random forest: classification
                     Number of trees: 100
No. of variables tried at each split: 2

        OOB estimate of  error rate: 5.88%
Confusion matrix:
           versicolor virginica class.error
versicolor         32         2  0.05882353
virginica           2        32  0.05882353

#随机森林的误差率

plot(rf)

 

#变量重要性

importance(rf)

importance(rf)
             MeanDecreaseGini
Sepal.Length        1.4398647
Sepal.Width         0.7037353
Petal.Length       11.1734509
Petal.Width        20.1025569

varImpPlot(rf)

 

 

#查看预测结果

pred<-predict(rf,newdata=test)

table(pred,test$Species)

pred         versicolor virginica
  versicolor         15         2
  virginica           1        14

#预测边距

plot(margin(rf,test$Species))

 

 

逻辑回归

library(pROC)​

g1<-glm(Species~.,family=binomial(link='logit'),data=train)​

pre1<-predict(g1,type="response")

g1

Call:  glm(formula = Species ~ ., family = binomial(link = "logit"), 
    data = train)

Coefficients:
 (Intercept)  Sepal.Length   Sepal.Width  Petal.Length   Petal.Width  
   -32.01349      -3.85855      -0.02084       6.65355      14.08817  

Degrees of Freedom: 67 Total (i.e. Null);  63 Residual
Null Deviance:        94.27 
Residual Deviance: 8.309     AIC: 18.31

summary(g1)

Call:
glm(formula = Species ~ ., family = binomial(link = "logit"),
    data = train)

Deviance Residuals:
     Min        1Q    Median        3Q       Max  
-1.73457  -0.02241  -0.00011   0.03691   1.76243  

Coefficients:
              Estimate Std. Error z value Pr(>|z|)  
(Intercept)  -32.01349   28.51193  -1.123   0.2615  
Sepal.Length  -3.85855    3.16430  -1.219   0.2227  
Sepal.Width   -0.02084    4.85883  -0.004   0.9966  
Petal.Length   6.65355    5.47953   1.214   0.2246  
Petal.Width   14.08817    7.32507   1.923   0.0544 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 94.268  on 67  degrees of freedom
Residual deviance:  8.309  on 63  degrees of freedom
AIC: 18.309

Number of Fisher Scoring iterations: 9

 #方差分析

anova(g1,test="Chisq")
Analysis of Deviance Table

Model: binomial, link: logit

Response: Species

Terms added sequentially (first to last)


             Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
NULL                            67     94.268              
Sepal.Length  1   14.045        66     80.223 0.0001785 ***
Sepal.Width   1    0.782        65     79.441 0.3764212    
Petal.Length  1   62.426        64     17.015 2.766e-15 ***
Petal.Width   1    8.706        63      8.309 0.0031715 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#计算最优阀值

modelroc1<-roc(as.factor(ifelse(train$Species=="virginica",1,0)),pre1)

plot(modelroc1,print.thres=TRUE)

 

评估模型的预测效果

predict <-predict(g1,type="response",newdata=test)​​

predict.results <-ifelse(predict>0.804,"virginica","versicolor")​

misClasificError <-mean(predict.results !=test$Species)

print(paste("Accuracy:",1-misClasificError))
[1] "Accuracy: 0.90625"

 

XGBoost

 

y<-data.matrix(as.data.frame(train$Species))-1

x<-data.matrix(train[-5])

bst <- xgboost(data =x, label = y, max.depth = 2, eta = 1,nround = 2, objective = "binary:logistic")

[1]    train-error:0.029412
[2]    train-error:0.029412 

p<-predict(bst,newdata=data.matrix(test))

modelroc2<-roc(as.factor(ifelse(test$Species=="virginica",1,0)),p)

plot(modelroc2)

 

predict.results <-ifelse(p>0.11,"virginica","versicolor")

misClasificError <-mean(predict.results !=test$Species)

 

print(paste(1-misClasificError))
[1] "0.90625"

 

转载于:https://www.cnblogs.com/aongao/p/7665891.html

目录列表: 2dplanes.arff abalone.arff ailerons.arff Amazon_initial_50_30_10000.arff anneal.arff anneal.ORIG.arff arrhythmia.arff audiology.arff australian.arff auto93.arff autoHorse.arff autoMpg.arff autoPrice.arff autos.arff auto_price.arff balance-scale.arff bank.arff bank32nh.arff bank8FM.arff baskball.arff bodyfat.arff bolts.arff breast-cancer.arff breast-w.arff breastTumor.arff bridges_version1.arff bridges_version2.arff cal_housing.arff car.arff cholesterol.arff cleveland.arff cloud.arff cmc.arff colic.arff colic.ORIG.arff contact-lenses.arff cpu.arff cpu.with.vendor.arff cpu_act.arff cpu_small.arff credit-a.arff credit-g.arff cylinder-bands.arff delta_ailerons.arff delta_elevators.arff dermatology.arff detroit.arff diabetes.arff diabetes_numeric.arff echoMonths.arff ecoli.arff elevators.arff elusage.arff eucalyptus.arff eye_movements.arff fishcatch.arff flags.arff fried.arff fruitfly.arff gascons.arff glass.arff grub-damage.arff heart-c.arff heart-h.arff heart-statlog.arff hepatitis.arff house_16H.arff house_8L.arff housing.arff hungarian.arff hypothyroid.arff ionosphere.arff iris.2D.arff iris.arff kdd_coil_test-1.arff kdd_coil_test-2.arff kdd_coil_test-3.arff kdd_coil_test-4.arff kdd_coil_test-5.arff kdd_coil_test-6.arff kdd_coil_test-7.arff kdd_coil_train-1.arff kdd_coil_train-3.arff kdd_coil_train-4.arff kdd_coil_train-5.arff kdd_coil_train-6.arff kdd_coil_train-7.arff kdd_el_nino-small.arff kdd_internet_usage.arff kdd_ipums_la_97-small.arff kdd_ipums_la_98-small.arff kdd_ipums_la_99-small.arff kdd_JapaneseVowels_test.arff kdd_JapaneseVowels_train.arff kdd_synthetic_control.arff kdd_SyskillWebert-Bands.arff kdd_SyskillWebert-BioMedical.arff kdd_SyskillWebert-Goats.arff kdd_SyskillWebert-Sheep.arff kdd_UNIX_user_data.arff kin8nm.arff kr-vs-kp.arff labor.arff landsat_test.arff landsat_train.arff letter.arff liver-disorders.arff longley.arff lowbwt.arff lung-cancer.arff lymph.arff machine_cpu.arff mbagrade.arff meta.arff mfeat-factors.arff mfeat-fourier.arff mfeat-karhunen.arff mfeat-morphological.arff mfeat-pixel.arff mfeat-zernike.arff molecular-biology_promoters.arff monks-problems-1_test.arff monks-problems-1_train.arff monks-problems-2_test.arff monks-problems-2_train.arff monks-problems-3_test.arff monks-problems-3_train.arff mushroom.arff mv.arff nursery.arff optdigits.arff page-blocks.arff pasture.arff pbc.arff pendigits.arff pharynx.arff pol.arff pollution.arff postoperative-patient-data.arff primary-tumor.arff puma32H.arff puma8NH.arff pwLinear.arff pyrim.arff quake.arff ReutersCorn-test.arff ReutersCorn-train.arff ReutersGrain-test.arff ReutersGrain-train.arff schlvote.arff segment-challenge.arff segment-test.arff segment.arff sensory.arff servo.arff sick.arff sleep.arff solar-flare_1.arff solar-flare_2.arff sonar.arff soybean.arff spambase.arff spectf_test.arff spectf_train.arff spectrometer.arff spect_test.arff spect_train.arff splice.arff sponge.arff squash-stored.arff squash-unstored.arff stock.arff strike.arff supermarket.arff triazines.arff unbalanced.arff vehicle.arff veteran.arff vineyard.arff vote.arff vowel.arff water-treatment.arff waveform-5000.arff weather.nominal.arff weather.numeric.arff white-clover.arff wine.arff wisconsin.arff zoo.arff
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值