R语言常用命令总结
之前做过一次数学建模竞赛,使用的分析工具为R语言,这里总结一些R语言常用的指令以备自用‘
# $符号用于选择列,这个指令代表beforedel表的vicinity列值为-9的赋值为0
beforedel$vicinity[which(beforedel$vicinity==(-9))]<-0
# 从表里面选出几列
feature_table<-beforedel[,c("targtype1","weapontype1","nkill","property","success","vicinity")]
# 构建dataframe格式的数据表
targetdata<-data.frame(row1,row2,row3,row4,row5,row6)
# 行相加
damage<-rowSums(targetdata)
# R语言存取CSV格式的文件比较快,能用CSV就不要用xlsx
write.csv(feature_table_damage_order,"feature_table_damage_order.csv")
# 按照某列降序排序
feature_table_damage_order<-feature_table_damage[order(-damage),]
# R语言数据分箱
rank<-cut(feature_table_damage_order$damage,c(1,0.6,0.4,0.3,0.2,0),labels = c("E","D","C","B","A"))
# 将两个列数一样的表组合成dataframe
feature_table_damage_order_rank<-data.frame(feature_table_damage_order,rank)
# R语言选出表中符合格式的行
questionone_answer<-filter(feature_table_damage_order_rank,beforedel.eventid==200108110012|beforedel.eventid==200511180002|beforedel.eventid==200901170021|beforedel.eventid==201402110015|beforedel.eventid==201405010071|beforedel.eventid==201411070002|beforedel.eventid==201412160041|beforedel.eventid==201508010015|beforedel.eventid==201705080012)
# 统计
table(rank)#统计ABCDE的个数
#分训练集和测试集
ind<-sample(2,nrow(test1),replace = TRUE,prob = c(0.8,0.2))
trainData<-test1[ind==1,]
testData<-test1[ind==2,]
# 计算混淆矩阵及准确率
confusion <- table(trainData$result.cluster, predict(tree.model, trainData ))
accuracy <- sum(diag(confusion)) * 100 / sum(confusion)
# 查看表结构
str