#loadData
loadData <- function(){
dataSet = matrix(c(1,1,0,"yes",1,1,1,"yes",1,0,1,"no",0,1,0,"no",0,1,1,"no"),byrow = T, nrow =5)
colnames(dataSet) = c("a","b","c","labels")
return (dataSet)
}
#computing ShannonEnt 实现香农熵
calShannonEnt <- function(dataSet){
numEntries = nrow(dataSet)
labels = levels(factor(dataSet[,"labels"]))
labelCount = NULL
labelCount[labels] = rep(0,length(labels))
for(i in 1:numEntries){
if(dataSet[i,"labels"] %in% labels){
temp = dataSet[i,"labels"]
labelCount[temp] = labelCount[temp] + 1
}
}
t = NULL
shannonEnt = 0
for(i in 1:length(labelCount)){
t[i]= labelCount[i]*1.0 / numEntries
shannonEnt = -t[i]*log2(t[i]) + shannonEnt
}
# labelCount = as.numeric(labelCount)
return (shannonEnt)
}
#划分数据集 split DataSet
splitDataSet <- function(dataSet,axis,value){
retDataSet = NULL
for(i in 1:nrow(dataSet)){
if(dataSet[i,axis] == value){
tempDataSet = dataSe
R语言完整决策树代码
最新推荐文章于 2025-09-25 11:05:44 发布
本文通过R语言实现了一个完整的决策树算法,包括数据加载、计算香农熵、划分数据集、选择最佳特征和递归创建决策树的步骤。通过示例数据集展示了如何从头构建一个决策树,并对数据进行分类。

最低0.47元/天 解锁文章
1777

被折叠的 条评论
为什么被折叠?



