from numpy import *
from math import log
import operator
def calcShannonEnt(dataset):
numdataset=len(dataset)
labelCount={}
for data in dataset:
curlabel=data[-1]
if curlabel not in labelCount.keys():
labelCount[curlabel]=0
labelCount[curlabel]+=1
shannonEnt=0.0
for key in labelCount:
prob=float(labelCount[key])/numdataset
shannonEnt-=prob*log(prob,2)
return shannonEnt
def creatDataSet():
dataset=[['青年','否','否','一般','否'],
['青年','否','否','好','否'],
['青年','是','否','好','是'],
['青年','是','是','一般','是'],
['青年','否','否','一般','否'],
['中年','否','否','一般',
李航 统计学习 采用C4.5算法构建决策树
最新推荐文章于 2023-07-02 20:14:57 发布