from numpy import *
from math import log
import operator
def calcShannonEnt(dataset):
numdataset=len(dataset)
labelCount={}
for data in dataset:
curlabel=data[-1]
if curlabel not in labelCount.keys():
labelCount[curlabel]=0
labelCount[curlabel]+=1
shannonEnt=0.0
for key in labelCount:
prob=float(labelCount[key])/numdataset
shannonEnt-=prob*log(prob,2)
return shannonEnt
def creatDataSet():
dataset=[['青年','否','否','一般','否'],
['青年','否','否','好','否'],
['青年','是','否','好','是'],
['青年','是','是','一般','是'],
['青年','否','否','一般','否'],
['中年','否','否','一般','否'],
['中年','否','否','好','否'],
['中年','是','是','好','是'],
['中年','否','是','非常好','是'],
['中年','否','是','非常好','是'],
['老年','否','是','非常好','是'],
['老年','否','是','好','是'],
['老年','是','否','好','是'],
李航 统计学习 例5.3 利用ID3算法建立决策树
最新推荐文章于 2023-02-27 14:01:19 发布
