调用trees.py(计算给定数据集的香农熵+数据划分)
注:代码选自《Machine Learning in Action机器学习实战》
#trees.py
from math import log
def calcShannonEnt(dataSet):
numEntries=len(dataSet)
labelCountes={}
for featVec in dataSet:
currentLabel=featVec[-1]
if currentLabel not in labelCountes.keys():
labelCountes[currentLabel]=0
labelCountes[currentLabel]+=1
shannonEnt=0.0
for key in labelCountes:
prob=float(labelCountes[key])/numEntries
shannonEnt-=prob*log(prob,2)
return shannonEnt
def createDateSet():
dataSet=[[1,1,'yes']
[1,1,'yes']
[1,0,'yes']
[0,1,'no']
[0,1,'no']]
labels=['no surfacing','flippers']
return dataSet,labels
#调用trees.py/划分数据
import trees
myDat,labels=trees.createDateSet()
print(myDat)
TypeError
Traceback (most recent call last):
File "E:/PyCharm/untitled/shang.py", line 2, in <module>
myDat,labels=trees.createDateSet()
File "E:\PyCharm\untitled\trees.py", line 17, in createDateSet
[1,1,'yes']
TypeError: list indices must be integers or slices, not tuple
修改:
#修改dataSet即可
dataSet=[[1,1,'yes'],
[1,1,'yes'],
[1,0,'yes'],
[0,1,'no'],
[0,1,'no']]