区别在于cart采用基尼系数。
def CART_chooseBestFeatureToSplit(dataset):
numFeatures = len(dataset[0]) - 1
bestGini = 999999.0
bestFeature = -1
for i in range(numFeatures):
featList = [example[i] for example in dataset]
uniqueVals = set(featList)
gini = 0.0
for value in uniqueVals:
subdataset=splitdataset(dataset,i,value)
p=len(subdataset)/float(len(dataset))
subp = len(splitdataset(subdataset, -1, '0')) / float(len(subdataset))
gini += p * (1.0 - pow(subp, 2) - pow(1 - subp, 2))
print(u"CART中第%d个特征的基尼值为:%.3f"%(i,gini))
if (gini < bestGini):
bestGini = gini
bestFeature = i
return bestFeature