先贴上代码,以后有时间了,再来补内容
1.本次代码采用的是信息增益的方法(entropy)
from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from sklearn import tree
from sklearn.externals.six import StringIO
allElectronicsData = open('E:\decision tree\wyt.csv', 'r')
reader = csv.reader(allElectronicsData) ## 按行读取内容
headers = next(reader) ##头行
featureList = []
labelList = []
for row in reader:
labelList.append(row[len(row) - 1])
rowDcit = {}
for i in range(1, len(row) -1 ):
rowDcit[headers[i]] = row[i]
featureList.append(rowDcit)
print(featureList)
## 特征值转化为数据变量
vec = DictVectorizer()
dummyX = vec.fit_transform(featureList). toarray()
print("dummyX" + str(dummyX))
print(vec.get_feature_names())
print("labelList:" + str(labelList))
## 类转化为数据变量
lb = preprocessing.LabelBinarizer()
dummyY = lb.fit_transform(labelList)
print("dummyX" + str(dummyY))
## 调用决策树中的分类器
clf = tree.DecisionTreeClassifier(criterion= 'entropy')
clf.fit(dummyX, dummyY)
print("clf:" + str(clf))
oneRowX = dummyX[0, :]
print("oneRowX:" + str(oneRowX))
newRowX = oneRowX
newRowX[0] = 1
newRowX[2] = 0
print("newRowX:" + str(newRowX))
predictedY = clf.predict([newRowX])
print("predictedY:" + str(predictedY))