本次练习是对上次练习(使用了泰坦尼克号幸存者数据)的模仿。
直接上代码
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import tree
import numpy as np
#1.数据获取
iris = datasets.load_iris()
x = iris.data
y = iris.target
print(x.shape)
#2.数据预处理
#random_state:生成随机种子。生成该组随机数的编号,下次再次输入该编号,得到的随机分组是相同的
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=1)
#3.使用决策树对鸢尾花数据进行类别建模,预测
clf = DecisionTreeClassifier()
clf.fit(x_train,y_train)
y_predict = clf.predict(x_test)
#4.获取结果报告
print('Accracy:',clf.score(x_test,y_test))
print(classification_report(y_predict,y_test,target_names=['setosa', 'versicolor', 'virginica']))
#5.保存生成的决策树
with open("Decision_tree_iris.dot","w") as f:
f = tree.export_graphviz(clf, out_file=f)
#6.用graphviz进行决策树图的输出
#进入cmd,切换到目标文件目录
#输入指令 dot -Tpdf Decision_tree_iris.dot -o output2.pdf