使用泰坦尼克数据,用sklearn决策树和随机森林进行预测比对
1.决策树(准确率:0.7811550)
#1)获取数据
import pandas as pd
data = pd.read_csv("titanic.csv")
#2)准备好特征值 目标值
x = data[["pclass","age","sex"]]
y = data["survived"]
#3)数据处理
#缺失值处理
x["age"].fillna(x["age"].mean(),inplace = True)
#特征值-》字典类型(字典抽取特征更方便)
x = x.to_dict(orient="records")
#4)划分数据集
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state =22)
#字典特征抽取
from sklearn.feature_extraction import DictVectorizer
transfer =DictVectorizer()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
#5).决策树预估器
from sklearn.tree import DecisionTreeClassifier
estimator = DecisionTreeClassifier(criterion="entropy",max_depth&#