比较简单的一个决策树,根据客户已有的信息判断是否可以发放贷款,记录一下
#-*- coding:utf-8 -*-
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 500)
data = pd.read_csv('/Users/shaling/Downloads/loan_data.txt',
encoding='utf-8', sep='\s+').set_index('nameid')
print(data.head(), data.shape)
x = data.drop(['approve'], axis=1).values
y = data.approve.values
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score
from sklearn import metrics
# 划分训练集和测试集
train_x = x[:700]
train_y = y[:700]
text_x = x[700:]
text_y = y[700:]
# 创造模型
dtf = DecisionTreeClassifier()
x1, x2, y1, y2 = train_test_split(
train_x, train_y, train_size=0.7, random_state=1)
dtf.fit(x1, y1)
print('模型训练分数:', dtf.score(x1, y1))
print('模型验证分数:', dtf.score(x2, y2)) # 过拟合
print('模型测试集分数:', dtf.score(text_x, text_y))
# 模型调参
param = {'max_depth': np.arange(1, 50, 5), 'min_samples_lea