import pandas as pd
data_all=pd.read_csv(r'C:\Users\lxy\Desktop\input\data_all.csv')
#对数据集37分
from sklearn.model_selection import train_test_split
features=[x for x in data_all.columns if x not in ['status']]
x=data_all[features]
y=data_all['status']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3,random_state=2018)
#用lr进行分类
from sklearn.linear_model import LogisticRegression
lr =LogisticRegression(random_state=2018)
lr.fit(x_train,y_train)
lr.score(x_test,y_test)
0.7484232655921513
#用svm分类
from sklearn.svm import LinearSVC
svm=LinearSVC(random_state=2018)
svm.fit(x_train,y_train)
svm.score(x_test,y_test)
0.7484232655921513
#用树做
from sklearn.tree import DecisionTreeClassifier
tree=DecisionTreeClassifier(random_state=2018)
tree.fit(x_train,y_train)
tree.score(x_test,y_test)
0.6846531184302733
#用lgb做
import lightgbm as lgb
lgb_model = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=100, reg_alpha=3, reg_lambda=5, max_depth=-7,
n_estimators=5000, objective='binary', subsample=0.9, colsample_bytree=0.77, subsample_freq=1, learning_rate=0.03,
random_state=2018, n_jobs=16, min_child_weight=4, min_child_samples=5, min_split_gain=0)
lgb_acc=lgb_model.fit(x_train,y_train)
from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_score,roc_auc_score
y_pred = lgb_acc.predict(x_test, num_iteration=lgb_acc.best_iteration_)
#print('The rmse of prediction is:', mean_squared_error(y_test, y_pred)**0.5)
auc_score = roc_auc_score(y_test,y_pred)
auc_score
0.6514115364151357
#用xgb做
from xgboost.sklearn import XGBClassifier
from sklearn.metrics import precision_score,roc_auc_score
xgbc = XGBClassifier(random_state=2018)
xgbc.fit(x_train,y_train)
pre_test = xgbc.predict(x_test)
auc_score = roc_auc_score(y_test,pre_test)
auc_score
0.6431606209508309
小结:这道题是简单的二分类,线性可分的二分类svm和lr原理差不多所以分也差不多,效果比较好。相应未调参的提升树算法效果并不满意。所以简单情况下还是多考虑简单模型