数据挖掘项目(四)
目标任务:
记录5个模型(逻辑回归、SVM、决策树、随机森林、XGBoost)关于accuracy、precision,recall和F1-score、auc值的评分表格,并画出ROC曲线。
思路想法汇总.先做归一化处理,应用对应算法做出图像.
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
models = [LogisticRegression(), LinearSVC(), DecisionTreeClassifier(), RandomForestClassifier(n_estimators=25), XGBClassifier()]
names = ['LogisticRegression','LinearSVC','DecisionTree','RandomForest','Xgboost']
for i, model in enumerate(models):
model.fit(X_selected, y_train)
accu = model.score(X_test_selected, y_test)
print(names[i], accu)
from sklearn.metrics import roc_curve, auc
def plot_auc(model_name, y_pred, y_test):
fpr, tpr, threshold = roc_curve(y_pred, y_test)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label=f'{names[i]}(area={roc_auc:.2f})', )
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1