一、红酒数据集分类
1实验代码
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt
import numpy as np
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 加载数据
wine = load_wine()
X = wine.data
y = wine.target
feature_names = wine.feature_names
class_names = wine.target_names.tolist()
# 数据分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 不同参数组合测试
params = [
{'criterion': 'gini', 'max_depth': 3},
{'criterion': 'gini', 'max_depth': None},
{'criterion': 'entropy', 'max_depth': 3}
]
for param in params:
# 训练模型
clf = DecisionTreeClassifier(**param, random_state=42)
clf.fit(X_train, y_train)
# 混淆矩阵
y_pred = clf.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(cm, display_labels=class_names)
disp.plot()
plt.title(f"参数设置:{param}")
plt.show()
# ROC曲线
y_proba = clf.predict_proba(X_test)
y_test_bin = label_binarize(y_test, classes=[0, 1, 2])
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(3):
fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_proba[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
plt.figure()
for i in range(3):
plt.plot(fpr[i], tpr[i], label=f'{class_names[i]} (AUC={roc_auc[i]:.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('假正率')
plt.ylabel('真正率')