Day10:机器学习建模与评估

最新推荐文章于 2026-01-07 08:55:54 发布

原创最新推荐文章于 2026-01-07 08:55:54 发布 · 264 阅读

5 ·

CC 4.0 BY-SA版权

文章标签：

#机器学习 #人工智能 #python

Python60天打卡专栏收录该内容

40 篇文章

订阅专栏

作业：尝试对心脏病数据集采用机器学习模型建模和评估

1. 数据集划分

使用sklearn划分数据集

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

df = pd.read_csv('./data/heart.csv')

from sklearn.model_selection import train_test_split
X = df.drop(['target'], axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f'训练集形状：{X_train.shape}, 测试集形状: {X_test.shape}')

训练集形状：(242, 13), 测试集形状: (61, 13)

2. 建模预测

SVM

# SVM
svm_model = SVC(random_state=42)
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
 
print("\nSVM 分类报告：")
print(classification_report(y_test, svm_pred))  # 打印分类报告
print("SVM 混淆矩阵：")
print(confusion_matrix(y_test, svm_pred))  # 打印混淆矩阵
 
# 计算 SVM 评估指标，这些指标默认计算正类的性能
svm_accuracy = accuracy_score(y_test, svm_pred)
svm_precision = precision_score(y_test, svm_pred)
svm_recall = recall_score(y_test, svm_pred)
svm_f1 = f1_score(y_test, svm_pred)
print("SVM 模型评估指标：")
print(f"准确率: {svm_accuracy:.4f}")
print(f"精确率: {svm_precision:.4f}")
print(f"召回率: {svm_recall:.4f}")
print(f"F1 值: {svm_f1:.4f}")

在这里插入图片描述

Logistic Regression

# 逻辑回归
logreg_model = LogisticRegression(random_state=42)
logreg_model.fit(X_train, y_train)
logreg_pred = logreg_model.predict(X_test)
 
print("\n逻辑回归 分类报告：")
print(classification_report(y_test, logreg_pred))
print("逻辑回归 混淆矩阵：")
print(confusion_matrix(y_test, logreg_pred))
 
logreg_accuracy = accuracy_score(y_test, logreg_pred)
logreg_precision = precision_score(y_test, logreg_pred)
logreg_recall = recall_score(y_test, logreg_pred)
logreg_f1 = f1_score(y_test, logreg_pred)
print("逻辑回归 模型评估指标：")
print(f"准确率: {logreg_accuracy:.4f}")
print(f"精确率: {logreg_precision:.4f}")
print(f"召回率: {logreg_recall:.4f}")
print(f"F1 值: {logreg_f1:.4f}")

朴素贝叶斯

# 朴素贝叶斯
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_pred = nb_model.predict(X_test)
 
print("\n朴素贝叶斯 分类报告：")
print(classification_report(y_test, nb_pred))
print("朴素贝叶斯 混淆矩阵：")
print(confusion_matrix(y_test, nb_pred))
 
nb_accuracy = accuracy_score(y_test, nb_pred)
nb_precision = precision_score(y_test, nb_pred)
nb_recall = recall_score(y_test, nb_pred)
nb_f1 = f1_score(y_test, nb_pred)
print("朴素贝叶斯 模型评估指标：")
print(f"准确率: {nb_accuracy:.4f}")
print(f"精确率: {nb_precision:.4f}")
print(f"召回率: {nb_recall:.4f}")
print(f"F1 值: {nb_f1:.4f}")

在这里插入图片描述

决策树

# 决策树
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
 
print("\n决策树 分类报告：")
print(classification_report(y_test, dt_pred))
print("决策树 混淆矩阵：")
print(confusion_matrix(y_test, dt_pred))
 
dt_accuracy = accuracy_score(y_test, dt_pred)
dt_precision = precision_score(y_test, dt_pred)
dt_recall = recall_score(y_test, dt_pred)
dt_f1 = f1_score(y_test, dt_pred)
print("决策树 模型评估指标：")
print(f"准确率: {dt_accuracy:.4f}")
print(f"精确率: {dt_precision:.4f}")
print(f"召回率: {dt_recall:.4f}")
print(f"F1 值: {dt_f1:.4f}")

随机森林

# 随机森林
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
 
print("\n随机森林 分类报告：")
print(classification_report(y_test, rf_pred))
print("随机森林 混淆矩阵：")
print(confusion_matrix(y_test, rf_pred))
 
rf_accuracy = accuracy_score(y_test, rf_pred)
rf_precision = precision_score(y_test, rf_pred)
rf_recall = recall_score(y_test, rf_pred)
rf_f1 = f1_score(y_test, rf_pred)
print("随机森林 模型评估指标：")
print(f"准确率: {rf_accuracy:.4f}")
print(f"精确率: {rf_precision:.4f}")
print(f"召回率: {rf_recall:.4f}")
print(f"F1 值: {rf_f1:.4f}")

在这里插入图片描述

XGBoost

# XGBoost
xgb_model = xgb.XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)
 
print("\nXGBoost 分类报告：")
print(classification_report(y_test, xgb_pred))
print("XGBoost 混淆矩阵：")
print(confusion_matrix(y_test, xgb_pred))
 
xgb_accuracy = accuracy_score(y_test, xgb_pred)
xgb_precision = precision_score(y_test, xgb_pred)
xgb_recall = recall_score(y_test, xgb_pred)
xgb_f1 = f1_score(y_test, xgb_pred)
print("XGBoost 模型评估指标：")
print(f"准确率: {xgb_accuracy:.4f}")
print(f"精确率: {xgb_precision:.4f}")
print(f"召回率: {xgb_recall:.4f}")
print(f"F1 值: {xgb_f1:.4f}")

在这里插入图片描述

LightGBM

# LightGBM
lgb_model = lgb.LGBMClassifier(random_state=42)
lgb_model.fit(X_train, y_train)
lgb_pred = lgb_model.predict(X_test)
 
print("\nLightGBM 分类报告：")
print(classification_report(y_test, lgb_pred))
print("LightGBM 混淆矩阵：")
print(confusion_matrix(y_test, lgb_pred))
 
lgb_accuracy = accuracy_score(y_test, lgb_pred)
lgb_precision = precision_score(y_test, lgb_pred)
lgb_recall = recall_score(y_test, lgb_pred)
lgb_f1 = f1_score(y_test, lgb_pred)
print("LightGBM 模型评估指标：")
print(f"准确率: {lgb_accuracy:.4f}")
print(f"精确率: {lgb_precision:.4f}")
print(f"召回率: {lgb_recall:.4f}")
print(f"F1 值: {lgb_f1:.4f}")