已收到!你的两个文件位置从原来的桌面改为了:
```
F:/333333333/data_HeartDisease.xlsx
F:/333333333/student_HD.xlsx
```
---
### ✅ 下面是**完全更新路径后的完整 R 代码**,可直接运行:
```r
# ===================================================
# 心脏病预测建模作业 —— 路径更新版(F:/333333333)
# 使用 mlr3 框架训练并评估模型
# ===================================================
# === 1. 安装必要包(首次运行时取消注释)===
# install.packages(c("mlr3", "mlr3learners", "mlr3extralearners", "readxl", "dplyr", "ggplot2"))
# === 2. 加载包 ===
library(mlr3)
library(mlr3learners) # 基础学习器
library(mlr3extralearners) # ✅ 提供 classif.randomForest
library(readxl)
library(dplyr)
library(ggplot2)
# === 3. 更新文件路径(关键修改)===
train_file <- "F:/333333333/data_HeartDisease.xlsx"
test_file <- "F:/333333333/student_HD.xlsx"
# === 4. 数据准备函数 ===
prepare_data <- function(path) {
df <- read_excel(path) %>%
filter(!is.na(HeartDisease), HeartDisease != "HeartDisease") %>%
select(-starts_with("X__"), -starts_with("..."), -starts_with(".rnn")) %>%
mutate(
BMI = as.numeric(as.character(BMI)),
HeartDisease = factor(ifelse(HeartDisease == "Yes", "Yes", "No"),
levels = c("No", "Yes"))
)
return(df)
}
# === 5. 加载数据 ===
train_data <- prepare_data(train_file)
test_data <- prepare_data(test_file)
# === 6. 特征组合定义 ===
features_all <- setdiff(names(train_data), "HeartDisease")
features_clinical <- intersect(c('BMI', 'Smoking', 'Stroke', 'DiffWalking',
'PhysicalHealth', 'MentalHealth',
'Diabetic', 'GenHealth', 'AgeCategory'),
features_all)
features_basic <- intersect(c('Smoking', 'AlcoholDrinking', 'PhysicalActivity',
'AgeCategory', 'Sex', 'BMI'), features_all)
feature_sets <- list(
"F1: All Features" = features_all,
"F2: Clinical" = features_clinical,
"F3: Basic" = features_basic
)
# === 7. 创建任务函数(安全方式)===
create_task <- function(data, feats, target = "HeartDisease") {
data_sub <- data[, c(feats, target)]
for (col in names(data_sub)) {
if (is.character(data_sub[[col]])) {
data_sub[[col]] <- as.factor(data_sub[[col]])
}
}
TaskClassif$new(id = "heart", backend = data_sub, target = target)
}
# === 8. 定义学习器 ===
learners <- list(
lrn("classif.log_reg"), # 逻辑回归
lrn("classif.rpart"), # 决策树
lrn("classif.randomForest"), # 随机森林(需 mlr3extralearners)
lrn("classif.xgboost") # XGBoost
)
learner_names <- c("Logistic Regression", "Decision Tree", "Random Forest", "XGBoost")
# === 9. 模型评估循环 ===
results <- data.frame()
for (i in seq_along(learners)) {
learner <- learners[[i]]
name <- learner_names[i]
for (fs_name in names(feature_sets)) {
feats <- feature_sets[[fs_name]]
if (length(feats) == 0) next
tryCatch({
train_task <- create_task(train_data, feats)
test_task <- create_task(test_data, feats)
learner$train(train_task)
pred <- learner$predict(test_task)
acc <- pred$score(msr("classif.acc"))$classif.acc
results <- rbind(results, data.frame(
Model = name,
Features = fs_name,
Accuracy = round(acc, 4)
))
}, error = function(e) next)
}
}
# === 10. 输出结果 ===
cat("\n📊 模型性能汇总表(按准确率排序):\n")
print(results %>% arrange(desc(Accuracy)))
best <- results[which.max(results$Accuracy), ]
cat("\n🏆 最高预测准确率模型:\n")
cat("算法: ", best$Model, "\n")
cat("特征组合: ", best$Features, "\n")
cat("准确率: ", best$Accuracy, "\n\n")
# === 11. 可视化 ===
if (nrow(results) > 0) {
ggplot(results, aes(x = Model, y = Accuracy, fill = Features)) +
geom_col(position = "dodge") +
labs(title = "心脏病预测模型准确率对比", y = "准确率", x = "模型") +
ylim(0, 1) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
}
```
---
✅ **本代码已做以下更新:**
| 修改项 | 内容 |
|-------|------|
| ✅ 文件路径 | 改为 `F:/333333333/...` |
| ✅ 包依赖 | 确保 `mlr3extralearners` 已加载(解决 randomForest 找不到问题) |
| ✅ 错误防护 | 增加 `tryCatch` 和列检查 |
---
📌 **你现在只需:**
1. 确保文件确实在:
- `F:/333333333/data_HeartDisease.xlsx`
- `F:/333333333/student_HD.xlsx`
2. 第一次运行前执行:
```r
install.packages("mlr3extralearners")
```
3. 直接运行整段脚本即可!
---
需要我帮你把预测结果保存成 Excel 或 CSV 吗?随时告诉我。