- 模块化设计:将不同功能封装成独立函数,提高代码复用性和可读性
- 错误处理:增加了数据有效性检查和异常处理机制
- 灵活性:支持自定义变量名和数据预处理
- 图形优化:改进了ROC曲线的绘制方法
- 结果管理:统一返回一个结果列表,便于后续分析
使用时,只需调用perform_survival_analysis()
函数,传入相应的数据和参数即可。代码还包含了详细的注释和使用示例。
# 生存分析与时间依赖ROC曲线分析脚本
# 加载必要的库
library(rms)
library(timeROC)
library(pROC)
# 数据预处理函数
prepare_survival_data <- function(data, categorical_vars = list()) {
# 转换分类变量
for (var_name in names(categorical_vars)) {
data[[var_name]] <- factor(
data[[var_name]],
levels = categorical_vars[[var_name]]$levels,
labels = categorical_vars[[var_name]]$labels
)
}
return(data)
}
# 构建Cox比例风险模型函数
build_cox_model <- function(data, time_var, status_var, predictors) {
# 检查数据有效性
stopifnot(
all(c(time_var, status_var, predictors) %in% names(data)),
is.numeric(data[[time_var]]),
is.numeric(data[[status_var]])
)
# 构建公式
formula <- as.formula(paste("Surv(", time_var, ",", status_var, ") ~",
paste(predictors, collapse = " + ")))
# 设置数据分布
options(datadist = NULL)
ddist <- datadist(data)
options(datadist = 'ddist')
# 拟合Cox回归模型
model <- tryCatch(
rms::cph(formula, data = data, surv = TRUE, x = TRUE, y = TRUE),
error = function(e) {
stop("模型拟合失败:", e$message)
}
)
return(model)
}
# 绘制时间依赖ROC曲线函数
plot_time_roc <- function(roc_obj, times, output_file = NULL) {
# 创建多面板图形
par(mfrow = c(1, length(times)), mar = c(4, 4, 2, 1))
for (i in seq_along(times)) {
plot(roc_obj,
time = times[i],
lty = 1,
lwd = 2,
xlab = '1-特异性',
ylab = '敏感性',
col = rainbow(length(times))[i],
main = paste(times[i]/365.25, "年ROC曲线"))
}
# 如果提供了输出文件,保存图形
if (!is.null(output_file)) {
dev.copy(png, filename = output_file)
dev.off()
}
}
# 计算时间依赖ROC曲线
calculate_time_roc <- function(time, status, pred_values, times, cause = 1) {
roc_result <- timeROC::timeROC(
T = time,
delta = status,
marker = pred_values,
cause = cause,
weighting = 'marginal',
times = times,
iid = TRUE
)
return(roc_result)
}
# 主分析函数
perform_survival_analysis <- function(train_data,
validation_data = NULL,
external_data = NULL,
time_var = 'time',
status_var = 'status',
predictors = c(),
categorical_vars = list()) {
# 数据预处理
train_data <- prepare_survival_data(train_data, categorical_vars)
# 构建模型
model <- build_cox_model(train_data, time_var, status_var, predictors)
# 定义时间点
time_points <- c(365.25 * 1, 365.25 * 3, 365.25 * 5)
# 训练集ROC分析
train_pred_values <- predict(model, newdata = train_data)
train_roc <- calculate_time_roc(
train_data[[time_var]],
train_data[[status_var]],
train_pred_values,
time_points
)
# 绘制训练集ROC曲线
plot_time_roc(train_roc, time_points, "train_roc_curves.png")
# 存储结果
results <- list(
model = model,
train_roc = train_roc,
train_auc = train_roc$AUC,
train_ci_auc = confint(train_roc)$CI_AUC
)
# 如果有验证数据集
if (!is.null(validation_data)) {
validation_data <- prepare_survival_data(validation_data, categorical_vars)
validation_pred_values <- predict(model, newdata = validation_data)
validation_roc <- calculate_time_roc(
validation_data[[time_var]],
validation_data[[status_var]],
validation_pred_values,
time_points
)
plot_time_roc(validation_roc, time_points, "validation_roc_curves.png")
results$validation_roc <- validation_roc
results$validation_auc <- validation_roc$AUC
results$validation_ci_auc <- confint(validation_roc)$CI_AUC
}
# 如果有外部验证数据集
if (!is.null(external_data)) {
external_data <- prepare_survival_data(external_data, categorical_vars)
external_pred_values <- predict(model, newdata = external_data)
external_roc <- calculate_time_roc(
external_data[[time_var]],
external_data[[status_var]],
external_pred_values,
time_points
)
plot_time_roc(external_roc, time_points, "external_roc_curves.png")
results$external_roc <- external_roc
results$external_auc <- external_roc$AUC
results$external_ci_auc <- confint(external_roc)$CI_AUC
}
return(results)
}
# 使用示例(需要根据实际数据替换)
# results <- perform_survival_analysis(
# train_data,
# validation_data,
# external_data,
# predictors = c('predictor1', 'predictor2'),
# categorical_vars = list(
# predictor1 = list(levels = c(0, 1), labels = c('No', 'Yes')),
# predictor2 = list(levels = c(1, 2, 3), labels = c('Stage I', 'Stage II', 'Stage III'))
# )
# )