精简代码:FAS_TB <- function(Va, Vb, Vc = NULL, Vd = NULL, data1, by = NULL, condition = NULL) {
# 加载必要的包
if (!requireNamespace("openxlsx", quietly = TRUE)) {
install.packages("openxlsx")
}
library(openxlsx)
# 定义计算置信区间的函数
calc_ci <- function(p, n) {
if (n == 0) return(c(NA, NA))
se <- sqrt(p * (1 - p) / n)
lower <- max(0, p - 1.96 * se)
upper <- min(1, p + 1.96 * se)
return(c(lower, upper))
}
# 处理没有分组的情况
if (is.null(Vc) && is.null(Vd) && is.null(by)) {
# 提取数据
x1 <- data1[[Va]]
x2 <- data1[[Vb]]
# 构建四格表
a <- sum(x1 == "阳性" & x2 == "阳性", na.rm = TRUE)
b <- sum(x1 == "阳性" & x2 == "阴性", na.rm = TRUE)
c_val <- sum(x1 == "阴性" & x2 == "阳性", na.rm = TRUE)
d <- sum(x1 == "阴性" & x2 == "阴性", na.rm = TRUE)
n <- a + b + c_val + d
# 创建四格表data.frame
table1 <- data.frame(
row.names = c(paste0(Va, "阳性"), paste0(Va, "阴性"), "合计"),
阳性 = c(a, c_val, a + c_val),
阴性 = c(b, d, b + d),
合计 = c(a + b, c_val + d, n)
)
names(table1) <- c(paste0(Vb, "阳性"), paste0(Vb, "阴性"), "合计")
# 计算Kappa值
PA <- (a + d) / n
Pe <- ((a + b) * (a + c_val) + (c_val + d) * (b + d)) / n^2
kappa_val <- (PA - Pe) / (1 - Pe)
# 使用您指定的公式计算Kappa的标准误和置信区间
kappa_se <- sqrt(PA * (1 - PA) / (n * (1 - Pe)^2))
kappa_ci_lower <- kappa_val - 1.96 * kappa_se
kappa_ci_upper <- kappa_val + 1.96 * kappa_se
# 输出计算结果
cat("Kappa 标准误 =", round(kappa_se, 4), "\n")
cat("95% CI =", round(kappa_ci_lower, 4), ",", round(kappa_ci_upper, 4), "\n")
# 确保置信区间在[-1, 1]范围内
kappa_ci_lower <- max(-1, kappa_ci_lower)
kappa_ci_upper <- min(1, kappa_ci_upper)
# 计算其他指标的置信区间
positive_agreement <- a / (a + c_val)
negative_agreement <- d / (b + d)
total_agreement <- (a + d) / n
PPV <- a / (a + b)
NPV <- d / (c_val + d)
positive_agreement_ci <- calc_ci(positive_agreement, a + c_val)
negative_agreement_ci <- calc_ci(negative_agreement, b + d)
total_agreement_ci <- calc_ci(total_agreement, n)
PPV_ci <- calc_ci(PPV, a + b)
NPV_ci <- calc_ci(NPV, c_val + d)
# 创建结果表
table2 <- data.frame(
指标 = c("阳性符合率", "阴性符合率", "总符合率", "阳性预测值", "阴性预测值", "Kappa值"),
估计值 = c(
sprintf("%.2f%%(%.2f%%,%.2f%%)", positive_agreement * 100, positive_agreement_ci[1] * 100, positive_agreement_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", negative_agreement * 100, negative_agreement_ci[1] * 100, negative_agreement_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", total_agreement * 100, total_agreement_ci[1] * 100, total_agreement_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", PPV * 100, PPV_ci[1] * 100, PPV_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", NPV * 100, NPV_ci[1] * 100, NPV_ci[2] * 100),
sprintf("%.4f(%.4f,%.4f)", kappa_val, kappa_ci_lower, kappa_ci_upper)
)
)
return(list(四格表 = table1, 有效性分析 = table2))
}
# 处理有二分类分组变量Vc的情况
else if (!is.null(Vc) && Vc %in% names(data1)) {
# 检查Vc是否为二分类变量
x3 <- data1[[Vc]]
unique_vals <- unique(na.omit(x3))
# 判断是否为二分类变量
if (length(unique_vals) == 2 &&
all(unique_vals %in% c("阳性", "阴性", "是", "否", "1", "0"))) {
results <- list()
# 处理阳性组
if (any(x3 %in% c("阳性", "是", "1"))) {
sub_data <- data1[x3 %in% c("阳性", "是", "1"), ]
x1 <- sub_data[[Va]]
x2 <- sub_data[[Vb]]
a1 <- sum(x1 == "阳性" & x2 == "阳性", na.rm = TRUE)
a2 <- sum(x1 == "阴性" & x2 == "阳性", na.rm = TRUE)
b1 <- sum(x1 == "阳性" & x2 == "阴性", na.rm = TRUE)
b2 <- sum(x1 == "阴性" & x2 == "阴性", na.rm = TRUE)
n <- a1 + a2 + b1 + b2
c1 <- a1 + b1
c2 <- a2 + b2
# 计算阳性检出率和置信区间
pos_detect <- a1 / (a1 + a2)
pos_detect_ci <- calc_ci(pos_detect, a1 + a2)
neg_detect <- b1 / (b1 + b2)
neg_detect_ci <- calc_ci(neg_detect, b1 + b2)
total_detect <- c1 / n
total_detect_ci <- calc_ci(total_detect, n)
table_positive <- data.frame(
X3 = c(paste0(Vc, "阳性"), "", ""),
X2结果 = c("阳性", "阴性", "合计"),
例数 = c(
paste0(a1 + a2, "(", round((a1 + a2)/n * 100, 2), "%)"),
paste0(b1 + b2, "(", round((b1 + b2)/n * 100, 2), "%)"),
paste0(n, "(100.00%)")
),
X1阳性 = c(a1, b1, c1),
X1阴性 = c(a2, b2, c2),
检出率 = c(
sprintf("%.2f%%(%.2f%%,%.2f%%)", pos_detect * 100, pos_detect_ci[1] * 100, pos_detect_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", neg_detect * 100, neg_detect_ci[1] * 100, neg_detect_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", total_detect * 100, total_detect_ci[1] * 100, total_detect_ci[2] * 100)
)
)
results[[paste0(Vc, "阳性")]] <- table_positive
}
# 处理阴性组
if (any(x3 %in% c("阴性", "否", "0"))) {
sub_data <- data1[x3 %in% c("阴性", "否", "0"), ]
x1 <- sub_data[[Va]]
x2 <- sub_data[[Vb]]
a1 <- sum(x1 == "阳性" & x2 == "阳性", na.rm = TRUE)
a2 <- sum(x1 == "阴性" & x2 == "阳性", na.rm = TRUE)
b1 <- sum(x1 == "阳性" & x2 == "阴性", na.rm = TRUE)
b2 <- sum(x1 == "阴性" & x2 == "阴性", na.rm = TRUE)
n <- a1 + a2 + b1 + b2
c1 <- a1 + b1
c2 <- a2 + b2
# 计算阴性检出率和置信区间
pos_detect <- a2 / (a1 + a2)
pos_detect_ci <- calc_ci(pos_detect, a1 + a2)
neg_detect <- b2 / (b1 + b2)
neg_detect_ci <- calc_ci(neg_detect, b1 + b2)
total_detect <- c2 / n
total_detect_ci <- calc_ci(total_detect, n)
table_negative <- data.frame(
X3 = c(paste0(Vc, "阴性"), "", ""),
X2结果 = c("阳性", "阴性", "合计"),
例数 = c(
paste0(a1 + a2, "(", round((a1 + a2)/n * 100, 2), "%)"),
paste0(b1 + b2, "(", round((b1 + b2)/n * 100, 2), "%)"),
paste0(n, "(100.00%)")
),
X1阳性 = c(a1, b1, c1),
X1阴性 = c(a2, b2, c2),
检出率 = c(
sprintf("%.2f%%(%.2f%%,%.2f%%)", pos_detect * 100, pos_detect_ci[1] * 100, pos_detect_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", neg_detect * 100, neg_detect_ci[1] * 100, neg_detect_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", total_detect * 100, total_detect_ci[1] * 100, total_detect_ci[2] * 100)
)
)
results[[paste0(Vc, "阴性")]] <- table_negative
}
return(results)
} else {
# Vc是多分类变量
if (is.null(by)) {
stop("对于多分类变量Vc,必须提供by参数来指定条件")
}
# 解析by参数,例如"Vc=A"
condition_parts <- strsplit(by, "=")[[1]]
if (length(condition_parts) != 2) {
stop("by参数格式不正确,应为'变量名=值'")
}
condition_var <- condition_parts[1]
condition_val <- condition_parts[2]
if (condition_var != Vc) {
stop("by参数中的变量名必须与Vc相同")
}
# 筛选数据
sub_data <- data1[data1[[Vc]] == condition_val, ]
x1 <- sub_data[[Va]]
x2 <- sub_data[[Vb]]
a1 <- sum(x1 == "阳性" & x2 == "阳性", na.rm = TRUE)
a2 <- sum(x1 == "阴性" & x2 == "阳性", na.rm = TRUE)
b1 <- sum(x1 == "阳性" & x2 == "阴性", na.rm = TRUE)
b2 <- sum(x1 == "阴性" & x2 == "阴性", na.rm = TRUE)
n <- a1 + a2 + b1 + b2
c1 <- a1 + b1
c2 <- a2 + b2
# 根据Vd的值决定计算阳性还是阴性检出率
if (is.null(Vd)) {
stop("对于多分类变量Vc,必须提供Vd参数来指定输出类型('P'或'N')")
}
if (Vd == "P") {
# 计算阳性检出率和置信区间
pos_detect <- a1 / (a1 + a2)
pos_detect_ci <- calc_ci(pos_detect, a1 + a2)
neg_detect <- b1 / (b1 + b2)
neg_detect_ci <- calc_ci(neg_detect, b1 + b2)
total_detect <- c1 / n
total_detect_ci <- calc_ci(total_detect, n)
table_result <- data.frame(
X4 = c(paste0(Vc, "=", condition_val), "", ""),
X2结果 = c("阳性", "阴性", "合计"),
例数 = c(
paste0(a1 + a2, "(", round((a1 + a2)/n * 100, 2), "%)"),
paste0(b1 + b2, "(", round((b1 + b2)/n * 100, 2), "%)"),
paste0(n, "(100.00%)")
),
X1阳性 = c(a1, b1, c1),
X1阴性 = c(a2, b2, c2),
阳性检出率 = c(
sprintf("%.2f%%(%.2f%%,%.2f%%)", pos_detect * 100, pos_detect_ci[1] * 100, pos_detect_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", neg_detect * 100, neg_detect_ci[1] * 100, neg_detect_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", total_detect * 100, total_detect_ci[1] * 100, total_detect_ci[2] * 100)
)
)
} else if (Vd == "N") {
# 计算阴性检出率和置信区间
pos_detect <- a2 / (a1 + a2)
pos_detect_ci <- calc_ci(pos_detect, a1 + a2)
neg_detect <- b2 / (b1 + b2)
neg_detect_ci <- calc_ci(neg_detect, b1 + b2)
total_detect <- c2 / n
total_detect_ci <- calc_ci(total_detect, n)
table_result <- data.frame(
X4 = c(paste0(Vc, "=", condition_val), "", ""),
X2结果 = c("阳性", "阴性", "合计"),
例数 = c(
paste0(a1 + a2, "(", round((a1 + a2)/n * 100, 2), "%)"),
paste0(b1 + b2, "(", round((b1 + b2)/n * 100, 2), "%)"),
paste0(n, "(100.00%)")
),
X1阳性 = c(a1, b1, c1),
X1阴性 = c(a2, b2, c2),
阴性检出率 = c(
sprintf("%.2f%%(%.2f%%,%.2f%%)", pos_detect * 100, pos_detect_ci[1] * 100, pos_detect_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", neg_detect * 100, neg_detect_ci[1] * 100, neg_detect_ci[2] * 100),
sprintf("%.2f%%(%.2f%%,%.2f%%)", total_detect * 100, total_detect_ci[1] * 100, total_detect_ci[2] * 100)
)
)
} else {
stop("Vd参数必须是'P'(阳性检出率)或'N'(阴性检出率)")
}
return(list(table_result))
}
} else {
stop("参数配置不正确,请检查输入参数")
}
}