no_recur.c

 
# 加载必要的包 library(readxl) library(GSVA) library(limma) library(tidyverse) library(edgeR) library(BiocParallel) # 步骤1:读取病人信息并预处理 patient_info <- read_excel("patient_info.xlsx") %>% select(RNAID, Recurrence) %>% filter(!is.na(RNAID)) # 步骤2:读取基因表达矩阵并匹配样本 gene_exp <- read.delim("Gene_exp.txt", row.names = 1, check.names = FALSE) # 匹配表达矩阵列名与RNAID matched_samples <- intersect(colnames(gene_exp), patient_info$RNAID) gene_exp_matched <- gene_exp[, matched_samples, drop = FALSE] # 获取对应的分组信息 group_info <- patient_info %>% filter(RNAID %in% matched_samples) %>% arrange(match(RNAID, colnames(gene_exp_matched))) %>% mutate(Group = factor(Recurrence, levels = c(0, 1), labels = c("NonRecur", "Recur"))) # 步骤3:数据转换 data_type <- "counts" # 请根据实际数据类型修改:"counts" 或 "fpkm" if(data_type == "counts") { dge <- DGEList(counts = gene_exp_matched) dge <- calcNormFactors(dge, method = "TMM") expr_matrix <- cpm(dge, log = TRUE, prior.count = 1) cat("数据转换完成:原始counts -> log2(CPM+1)\n") } else if(data_type == "fpkm") { expr_matrix <- as.matrix(log2(gene_exp_matched + 1)) cat("数据转换完成:FPKM/TPM -> log2(FPKM+1)\n") } else { stop("请指定正确的数据类型:'counts' 或 'fpkm'") } # 步骤4:读取SASP通路基因集 sasp_gmt <- readLines("SASP.gmt") sasp_gene_sets <- list() for (line in sasp_gmt) { elements <- unlist(strsplit(line, "\t")) pathway_name <- elements[1] genes <- elements[3:length(elements)] genes <- genes[genes != ""] # 移除空基因名 sasp_gene_sets[[pathway_name]] <- genes } # 步骤5:创建GSVA参数对象 param <- gsvaParam( exprData = expr_matrix, geneSets = sasp_gene_sets, minSize = 5, # 最小基因集大小 maxSize = 500, # 最大基因集大小 kcdf = "Gaussian", # 适用于log转换的数据 absRanking = FALSE ) # 步骤6:稳健执行GSVA分析(带错误处理和回退机制) run_gsva <- function() { # 尝试并行计算(如果可用) if (requireNamespace("BiocParallel", quietly = TRUE)) { tryCatch({ # 尝试自动检测最佳并行方法 bpparam <- bpparam() # 设置进度条 if (interactive()) { bpparam$progressbar <- TRUE } cat("尝试并行计算...\n") return(gsva(param, verbose = TRUE, BPPARAM = bpparam)) }, error = function(e) { cat("并行计算失败,错误信息:", e$message, "\n") cat("尝试单线程计算...\n") return(gsva(param, verbose = TRUE)) }) } else { cat("BiocParallel不可用,使用单线程计算...\n") return(gsva(param, verbose = TRUE)) } } # 执行GSVA分析 gsva_results <- run_gsva() # 步骤7:差异分析 design <- model.matrix(~ 0 + group_info$Group) colnames(design) <- levels(group_info$Group) fit <- lmFit(gsva_results, design) cont_matrix <- makeContrasts( Recur_vs_Non = Recur - NonRecur, levels = design ) fit2 <- contrasts.fit(fit, cont_matrix) fit2 <- eBayes(fit2) # 获取差异结果 diff_results <- topTable(fit2, number = Inf, adjust.method = "BH", coef = "Recur_vs_Non") # 步骤8:结果可视化 # 火山图 volcano_data <- as.data.frame(diff_results) %>% rownames_to_column("Pathway") %>% mutate(Significant = adj.P.Val < 0.05) p_volcano <- ggplot(volcano_data, aes(x = logFC, y = -log10(adj.P.Val), color = Significant)) + geom_point(alpha = 0.6) + geom_hline(yintercept = -log10(0.05), linetype = "dashed") + geom_vline(xintercept = 0, linetype = "dashed") + scale_color_manual(values = c("grey", "red")) + labs(title = "SASP Pathways Differential Enrichment", x = "Log2 Fold Change (Recur vs NonRecur)", y = "-Log10(Adjusted P-value)") + theme_minimal() # 添加标签(仅当有显著结果时) if (sum(volcano_data$Significant) > 0) { p_volcano <- p_volcano + ggrepel::geom_text_repel( data = subset(volcano_data, Significant), aes(label = Pathway), size = 3, max.overlaps = 20 ) } ggsave("GSVA_volcano_plot.pdf", plot = p_volcano, width = 10, height = 8) # 热图(仅显示显著通路) significant_pathways <- rownames(diff_results)[diff_results$adj.P.Val < 0.05] if (length(significant_pathways) > 0) { heatmap_data <- gsva_results[significant_pathways, ] # 创建注释数据框 annotation_col <- data.frame( Group = group_info$Group, row.names = colnames(heatmap_data) ) # 绘制热图 pheatmap::pheatmap( heatmap_data, annotation_col = annotation_col, show_colnames = FALSE, scale = "row", clustering_method = "complete", color = colorRampPalette(c("blue", "white", "red"))(100), main = "Differential SASP Pathways (adj.P.Val < 0.05)", filename = "GSVA_heatmap.pdf", width = 10, height = max(6, length(significant_pathways) * 0.5) ) } else { cat("未发现显著差异的通路(adj.P.Val < 0.05)\n") } # 步骤9:保存结果 write.csv(diff_results, "GSVA_Differential_Results.csv") write.csv(as.data.frame(gsva_results), "GSVA_Enrichment_Scores.csv") saveRDS(gsva_results, "GSVA_Scores.rds") # 生成报告摘要 report <- list( analysis_date = Sys.Date(), input_data = c( expression_matrix = paste(nrow(expr_matrix), "genes ×", ncol(expr_matrix), "samples"), gene_sets = paste(length(sasp_gene_sets), "pathways"), samples = paste(nrow(group_info), "samples (", sum(group_info$Group == "NonRecur"), "NonRecur, ", sum(group_info$Group == "Recur"), "Recur)") ), gsva_results = paste(nrow(gsva_results), "pathways ×", ncol(gsva_results), "samples"), significant_pathways = if(length(significant_pathways) > 0) significant_pathways else "None" ) cat("\n===== 分析摘要 =====\n") cat("分析日期:", report$analysis_date, "\n") cat("输入数据:\n -", report$input_data["expression_matrix"], "\n") cat(" -", report$input_data["gene_sets"], "\n") cat(" -", report$input_data["samples"], "\n") cat("GSVA结果维度:", report$gsva_results, "\n") cat("显著差异通路数:", if(is.character(report$significant_pathways)) report$significant_pathways else length(report$significant_pathways), "\n") cat("结果已保存到工作目录\n") 可视化部分我只需要热图,复发一列、非复发一列,只看SASP通路,需要标记哪一列是复发还是非复发,样本名也需要写出
最新发布
07-08
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值