首页/编程语言
蛋白测序代码问题(SCTmodel.list包含多个SCT模型无法统一)
r语言
本人用Seurat v5做多样本的单细胞分析,用SCT归一化,然后用harmony消除批次效应,一直做到降为聚类和umap图都没什么问题,接下来想做细胞注释,在寻找各簇的差异基因的时候报错显示:
警告: No DE genes identified
警告: The following tests were not performed:
警告: When testing 0 versus all: Object contains multiple models with unequal library sizes. Run PrepSCTFindMarkers() before running FindMarkers()
在尝试运行PrepSCTFindMarkers()后,又显示minimum UMI unchanged. Skiped re-correct.
现在的问题是PrepSCTFindMarkers()无法运行导致SCTmodel.list里包含的多喝SCT模型无法被统一,因此不能进行FindMakers。请求解决方法。
library(remotes)
library(Seurat)
library(tidyverse)
library(DoubletFinder)
library(harmony)
library(SingleR)
library(celldex)
library(ggplot2)
library(patchwork)
library(glmGamPoi)
library(devtools)
# -----------------------------
# 1. 数据读取与元数据构建
# -----------------------------
data_dir <- "D:/GSEXXXX_RAW"
h5_files <- list.files(data_dir, pattern = "\\.h5$", full.names = TRUE
# 提取样本ID和实验条件
meta_data <- data.frame(
file = h5_files,
sample_id = str_extract(basename(h5_files), "GSM\\d+"),
condition = str_extract(basename(h5_files), "(Ctrl|EcP|EcO)"),
stringsAsFactors = FALSE
)
# -----------------------------
# 2. 分样本创建Seurat对象及初步QC
# -----------------------------
seurat_list <- lapply(seq_along(h5_files), function(i) {
# 读取数据并创建对象
counts <- Read10X_h5(meta_data$file[i])
obj <- CreateSeuratObject(
counts = counts,
project = meta_data$sample_id[i],
min.cells = 3,
min.features = 200
)
# 添加元数据
obj$condition <- meta_data$condition[i]
obj$sample_id <- meta_data$sample_id[i]
# 计算QC指标
obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^MT-")
obj[["percent.rb"]] <- PercentageFeatureSet(obj, pattern = "^RP[SL]")
obj[["percent.hb"]] <- PercentageFeatureSet(obj, pattern = "^HB[AB]")
# 分样本初步QC过滤
obj <- subset(obj,
subset = nFeature_RNA > 500 &
nFeature_RNA < 6000 &
percent.mt < 20 &
percent.rb < 50 &
percent.hb < 0.5
)
return(obj)
})
# 为列表命名确保可追溯性
names(seurat_list) <- meta_data$sample_id
saveRDS(seurat_list,file = "seurat_list.qc.rds"
# -----------------------------
# 3. 双细胞检测(分样本SCTransform标准化)
# -----------------------------
seurat_list <- lapply(seurat_list, function(obj) {
# 使用SCTransform标准化
obj <- SCTransform(
obj,
vars.to.regress = c("percent.mt")
)
# 运行PCA(基于SCTransform生成的SCT assay)
obj <- RunPCA(obj, assay = "SCT", npcs = 20)
# 自动检测双细胞
sweep.res <- paramSweep(obj, PCs = 1:20, sct = TRUE)
sweep.stats <- summarizeSweep(sweep.res)
bcmvn <- find.pK(sweep.stats)
optimal_pK <- as.numeric(bcmvn$pK[which.max(bcmvn$BCmetric)])
# 动态计算双细胞率(按10x公式)
n_cells <- ncol(obj)
doublet_rate <- 0.008 * (n_cells / 1000)
nExp <- round(doublet_rate * n_cells)
nExp <- max(nExp, 50)
# 运行DoubletFinder
obj <- doubletFinder(
obj,
PCs = 1:20,
pN = 0.25,
pK = optimal_pK,
nExp = nExp,
sct = TRUE
)
# 过滤双细胞
df_col <- grep("DF.classifications", colnames(obj@meta.data), value = TRUE)
obj <- subset(obj, cells = colnames(obj)[obj[[df_col]] == "Singlet"])
return(obj)
})
saveRDS(seurat_list,file = "seurat_list.qc.DF.rds")
# -----------------------------
# 5. 跨样本整合(Harmony校正批次效应)
# -----------------------------
# 合并所有样本为一个Seurat对象
merged_seurat <- merge(seurat_list[[1]], seurat_list[-1])
# 整合全局可变基因
var_features <- SelectIntegrationFeatures(object.list = seurat_list, nfeatures = 3000)
VariableFeatures(merged_seurat) <- var_features
# 运行PCA
DefaultAssay(merged_seurat) <- "SCT"
merged_seurat <- RunPCA(merged_seurat, npcs = 20, verbose = FALSE)
# 运行Harmony校正批次效应
merged_seurat <- RunHarmony(
merged_seurat,
group.by.vars = "sample_id",
assay.use = "SCT",
project.dim = FALSE,
plot_convergence = TRUE
)
# 保存批次校正后的对象
saveRDS(merged_seurat, file = "merged_seurat.harmony.rds")
merged_seurat = readRDS("merged_seurat.harmony.rds")
# -----------------------------
# (1) 计算KNN图(基于Harmony嵌入)
merged_seurat <- FindNeighbors(
merged_seurat,
reduction = "harmony",
dims = 1:20,
assay = "SCT",
graph.name = "harmony_snn" )
# (2) 基于Leiden或Louvain算法聚类(分辨率决定簇数量)
merged_seurat <- FindClusters(
merged_seurat,
graph.name = "harmony_snn",
resolution = seq(0.2, 1.2, 0.2),
algorithm = 1,
verbose = FALSE
)
# (3) 可视化聚类结果(UMAP)
merged_seurat <- RunUMAP(
merged_seurat,
reduction = "harmony",
dims = 1:20,
reduction.name = "umap_harmony"
)
# 绘制UMAP图(按聚类和样本分组)
p1 <- DimPlot(merged_seurat, reduction = "umap_harmony", group.by = "seurat_clusters", label = TRUE)
p2 <- DimPlot(merged_seurat, reduction = "umap_harmony", group.by = "sample_id")
p1 | p2
# --------------------------------
# 6. 差异基因分析(识别各簇的标记基因)
# --------------------------------
# (1) 设置默认Assay为RNA或SCT(根据标准化方法选择)
DefaultAssay(merged_seurat) <- "SCT" # 若使用SCTransform
# 步骤 2: 整合 SCT 模型
merged_seurat <- PrepSCTFindMarkers(
merged_seurat,
assay = "SCT")
# (2) 寻找各簇的差异基因
markers <- FindAllMarkers(
merged_seurat,
only.pos = TRUE,
min.pct = 0.25,
logfc.threshold = 0.5,
test.use = "wilcox",
assay = "SCT"
)
# 过滤并排序(按平均logFC和p值)
top_markers <- markers %>%
group_by(cluster) %>%
filter(p_val_adj < 0.05) %>%
slice_max(n = 5, order_by = avg_log2FC) # 每簇取top5基因
# 查看标记基因
print(top_markers)
saveRDS(merged_seurat, file = "top_markers.rds")
# (2) 下载参考数据集(以Human Primary Cell Atlas为例)
ref <- celldex::HumanPrimaryCellAtlasData()
# (3) 提取单细胞表达矩阵(需与参考数据基因符号匹配)
counts <- GetAssayData(merged_seurat, assay = "SCT", slot = "data") # 使用log-normalized数据
# (4) 运行SingleR预测
pred <- SingleR(
test = counts,
ref = ref,
labels = ref$label.main,
clusters = merged_seurat$seurat_clusters
)
# (5) 将预测结果添加到metadata
merged_seurat$singleR_labels <- pred$labels[match(merged_seurat$seurat_clusters, rownames(pred))]
# -----------------------------
# 7. 结果保存与输出
# -----------------------------
# 保存完整对象
saveRDS(merged_seurat, file = "integrated_seurat_final.rds")
# 导出标记基因表格
markers <- FindAllMarkers(merged_seurat, assay = "SCT", only.pos = TRUE)
write.csv(markers, "cluster_markers.csv", row.names = FALSE)
# 生成PDF报告
pdf("QC_plots.pdf")
VlnPlot(merged_seurat, features = c("nFeature_RNA", "percent.mt"), group.by = "sample_id")
DimPlot(merged_seurat, group.by = "celltype", label = TRUE)
FeaturePlot(merged_seurat, features = c("CD3D", "CD19", "EPCAM"))
dev.off()
最新发布