单细胞测序数据分析全流程解析#Single-Cell RNA Sequencing Reveals Vascular Heterogeneity and Immune Crosstalk

原创已于 2025-11-17 14:26:20 修改 · 99 阅读
CC 4.0 BY-SA版权
文章标签：
于 2025-11-17 14:22:31 首次发布

###################################01.数据前期处理与矫正###################################
#引用包
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(tidyverse)
library(ggthemes)
library(ggrepel)
library(harmony)
library(patchwork)
library(tidyr)
library(ggplot2)

load('normal.rda')



logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件


#将矩阵转换为Seurat对象，并对数据进行过滤
pbmc=CreateSeuratObject(counts = normal,
                        min.cells=5,      #基因至少在多少个细胞中有表达
                        min.features=500) #至少表达多少个基因

pbmc@assays$RNA@layers$counts@Dimnames <- list(rownames(pbmc@assays$RNA@features@.Data),rownames(pbmc@assays$RNA@cells@.Data))

#使用PercentageFeatureSet函数计算线粒体基因百分比
pbmc[["percent.mt"]] <- PercentageFeatureSet(object = pbmc, pattern = "^MT-")
#绘制特征基因小提琴图
pdf(file="01.featureViolin.pdf", width=10, height=6)
VlnPlot(object = pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
dev.off()

#过滤细胞
pbmc=subset(x = pbmc, subset = nFeature_RNA > 0 & nFeature_RNA < 7000 & percent.mt < 25)    #对数据进行过滤
pbmc@assays$RNA@layers$counts@Dimnames <- list(rownames(pbmc@assays$RNA@features@.Data),rownames(pbmc@assays$RNA@cells@.Data))

#测序深度相关性图
pdf(file="01.featureCor.pdf",width=10,height=6)
plot1 <- FeatureScatter(object = pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt",pt.size=1.5)
plot2 <- FeatureScatter(object = pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",,pt.size=1.5)
CombinePlots(plots = list(plot1, plot2))
dev.off()




#对数据进行标准化
pbmc <- NormalizeData(pbmc)
#提取细胞间变异系数较大的基因
pbmc <- FindVariableFeatures(object = pbmc, selection.method = "vst", nfeatures = 2000)
#输出特征方差图
top10 <- head(VariableFeatures(pbmc), 10)
pdf(file="01.featureVar.pdf",width=10,height=6)
plot1 <- VariableFeaturePlot(object = pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
dev.off()

###################################02.PCA主成分分析###################################
##PCA分析
pbmc=ScaleData(pbmc)          #PCA降维之前的标准处理步骤
pbmc=RunPCA(object= pbmc,npcs = 30 ,pc.genes=VariableFeatures(object = pbmc))     #PCA分析

# #绘制每个PCA成分的特征基因
# pdf(file="02.pcaGene.pdf",width=10,height=8)
# VizDimLoadings(object = pbmc, dims = 1:4, reduction = "pca",nfeatures = 20)
# dev.off()
# 
# #绘制主成分分析图
# pdf(file="02.PCA.pdf",width=6.5,height=6)
# DimPlot(object = pbmc, reduction = "pca")
# dev.off()

# #主成分分析热图
# pdf(file="02.pcaHeatmap.pdf",width=10,height=8)
# DimHeatmap(object = pbmc, dims = 1:4, cells = 500, balanced = TRUE,nfeatures = 30,ncol=2)
# dev.off()

# #得到每个PC的p值分布
# pbmc <- JackStraw(object = pbmc, num.replicate = 100)
# pbmc <- ScoreJackStraw(object = pbmc, dims = 1:10)
# pdf(file="02.pcaJackStraw.pdf",width=8,height=6)
# JackStrawPlot(object = pbmc, dims = 1:10)
# dev.off()

pdf(file="02.pcaElbowPlot.pdf",width=8,height=6)
ElbowPlot(pbmc)
dev.off()

# save(pbmc,file = '02.pbmc.rda')
load('02.pbmc.rda')
logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件
###################################03.TSNE聚类分析和Marker基因###################################
##TSNE聚类分析
pbmc <- FindNeighbors(pbmc, dims = 1:20)       #前10个维度
pbmc <- FindClusters(object = pbmc, resolution = 0.5)   #对细胞分组，对细胞标准模块化，resolution越小cluster越少
head(Idents(pbmc),5)
# pbmc <- RunTSNE(object = pbmc, dims = 1:10)             ##TSNE聚类
# 
# pdf(file="03.TSNE.pdf",width=6.5,height=6)
# TSNEPlot(object = pbmc, pt.size = 0.2, label = TRUE)    ##TSNE可视化
# dev.off()
# write.table(pbmc$seurat_clusters,file="03.tsneCluster.txt",quote=F,sep="\t",col.names=F)

##############################################################################
pbmc <- RunUMAP(object = pbmc, dims = 1:20)          #UMAP聚类
pdf(file="03.UMAP.pdf",width=17,height=14)
DimPlot(object = pbmc, reduction = "umap", pt.size = 0.2, label = TRUE,raster=FALSE)   #可视化
dev.off()
write.table(pbmc$seurat_clusters,file="03.umapCluster.txt",quote=F,sep="\t",col.names=F)


##查找每个cluster差异基因
pbmc.markers <- FindAllMarkers(object = pbmc,
                               only.pos = FALSE,
                               min.pct = 0.25,
                               logfc.threshold = logFCfilter)
sig.markers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.markers,file="03.clusterMarkers.txt",sep="\t",row.names=F,quote=F)


#marker
showGenes1 = c('FA2H', 'MAG',                # Oligodendrocyte
               'GFAP','AQP4',                # Astrocyte
               'PDGFRB','CSPG4',             # pericyteS
               'PLN','MYH11',                # Smooth muscle cell
               'KCNMA1','SLC26A2',           # Meningeal fibroblast
               'CLDN5','VWF'                 # Endothelial cells
)   


FeaturePlot(object = pbmc, features = 'KCNMA1', cols = c("#F2F3F4", "blue"),raster=FALSE)

#绘制Marker在各个cluster的散点图
pdf(file="03.markerScatter.pdf",width=15,height=14)
FeaturePlot(object = pbmc, features = showGenes1, cols = c("#F2F3F4", "blue"),raster=FALSE)
dev.off()

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
#绘制Marker在各个cluster的热图
pdf(file="03.umapHeatmap.pdf",width=12,height=9)
DoHeatmap(object = pbmc, features = top10$gene) + NoLegend()
dev.off()

#绘制marker小提琴图
pdf(file="03.markerViolin.pdf",width=10,height=6)
VlnPlot(object = pbmc, features = row.names(sig.markers)[1:2])
dev.off()


#绘制Marker在各个cluster的气泡图
pdf(file="03.markerBubble.pdf",width=20,height=12)
cluster10Marker=showGenes1
DotPlot(object = pbmc, features = cluster10Marker)
dev.off()

# save(list = ls(),file = '03.singleR.rda')
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(BiocParallel)
library(monocle)
library(cols4all)
load('03.singleR.rda')

#####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####
celltype=data.frame(ClusterID=0:21,
                    celltype= 0:21) 

# 'FA2H', 'MAG',                # Oligodendrocyte
# 'GFAP','AQP4',                # Astrocyte
# 'PDGFRB','CSPG4',             # pericyteS
# 'PLN','MYH11',                # Smooth muscle cell
# 'KCNMA1','SLC26A2',           # Meningeal fibroblast
# 'CLDN5','VWF'                 # Endothelial cells

FeaturePlot(object = pbmc, features = 'EPCAM', cols = c("#F2F3F4", "blue"))
FeaturePlot(object = pbmc, features = 'SFTPB', cols = c( "blue",'red'))

celltype[celltype$ClusterID %in% c(6,10),2]='Oligodendrocyte'
celltype[celltype$ClusterID %in% c(18),2]='Astrocyte'
celltype[celltype$ClusterID %in% c(0),2]='Pericytes'
celltype[celltype$ClusterID %in% c(8),2]='Smooth muscle cell'
celltype[celltype$ClusterID %in% c(7,19),2]='Meningeal fibroblast'
celltype[celltype$ClusterID %in% c(1,2,3,4,5,9,11,12,13,14,15,16,17,20,21),2]='Endothelial cells'

write.table(celltype,file="04.clusterAnn.txt",quote=F,sep="\t", row.names=F)





# colors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3', '#57C3F3',
#              '#E95C59', '#E59CC4', '#AB3282',  '#BD956A', 
#              '#9FA3A8', '#E0D4CA',  '#C5DEBA', '#F7F398',
#              '#C1E6F3', '#6778AE', '#91D0BE', '#B53E2B',
#              '#712820', '#DCC1DD', '#CCE0F5',  '#CCC9E6', '#625D9E', '#68A180', '#3A6963',
#              '#968175')

colors <-c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
           "#885649","#DD76C5",'#EEAD0E',"#BBBE00","#41BED1",
           '#E5D2DD','#53A85F','#F1BB72','#F3B1A0','#D6E7A3',
           '#57C3F3','#E59CC4','#AB3282','#BD956A','#91D0BE',
           '#6778AE','#B53E2B','#712820','#DCC1DD','#F7F398',
           '#CCE0F5','#9FA3A8','#E0D4CA','#C5DEBA','#C1E6F3')

table(celltype$celltype)
mycolors =c("#F78000","#3176B7","#3FA116","#CE2820","#9265C1",
            "#885649","#DD76C5",'#EEAD0E',"#BBBE00","#41BED1", '#CCE0F5')



#cluster注释结果可视化
newLabels <- celltype$celltype
names(newLabels) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, newLabels)

# celltype[celltype$ClusterID %in% c(0,1),2]='T_cells 1'
# celltype[celltype$ClusterID %in% c(15,16),2]='T_cells 2'
# celltype[celltype$ClusterID %in% c(23),2]='T_cells 3'
# celltype[celltype$ClusterID %in% c(3),2]='B_cells 1'
# celltype[celltype$ClusterID %in% c(6),2]='B_cells 2'
# celltype[celltype$ClusterID %in% c(0,12,14),2]='NK_cells'
# celltype[celltype$ClusterID %in% c(4,7,8,11,20),2]='Macrophage_cells'
# celltype[celltype$ClusterID %in% c(19),2]='Mast_cells'
# celltype[celltype$ClusterID %in% c(9),2]='Neutrophils'
# celltype[celltype$ClusterID %in% c(18),2]='Fibroblast_cells'
# celltype[celltype$ClusterID %in% c(13),2]='Epithelial_cells 1'
# celltype[celltype$ClusterID %in% c(2,5,17),2]='Epithelial_cells 2'
# celltype[celltype$ClusterID %in% c(21),2]='Epithelial_cells 3'
# celltype[celltype$ClusterID %in% c(10),2]='Epithelial_cells 4'
# celltype[celltype$ClusterID %in% c(22),2]='Endothelial_cells'




#细胞类型主观排序
cell_type <- c('Pericytes',
               'Endothelial cells',
               'Oligodendrocyte',
               'Smooth muscle cell',
               'Meningeal fibroblast',
               'Astrocyte')

TB <- subset(pbmc,idents = cell_type)
levels(TB) <- cell_type

plot1 <- UMAPPlot(object = TB,group.by = "ident",cols = mycolors, pt.size =0.5, label = TRUE,raster=FALSE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化

pdf(file="04.UMAP-1.pdf",width=20,height=22)
plot1
dev.off()

color2 <- c('#77d3ec','#ffc5d0','#de597c')

plot2 <- UMAPPlot(object = pbmc,group.by = "orig.ident",cols = color2, pt.size =0.5, label = TRUE,raster=FALSE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化

pdf(file="04.UMAP-2.pdf",width=20,height=22)
plot2
dev.off()

pdf(file="04.UMAP.pdf",width=40,height=22)
plot1+plot2
dev.off()

write.table(TB@active.ident,file="04.umapCluster.txt",quote=F,sep="\t",col.names=F)

# df_group <- as.data.frame(colnames(df))
# write.csv(df_group,'df_group.csv')

##cluster注释结果差异化
pbmc.markers=FindAllMarkers(object = TB,
                            only.pos = FALSE,
                            min.pct = 0.25,
                            logfc.threshold = logFCfilter)
sig.cellMarkers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.cellMarkers,file="04.cellMarkers.txt",sep="\t",row.names=F,quote=F)

#绘制Marker在各个cluster的散点图
pdf(file="04.markerScatter.pdf",width=15,height=14)
FeaturePlot(object = TB, features = showGenes1, cols = c("snow2", "blue"),raster=FALSE)
dev.off()

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)

#绘制Marker在各个cluster的热图
pdf(file="04.umapHeatmap.pdf",width=12,height=9)
DoHeatmap(object = TB, features = top10$gene) + NoLegend()
dev.off()

pdf(file="04.markerViolin.pdf",width=20,height=25)
VlnPlot(object = TB,fill.by = "ident",cols = mycolors, 
        features = showGenes1,pt.size = 0,stack = T,flip = T) + 
  theme(text = element_text(size = 30))+
  NoLegend()
dev.off()

#绘制Marker在各个cluster的气泡图
pdf(file="04.markerBubble.pdf",width=10,height=15)
cluster10Marker=showGenes1
DotPlot(object = TB, features = cluster10Marker,dot.scale = 10)+
  theme(axis.text.x = element_text(face = "bold",angle = 90,color = 'black'),
        panel.background = element_rect(fill = 'transparent',color = 'black',size = 1))+
  coord_flip()
dev.off()

#############################################################################
############取出一部分cluster用来亚分群######################################
#############################################################################
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(monocle)
library(cols4all)

# save(list=ls(),file='subset_cluster_input.RData')
load('subset_cluster_input.RData')

###################################01.数据前期处理与矫正###################################
#引用包
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(tidyverse)
library(ggthemes)
library(ggrepel)
library(harmony)
library(patchwork)
library(tidyr)
library(ggplot2)

load('tumor.rda')



logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件


#将矩阵转换为Seurat对象，并对数据进行过滤
pbmc=CreateSeuratObject(counts = tumor,
                        min.cells=5,      #基因至少在多少个细胞中有表达
                        min.features=500) #至少表达多少个基因

pbmc@assays$RNA@layers$counts@Dimnames <- list(rownames(pbmc@assays$RNA@features@.Data),rownames(pbmc@assays$RNA@cells@.Data))

#使用PercentageFeatureSet函数计算线粒体基因百分比
pbmc[["percent.mt"]] <- PercentageFeatureSet(object = pbmc, pattern = "^MT-")
#绘制特征基因小提琴图
pdf(file="01.featureViolin.pdf", width=10, height=6)
VlnPlot(object = pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
dev.off()

#过滤细胞
pbmc=subset(x = pbmc, subset = nFeature_RNA > 0 & nFeature_RNA < 7000 & percent.mt < 25)    #对数据进行过滤
pbmc@assays$RNA@layers$counts@Dimnames <- list(rownames(pbmc@assays$RNA@features@.Data),rownames(pbmc@assays$RNA@cells@.Data))

#测序深度相关性图
pdf(file="01.featureCor.pdf",width=10,height=6)
plot1 <- FeatureScatter(object = pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt",pt.size=1.5)
plot2 <- FeatureScatter(object = pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",,pt.size=1.5)
CombinePlots(plots = list(plot1, plot2))
dev.off()




#对数据进行标准化
pbmc <- NormalizeData(pbmc)
#提取细胞间变异系数较大的基因
pbmc <- FindVariableFeatures(object = pbmc, selection.method = "vst", nfeatures = 2000)
#输出特征方差图
top10 <- head(VariableFeatures(pbmc), 10)
pdf(file="01.featureVar.pdf",width=10,height=6)
plot1 <- VariableFeaturePlot(object = pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
dev.off()

###################################02.PCA主成分分析###################################
##PCA分析
pbmc=ScaleData(pbmc)          #PCA降维之前的标准处理步骤
pbmc=RunPCA(object= pbmc,npcs = 30 ,pc.genes=VariableFeatures(object = pbmc))     #PCA分析

# #绘制每个PCA成分的特征基因
# pdf(file="02.pcaGene.pdf",width=10,height=8)
# VizDimLoadings(object = pbmc, dims = 1:4, reduction = "pca",nfeatures = 20)
# dev.off()
# 
# #绘制主成分分析图
# pdf(file="02.PCA.pdf",width=6.5,height=6)
# DimPlot(object = pbmc, reduction = "pca")
# dev.off()

# #主成分分析热图
# pdf(file="02.pcaHeatmap.pdf",width=10,height=8)
# DimHeatmap(object = pbmc, dims = 1:4, cells = 500, balanced = TRUE,nfeatures = 30,ncol=2)
# dev.off()

# #得到每个PC的p值分布
# pbmc <- JackStraw(object = pbmc, num.replicate = 100)
# pbmc <- ScoreJackStraw(object = pbmc, dims = 1:10)
# pdf(file="02.pcaJackStraw.pdf",width=8,height=6)
# JackStrawPlot(object = pbmc, dims = 1:10)
# dev.off()

pdf(file="02.pcaElbowPlot.pdf",width=8,height=6)
ElbowPlot(pbmc)
dev.off()

# save(pbmc,file = '02.pbmc.rda')
load('02.pbmc.rda')
logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件
###################################03.TSNE聚类分析和Marker基因###################################
##TSNE聚类分析
pbmc <- FindNeighbors(pbmc, dims = 1:20)       #前10个维度
pbmc <- FindClusters(object = pbmc, resolution = 0.5)   #对细胞分组，对细胞标准模块化，resolution越小cluster越少
head(Idents(pbmc),5)
# pbmc <- RunTSNE(object = pbmc, dims = 1:10)             ##TSNE聚类
# 
# pdf(file="03.TSNE.pdf",width=6.5,height=6)
# TSNEPlot(object = pbmc, pt.size = 0.2, label = TRUE)    ##TSNE可视化
# dev.off()
# write.table(pbmc$seurat_clusters,file="03.tsneCluster.txt",quote=F,sep="\t",col.names=F)

##############################################################################
pbmc <- RunUMAP(object = pbmc, dims = 1:20)          #UMAP聚类
pdf(file="03.UMAP.pdf",width=17,height=14)
DimPlot(object = pbmc, reduction = "umap", pt.size = 0.2, label = TRUE,raster=FALSE)   #可视化
dev.off()
write.table(pbmc$seurat_clusters,file="03.umapCluster.txt",quote=F,sep="\t",col.names=F)


##查找每个cluster差异基因
pbmc.markers <- FindAllMarkers(object = pbmc,
                               only.pos = FALSE,
                               min.pct = 0.25,
                               logfc.threshold = logFCfilter)
sig.markers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.markers,file="03.clusterMarkers.txt",sep="\t",row.names=F,quote=F)


#marker
showGenes1 = c('FA2H', 'MAG',                # Oligodendrocyte
               'GFAP','AQP4',                # Astrocyte
               'PDGFRB','CSPG4',             # Pericytes
               'PLN','MYH11',                # Smooth muscle cell
               'KCNMA1','SLC26A2',           # Meningeal fibroblast
               'CLDN5','VWF'                 # Endothelial cells
)   


FeaturePlot(object = pbmc, features = 'KCNMA1', cols = c("#F2F3F4", "blue"),raster=FALSE)

#绘制Marker在各个cluster的散点图
pdf(file="03.markerScatter.pdf",width=15,height=14)
FeaturePlot(object = pbmc, features = showGenes1, cols = c("#F2F3F4", "blue"),raster=FALSE)
dev.off()

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
#绘制Marker在各个cluster的热图
pdf(file="03.umapHeatmap.pdf",width=12,height=9)
DoHeatmap(object = pbmc, features = top10$gene) + NoLegend()
dev.off()

#绘制marker小提琴图
pdf(file="03.markerViolin.pdf",width=10,height=6)
VlnPlot(object = pbmc, features = row.names(sig.markers)[1:2])
dev.off()


#绘制Marker在各个cluster的气泡图
pdf(file="03.markerBubble.pdf",width=20,height=12)
cluster10Marker=showGenes1
DotPlot(object = pbmc, features = cluster10Marker)
dev.off()

# save(list = ls(),file = '03.singleR.rda')
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(BiocParallel)
library(monocle)
library(cols4all)
load('03.singleR.rda')

#####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####
celltype=data.frame(ClusterID=0:21,
                    celltype= 0:21) 

# 'FA2H', 'MAG',                # Oligodendrocyte
# 'GFAP','AQP4',                # Astrocyte
# 'PDGFRB','CSPG4',             # pericyteS
# 'PLN','MYH11',                # Smooth muscle cell
# 'KCNMA1','SLC26A2',           # Meningeal fibroblast
# 'CLDN5','VWF'                 # Endothelial cells

FeaturePlot(object = pbmc, features = 'EPCAM', cols = c("#F2F3F4", "blue"))
FeaturePlot(object = pbmc, features = 'SFTPB', cols = c( "blue",'red'))

celltype[celltype$ClusterID %in% c(7,8,9),2]='Oligodendrocyte'
celltype[celltype$ClusterID %in% c(16,19),2]='Astrocyte'
celltype[celltype$ClusterID %in% c(12,17),2]='Pericytes'
celltype[celltype$ClusterID %in% c(13),2]='Smooth muscle cell'
celltype[celltype$ClusterID %in% c(11),2]='Meningeal fibroblast'
celltype[celltype$ClusterID %in% c(0,1,2,3,4,5,6,10,14,15,18,20,21),2]='Endothelial cells'

write.table(celltype,file="04.clusterAnn.txt",quote=F,sep="\t", row.names=F)





# colors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3', '#57C3F3',
#              '#E95C59', '#E59CC4', '#AB3282',  '#BD956A', 
#              '#9FA3A8', '#E0D4CA',  '#C5DEBA', '#F7F398',
#              '#C1E6F3', '#6778AE', '#91D0BE', '#B53E2B',
#              '#712820', '#DCC1DD', '#CCE0F5',  '#CCC9E6', '#625D9E', '#68A180', '#3A6963',
#              '#968175')

colors <-c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
           "#885649","#DD76C5",'#EEAD0E',"#BBBE00","#41BED1",
           '#E5D2DD','#53A85F','#F1BB72','#F3B1A0','#D6E7A3',
           '#57C3F3','#E59CC4','#AB3282','#BD956A','#91D0BE',
           '#6778AE','#B53E2B','#712820','#DCC1DD','#F7F398',
           '#CCE0F5','#9FA3A8','#E0D4CA','#C5DEBA','#C1E6F3')

table(celltype$celltype)
mycolors =c("#F78000","#3176B7","#3FA116","#CE2820","#9265C1",
            "#885649","#DD76C5",'#EEAD0E',"#BBBE00","#41BED1", '#CCE0F5')




#cluster注释结果可视化
newLabels <- celltype$celltype
names(newLabels) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, newLabels)

# celltype[celltype$ClusterID %in% c(0,1),2]='T_cells 1'
# celltype[celltype$ClusterID %in% c(15,16),2]='T_cells 2'
# celltype[celltype$ClusterID %in% c(23),2]='T_cells 3'
# celltype[celltype$ClusterID %in% c(3),2]='B_cells 1'
# celltype[celltype$ClusterID %in% c(6),2]='B_cells 2'
# celltype[celltype$ClusterID %in% c(0,12,14),2]='NK_cells'
# celltype[celltype$ClusterID %in% c(4,7,8,11,20),2]='Macrophage_cells'
# celltype[celltype$ClusterID %in% c(19),2]='Mast_cells'
# celltype[celltype$ClusterID %in% c(9),2]='Neutrophils'
# celltype[celltype$ClusterID %in% c(18),2]='Fibroblast_cells'
# celltype[celltype$ClusterID %in% c(13),2]='Epithelial_cells 1'
# celltype[celltype$ClusterID %in% c(2,5,17),2]='Epithelial_cells 2'
# celltype[celltype$ClusterID %in% c(21),2]='Epithelial_cells 3'
# celltype[celltype$ClusterID %in% c(10),2]='Epithelial_cells 4'
# celltype[celltype$ClusterID %in% c(22),2]='Endothelial_cells'




#细胞类型主观排序
cell_type <- c('Pericytes',
               'Endothelial cells',
               'Oligodendrocyte',
               'Smooth muscle cell',
               'Meningeal fibroblast',
               'Astrocyte')

TB <- subset(pbmc,idents = cell_type)
levels(TB) <- cell_type

plot1 <- UMAPPlot(object = pbmc,group.by = "ident",cols = colors, pt.size =0.5, label = TRUE,raster=FALSE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化

pdf(file="04.UMAP-1.pdf",width=20,height=22)
plot1
dev.off()

color2 <- c('#77d3ec','#ffc5d0','#de597c')

plot2 <- UMAPPlot(object = pbmc,group.by = "orig.ident",cols = color2, pt.size =0.5, label = TRUE,raster=FALSE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化

pdf(file="04.UMAP-2.pdf",width=20,height=22)
plot2
dev.off()

pdf(file="04.UMAP.pdf",width=40,height=22)
plot1+plot2
dev.off()

write.table(TB@active.ident,file="04.umapCluster.txt",quote=F,sep="\t",col.names=F)

# df_group <- as.data.frame(colnames(df))
# write.csv(df_group,'df_group.csv')

##cluster注释结果差异化
pbmc.markers=FindAllMarkers(object = TB,
                            only.pos = FALSE,
                            min.pct = 0.25,
                            logfc.threshold = logFCfilter)
sig.cellMarkers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.cellMarkers,file="04.cellMarkers.txt",sep="\t",row.names=F,quote=F)

#绘制Marker在各个cluster的散点图
pdf(file="04.markerScatter.pdf",width=15,height=14)
FeaturePlot(object = TB, features = showGenes1, cols = c("snow2", "blue"),raster=FALSE)
dev.off()

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)

#绘制Marker在各个cluster的热图
pdf(file="04.umapHeatmap.pdf",width=12,height=9)
DoHeatmap(object = TB, features = top10$gene) + NoLegend()
dev.off()

pdf(file="04.markerViolin.pdf",width=20,height=25)
VlnPlot(object = TB,fill.by = "ident",cols = mycolors, 
        features = showGenes1,pt.size = 0,stack = T,flip = T) + 
  theme(text = element_text(size = 30))+
  NoLegend()
dev.off()

#绘制Marker在各个cluster的气泡图
pdf(file="04.markerBubble.pdf",width=10,height=15)
cluster10Marker=showGenes1
DotPlot(object = TB, features = cluster10Marker,dot.scale = 10)+
  theme(axis.text.x = element_text(face = "bold",angle = 90,color = 'black'),
        panel.background = element_rect(fill = 'transparent',color = 'black',size = 1))+
  coord_flip()
dev.off()

#############################################################################
############取出一部分cluster用来亚分群######################################
#############################################################################
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(monocle)
library(cols4all)

# save(list=ls(),file='subset_cluster_input.RData')
load('subset_cluster_input.RData')


library(limma)
library(tidyverse)
library(org.Hs.eg.db)
library(tidyverse)
library(msigdbr)
library(GSEABase)
library(GSVA)
library(dplyr)
library(pheatmap)
library(RColorBrewer)
library(ComplexHeatmap)
library(CellChat)

# load('subset_cluster_input.RData')
# save(TB,file='TB.rda')
load('TB.rda')
#####（一）创建cellchat对象####################################################
#pbmc3k里的seurat_annotations有一些NA注释，过滤掉
data.input = TB@assays$RNA@layers$data
meta.data =  as.data.frame(TB@active.ident)
meta.data$seurat_annotations <- meta.data$`TB@active.ident`
data.input = data.input[,row.names(meta.data)]

load('tumor.rda')
data.input = tumor[,meta.data$seurat_annotations]
data.input <- as.matrix(data.input)
#######################################################################################
#######################################################################################
#######################################################################################
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(SeuratData)
library(patchwork) #最强大的拼图包
library(CellChat)
library(ggalluvial)
library(svglite)
library(Matrix)
library(ComplexHeatmap)
library(patchwork)

#构建CellChat数据对象
cellchat <- createCellChat(object = data.input, 
                           meta = meta.data, 
                           group.by = "seurat_annotations")


cellchat
summary(cellchat)
str(cellchat)


levels(cellchat@idents) #查看celltype和factor顺序
table(cellchat@idents) #每个celltype中的细胞数

#把metadata信息加到CellChat对象
identity = data.frame(group =meta.data$seurat_annotations, 
                      row.names = row.names(meta.data)) 

cellchat <- addMeta(cellchat, meta = identity, meta.name = "labels")
cellchat <- setIdent(cellchat, ident.use = "labels") ##将label设置为显示的默认顺序
levels(cellchat@idents) #查看celltype和factor顺序
table(cellchat@idents) #每个celltype中的细胞数


#设置配受体数据库(CellChatDB):
CellChatDB<- CellChatDB.human #有人类(CellChatDB.human)和小鼠(CellChatDB.mouse)

showDatabaseCategory(CellChatDB) #查看描述该数据库组成的饼状图

dplyr::glimpse(CellChatDB$interaction) #查看数据库结构


#直接使用CellChatDB全库进行细胞通讯分析：
CellChatDB.use <- CellChatDB # simply use the default CellChatDB

#选择数据库中特定子集进行细胞通讯分析:
CellChatDB.use <- subsetDB(CellChatDB, search = 'Secreted Signaling') #可选择Secreted Signaling、ECM-Receptor或Cell-Cell Contact

#将数据库添加到CellChat对象中(DB)
cellchat@DB <- CellChatDB.use


#将信号基因的表达矩阵子集化，节省计算成本
cellchat<- subsetData(cellchat) #必选的step,取上一步CellChatDB.use中信号基因的表达矩阵子集,赋值到cellchat@data.Signaling

#鉴定与每个细胞亚群相关的过表达信号基因：
##基于表达该基因的细胞比例、差异倍数和p值判定。
cellchat<- identifyOverExpressedGenes(cellchat,
                                      only.pos = TRUE, #仅返回positive markers
                                      thresh.pc = 0, #细胞比例阈值
                                      thresh.fc = 0, #差异倍数
                                      thresh.p = 0.05) #P-Value

#计算结果赋值到cellchat@var.features：
head(cellchat@var.features$features) #过表达信号基因名
head(cellchat@var.features$features.info) #差异计算结果表



#识别过表达基因配体-受体互作：
cellchat<- identifyOverExpressedInteractions(cellchat) 
head(cellchat@LR$LRsig) #计算结果赋值位置

# 
# #将基因表达数据映射到PPI网络(可跳过)：
# cellchat<- projectData(cellchat, PPI.human) #返回结果:cellchat@data.project

# 1）计算细胞通讯概率：
cellchat<- computeCommunProb(cellchat, raw.use = TRUE) #返回结果:cellchat@options$parameter
##默认使用原始表达数据(cellchat@data.Signaling)，若想使用上一步PPI矫正数据,设置raw.use = TALSE
cellchat<- filterCommunication(cellchat, min.cells = 10) #细胞通讯过滤(设置每个亚群中进行细胞间通讯所需的最小细胞数)


# 2）提取配受体对细胞通讯结果表：
df.net <- subsetCommunication(cellchat, slot.name = 'net')
head(df.net) #得到配受体对细胞通讯结果表

write.csv(df.net,file='df_net.csv')
#或访问其它感兴趣/特定的细胞通讯结果：
df.net1 <- subsetCommunication(cellchat, 
                               sources.use = c('spp1+_mac'), 
                               targets.use = c('T cells')) #访问特定细胞对子集
head(df.net1)

#或访问其它感兴趣/特定的细胞通讯结果：
df.net2 <- subsetCommunication(cellchat, 
                               sources.use = c('spp1+_mac'), 
                               targets.use = c('Epithelial cells')) #访问特定细胞对子集
head(df.net2)

#或访问其它感兴趣/特定的细胞通讯结果：
df.net3 <- subsetCommunication(cellchat, 
                               sources.use = c('spp1+_mac'), 
                               targets.use = c('Fibroblast cells')) #访问特定细胞对子集
head(df.net3)



# 3）提取信号通路水平的细胞通讯表：
cellchat<- computeCommunProbPathway(cellchat) #计算信号通路水平上的通讯概率
df.netp <- subsetCommunication(cellchat, slot.name = 'netP') #得到信号通路水平细胞通讯表
head(df.netp)


# 4）细胞互作关系展示：
#计算细胞对间通讯的数量和概率强度
cellchat<- aggregateNet(cellchat)

#不同细胞亚群间的互作数量与概率/强度可视化：
##细胞亚群间配受体数目网络图：
groupSize<- as.numeric(table(cellchat@idents))

par(mfrow = c(1,1), xpd = TRUE)
netVisual_circle(cellchat@net$count, 
                 vertex.weight = groupSize, 
                 weight.scale = T, 
                 label.edge = F, 
                 title.name = 'Number of interactions')



# 2.1 细胞通讯差异网络图
par(mfrow = c(1,2), xpd = TRUE)
netVisual_diffInteraction(cellchat, weight.scale = T)
netVisual_diffInteraction(cellchat, weight.scale = T, measure = "weight")



# 2.2 细胞通讯差异热图
p3 <- netVisual_heatmap(cellchat)
p4 <- netVisual_heatmap(cellchat, measure = "weight")
p3 + p4


# 3.1 组间富集信号通路差异条形图
##基于信息流或互作数对信号通路进行排序
p5 <- rankNet(cellchat, mode = "comparison", stacked = T, do.stat = TRUE) #堆叠
p6 <- rankNet(cellchat, mode = "comparison", stacked = F, do.stat = TRUE) #不堆叠
p5 + p6


# 3.2 传出信号通路水平热图
i = 1
object.list <- list(cellchat) 
cellchat <- netAnalysis_computeCentrality(cellchat)

ht1 = netAnalysis_signalingRole_heatmap(cellchat,
                                        pattern = "outgoing", #传出
                                        signaling = cellchat@netP$pathways,
                                        title = names(object.list)[i],
                                        width = 5,
                                        height = 15)

draw(ht1, ht_gap = unit(0.5, "cm"))


# 3.3 传入信号通路水平热图
ht2 = netAnalysis_signalingRole_heatmap(cellchat,
                                        pattern = "incoming", #传入
                                        signaling = cellchat@netP$pathways,
                                        title = names(object.list)[i],
                                        width = 5, height = 15,
                                        color.heatmap = "GnBu")


draw(ht1+ht2, ht_gap = unit(0.5, "cm"))


# 3.4 总体信号通路水平热图
ht5 = netAnalysis_signalingRole_heatmap(cellchat,
                                        pattern = "all", #总体
                                        signaling = cellchat@netP$pathways,
                                        title = names(object.list)[i],
                                        width = 5, height = 15,
                                        color.heatmap = "OrRd")

draw(ht5, ht_gap = unit(0.5, "cm"))


# 4.1 总配受体对概率差异气泡图
levels(cellchat@idents) #查看细胞亚群
netVisual_bubble(cellchat,
                 sources.use = 7,  #序号 是根据上一步查看细胞亚群顺序来的
                 targets.use = c(8,1,3,4),
                 comparison = NULL,
                 angle.x = 45)

# 5.单个/特定信号通路水平差异可视化
#使用网络图：
pathways.show <- c("CXCL") #选择目标信号通路
weight.max <- getMaxWeight(object.list,
                           slot.name = c("netP"),
                           attribute = pathways.show) #控制不同数据集的边权重

par(mfrow = c(1,2), xpd = TRUE)
for (i in 1:length(object.list)) {
  netVisual_aggregate(object.list[[i]],
                      signaling = pathways.show,
                      layout = "circle",
                      edge.weight.max = weight.max[1],
                      edge.width.max = 10,
                      signaling.name = paste(pathways.show, names(object.list)[i]))
}


save(list = ls(),file = 'cell_communication.rda')

load('cell_communication.rda')
library(limma)
library(tidyverse)
library(org.Hs.eg.db)
library(tidyverse)
library(msigdbr)
library(GSEABase)
library(GSVA)
library(dplyr)
library(pheatmap)
library(RColorBrewer)
library(ComplexHeatmap)
library(CellChat)



###################################01.数据前期处理与矫正###################################
#引用包
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(tidyverse)
library(ggthemes)
library(ggrepel)
library(harmony)
library(patchwork)
library(tidyr)
library(ggplot2)

load('zong.rda')

cell_n <- read.table("umapCluster_normal.txt", header = TRUE, row.names = 1, sep = "\t")
cell_t <- read.table("umapCluster_tumor.txt", header = TRUE, row.names = 1, sep = "\t")


cell <- rbind(cell_n,cell_t)
# 将行名转换为新的一列
cell$cell <- rownames(cell)


# 提取 type 为 "Endothelial cells" 的行
Pericytes_cells <- cell[cell$type == 'Pericytes', ]

logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件

data_Pericytes <- zong[,Pericytes_cells$cell]
#将矩阵转换为Seurat对象，并对数据进行过滤
pbmc=CreateSeuratObject(counts = data_Pericytes,
                        min.cells=5,      #基因至少在多少个细胞中有表达
                        min.features=500) #至少表达多少个基因

pbmc@assays$RNA@layers$counts@Dimnames <- list(rownames(pbmc@assays$RNA@features@.Data),rownames(pbmc@assays$RNA@cells@.Data))

#使用PercentageFeatureSet函数计算线粒体基因百分比
pbmc[["percent.mt"]] <- PercentageFeatureSet(object = pbmc, pattern = "^MT-")
#绘制特征基因小提琴图
pdf(file="01.featureViolin.pdf", width=10, height=6)
VlnPlot(object = pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
dev.off()

#过滤细胞
pbmc=subset(x = pbmc, subset = nFeature_RNA > 0 & nFeature_RNA < 7000 & percent.mt < 25)    #对数据进行过滤
pbmc@assays$RNA@layers$counts@Dimnames <- list(rownames(pbmc@assays$RNA@features@.Data),rownames(pbmc@assays$RNA@cells@.Data))

#测序深度相关性图
pdf(file="01.featureCor.pdf",width=10,height=6)
plot1 <- FeatureScatter(object = pbmc, feature1 = "nCount_RNA", feature2 = "percent.mt",pt.size=1.5)
plot2 <- FeatureScatter(object = pbmc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",,pt.size=1.5)
CombinePlots(plots = list(plot1, plot2))
dev.off()




#对数据进行标准化
pbmc <- NormalizeData(pbmc)
#提取细胞间变异系数较大的基因
pbmc <- FindVariableFeatures(object = pbmc, selection.method = "vst", nfeatures = 2000)
#输出特征方差图
top10 <- head(VariableFeatures(pbmc), 10)
pdf(file="01.featureVar.pdf",width=10,height=6)
plot1 <- VariableFeaturePlot(object = pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
dev.off()

###################################02.PCA主成分分析###################################
##PCA分析
pbmc=ScaleData(pbmc)          #PCA降维之前的标准处理步骤
pbmc=RunPCA(object= pbmc,npcs = 30 ,pc.genes=VariableFeatures(object = pbmc))     #PCA分析

# #绘制每个PCA成分的特征基因
# pdf(file="02.pcaGene.pdf",width=10,height=8)
# VizDimLoadings(object = pbmc, dims = 1:4, reduction = "pca",nfeatures = 20)
# dev.off()
# 
# #绘制主成分分析图
# pdf(file="02.PCA.pdf",width=6.5,height=6)
# DimPlot(object = pbmc, reduction = "pca")
# dev.off()

# #主成分分析热图
# pdf(file="02.pcaHeatmap.pdf",width=10,height=8)
# DimHeatmap(object = pbmc, dims = 1:4, cells = 500, balanced = TRUE,nfeatures = 30,ncol=2)
# dev.off()

# #得到每个PC的p值分布
# pbmc <- JackStraw(object = pbmc, num.replicate = 100)
# pbmc <- ScoreJackStraw(object = pbmc, dims = 1:10)
# pdf(file="02.pcaJackStraw.pdf",width=8,height=6)
# JackStrawPlot(object = pbmc, dims = 1:10)
# dev.off()

pdf(file="02.pcaElbowPlot.pdf",width=8,height=6)
ElbowPlot(pbmc)
dev.off()

# save(pbmc,file = '02.pbmc.rda')
load('02.pbmc.rda')
logFCfilter=1               #logFC的过滤条件
adjPvalFilter=0.05          #校正后的pvalue过滤条件
###################################03.TSNE聚类分析和Marker基因###################################
##TSNE聚类分析
pbmc <- FindNeighbors(pbmc, dims = 1:20)       #前10个维度
pbmc <- FindClusters(object = pbmc, resolution = 0.5)   #对细胞分组，对细胞标准模块化，resolution越小cluster越少
head(Idents(pbmc),5)
# pbmc <- RunTSNE(object = pbmc, dims = 1:10)             ##TSNE聚类
# 
# pdf(file="03.TSNE.pdf",width=6.5,height=6)
# TSNEPlot(object = pbmc, pt.size = 0.2, label = TRUE)    ##TSNE可视化
# dev.off()
# write.table(pbmc$seurat_clusters,file="03.tsneCluster.txt",quote=F,sep="\t",col.names=F)

##############################################################################
pbmc <- RunUMAP(object = pbmc, dims = 1:20)          #UMAP聚类
pdf(file="03.UMAP.pdf",width=17,height=14)
DimPlot(object = pbmc, reduction = "umap", pt.size = 0.2, label = TRUE,raster=FALSE)   #可视化
dev.off()
write.table(pbmc$seurat_clusters,file="03.umapCluster.txt",quote=F,sep="\t",col.names=F)


##查找每个cluster差异基因
pbmc.markers <- FindAllMarkers(object = pbmc,
                               only.pos = FALSE,
                               min.pct = 0.25,
                               logfc.threshold = logFCfilter)
sig.markers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.markers,file="03.clusterMarkers.txt",sep="\t",row.names=F,quote=F)


#marker
showGenes1 = c('PDGFRB','CSPG4',             # pericyteS
               'PLN','MYH11',                # Smooth muscle cell
               'CLDN5','VWF'                 # Endothelial cells
)   


FeaturePlot(object = pbmc, features = 'KCNMA1', cols = c("#F2F3F4", "blue"),raster=FALSE)


colors <-c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
           "#885649","#DD76C5",'#EEAD0E',"#BBBE00","#41BED1",
           '#E5D2DD','#53A85F','#F1BB72','#F3B1A0','#D6E7A3',
           '#57C3F3','#E59CC4','#AB3282','#BD956A','#91D0BE',
           '#6778AE','#B53E2B','#712820','#DCC1DD','#F7F398',
           '#CCE0F5','#9FA3A8','#E0D4CA','#C5DEBA','#C1E6F3')


plot1 <- UMAPPlot(object = pbmc,group.by = "ident",cols = colors, pt.size =0.5, label = TRUE,raster=FALSE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化

pdf(file="04.UMAP-1.pdf",width=20,height=22)
plot1
dev.off()

color2 <- c('#77d3ec','#ffc5d0','#de597c')

# 构建plot2的分组数据# 构建plot2的分组数据# 构建plot2的分组数据# 构建plot2的分组数据
normal <- data.frame(ID = 1:16032) 
tumor <- data.frame(ID = 1:3933) 
normal$type <- "normal"
tumor$type <- "tumor"
# 合并两个数据框
group <- rbind(normal, tumor)
Pericytes_cells$group <- group$type

pbmc@meta.data$orig.ident <- Pericytes_cells$group

plot2 <- UMAPPlot(object = pbmc,group.by = "orig.ident",cols = color2, pt.size =0.5, label = TRUE,raster=FALSE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化

pdf(file="04.UMAP-2.pdf",width=20,height=22)
plot2
dev.off()

pdf(file="04.UMAP.pdf",width=40,height=22)
plot1+plot2
dev.off()

#绘制Marker在各个cluster的散点图
pdf(file="03.markerScatter.pdf",width=15,height=14)
FeaturePlot(object = pbmc, features = showGenes1, cols = c("#F2F3F4", "blue"),raster=FALSE)
dev.off()

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
#绘制Marker在各个cluster的热图
pdf(file="03.umapHeatmap.pdf",width=12,height=9)
DoHeatmap(object = pbmc, features = top10$gene) + NoLegend()
dev.off()

#绘制marker小提琴图
pdf(file="03.markerViolin.pdf",width=10,height=6)
VlnPlot(object = pbmc, features = row.names(sig.markers)[1:2])
dev.off()


#绘制Marker在各个cluster的气泡图
pdf(file="03.markerBubble.pdf",width=20,height=12)
cluster10Marker=showGenes1
DotPlot(object = pbmc, features = cluster10Marker)
dev.off()

# save(list = ls(),file = '03.singleR.rda')
library(ggplot2)
library(limma)
library(Seurat)
library(dplyr)
library(magrittr)
library(celldex)
library(SingleR)
library(BiocParallel)
library(monocle)
library(cols4all)
load('03.singleR.rda')

#####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####个性标记####
celltype=data.frame(ClusterID=0:21,
                    celltype= 0:21) 

# 'FA2H', 'MAG',                # Oligodendrocyte
# 'GFAP','AQP4',                # Astrocyte
# 'PDGFRB','CSPG4',             # pericyteS
# 'PLN','MYH11',                # Smooth muscle cell
# 'KCNMA1','SLC26A2',           # Meningeal fibroblast
# 'CLDN5','VWF'                 # Endothelial cells

FeaturePlot(object = pbmc, features = 'EPCAM', cols = c("#F2F3F4", "blue"))
FeaturePlot(object = pbmc, features = 'SFTPB', cols = c( "blue",'red'))

celltype[celltype$ClusterID %in% c(7,8,9),2]='Oligodendrocyte'
celltype[celltype$ClusterID %in% c(16,19),2]='Astrocyte'
celltype[celltype$ClusterID %in% c(12,17),2]='Pericytes'
celltype[celltype$ClusterID %in% c(13),2]='Smooth muscle cell'
celltype[celltype$ClusterID %in% c(11),2]='Meningeal fibroblast'
celltype[celltype$ClusterID %in% c(0,1,2,3,4,5,6,10,14,15,18,20,21),2]='Endothelial cells'

write.table(celltype,file="04.clusterAnn.txt",quote=F,sep="\t", row.names=F)





# colors <-c('#E5D2DD', '#53A85F', '#F1BB72', '#F3B1A0', '#D6E7A3', '#57C3F3',
#              '#E95C59', '#E59CC4', '#AB3282',  '#BD956A', 
#              '#9FA3A8', '#E0D4CA',  '#C5DEBA', '#F7F398',
#              '#C1E6F3', '#6778AE', '#91D0BE', '#B53E2B',
#              '#712820', '#DCC1DD', '#CCE0F5',  '#CCC9E6', '#625D9E', '#68A180', '#3A6963',
#              '#968175')

colors <-c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
           "#885649","#DD76C5",'#EEAD0E',"#BBBE00","#41BED1",
           '#E5D2DD','#53A85F','#F1BB72','#F3B1A0','#D6E7A3',
           '#57C3F3','#E59CC4','#AB3282','#BD956A','#91D0BE',
           '#6778AE','#B53E2B','#712820','#DCC1DD','#F7F398',
           '#CCE0F5','#9FA3A8','#E0D4CA','#C5DEBA','#C1E6F3')

table(celltype$celltype)
mycolors =c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
            "#885649","#DD76C5",'#EEAD0E',"#BBBE00","#41BED1", '#CCE0F5')



#cluster注释结果可视化
newLabels <- celltype$celltype
names(newLabels) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, newLabels)

# celltype[celltype$ClusterID %in% c(0,1),2]='T_cells 1'
# celltype[celltype$ClusterID %in% c(15,16),2]='T_cells 2'
# celltype[celltype$ClusterID %in% c(23),2]='T_cells 3'
# celltype[celltype$ClusterID %in% c(3),2]='B_cells 1'
# celltype[celltype$ClusterID %in% c(6),2]='B_cells 2'
# celltype[celltype$ClusterID %in% c(0,12,14),2]='NK_cells'
# celltype[celltype$ClusterID %in% c(4,7,8,11,20),2]='Macrophage_cells'
# celltype[celltype$ClusterID %in% c(19),2]='Mast_cells'
# celltype[celltype$ClusterID %in% c(9),2]='Neutrophils'
# celltype[celltype$ClusterID %in% c(18),2]='Fibroblast_cells'
# celltype[celltype$ClusterID %in% c(13),2]='Epithelial_cells 1'
# celltype[celltype$ClusterID %in% c(2,5,17),2]='Epithelial_cells 2'
# celltype[celltype$ClusterID %in% c(21),2]='Epithelial_cells 3'
# celltype[celltype$ClusterID %in% c(10),2]='Epithelial_cells 4'
# celltype[celltype$ClusterID %in% c(22),2]='Endothelial_cells'




#细胞类型主观排序
cell_type <- c('Oligodendrocyte',
               'Astrocyte',
               'Pericytes',
               'Smooth muscle cell',
               'Meningeal fibroblast',
               'Endothelial cells')

TB <- subset(pbmc,idents = cell_type)
levels(TB) <- cell_type

plot1 <- UMAPPlot(object = pbmc,group.by = "ident",cols = colors, pt.size =0.5, label = TRUE,raster=FALSE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化

pdf(file="04.UMAP-1.pdf",width=20,height=22)
plot1
dev.off()

color2 <- c('#77d3ec','#ffc5d0','#de597c')

plot2 <- UMAPPlot(object = pbmc,group.by = "orig.ident",cols = color2, pt.size =0.5, label = TRUE,raster=FALSE) +
  theme(legend.position = "bottom") +
  theme(text = element_text(size = 30))#UMAP可视化

pdf(file="04.UMAP-2.pdf",width=20,height=22)
plot2
dev.off()

pdf(file="04.UMAP.pdf",width=40,height=22)
plot1+plot2
dev.off()

write.table(TB@active.ident,file="04.umapCluster.txt",quote=F,sep="\t",col.names=F)

# df_group <- as.data.frame(colnames(df))
# write.csv(df_group,'df_group.csv')

##cluster注释结果差异化
pbmc.markers=FindAllMarkers(object = TB,
                            only.pos = FALSE,
                            min.pct = 0.25,
                            logfc.threshold = logFCfilter)
sig.cellMarkers=pbmc.markers[(abs(as.numeric(as.vector(pbmc.markers$avg_log2FC)))>logFCfilter & as.numeric(as.vector(pbmc.markers$p_val_adj))<adjPvalFilter),]
write.table(sig.cellMarkers,file="04.cellMarkers.txt",sep="\t",row.names=F,quote=F)

#绘制Marker在各个cluster的散点图
pdf(file="04.markerScatter.pdf",width=15,height=14)
FeaturePlot(object = TB, features = showGenes1, cols = c("snow2", "blue"),raster=FALSE)
dev.off()

top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)

#绘制Marker在各个cluster的热图
pdf(file="04.umapHeatmap.pdf",width=12,height=9)
DoHeatmap(object = TB, features = top10$gene) + NoLegend()
dev.off()

pdf(file="04.markerViolin.pdf",width=20,height=25)
VlnPlot(object = TB,fill.by = "ident",cols = colors, 
        features = showGenes1,pt.size = 0,stack = T,flip = T) + 
  theme(text = element_text(size = 30))+
  NoLegend()
dev.off()

#绘制Marker在各个cluster的气泡图
pdf(file="04.markerBubble.pdf",width=10,height=15)
cluster10Marker=showGenes1
DotPlot(object = TB, features = cluster10Marker,dot.scale = 10)+
  theme(axis.text.x = element_text(face = "bold",angle = 90,color = 'black'),
        panel.background = element_rect(fill = 'transparent',color = 'black',size = 1))+
  coord_flip()
dev.off()



###################################################################################
##该代码10.monocle2对???取的细胞类型进行轨迹绘???###################################
##该代码10.monocle2对???取的细胞类型进行轨迹绘???###################################
##该代码10.monocle2对???取的细胞类型进行轨迹绘???###################################
###################################################################################
# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install("monocle")


# install.packages("densityClust")
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
library(monocle)

# sub_pbmc <- pbmc
# save(sub_pbmc,file = 'sub_pbmc.rda')
load('sub_pbmc.rda')

expr_matrix <- as(as.matrix(sub_pbmc@assays$RNA@layers$counts), 'sparseMatrix')
p_data <- sub_pbmc@meta.data 
p_data$celltype <- sub_pbmc@active.ident  
f_data <- data.frame(gene_short_name = row.names(sub_pbmc),row.names = row.names(sub_pbmc))
pd <- new('AnnotatedDataFrame', data = p_data) 
fd <- new('AnnotatedDataFrame', data = f_data)
cds <- newCellDataSet(expr_matrix, 
                      phenoData = pd,
                      featureData = fd,
                      expressionFamily = negbinomial.size())

# 3. 估计size factor和离散度
# size facotr帮助我们标准化细胞之间的mRNA的差异。离散度值可以帮助我们进行后续的差异分析。（类似于seurat的数据归一化处理）
cds <- estimateSizeFactors(cds)
cds <- estimateDispersions(cds)

# 4. 过滤低质量的细胞
# 大多数单细胞工作流程至少会包含一些由死细胞或空孔组成的库。同样重要的是要删除doublets:由两个或多个细胞意外生成的库。这些细胞可以破坏下游步骤，如伪时间排序或聚类。要知道一个特定的基因有多少个表达，或者一个给定的细胞有多少个基因表达，通常是很方便的。Monocle提供了一个简单的函数来计算这些统计数据。
# 因为Seurat已经完成细胞过滤，此步可省略。
# 但由于Seurat是通过基因表达量来对细胞进行过滤。因此，我们可以通过下面的代码用表达某基因的细胞的数目对基因进行过滤，从而得到后续操作需要的基因。

#筛选基因,这里可以根据自己的需要筛选特定的基因
disp_table <- dispersionTable(cds)
#unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)
unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)

cds <- setOrderingFilter(cds, unsup_clustering_genes$gene_id)

#用DDRtree 进行降维分析
cds <- reduceDimension(
  cds,
  max_components = 2,
  method = 'DDRTree')

#计算psudotime值,若变换根orderCells(cds,root_state = 4)
cds <- orderCells(cds)
head(pData(cds))
# save(list = ls(),file = '10.monocle2.rda')
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
library(monocle)
load('10.monocle2.rda')

plot_cell_trajectory(cds,color_by="celltype", size=1,show_backbone=TRUE)
ggsave("拟时序.pdf",width = 7,height = 6)

plot_cell_trajectory(cds,color_by="Pseudotime", size=1,show_backbone=TRUE)
ggsave("Pseudotime.pdf",width = 7,height = 6)

plot_cell_trajectory(cds,color_by="State", size=1,show_backbone=TRUE)
ggsave("State.pdf",width = 7,height = 6)

colors <-c("#3176B7","#F78000","#3FA116","#CE2820","#9265C1",
           "#885649","#DD76C5",'#EEAD0E',"#BBBE00","#41BED1",
           '#E5D2DD','#53A85F','#F1BB72','#F3B1A0','#D6E7A3',
           '#57C3F3','#E59CC4','#AB3282','#BD956A','#91D0BE',
           '#6778AE','#B53E2B','#712820','#DCC1DD','#F7F398',
           '#CCE0F5','#9FA3A8','#E0D4CA','#C5DEBA','#C1E6F3')

plot_cell_trajectory(cds, x = 1, y = 2, color_by = "celltype",show_backbone=TRUE) + 
  theme(panel.border = element_blank()) + 
  scale_color_manual(values = colors) 
ggsave("拟时序P1.pdf",width = 7,height = 6)

#这里是把排序基因提取出来做回归分析，来找它们是否跟拟时间有显著的关系,如果不设置，就会用所有基因来做它们与拟时间的相关性
Time_diff <-differentialGeneTest(cds[unsup_clustering_genes$gene_id,], cores = 8,
                                 fullModelFormulaStr = "~sm.ns(Pseudotime)")
Time_diff <-Time_diff[,c(5,2,3,4,1,6,7)]     #把gene放前面，也可以不改
write.csv(Time_diff,"Time_diff_all.csv", row.names = F)

sig_Time_gene <- row.names(subset(Time_diff, qval < 0.0001))

plot_pseudotime_heatmap(cds[sig_Time_gene,],num_clusters=4, show_rownames=T, return_heatmap=T)

BEAM_res <- BEAM(cds[sig_Time_gene,],branch_point = 1, cores = 8)

#这里用的是sig_Time_gene，也就是第六步dpFeature找出来的基因。如果前面用的是seurat的marker基因，记得改成express_genes
# BEAM_res <- BEAM(cds, branch_point = 2,cores = 2) #对所有基因进行排序，运行慢
BEAM_res <-BEAM_res[order(BEAM_res$qval),]
BEAM_res <-BEAM_res[,c("gene_short_name", "pval", "qval")]
head(BEAM_res)
write.csv(BEAM_res,"BEAM_res.csv", row.names = F)

# save(list = ls(),file = 'monocle.rda')
load('monocle.rda')

#分支热图可视化,前100
pdf(file="branched_heatmap.pdf",width=7,height=15)
plot_genes_branched_heatmap(cds[row.names(subset(BEAM_res,qval < 1e-4))[1:100],],
                            branch_point = 1, num_clusters = 4, cores = 8,
                            use_gene_short_name= T,show_rownames = T)
dev.off()

#也可以选前100个基因可视化
BEAM_genes <- top_n(BEAM_res, n = 200,order(BEAM_res$qval)) %>% pull(gene_short_name) %>% as.character()

p <- plot_genes_branched_heatmap(cds[BEAM_genes,], branch_point = 1,
                                 num_clusters =4, show_rownames = T, return_heatmap = T)
ggsave("BEAM_heatmap.pdf",p$ph_res, width = 6.5, height = 20)


#也可以选前100个基因可视化#指定gene
BEAM_genes <- c('GDI2','TUSC3','CAPNS1', 'UBE2L3', 'RSL1D1','DAP','RAP1B','SNF8', 'NTMT1')

p <- plot_genes_branched_heatmap(cds[BEAM_genes,], branch_point = 1,
                                 num_clusters =4, show_rownames = T, return_heatmap = T)
ggsave("BEAM_heatmap_filter.pdf",p$ph_res, width = 6.5, height = 5)


##拟时序展示单个基因表达量，查看分支表达
library(ggpubr)
library(ggsci)
colnames(pData(cds))
pData(cds)$RAP1B = log2(exprs(cds)['RAP1B',]+1)
plot_cell_trajectory(cds, color_by ="RAP1B")  + scale_color_gsea()
ggsave("11.RAP1B.pdf", width = 6.5, height = 7)

pData(cds)$NTMT1 = log2(exprs(cds)['NTMT1',]+1)
plot_cell_trajectory(cds, color_by ="NTMT1")  + scale_color_gsea()
ggsave("11.NTMT1.pdf",width = 6.5, height = 7)

pData(cds)$CHMP4B = log2(exprs(cds)['CHMP4B',]+1)
plot_cell_trajectory(cds, color_by ="CHMP4B")  + scale_color_gsea()
ggsave("11.CHMP4B.pdf", width = 6.5, height = 7)

pData(cds)$ACTN4 = log2(exprs(cds)['ACTN4',]+1)
plot_cell_trajectory(cds, color_by ="ACTN4")  + scale_color_gsea()
ggsave("11.ACTN4.pdf", width = 6.5, height = 7)

#如果要所有的差异基因，就把前面所有基因的热图存为p
hp.genes <- p$ph_res$tree_row$labels[p$ph_res$tree_row$order]
BEAM_sig <- BEAM_res[hp.genes, c("gene_short_name", "pval", "qval")]
write.csv(BEAM_sig, "BEAM_sig.csv", row.names = F)

#选择上面热图中或显著差异基因中感兴趣的基因进行可视化
head(BEAM_sig)
genes <- row.names(subset(fData(cds),gene_short_name %in% c( 'GDI2','TUSC3','CAPNS1','RSL1D1','DAP','RAP1B','SNF8', 'NTMT1')))
plot_genes_branched_pseudotime(cds[genes,],branch_point = 1,color_by = "celltype",ncol = 1) + 
  theme(panel.border = element_blank()) + 
  scale_color_manual(values = colors) 
ggsave("11.总和.pdf", width = 6.5, height = 20)

head(BEAM_sig)
genes <- row.names(subset(fData(cds),gene_short_name %in% c( 'GDI2','TUSC3','CAPNS1')))
plot_genes_branched_pseudotime(cds[genes,],branch_point = 1,color_by = "celltype",ncol = 1) + 
  theme(panel.border = element_blank()) + 
  scale_color_manual(values = colors) 
ggsave("11.总和_TUSC3.pdf", width = 6.5, height = 20)

# save(list = ls(),file = 'all.rda')
load('all.rda')