A single-cell map of intratumoral changes during anti-PD1 treatment of patients with breast cancer
文章目录
前言
Seurat-单细胞文献复现第二弹-01
Seurat-单细胞文献复现第二弹-02
讲在前面的废话
勘误:一开始把文章中的
P** 与数据中的 BIOKEY** 误认为是对等的,结果导致有两幅图是错误的。
Code
Fig 1-Plot
### ---------------
###
### Create: Yuan.Sh
### Date: 2021-10-15 10:48:20
### Email: yuansh3354@163.com
### Blog: https://blog.youkuaiyun.com/qq_40966210
### Fujian Medical University
###
### ---------------
# options
rm(list=ls())
gc()
options(stringsAsFactors = F)
options(as.is = T)
# packages
library(stringr)
library(magrittr)
library(Seurat)
library(patchwork)
library(ggplot2)
library(ggpubr)
#library(ggthemes)
#library(scales)
# set work-dir
if(file.exists('/media')){
setwd('/media/yuansh/1THHD/Yuansh_Share/单细胞 PDL1 响应预测')
}else{setwd('/Volumes/Yuansh_Share/单细胞 PDL1 响应预测/')}
getwd()
# load data
tcr.info = read.csv('scTCR/1879-BIOKEY_barcodes_vdj_combined_cohort1.csv')
tcr.meta = read.csv('scTCR/1881-BIOKEY_clonotypes_combined_cohort1.csv')
load('output/my_cell_annotion.rdata')
load('output/meta_NEs.rdata')
mycolors = c("#1a476f","#90353b","#55752f","#e37e00","#6e8e84",
"#c10534","#938dd2","#cac27e","#a0522d","#7b92a8",
"#2d6d66","#9c8847","#bfa19c","#ffd200","#d9e6eb")
# Corrigendum
meta = sce@meta.data
meta = meta[which(meta$expansion !='n/a'),]
pre = meta[which(meta$timepoint == 'Pre'),]
on = meta[which(meta$timepoint == 'On'),]
ids1 = table(pre$cell_annotion) %>% as.data.frame()
ids2 = table(on$cell_annotion) %>% as.data.frame()
ids1$group = 'pre'
ids2$group = 'on'
df = rbind(ids1,ids2)
E = meta[which(meta$expansion == 'E'),'patient_id'] %>% unique()
# boxplot
df = pre
ids = table(df$patient_id) %>% as.data.frame()
df.plot = table(df[,c('patient_id','cell_annotion')]) %>% as.data.frame()
df.plot$prop = 0
for(i in 1:dim(ids)[1]){
prop.temp = df.plot[which(df.plot$patient_id %in% ids[i,1]),'Freq'] / ids[i,2]
df.plot[which(df.plot$patient_id %in% ids[i,1]),'prop'] = prop.temp
}
df.plot$cell_annotion = factor(df.plot$cell_annotion,levels = c("T_cell","Fibroblast","Myeloid_cell","Cancer_cell","B_cell","Endothelial_cell","Mast_cell","pDC"))
df.plot$Ne_type = ifelse(df.plot$patient_id %in% E,'E','NEs')
df.plot$Ne_type = factor(df.plot$Ne_type,levels = c('NEs','E'))
df.plot
ggplot(data=df.plot, mapping=aes(x=cell_annotion,y=prop,color=Ne_type))+
geom_boxplot()+theme_bw()+scale_color_manual(values = mycolors[7:6])
ggsave('plot/(Corrigendum)_Cohort1_pre_box_plot.png', width = 8.24, height = 2.84)
P_val 计算
my_comparisons = 'T_cell'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value
my_comparisons = 'Fibroblast'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value
my_comparisons = 'Cancer_cell'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value
输出结果:
- T_cell : 0.03777869
- Fibroblast : 0.03929499
- Cancer_cell : 0.4323733
上述显著性与文献一致
Fig 2-Plot
df = on
ids = table(df$patient_id) %>% as.data.frame()
df.plot = table(df[,c('patient_id','cell_annotion')]) %>% as.data.frame()
df.plot$prop = 0
for(i in 1:dim(ids)[1]){
prop.temp = df.plot[which(df.plot$patient_id %in% ids[i,1]),'Freq'] / ids[i,2]
df.plot[which(df.plot$patient_id %in% ids[i,1]),'prop'] = prop.temp
}
df.plot$cell_annotion = factor(df.plot$cell_annotion,levels = c("T_cell","Fibroblast","Myeloid_cell","Cancer_cell","B_cell","Endothelial_cell","Mast_cell","pDC"))
df.plot$Ne_type = ifelse(df.plot$patient_id %in% E,'E','NEs')
df.plot$Ne_type = factor(df.plot$Ne_type,levels = c('NEs','E'))
df.plot
ggplot(data=df.plot, mapping=aes(x=cell_annotion,y=prop,color=Ne_type))+
geom_boxplot()+theme_bw()+scale_color_manual(values = mycolors[7:6])
ggsave('plot/(Corrigendum)_Cohort1_on_box_plot.png', width = 8.24, height = 2.84)
P_val 计算
my_comparisons = 'T_cell'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value
my_comparisons = 'Fibroblast'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value
my_comparisons = 'Cancer_cell'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value
输出结果:
- T_cell : 3.470728e-5
- Fibroblast : 0.002274304
- Cancer_cell : 0.01048942
上述显著性与文献一致
免疫组库
其实文章给的数据已经是处理过后的终末数据了。没什么好分析的,直接上图可视化吧
# options
rm(list=ls())
gc()
options(stringsAsFactors = F)
options(as.is = T)
# packages
library(stringr)
library(magrittr)
library(Seurat)
library(patchwork)
library(ggplot2)
library(ggpubr)
library(reshape2)
#library(ggthemes)
#library(scales)
# set work-dir
if(file.exists('/media')){
setwd('/media/yuansh/1THHD/Yuansh_Share/单细胞 PDL1 响应预测')
}else{setwd('/Volumes/Yuansh_Share/单细胞 PDL1 响应预测/')}
getwd()
# load data
load('output/my_cell_annotion.rdata')
load('output/meta_NEs.rdata')
tcr.info = read.csv('scTCR/1879-BIOKEY_barcodes_vdj_combined_cohort1.csv')
tcr.meta = read.csv('scTCR/1881-BIOKEY_clonotypes_combined_cohort1.csv')
mycolors = c("#1a476f","#90353b","#55752f","#e37e00","#6e8e84",
"#c10534","#938dd2","#cac27e","#a0522d","#7b92a8",
"#2d6d66","#9c8847","#bfa19c","#ffd200","#d9e6eb")
# Step.01 Clean data
tcr.meta$patient = gsub('(_On)|(_Pre)','',tcr.meta$SAMPLE_ID)
tcr.info$patient = gsub('(_On)|(_Pre)','',tcr.info$SAMPLE_ID)
# Statistics information : >2 cells (n=12,531) or >5 cells (n = 7,793)
ids = tcr.info$clonotype
clon = which(table(ids)>2) %>% names
tcr.info[which(tcr.info$clonotype %in% clon),] %>% dim
clon = which(table(ids)>5) %>% names
tcr.info[which(tcr.info$clonotype %in% clon),] %>% dim
# Step.02 follow the article, define NEs / E type
### get on- / pre-
df = tcr.meta[which(tcr.meta$frequency > 2),]
tcr.meta.on = df[grep('On',df$clonotype_id),]
tcr.meta.pre = df[grep('Pre',df$clonotype_id),]
tcr.meta.on$clonotype_id = gsub('_On','',tcr.meta.on$clonotype_id)
tcr.meta.pre$clonotype_id = gsub('_Pre','',tcr.meta.pre$clonotype_id)
which(table(tcr.meta.on$patient) > 35) %>% length
ids = intersect(tcr.meta.on$clonotype_id,tcr.meta.pre$clonotype_id)
ids1 = tcr.meta.on[which(tcr.meta.on$clonotype_id %in% ids),]
ids2 = tcr.meta.pre[which(tcr.meta.pre$clonotype_id %in% ids),]
ids = ids1$frequency - ids2$frequency
ids = ids1[which(ids < 0),'clonotype_id']
ids = tcr.meta.on[-which(tcr.meta.on$clonotype_id %in% ids),]
which(table(tcr.meta.on$patient) > 35)%>% length()
df1 = as.data.frame(table(tcr.meta.on$patient))
df = tcr.meta[which(tcr.meta$frequency > 2),]
tcr.meta.on = df[grep('On',df$clonotype_id),]
tcr.meta.pre = df[grep('Pre',df$clonotype_id),]
tcr.meta.on$clonotype_id = gsub('_On','',tcr.meta.on$clonotype_id)
tcr.meta.pre$clonotype_id = gsub('_Pre','',tcr.meta.pre$clonotype_id)
which(table(tcr.meta.on$patient) > 35) %>% length
ids = intersect(tcr.meta.on$clonotype_id,tcr.meta.pre$clonotype_id)
ids1 = tcr.meta.on[which(tcr.meta.on$clonotype_id %in% ids),]
ids2 = tcr.meta.pre[which(tcr.meta.pre$clonotype_id %in% ids),]
ids = ids1$proportion - ids2$proportion
ids = ids1[which(ids < 0),'clonotype_id']
ids = tcr.meta.on[-which(tcr.meta.on$clonotype_id %in% ids),]
which(table(tcr.meta.on$patient) > 35)%>% length()
df2 = as.data.frame(table(tcr.meta.on$patient))
df = tcr.meta[which(tcr.meta$frequency > 5),]
tcr.meta.on = df[grep('On',df$clonotype_id),]
tcr.meta.pre = df[grep('Pre',df$clonotype_id),]
tcr.meta.on$clonotype_id = gsub('_On','',tcr.meta.on$clonotype_id)
tcr.meta.pre$clonotype_id = gsub('_Pre','',tcr.meta.pre$clonotype_id)
which(table(tcr.meta.on$patient) > 35) %>% length
ids = intersect(tcr.meta.on$clonotype_id,tcr.meta.pre$clonotype_id)
ids1 = tcr.meta.on[which(tcr.meta.on$clonotype_id %in% ids),]
ids2 = tcr.meta.pre[which(tcr.meta.pre$clonotype_id %in% ids),]
ids = ids1$frequency - ids2$frequency
ids = ids1[which(ids < 0),'clonotype_id']
ids = tcr.meta.on[-which(tcr.meta.on$clonotype_id %in% ids),]
which(table(tcr.meta.on$patient) > 35)%>% length()
df3 = as.data.frame(table(tcr.meta.on$patient))
df = merge(df1,df2,by='Var1')
df = merge(df,df3,by='Var1',all.x=T)
df[which(is.na(df$Freq)),'Freq']=0
colnames(df) = c('Var1','Freq ↑ and freq > 2','Prop ↑ and freq > 2','Freq ↑ and freq > 5')
df = melt(
df, #待转换的数据集名称
id.vars='Var1', #要保留的主字段
variable.name="Group", #转换后的分类字段名称(维度)
value.name="Freq" #转换后的度量值名称
)
ids = df1[order(df1$Freq),]$Var1 %>% as.character()
df$Var1 = factor(df$Var1, levels = ids)
ggplot(data=df, mapping=aes(x=Var1,y=Freq,fill=Group))+
geom_bar(width=0.75,stat="identity",position="dodge")+theme_classic()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave('plot/Cohort1_scTCR_bar_plot.png', width = 8.24, height = 2.84)
文章的后面分析其实就是围绕上面的内容进行不断的提取细胞,然后分群注释,找差异。没什么好说的了。
由于这篇文章的数据实在太大,个人电脑带不动速率分析,所以这部分的图没办法画,导致后续的内容没办法继续下去了。
讲实话其实这边文章的内容一点也不难。。。感觉没啥意思
### ---------------
###
### Create: Yuan.Sh
### Date: 2021-10-16 23:03:26
### Email: yuansh3354@163.com
### Blog: https://blog.youkuaiyun.com/qq_40966210
### Fujian Medical University
###
### ---------------
> 课题项目合作以及咨询请联系:yuansh3354@163.com
After advisement, if you still have questions, you can send me an E-mail asking for help
Best Regards,
Yuan.SH
---------------------------------------
please contact me via the following ways:
(a) E-mail: yuansh3354@gmail/163/outlook.com
(b) QQ: 1044532817
(c) WeChat: YuanSh181014
(d) Address: School of Basic Medical Sciences,
Fujian Medical University, Fuzhou,
Fujian 350108, China
---------------------------------------