Seurat-单细胞文献复现第二弹-02_单细胞生信复现-优快云博客

本文链接：https://blog.youkuaiyun.com/qq_40966210/article/details/120804976

A single-cell map of intratumoral changes during anti-PD1 treatment of patients with breast cancer

https://doi.org/10.1038/s41591-021-01323-8

文章目录

A single-cell map of intratumoral changes during anti-PD1 treatment of patients with breast cancer
前言
讲在前面的废话
Code
免疫组库

前言

Seurat-单细胞文献复现第二弹-01
Seurat-单细胞文献复现第二弹-02

讲在前面的废话

勘误：一开始把文章中的
P** 与数据中的 BIOKEY** 误认为是对等的，结果导致有两幅图是错误的。

Code

Fig 1-Plot

### ---------------
###
### Create: Yuan.Sh
### Date: 2021-10-15 10:48:20
### Email: yuansh3354@163.com
### Blog: https://blog.youkuaiyun.com/qq_40966210
### Fujian Medical University
###
### ---------------

# options
rm(list=ls())
gc()
options(stringsAsFactors = F)
options(as.is = T)

# packages 
library(stringr)
library(magrittr)
library(Seurat)
library(patchwork)
library(ggplot2)
library(ggpubr)
#library(ggthemes)
#library(scales)

# set work-dir

if(file.exists('/media')){
  setwd('/media/yuansh/1THHD/Yuansh_Share/单细胞 PDL1 响应预测')
}else{setwd('/Volumes/Yuansh_Share/单细胞 PDL1 响应预测/')}
getwd()

# load data 
tcr.info = read.csv('scTCR/1879-BIOKEY_barcodes_vdj_combined_cohort1.csv')
tcr.meta = read.csv('scTCR/1881-BIOKEY_clonotypes_combined_cohort1.csv')
load('output/my_cell_annotion.rdata')
load('output/meta_NEs.rdata')
mycolors = c("#1a476f","#90353b","#55752f","#e37e00","#6e8e84",
             "#c10534","#938dd2","#cac27e","#a0522d","#7b92a8",
             "#2d6d66","#9c8847","#bfa19c","#ffd200","#d9e6eb")
# Corrigendum
meta = sce@meta.data
meta = meta[which(meta$expansion !='n/a'),]
pre = meta[which(meta$timepoint == 'Pre'),]
on = meta[which(meta$timepoint == 'On'),]
ids1 = table(pre$cell_annotion) %>% as.data.frame()
ids2 = table(on$cell_annotion) %>% as.data.frame()
ids1$group = 'pre'
ids2$group = 'on'
df = rbind(ids1,ids2)
E = meta[which(meta$expansion == 'E'),'patient_id'] %>% unique()
# boxplot 
df = pre
ids = table(df$patient_id) %>% as.data.frame()
df.plot = table(df[,c('patient_id','cell_annotion')]) %>% as.data.frame()
df.plot$prop = 0
for(i in 1:dim(ids)[1]){
  prop.temp = df.plot[which(df.plot$patient_id %in% ids[i,1]),'Freq'] / ids[i,2]
  df.plot[which(df.plot$patient_id %in% ids[i,1]),'prop'] = prop.temp
}
df.plot$cell_annotion = factor(df.plot$cell_annotion,levels = c("T_cell","Fibroblast","Myeloid_cell","Cancer_cell","B_cell","Endothelial_cell","Mast_cell","pDC"))
df.plot$Ne_type = ifelse(df.plot$patient_id %in% E,'E','NEs')
df.plot$Ne_type = factor(df.plot$Ne_type,levels = c('NEs','E'))
df.plot

ggplot(data=df.plot, mapping=aes(x=cell_annotion,y=prop,color=Ne_type))+
  geom_boxplot()+theme_bw()+scale_color_manual(values = mycolors[7:6])
ggsave('plot/(Corrigendum)_Cohort1_pre_box_plot.png', width = 8.24, height = 2.84)

请添加图片描述

P_val 计算

my_comparisons = 'T_cell'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value

my_comparisons = 'Fibroblast'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value

my_comparisons = 'Cancer_cell'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value

输出结果：

T_cell : 0.03777869
Fibroblast : 0.03929499
Cancer_cell : 0.4323733

上述显著性与文献一致

Fig 2-Plot

df = on
ids = table(df$patient_id) %>% as.data.frame()
df.plot = table(df[,c('patient_id','cell_annotion')]) %>% as.data.frame()
df.plot$prop = 0
for(i in 1:dim(ids)[1]){
  prop.temp = df.plot[which(df.plot$patient_id %in% ids[i,1]),'Freq'] / ids[i,2]
  df.plot[which(df.plot$patient_id %in% ids[i,1]),'prop'] = prop.temp
}
df.plot$cell_annotion = factor(df.plot$cell_annotion,levels = c("T_cell","Fibroblast","Myeloid_cell","Cancer_cell","B_cell","Endothelial_cell","Mast_cell","pDC"))
df.plot$Ne_type = ifelse(df.plot$patient_id %in% E,'E','NEs')
df.plot$Ne_type = factor(df.plot$Ne_type,levels = c('NEs','E'))
df.plot

ggplot(data=df.plot, mapping=aes(x=cell_annotion,y=prop,color=Ne_type))+
  geom_boxplot()+theme_bw()+scale_color_manual(values = mycolors[7:6])
ggsave('plot/(Corrigendum)_Cohort1_on_box_plot.png', width = 8.24, height = 2.84)

请添加图片描述

P_val 计算

my_comparisons = 'T_cell'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value

my_comparisons = 'Fibroblast'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value

my_comparisons = 'Cancer_cell'
ids1 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'E')),]
ids2 = df.plot[which((df.plot$cell_annotion == my_comparisons) & (df.plot$Ne_type == 'NEs')),]
t.test(ids1$prop,ids2$prop)$p.value

输出结果：

T_cell : 3.470728e-5
Fibroblast : 0.002274304
Cancer_cell : 0.01048942

上述显著性与文献一致

免疫组库

其实文章给的数据已经是处理过后的终末数据了。没什么好分析的，直接上图可视化吧

# options
rm(list=ls())
gc()
options(stringsAsFactors = F)
options(as.is = T)

# packages 
library(stringr)
library(magrittr)
library(Seurat)
library(patchwork)
library(ggplot2)
library(ggpubr)
library(reshape2)
#library(ggthemes)
#library(scales)

# set work-dir

if(file.exists('/media')){
  setwd('/media/yuansh/1THHD/Yuansh_Share/单细胞 PDL1 响应预测')
}else{setwd('/Volumes/Yuansh_Share/单细胞 PDL1 响应预测/')}
getwd()

# load data 
load('output/my_cell_annotion.rdata')
load('output/meta_NEs.rdata')

tcr.info = read.csv('scTCR/1879-BIOKEY_barcodes_vdj_combined_cohort1.csv')
tcr.meta = read.csv('scTCR/1881-BIOKEY_clonotypes_combined_cohort1.csv')

mycolors = c("#1a476f","#90353b","#55752f","#e37e00","#6e8e84",
             "#c10534","#938dd2","#cac27e","#a0522d","#7b92a8",
             "#2d6d66","#9c8847","#bfa19c","#ffd200","#d9e6eb")


# Step.01 Clean data
tcr.meta$patient = gsub('(_On)|(_Pre)','',tcr.meta$SAMPLE_ID)
tcr.info$patient = gsub('(_On)|(_Pre)','',tcr.info$SAMPLE_ID)

# Statistics information : >2 cells (n=12,531) or >5 cells (n = 7,793)
ids = tcr.info$clonotype
clon = which(table(ids)>2) %>% names
tcr.info[which(tcr.info$clonotype %in% clon),] %>% dim
clon = which(table(ids)>5) %>% names
tcr.info[which(tcr.info$clonotype %in% clon),] %>% dim

# Step.02 follow the article, define NEs / E type
### get on- / pre-
df = tcr.meta[which(tcr.meta$frequency > 2),]
tcr.meta.on = df[grep('On',df$clonotype_id),]
tcr.meta.pre = df[grep('Pre',df$clonotype_id),]
tcr.meta.on$clonotype_id = gsub('_On','',tcr.meta.on$clonotype_id)
tcr.meta.pre$clonotype_id = gsub('_Pre','',tcr.meta.pre$clonotype_id)
which(table(tcr.meta.on$patient) > 35) %>% length
ids = intersect(tcr.meta.on$clonotype_id,tcr.meta.pre$clonotype_id)
ids1 = tcr.meta.on[which(tcr.meta.on$clonotype_id %in% ids),]
ids2 = tcr.meta.pre[which(tcr.meta.pre$clonotype_id %in% ids),]
ids = ids1$frequency - ids2$frequency
ids = ids1[which(ids < 0),'clonotype_id']
ids = tcr.meta.on[-which(tcr.meta.on$clonotype_id %in% ids),]
which(table(tcr.meta.on$patient) > 35)%>% length()
df1 = as.data.frame(table(tcr.meta.on$patient))

df = tcr.meta[which(tcr.meta$frequency > 2),]
tcr.meta.on = df[grep('On',df$clonotype_id),]
tcr.meta.pre = df[grep('Pre',df$clonotype_id),]
tcr.meta.on$clonotype_id = gsub('_On','',tcr.meta.on$clonotype_id)
tcr.meta.pre$clonotype_id = gsub('_Pre','',tcr.meta.pre$clonotype_id)
which(table(tcr.meta.on$patient) > 35) %>% length
ids = intersect(tcr.meta.on$clonotype_id,tcr.meta.pre$clonotype_id)
ids1 = tcr.meta.on[which(tcr.meta.on$clonotype_id %in% ids),]
ids2 = tcr.meta.pre[which(tcr.meta.pre$clonotype_id %in% ids),]
ids = ids1$proportion - ids2$proportion
ids = ids1[which(ids < 0),'clonotype_id']
ids = tcr.meta.on[-which(tcr.meta.on$clonotype_id %in% ids),]
which(table(tcr.meta.on$patient) > 35)%>% length()
df2 = as.data.frame(table(tcr.meta.on$patient))

df = tcr.meta[which(tcr.meta$frequency > 5),]
tcr.meta.on = df[grep('On',df$clonotype_id),]
tcr.meta.pre = df[grep('Pre',df$clonotype_id),]
tcr.meta.on$clonotype_id = gsub('_On','',tcr.meta.on$clonotype_id)
tcr.meta.pre$clonotype_id = gsub('_Pre','',tcr.meta.pre$clonotype_id)
which(table(tcr.meta.on$patient) > 35) %>% length
ids = intersect(tcr.meta.on$clonotype_id,tcr.meta.pre$clonotype_id)
ids1 = tcr.meta.on[which(tcr.meta.on$clonotype_id %in% ids),]
ids2 = tcr.meta.pre[which(tcr.meta.pre$clonotype_id %in% ids),]
ids = ids1$frequency - ids2$frequency
ids = ids1[which(ids < 0),'clonotype_id']
ids = tcr.meta.on[-which(tcr.meta.on$clonotype_id %in% ids),]
which(table(tcr.meta.on$patient) > 35)%>% length()
df3 = as.data.frame(table(tcr.meta.on$patient))

df = merge(df1,df2,by='Var1')
df = merge(df,df3,by='Var1',all.x=T)
df[which(is.na(df$Freq)),'Freq']=0
colnames(df) = c('Var1','Freq ↑ and freq > 2','Prop ↑ and freq > 2','Freq ↑ and freq > 5')
df = melt(
  df,                       #待转换的数据集名称
  id.vars='Var1',  #要保留的主字段
  variable.name="Group",         #转换后的分类字段名称（维度）
  value.name="Freq"             #转换后的度量值名称
)
ids = df1[order(df1$Freq),]$Var1 %>% as.character()
df$Var1 = factor(df$Var1, levels = ids)
ggplot(data=df, mapping=aes(x=Var1,y=Freq,fill=Group))+
  geom_bar(width=0.75,stat="identity",position="dodge")+theme_classic()+ theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggsave('plot/Cohort1_scTCR_bar_plot.png', width = 8.24, height = 2.84)

请添加图片描述

文章的后面分析其实就是围绕上面的内容进行不断的提取细胞，然后分群注释，找差异。没什么好说的了。
由于这篇文章的数据实在太大，个人电脑带不动速率分析，所以这部分的图没办法画，导致后续的内容没办法继续下去了。
讲实话其实这边文章的内容一点也不难。。。感觉没啥意思

### ---------------
###
### Create: Yuan.Sh
### Date: 2021-10-16 23:03:26
### Email: yuansh3354@163.com
### Blog: https://blog.youkuaiyun.com/qq_40966210
### Fujian Medical University
###
### ---------------

> 课题项目合作以及咨询请联系：yuansh3354@163.com

After advisement, if you still have questions, you can send me an E-mail asking for help
Best Regards,
Yuan.SH
---------------------------------------
please contact me via the following ways:
(a) E-mail: yuansh3354@gmail/163/outlook.com
(b) QQ: 1044532817
(c) WeChat: YuanSh181014
(d) Address: School of Basic Medical Sciences, 
Fujian Medical University, Fuzhou, 
Fujian 350108, China
---------------------------------------