task7b-TP53突变与否的TNBC病人基因表达相关性改变

作业链接

作业内容

重复这个散点图
在这里插入图片描述

背景知识

TNBC-三阴性乳腺癌
三阴乳腺癌是指乳腺癌免疫组化结果:雌激素受体(ER)、孕激素受体(PR)和人表皮生长因子受体2(Her-2)均为阴性的乳腺癌。
三阴性乳腺癌容易早期发生广泛转移,如脑转移、骨转移等情况。所以,化疗在三阴性乳腺癌当中地位很重要。当然,复发高峰在手术以后头三年,过了复发高峰以后,预后也还是非常好的一种恶性肿瘤,具体要看分期如何、病人身体状况,以及接受什么样治疗等。
在这里插入图片描述

下载数据并提取

1.从TCGA.BRCA.sampleMap%2FBRCA_clinicalMatrix中提取TNBC的TCGA ID,发现一共123个
#https://tcga-xena-hub.s3.us-east-1.amazonaws.com/download/TCGA.BRCA.sampleMap%2FBRCA_clinicalMatrix;
#phenotype;存储BRCA的样品临床信息
#TCGA.BRCA.sampleMap%2FBRCA_clinicalMatrix;
cut -f 1,8,10,22 TCGA.BRCA.sampleMap%2FBRCA_clinicalMatrix |awk '$2=="Negative" && $3=="Negative" && $4=="Negative"' |cut -f 1 >TNBC_id
TCGA-A1-A0SK-01,TCGA-A1-A0SO-01,TCGA-A1-A0SP-01,TCGA-A2-A04P-01,TCGA-A2-A04Q-01,TCGA-A2-A04T-01,TCGA-A2-A04U-01,TCGA-A2-A0CM-01,TCGA-A2-A0D0-01,TCGA-A2-A0D2-01,TCGA-A2-A0ST-01,TCGA-A2-A0SX-01,TCGA-A2-A0T0-01,TCGA-A2-A0T2-01,TCGA-A2-A0YE-01,TCGA-A2-A0YM-01,TCGA-A2-A1G6-01,TCGA-A7-A0CE-01,TCGA-A7-A0DA-01,TCGA-A7-A26F-01,TCGA-A7-A26G-01,TCGA-A7-A26I-01,TCGA-A8-A07C-01,TCGA-A8-A07O-01,TCGA-A8-A08R-01,TCGA-A8-A09X-01,TCGA-AN-A04D-01,TCGA-AN-A0AL-01,TCGA-AN-A0AR-01,TCGA-AN-A0AT-01,TCGA-AN-A0FL-01,TCGA-AN-A0FX-01,TCGA-AN-A0G0-01,TCGA-AN-A0XU-01,TCGA-AO-A03U-01,TCGA-AO-A0J2-01,TCGA-AO-A0J4-01,TCGA-AO-A0J6-01,TCGA-AO-A0JL-01,TCGA-AO-A124-01,TCGA-AO-A128-01,TCGA-AO-A129-01,TCGA-AO-A12F-01,TCGA-AQ-A04J-01,TCGA-AR-A0TS-01,TCGA-AR-A0TU-01,TCGA-AR-A0U0-01,TCGA-AR-A0U1-01,TCGA-AR-A0U4-01,TCGA-AR-A1AI-01,TCGA-AR-A1AQ-01,TCGA-AR-A1AR-01,TCGA-AR-A1AY-01,TCGA-AR-A256-01,TCGA-B6-A0IE-01,TCGA-B6-A0IK-01,TCGA-B6-A0IQ-01,TCGA-B6-A0RE-01,TCGA-B6-A0RG-01,TCGA-B6-A0RN-01,TCGA-B6-A0RS-01,TCGA-B6-A0RT-01,TCGA-B6-A0RU-01,TCGA-B6-A0WX-01,TCGA-BH-A0AV-01,TCGA-BH-A0B3-01,TCGA-BH-A0B9-01,TCGA-BH-A0BG-01,TCGA-BH-A0BL-01,TCGA-BH-A0BW-01,TCGA-BH-A0E0-01,TCGA-BH-A0E6-01,TCGA-BH-A0RX-01,TCGA-BH-A0WA-01,TCGA-BH-A18G-01,TCGA-BH-A18Q-01,TCGA-BH-A18T-01,TCGA-BH-A18V-01,TCGA-BH-A1EW-01,TCGA-BH-A1FC-01,TCGA-C8-A12V-01,TCGA-C8-A131-01,TCGA-C8-A134-01,TCGA-C8-A1HJ-01,TCGA-C8-A26X-01,TCGA-C8-A26Y-01,TCGA-C8-A27B-01,TCGA-D8-A13Z-01,TCGA-D8-A142-01,TCGA-D8-A143-01,TCGA-D8-A147-01,TCGA-D8-A1JF-01,TCGA-D8-A1JG-01,TCGA-D8-A1JL-01,TCGA-D8-A1XK-01,TCGA-D8-A1XQ-01,TCGA-D8-A27F-01,TCGA-D8-A27H-01,TCGA-D8-A27M-01,TCGA-E2-A14N-01,TCGA-E2-A14R-01,TCGA-E2-A14X-01,TCGA-E2-A150-01,TCGA-E2-A158-01,TCGA-E2-A159-01,TCGA-E2-A1AZ-01,TCGA-E2-A1B6-01,TCGA-E2-A1L7-01,TCGA-E2-A1LG-01,TCGA-E2-A1LH-01,TCGA-E2-A1LI-01,TCGA-E2-A1LK-01,TCGA-E2-A1LL-01,TCGA-E2-A1LS-01,TCGA-E9-A1ND-01,TCGA-E9-A22G-01,TCGA-EW-A1OV-01,TCGA-EW-A1OW-01,TCGA-EW-A1P1-01,TCGA-EW-A1P4-01,TCGA-EW-A1P7-01,TCGA-EW-A1P8-01,TCGA-EW-A1PB-01
#R语言操作,效果和上面一致
library(data.table)
BRCA_info = fread('TCGA.BRCA.sampleMap%2FBRCA_clinicalMatrix',na.strings="NA")[,c(1,8,10,22)]
BRCA_TNBC_info = dplyr::filter(BRCA_info,ER_Status_nature2012=="Negative",HER2_Final_Status_nature2012=="Negative",PR_Status_nature2012=="Negative")
TNBC_id = unique(BRCA_TNBC_info$sampleID)
> dim(BRCA_TNBC_info)
[1] 123   4
> head(BRCA_TNBC_info)
          sampleID ER_Status_nature2012 HER2_Final_Status_nature2012 PR_Status_nature2012
1: TCGA-A1-A0SK-01             Negative                     Negative             Negative
2: TCGA-A1-A0SO-01             Negative                     Negative             Negative
3: TCGA-A1-A0SP-01             Negative                     Negative             Negative
4: TCGA-A2-A04P-01             Negative                     Negative             Negative
5: TCGA-A2-A04Q-01             Negative                     Negative             Negative
6: TCGA-A2-A04T-01             Negative                     Negative             Negative
2.从mc3%2FBRCA_mc3.txt.gz文件中提取 tp53突变 且突变类型为 Missense_Mutation的TCGA ID
#https://tcga-xena-hub.s3.us-east-1.amazonaws.com/download/mc3%2FBRCA_mc3.txt.gz; 
#mc3%2FBRCA_mc3.txt.gz;somatic mutation (SNP and INDEL),保存的是BRCA样品的突变信息

zcat mc3%2FBRCA_mc3.txt.gz |awk '$7=="TP53" && $8=="Missense_Mutation"' |cut -f 1 > tp53_Missense_Mutation
#R语言版本
BRCA_mutation_info = fread('mc3%2FBRCA_mc3.txt.gz')
BRCA_TP53_Missense_Mutation = dplyr::filter(BRCA_mutation_info,gene=="TP53",effect=="Missense_Mutation")
TP53_MMT_id = unique(BRCA_TP53_Missense_Mutation$sample)
#结果展示
> dim(BRCA_TP53_Missense_Mutation)
[1] 166  12
> head(BRCA_TP53_Missense_Mutation)
            sample chr   start     end reference alt gene            effect Amino_Acid_Change DNA_VAF            SIFT                 PolyPhen
1: TCGA-A1-A0SI-01  17 7578406 7578406         C   T TP53 Missense_Mutation           p.R175H    0.30 tolerated(0.11)            benign(0.308)
2: TCGA-A1-A0SK-01  17 7578532 7578532         A   T TP53 Missense_Mutation           p.M133K    0.98  deleterious(0)            benign(0.122)
3: TCGA-A1-A0SO-01  17 7578190 7578190         T   C TP53 Missense_Mutation           p.Y220C    0.87  deleterious(0)     probably_damaging(1)
4: TCGA-A2-A04W-01  17 7577124 7577124         C   T TP53 Missense_Mutation           p.V272M    0.34  deleterious(0) probably_damaging(0.997)
5: TCGA-A2-A0CL-01  17 7577120 7577120         C   A TP53 Missense_Mutation           p.R273L    0.18  deleterious(0) probably_damaging(0.993)
6: TCGA-A2-A0ST-01  17 7578442 7578442         T   C TP53 Missense_Mutation           p.Y163C    0.09  deleterious(0) probably_damaging(0.999)
> length(TP53_MMT_id)
[1] 165
#TNBC中TP53 missense mutation subgroups样品id个数为38
BRCA_TNBC_mmtp53 = intersect(TNBC_id, TP53_MMT_id)
> length(BRCA_TNBC_mmtp53)
[1] 38

#TNBC中TP53 wild type subgroups样品id个数为
TNBC_TP53_WT = setdiff(TNBC_id, unique(BRCA_mutation_info$sample))
> length(TNBC_TP53_WT)
[1] 34
3.分别提取 TP53 missense mutation subgroups, TP53 missense mutation subgroups 的表达量矩阵
#一共1218个样品的gene expression RNAseq;
#log2(norm_count+1)
#nohup wget -c 'https://tcga-xena-hub.s3.us-east-1.amazonaws.com/download/TCGA.BRCA.sampleMap%2FHiSeqV2.gz' &

TCGA_sample_expression = fread('TCGA.BRCA.sampleMap%2FHiSeqV2.gz')
TCGA_sample_expression = as.data.frame(TCGA_sample_expression)
rownames(TCGA_sample_expression) = TCGA_sample_expression$sample 

#TP53 wild type subgroups
TNBC_TP53_WT_expression = TCGA_sample_expression[,TNBC_TP53_WT]

#TP53 missense mutation subgroups
BRCA_TNBC_mmtp53_expression = TCGA_sample_expression[,BRCA_TNBC_mmtp53] 
4.对上述得到的两个表达量矩阵计算相关性

计算方法-表达量矩阵中所有基因与TP53基因计算相关性

TNBC_TP53_WT_expression = as.data.frame( t(TNBC_TP53_WT_expression) )
BRCA_TNBC_mmtp53_expression = as.data.frame(t(BRCA_TNBC_mmtp53_expression))

sink('gene_tp53_cor.txt')
for(gene in colnames(TNBC_TP53_WT_expression) ){
  cat( paste0(gene,'\t', cor(TNBC_TP53_WT_expression[,gene], TNBC_TP53_WT_expression$TP53)),'\t',cor(BRCA_TNBC_mmtp53_expression[,gene], BRCA_TNBC_mmtp53_expression$TP53) ,"\n")
  }
sink()

tp53_correlation = read.table('gene_tp53_cor.txt',header = F)
colnames(tp53_correlation) = c('gene', 'tnbc_wt_tp53', 'tnbc_mt_tp53')
tp53_correlation = na.omit(tp53_correlation)

rownames(tp53_correlation) = tp53_correlation$gene
tp53_correlation = tp53_correlation[, 2:3]

> head(tp53_correlation)
          tnbc_wt_tp53 tnbc_mt_tp53
ARHGEF10L   0.28587482  0.169284500
HIF3A       0.16243489  0.115790900
RNF17      -0.19910027  0.039481480
RNF10       0.21917671 -0.310539300
RNF11       0.18367476 -0.486836800
RNF13       0.02319396  0.005537374
library(dplyr)
del_gene_id = rownames(tp53_correlation) %>% grep('^\\?', . ,perl = T)
> del_gene_id %>% rownames(tp53_correlation)[.]
 [1] "?|26823"     "?|100133144" "?|90288"     "?|645851"    "?|653553"   
 [6] "?|10357"     "?|388795"    "?|100134869" "?|729884"    "?|57714"    
[11] "?|652919"    "?|553137"    "?|390284"    "?|728603"    "?|155060"   
[16] "?|340602"    "?|10431"     "?|317712"    "?|8225"      "?|391714"   
[21] "?|280660"    "?|391343"    "?|728788"  

tp53_correlation = tp53_correlation[-del_gene_id,]
tp53_correlation = tp53_correlation[sort( rownames(tp53_correlation) ),]
> head(tp53_correlation,30)
         tnbc_wt_tp53 tnbc_mt_tp53
A1BG      0.013355972  0.065416980
A1CF     -0.096826609  0.033157820
A2BP1    -0.163385210  0.010062910
A2LD1     0.246807729  0.212874200
A2M       0.147999058 -0.129446500
A2ML1     0.341158546 -0.005538465
A4GALT    0.004229104  0.127372200
A4GNT     0.105848555  0.115709900
AAAS     -0.064430839  0.296455500
AACS     -0.137412948 -0.432603200
AACSL     0.049008076 -0.474494000
AADAC     0.076812779 -0.308284300
AADACL2   0.191125198  0.233111900
AADACL3  -0.191370449  0.033848400
AADACL4   0.230454755  0.121381600
AADAT    -0.052448739 -0.164513200
AAGAB     0.278612704 -0.045500850
AAK1      0.093400379 -0.362386100
AAMP     -0.079047101  0.095738200
AANAT     0.074936621 -0.009138630
AARS     -0.013959717  0.032320990
AARS2     0.099614889 -0.015314100
AARSD1   -0.245608837  0.428401600
AASDH    -0.091453632 -0.189170200
AASDHPPT -0.572624010 -0.003107184
AASS     -0.109802198 -0.115977400
AATF     -0.310146852  0.262189300
AATK     -0.179822766  0.235807200
ABAT      0.289620484 -0.513028100
ABCA1     0.109771473 -0.249502800
select_gene = c("CCL1","CCL18","CCL28","CCL4","CCL7","CCL8","CX3CL1","CXCL10","CXCL11","IFIT1","IFIT2","IFIT3","IFNB1","IFNG","IL27","IL29")
tp53_correlation[select_gene,]
> tp53_correlation[select_gene,]
       tnbc_wt_tp53 tnbc_mt_tp53
CCL1      0.2514942  -0.12348200
CCL18    -0.0649739  -0.09928178
CCL28     0.2138325  -0.13331330
CCL4      0.1815558  -0.08629248
CCL7      0.1008837  -0.14959010
CCL8      0.1171175  -0.15947400
CX3CL1    0.1885921   0.25851010
CXCL10    0.2533631  -0.05307441
CXCL11    0.2534424  -0.15895260
IFIT1     0.1210111  -0.10309130
IFIT2     0.1538091  -0.09959147
IFIT3     0.2242451  -0.17122550
IFNB1     0.1161510  -0.14915090
IFNG      0.1704638  -0.14866620
IL27      0.3561543   0.06605169
IL29     -0.1146054  -0.22295270

散点图

scatter_data = tp53_correlation[select_gene,]

if(T){
plot(scatter_data$tnbc_wt_tp53, scatter_data$tnbc_mt_tp53,pch=19, cex=1.5, cex.lab =1.3,xlab="Correlation coefficient in WTp53", ylab="Correlation coefficient in mtp53",main="A scatterplot")
abline(h=0.1,lty=3)
abline(v=0.1,lty=3)
points(0.116151,-0.1491509,pch=15,col='red',cex=1.6)
text(0.133,-0.145, 'IFNB1' ,col = 'red')
}

在这里插入图片描述

ps:欢迎批评指正

参考

https://www.youlai.cn/ask/5C971DMTn00.html
笔记15-TCGA三阴乳腺癌样本的下载

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值