count转化为TPM

最新推荐文章于 2024-08-27 08:56:50 发布

原创最新推荐文章于 2024-08-27 08:56:50 发布 · 6.6k 阅读

16 ·

CC 4.0 BY-SA版权

文章标签：

#r语言 #其他

library(biomaRt)
#查看基因组参数
mart = useMart('ensembl')
listDatasets(mart)

#你需要哪个基因组，就复制它在dataset列里的词，放在下面这行的`dataset = `参数里
#此处以人类为例，植物参考注一
bmart <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL", 
                          dataset = "hsapiens_gene_ensembl",
                          host = "www.ensembl.org")

# 从输入数据里提取基因名
feature_ids <- rownames(expMatrix)##就是你需要ENSEMBL的例如ENSG00000000003


attributes = c(
  "ensembl_gene_id",
  #"hgnc_symbol",
  "chromosome_name",
  "start_position",
  "end_position"
)
filters = "ensembl_gene_id"

feature_info <- biomaRt::getBM(attributes = attributes, 
                               filters = filters, 
                               values = feature_ids, mart = bmart)
mm <- match(feature_ids, feature_info[[filters]])
feature_info_full <- feature_info[mm, ]
rownames(feature_info_full) <- feature_ids

# 计算基因的有效长度
eff_length <- abs(feature_info_full$end_position - feature_info_full$start_position)
feature_info_full <- cbind(feature_info_full,eff_length)

eff_length2 <- feature_info_full[,c(1,5)]

x <- expMatrix / eff_length2$eff_length

tpm = t( t(x) / colSums(x) ) * 1e6