一、翻译效率计算
(1)准备文件:
转录组和翻译组测序的表达量表(经过标准化的TPM/FPKM)矩阵如图
(2)实现代码
## get TPM值
import pandas as pd
import numpy as np
import sys
tpm = sys.argv[1] ##输入TPM表达量表 如TPM.gene.xls
outfile = sys.argv[2] ##输出文件名字,如TE.xls
df = pd.read_csv(tpm, index_col=0, sep="\t")
rna_cols = [col for col in df.columns if 'RNA' in col]
ribo_cols = [col for col in df.columns if 'Ribo' in col]
##配对列名去除前缀
def get_suffix(col):
return "_".join(col.split("_")[1:])
pairs_name = []
for rna_col in rna_cols:
suffix = get_suffix(rna_col)
ribo_col = "Ribo_" + suffix
if ribo_col in ribo_cols:
pairs_name.append((rna_col, ribo_col))
##创建TE的数据框
te_df = pd.DataFrame(index=df.index)
for rna, ribo in pairs_name:
te_name = get_suffix(ribo)
te_df[te_name] = df[ribo]/df[rna].replace(0, np.nan)
##保存结果
te_df.to_csv(outfile, sep="\t", index=True)
def main():
print("你传入的参数是:", sys.argv)
if __name__ == "__main__":
main()
(3)翻译表达量与转录表达量相关性分析
library(ggplot2)
library(reshape2)
theme_set(theme_bw())
group <- read.table("sample.txt", header = T)
data <- read.table("TPM.gene.xls", header=T, check.names=F, stringsAsFactors=F, row.names=1)
data2 <- data[which(rowSums(data[, group$RNA])> 0),]
for (i in 1:nrow(group)) {
samplename <- sub("RNA_", "", group$RNA[i])
df <- data2[, c(group$RNA[i], group$Ribo[i])]
df <- log10(df + 1)
r <- cor(df[,1], df[,2])
r <- round(r, digits=3)
x_title <- paste0(group$RNA[i], " log10(TPM+1)")
y_title <- paste0(group$Ribo[i], " log10(TPM+1)")
p <- ggplot(df, aes(x=df[,1], y=df[,2])) +
geom_point(alpha=0.5, size=0.8)+
geom_smooth(method="lm", colour="red", linewidth=0.8)+
labs(x=x_title, y= y_title)+
scale_y_continuous(expand=c(0,0),limits=c(0,NA))+
scale_x_continuous(expand=c(0,0),limits=c(0,NA))+
annotate("text", x=0.2, y=Inf, label=paste0("R = ", r), vjust=3, hjust=0, color="red")
pngfile <- paste0("cor.rna_ribo.", samplename, ".png")
pdffile <- paste0("cor.rna_ribo.", samplename, ".pdf")
ggsave(pngfile, plot=p, width=6, height=6, dpi=400)
ggsave(pdffile, plot=p, width=6, height=6, dpi=400)
}
展示图结果如下(可以自己设置外观,美化自己的图)