get rRNA.gtf

从GTF文件中取出想要的特殊类型的gtf文件

example:ensembl

$ grep -w 'rRNA\|Mt_tRNA\|MT' Mus_musculus.GRCm38.75.gtf > mask_file.gtf

OR

$ egrep -wi 'rRNA|Mt_tRNA|MT' Mus_musculus.GRCm38.75.gtf > mask_file.gtf

#!/usr/bin/env Rscript args = commandArgs(trailingOnly=TRUE) if (length(args) < 3){ stop("No enough input parameters!\n", call.=FALSE) } library("ggplot2") library("data.table") library("grid") library(stringr) rRNA.stack <- function(file.address, file.name, rRNA.length){ length.dis <- read.table(paste(file.address , file.name , "_result/" , file.name , "_length_distribution.txt", sep = ""), header = TRUE, sep = "\t", col.names = c("name", "length", "reads") ) length.dis <- length.dis[!grepl("antisense", length.dis$name, ignore.case = TRUE), ] len.min <- min(length.dis$length) len.max <- max(length.dis$length) len <- len.min : len.max sum.clean <- sum(length.dis[which(length.dis$name == "Clean_Reads"), 3]) length.dis <- data.frame(length.dis[ ,1:2], reads = length.dis$reads * 1000000 / sum.clean) dis.clean <- length.dis[which(length.dis$name == "Clean_Reads"), 2:3] stack.all <- c() dis.total.rRNA <- length.dis[grep("-rRNA|_rRNA", length.dis$name, ignore.case = TRUE), ] dis.total.rRNA <- dis.total.rRNA[!grepl("antisense|S-rRNA|other-rRNA", dis.total.rRNA$name, ignore.case = TRUE), 2:3] dis.other.rRNA <- data.frame(name = "other.rRNA", length.combine(dis.total.rRNA, len)) rRNA.length <- unlist(strsplit(rRNA.length, ",")) k <- 0 for(i in 1:length(rRNA.length)){ temp.length <- unlist(strsplit(rRNA.length[i], "=")) if (!grepl("RNY|other", temp.length[1])){ dis.rRNA <- length.dis[grep(paste("-", temp.length[1], sep = ""), length.dis$name), 2:3] dis.rRNA <- data.frame(name = paste(temp.length[1], ".rRNA", sep = ""), length.combine(dis.rRNA, len)) sum.rRNA <- sum(dis.rRNA$reads) if(sum.rRNA > 0){ k <- k+1 stack.all <- rbind(stack.all, dis.rRNA) dis.other.rRNA <- data.frame(dis.other.rRNA[1:2], dis.other.rRNA[3] - dis.rRNA[3]) } } } stack.all <- rbind(stack.all, dis.other.rRNA) stack.all$name <- factor(stack.all$name, levels = unique(stack.all$name)) stack.sep <- ggplot(stack.all, aes(x = length, y = reads, fill = name)) + geom_bar(stat = "identity") + facet_grid(. ~ name) + labs(x = "length", y = "RPM", title = "") + theme(axis.line = element_line() , panel.grid.minor = element_blank() , panel.grid.major = element_blank() , panel.background = element_rect(fill = "transparent", colour = NA) , plot.background = element_rect(fill = "transparent", colour = NA) , panel.border = element_rect(fill = "transparent") ) stack.pic <- ggplot(stack.all, aes(x = length, y = reads, fill = name)) + geom_bar(stat = "identity") + theme(axis.line = element_line() , panel.grid.minor = element_blank() , panel.grid.major = element_blank() , panel.background = element_rect(fill = "transparent", colour = NA) , plot.background = element_rect(fill = "transparent", colour = NA) , legend.title = element_blank() , legend.background = element_rect(fill = "transparent", colour = NA) , legend.box.background = element_rect(fill = "transparent", colour = NA) , legend.position = "top" , axis.text = element_text(size = 15, color = "black") , axis.title = element_text(size = 15) ) + labs(x = "length", y = "RPM", title = "") pdf(paste(file.address, file.name, "_result/", file.name, "_rsRNA_distribution.pdf", sep=""), width = 8+0.5 * k, height = 12) grid.newpage() pushViewport(viewport(layout = grid.layout(4, 1, heights = unit(c(1, 4, 4, 4), "null")))) pdf.title <- paste("rRNAs Length Distribution of ", file.name, sep = "") vplayout <- function(x,y) viewport(layout.pos.row = x, layout.pos.col = y) grid.text(pdf.title, vp = vplayout(1, 1), gp = gpar(fontsize = 20)) print(stack.pic, vp = vplayout(2:3, 1)) print(stack.sep, vp = vplayout(4, 1)) invisible(dev.off()) } length.combine <- function(distr, len){ if(is.data.frame(distr) && nrow(distr) == 0){ distr <- data.frame(length = len, reads = 0) } tmp <- data.table(distr) tmp <- as.data.frame(tmp[, lapply(.SD, sum), by=list(length)]) tmp <- tmp[order(as.numeric(tmp$length)), ] if(length(len[-c(match(distr[ ,1], len))]) != 0){ tmp <- rbind(tmp, data.frame(length = len[-c(match(distr[ ,1], len))], reads = 0)) } distr <- tmp[order(as.numeric(tmp$length)), ] } rRNA.stack(file.address = args[1], file.name = args[2], rRNA.length = args[3])
最新发布
09-05
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值