calculateRA2.pl
#!/usr/bin/perl -w
use strict;
use Getopt::Long;
my $usage =<<_USAGE_;
usage perl calculateRA.pl -i input -o output -f1 count for filter -f2 rate for filter -m calculate contig
_USAGE_
my $samtool ||= 0;
my ($input, $output, $f1, $f2);
GetOptions(
"i=s" => \$input,
"o=s" => \$output,
"f1=s" => \$f1,
"f2=s" => \$f2,
"m=s" => \$samtool
);
die $usage if (!$input || !$output);
$f1 = 1 if (!$f1);
$f2 = 0.0000001 if (!$f2);
my %genes;
open BAM,"samtools view $input | " or die $!;
if ($samtool) {
while(<BAM>){
chomp;next if ($_ eq '');s/\r//g;
my @entries =split/\t/;
if ($genes{$entries[2]}){
$genes{$entries[2]}++;
}else{
$genes{$entries[2]} = 1;
}
}
close BAM;
}else{
while(<BAM>){
chomp;next if ($_ eq '');s/\r//g;
my @entries =split/\t/;
my $fflag = $entries[1];
unless($fflag & 0x4){
next if($entries[4] < 1);
if ($genes{$entries[2]}){
$genes{$entries[2]}++;
}else{
$genes{$entries[2]} = 1;
}
}
}
close BAM;
}
my $total = 0;
open (my $out0, ">$output.count") || die "$!:$output.out\n";
my $x = 0;
my $y = 0;
my %genecount;
foreach (keys %genes){
my $n = $genes{$_};
if ($n > $f1){
#print $out0 "$_\t$n\n";
$_=~ /\|(\d+)/;
my $len = $1;
my $abundance = $n/$len;
$genes{$_} = $abundance;
$total += $abundance;
$genecount{$_} = $n;
}else{
delete $genes{$_};
}
}
#close $out0;
open (my $out1, ">$output.abundance") || die "$!:$output.abundance\n";
open (my $out2, ">$output.relative_abundance") || die "$!:$output.relative_abundance\n";
foreach (sort {$a cmp $b }keys %genes){
my $abundance = $genes{$_};
my $ra = $abundance/$total;
if ($ra >=$f2){
print $out2 "$_\t$ra\n";
print $out1 "$_\t$abundance\n";
print $out0 "$_\t$genecount{$_}\n";
}
}
close $out1;
close $out2;
close $out0;