根据网易云课程整理,禁转
测序文件读入写出
use Bio::SeqIO;
use Bio::Seq;
use Data::Dumper;
#fasta
$in = Bio::SeqIO->new(-file => "D:/test.fa", -format => 'Fasta', -alphabet =>"dna");
$out = Bio::SeqIO->new(-file => ">D:/aa.fa", -format => 'EMBL');
while (my$seqobj = $in->next_seq()){
my $id = $seqobj->id(); #the human read-able id of the sequence
my $seq = $seqobj->seq(); #string of sequence
my $desc = $seqobj->desc(); #a description of the sequence
my $alphabet = $seqobj->alphabet(); #one of 'dna', 'rna', 'protein'
my $len = $seqobj->length();
print $seqobj."\n";
print Dumper($seqobj);
$out->write_seq($seqobj); #实现格式转换
last; #只读一条序列
}
#fastq双端
$in1 = Bio::SeqIO -> new(-file =>"D:/test_1.fq", -format => 'fastq');
$in2 = Bio::SeqIO -> new(-file =>"D:/test_2.fq", -format => 'fastq');
$out = Bio::SeqIO->new(-file => ">D:/test_1.fa", -format => 'fasta');
while (my $seqobj1 = $in1 ->next_seq() and my $seqobj2 = $in2 -> next_seq()){
my ($id1, $id2) = ($seqobj1->id, $seqobj2->id);
my ($seq1,$seq2) = ($seqobj1->seq, $seqobj2->seq);
my ($desc1,$desc2) = ($seqobj1->desc, $seqobj2 ->desc);
my ($alphabet1, $alphabet2) = ($seqobj1->alphabet, $seqobj2->alphabet); #one of 'dna', 'rna', 'protein'
my ($len1,$len2) = ($seqobj1->length, $seqobj2 ->length);
my ($qual1, $qual2) = ($seqobj1->qual,$seqobj2->qual);
$out->write_seq($seqobj1,$seqobj2);
last;
}
write_seq()只能传入bioseq对象,接收字符串会报错
序列截取
#创建bioseq对象
$seqobj = Bio::Seq->new(-seq => "aaccggtt", -desc => "Sample Bio::Seq object", -id => "ID1");
$out = Bio::SeqIO->new(-file =>">./subseq.fa", -format=>"Fasta");
#subseq截取,返回字符串
my $subseq = $seqobj->subseq(3,5);
#注意:这里的索引从1开始,所以截取3-5获得的序列是: ccg
print $subseq."\n";
my$newseqobj = Bio::Seq ->new(-seq=$subseq, -desc=>"subseq 3-5", -id="ID2");
$out->write_seq($newseqobj);
#trunc截取,返回bioseq对象,desc和id信息不变
my$subseqobj = $seqobj->trunc(3,5);
$out->write_seq($subseqobj);
反向互补序列
my$revcom = $seqobj ->revcom;返回bioseq对象
蛋白质翻译
my$translate = $seqobj->translate(undef,undef,0);
translate第一个参数stop符号,默认*;第二个参数unknown amino acid (‘X’);第三个参数开始位置,比如0,1,2
这篇博客介绍了如何使用Bioperl库进行生物信息学操作,包括测序文件的读入和写出,序列截取,生成反向互补序列以及进行蛋白质翻译。在序列操作中,注意translate函数的参数设置,如停止符号、未知氨基酸代号和起始位置。
1421

被折叠的 条评论
为什么被折叠?



