使用Python计算fasta文件的序列长度
在这里插入代码片
使用Python计算fasta文件的序列长度
#!/usr/bin/python
#-- coding:utf-8 --
import sys
f = open(sys.argv[1],‘r’)
out = open(sys.argv[2],‘w’)
def chr_length(infile,outfile):
f = open(sys.argv[1],‘r’)
out = open(sys.argv[2],‘w’)
dic = {}
name = ‘’
seq = []
for line in f:
if line.startswith(’>’):
name = line.strip(’\n’).strip(’>’)
seq = []
dic[name] = seq
else :
line = line.strip(’\n’)
seq.append(line)
dic[name] = seq
#print “%s sequences in total” % len(dic)
out.write(“Chromosome\tstart\tend\n”)
for (name ,seq) in dic.items():
out.write("%s\t1\t%s\n" % (name,sum(map(len,seq))))
f.close()
out.close()
chr_length(sys.argv[1],sys.argv[2])