import re
#引入fasta文件得到dna序列
with open("../examples/ros_bio22_SPLC.txt")as f:
file = f.readlines()
fasta = {}
for line in file:
line = re.sub(r'\n', "", line)
m = re.match(r'^>.*', line)
if m:
name = m.group()
fasta[name] = ''
else:
fasta[name] += line
table = {} # 密码子表
#RNA密码子表
codon = """UUU F CUU L AUU I GUU V
UUC F CUC L AUC I GUC V
UUA L CUA L AUA I GUA V
UUG L CUG L AUG M GUG V
UCU S CCU P ACU T GCU A
UCC S CCC P ACC T GCC A
UCA S CCA P ACA T GCA A
UCG S CCG P ACG T GCG A
UAU Y CAU H AAU N GAU D
UAC Y CAC H AAC N GAC D
UAA Stop CAA Q AAA K GAA E
UAG Stop CAG Q AAG K GAG E
UGU C CGU R AGU S GGU G
UGC C CGC R AGC S GGC G
UGA Stop CGA R AGA R GGA G
UGG W CGG R AGG R GGG G """
codon = codon.split("\n")
#将密码子表字符串转为字典
for line in codon:
i = 0
pattern = re.compile(r'(.{3} \S+)\s*(.{3} \S+)\s*(.{3} \S+)\s*(.{3} \S+)')
m = pattern.match(line)
while i <= 4:
single = m.group(i).split(" ")
table.update({single[0]: single[1]})
i += 1
#获取母串序列,并分别剪切两个子串,得到没有内含子的dna,再替换T成U转录得rna
seq = []
for value in fasta.values():
seq.append(value)
dna = seq.pop(0)
real_dna = dna[:dna.find(seq[0])] + dna[dna.find(seq[0]) + len(seq[0]):]
real_dna = real_dna[:real_dna.find(seq[1])] + real_dna[real_dna.find(seq[1]) + len(seq[1]):]
rna = real_dna.replace('T', 'U')
#翻译
i = 0
protein = ''
while i < len(rna):
if table[rna[i: i+3]] == 'Stop':
break
protein += table[rna[i: i+3]]
i += 3
print(protein)
Rosalind第22题——ros_bio22_SPLC
最新推荐文章于 2021-08-17 22:40:00 发布