import re
#读取fasta文件
with open('../examples/ros_bio30_SSEQ.txt') as f:
file = f.readlines()
table = {}
for line in file:
line = re.sub(r'\n', '', line)
m = re.match(r'^>.*', line)
if m:
name = m.group()
table[name] = ''
else:
table[name] += line
#取出fasta中的母串和子串
all_seq = []
for value in table.values():
all_seq.append(value)
seq = all_seq[0]
subseq = all_seq[1]
#将子串中各字符的位置与字符一同生成一个字典
all_loc = {}
for j in subseq:
all_loc[j] = []
for i in range(len(seq)):
if seq[i] == j:
all_loc[j].append(i+1)
print(all_loc)
#寻找各个字典中位置的最小值,且各位置列表中的最小值都要比前一个列表最小值大
location = [x for x in all_loc.values()]
temp = min(location.pop(0))
sloca = str(temp)
num = len(location)
i = 0
while i < num:
if min(location[i]) > temp:
sloca += ' ' + str(min(location[i]))
temp = min(location[i])
i += 1
else:
location[i] = [x for x in location[i] if x > min(location[i])]
print(sloca)
Rosalind第30题——ros_bio30_SSEQ
最新推荐文章于 2022-11-03 16:09:22 发布