import re
#将fasta文件存为字典后转化为列表
fasta = {}
with open("../examples/ros_bio12_GRPH.txt") as f:
file = f.readlines()
for line in file:
line = re.sub("\n", "", line)
m = re.match(r'^>.*', line)
if m:
name = m.group()
fasta[name] = ''
else:
fasta[name] += line
name = list(fasta.keys())
sequence = list(fasta.values())
#通过双层循环遍历序列,判断序列后三位与其余序列前三位是否相等
adjacency = []
index = 0
i = 0
while i < len(sequence):
j = 0
while j < len(sequence):
if name[i] == name[j]:
j += 1
continue
if sequence[i][-3:] == sequence[j][:3]:
adjacency.append(name[i] + ' ' + name[j])
index += 1
j += 1
i += 1
for line in adjacency:
print(line)
#print(adjacency)