这似乎对您的示例文本有效。我不知道每个文件是否可以有一个以上的提取,我在这里时间不够,所以如果需要,您必须扩展它:#!python3
import re
Extract = {}
def match_notes(line):
global _State
pattern = r"^\s+(.*)$"
m = re.match(pattern, line.rstrip())
if m:
if 'notes' not in Extract:
Extract['notes'] = []
Extract['notes'].append(m.group(1))
return True
else:
_State = match_sp
return False
def match_pattern(line):
global _State
pattern = r"^\s+Pattern: (.*)$"
m = re.match(pattern, line.rstrip())
if m:
Extract['pattern'] = m.group(1)
_State = match_notes
return True
return False
def match_sp(line):
global _State
pattern = r">sp\|([^|]+)\|(.*)$"
m = re.match(pattern, line.rstrip())
if m:
if 'sp' not in Extract:
Extract['sp'] = []
spinfo = {
'accession co