"""
问题 :我想匹配一个句子中某一个token
1.是否出现过
2.出现的次数
3.出现的下标位置在哪里
案例 ner标注的时候要给出token
求解的方案是 kmp
这里面我怎么理解那个next 指针???
"""
#实现我们的算法结构
def KMP_algorithm(string, substring):
'''
string:主字符串
substring:匹配字符串
KMP字符串匹配的主函数
若存在字串返回字串在字符串中开始的位置下标,或者返回-1
'''
pnext = gen_pnext(substring)
n = len(string)
m = len(substring)
i,j = 0,0
while (i < n) and (j < m):
if string[i] == substring[j]:
#string = 'abcxabcdabcdabcy'
#substring= abcdabcy' 前缀移到后缀位置
i += 1
j += 1
elif j != 0:
j = pnext[j - 1]
else:
i += 1
if j == m:
return i - j
else:
return -1
def gen_pnext(substring):
"""
构造临时数组p next #abcdabcy
"""
index, m = 0, len(substring)
pnext = [0] * m
i = 1
while i < m: #abcdabcy
if substring[i] == substring[index]:
pnext[i] = index + 1
index += 1
i += 1
elif index != 0:
index = pnext[index - 1]
else:
pnext[i] = 0
i += 1
return pnext
def count(string:str,substring:str):
"""
:param string: 主串
:param substring: 模式串
:return: 返回一个下标以及出现次数
"""
string = string
substring = substring
count = 0
index = 0
indexs = []
while string:
out = KMP_algorithm(string, substring)
if out == -1:
break
out += index
indexs.append([out,out+len(substring)])
count += 1 #统计次数
index = len(string[:out+len(substring)])
string = string[out+len(substring):]
return indexs,count
print(count("qwesdadasdasdadasrt","asd"))