03Rabin-Karp模式匹配
定义:给定一个长度为n的字符串s和一个长度为m的待匹配模式字符串t,我们希望找到t在s中第一次出现时的下标i。当t不是s的子串时,返回值应该是-1。
输入:lalopalalali lala
输出:6
复杂度: 一般为O(m+n),最差为O(mn)
算法:
#coding=utf-8
"""
算法:Rabin-Karp模式匹配
作者:lph-China
时间:2019/7/4
"""
PRIME = 72057594037927931
DOMAIN = 128
def roll_hash(old_val, out_digit, in_digit, last_pos):
val = (old_val - out_digit * last_pos + DOMAIN * PRIME) % PRIME
val = (val * DOMAIN) % PRIME
return (val + in_digit) % PRIME
def matches(s, t, i, j, k):
for d in range(k):
if s[i + d] != t[j + d]:
return False
return True
def rabin_karp_matching(s, t):
hash_s = 0
hash_t = 0
len_s = len(s)
len_t = len(t)
last_pos = pow(DOMAIN, len_t - 1) % PRIME
if len_s < len_t:
return -1
for i in range(len_t):
hash_s = (DOMAIN * hash_s + ord(s[i])) % PRIME
hash_t = (DOMAIN * hash_t + ord(t[i])) % PRIME
for i in range(len_s - len_t + 1):
if hash_s == hash_t:
if matches(s, t, i, 0, len_t):
return i
if i < len_s - len_t:
hash_s = roll_hash(hash_s, ord(s[i]), ord(s[i + len_t]), last_pos)
return -1
if __name__ == '__main__':
s = 'lalopalalali'
t = 'lala'
result = rabin_karp_matching(s, t)
print(result)