# 编辑距离
def edit_distance(word1, word2):
len1 = len(word1)
len2 = len(word2)
dp = np.zeros((len1 + 1,len2 + 1))
for i in range(len1 + 1):
dp[i][0] = i
for j in range(len2 + 1):
dp[0][j] = j
for i in range(1, len1 + 1):
for j in range(1, len2 + 1):
delta = 0 if word1[i-1] == word2[j-1] else 1
dp[i][j] = min(dp[i - 1][j - 1] + delta, min(dp[i-1][j] + 1, dp[i][j - 1] + 1))
return dp[len1][len2]
# 全局序列对齐 Needleman-Wunsch
def globalAlignment(str1, str2):
m = len(str1)
n = len(str2)
f = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
for i in range(m + 1):
for j in range(n + 1):
if i == 0 and j == 0:
pass
elif i == 0:
f[i][j] = f[i][j - 1] - 2
elif j == 0:
f[i][j] = f[i - 1][j] - 2
else:
temp = 1 if str1[i - 1] == str2[j - 1] else -1
f[i][j] = max(f[i - 1][j] - 2, f[i][j - 1] - 2, f[i - 1][j - 1] + temp)
return f[m][n]
# 局部序列对齐 Smith-Waterman
def localAlignment(str1, str2):
m = len(str1)
n = len(str2)
f = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
for i in range(m + 1):
for j in range(n + 1):
if i == 0 and j == 0:
pass
elif i == 0:
f[i][j] = max(f[i][j - 1] - 2, 0)
elif j == 0:
f[i][j] = max(f[i - 1][j] - 2, 0)
else:
temp = 1 if str1[i - 1] == str2[j - 1] else -1
f[i][j] = max(f[i - 1][j] - 2, f[i][j - 1] - 2, f[i - 1][j - 1] + temp, 0)
return max([max(ele) for ele in f])
# 最长公共子串
def longestCommonString(A, B):
m = len(A)
n = len(B)
f = [[0 for _ in range(n + 1)] for _ in range(m + 1)]
for i in range(1, m + 1):
for j in range(1, n + 1):
if A[i - 1] == B[j - 1]:
f[i][j] = f[i - 1][j - 1] + 1
else:
f[i][j] = 0
return max([max(ele) for ele in f])
# 最长的公共子序列 LCS
def find_lcseque(s1, s2):
# 生成字符串长度加1的0矩阵,m用来保存对应位置匹配的结果
m = [ [ 0 for x in range(len(s2)+1) ] for y in range(len(s1)+1) ]
# d用来记录转移方向
d = [ [ None for x in range(len(s2)+1) ] for y in range(len(s1)+1) ]
for p1 in range(len(s1)):
for p2 in range(len(s2)):
if s1[p1] == s2[p2]: #字符匹配成功,则该位置的值为左上方的值加1
m[p1+1][p2+1] = m[p1][p2]+1
d[p1+1][p2+1] = 'ok'
elif m[p1+1][p2] > m[p1][p2+1]: #左值大于上值,则该位置的值为左值,并标记回溯时的方向
m[p1+1][p2+1] = m[p1+1][p2]
d[p1+1][p2+1] = 'left'
else: #上值大于左值,则该位置的值为上值,并标记方向up
m[p1+1][p2+1] = m[p1][p2+1]
d[p1+1][p2+1] = 'up'
(p1, p2) = (len(s1), len(s2))
s = []
while m[p1][p2]: #不为None时
c = d[p1][p2]
if c == 'ok': #匹配成功,插入该字符,并向左上角找下一个
s.append(s1[p1-1])
p1-=1
p2-=1
if c =='left': #根据标记,向左找下一个
p2 -= 1
if c == 'up': #根据标记,向上找下一个
p1 -= 1
s.reverse()
return len(s)