一:相似度计算的两种方式
import difflib
def string_similar(s1, s2):
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
# for i in range(len(data4_message)):
# s1 = data4_message[i]
# s2 = data4_answer[i]
# print(string_similar(s1, s2))
#计算H,D, S, I, N
def count_d_i_s_n( json_str, xml_str):
len1 = len(json_str)
len2 = len(xml_str)
dis = [[0 for i in range(len(xml_str) + 1)] for i in range(len(json_str) + 1)]
flag = [[0 for i in range(len(xml_str) + 1)] for i in range(len(json_str) + 1)]
N = len(xml_str)
H = I = S = D = 0
for i in range(len1):
dis[i][0] = i
flag[i][0] = 0
for j in range(len2):
dis[0][j] = j
flag[0][j] = 0
for i in range(1, len1 + 1):
for j in range(1, len2 + 1):
top = dis[i - 1][j] + 1
left = dis[i][j - 1] + 1
if json_str[i - 1] == xml_str[j - 1]:
lt = dis[i - 1][j - 1]
else:
lt = dis[i - 1][j - 1] + 1
dis[i][j] = min(top, min(left, lt))
if dis[i][j] == lt:
flag[i][j] = 3
elif dis[i][j] == top:
flag[i][j] = 1
elif dis[i][j] == left:
flag[i][j] = 2
m = len(json_str)
n = len(xml_str)
while m > 0 and n > 0:
if json_str[m - 1] == xml_str[n - 1]:
m = m - 1
n = n - 1
else:
if flag[m][n] == 1:
I = I + 1
m = m - 1
elif flag[m][n] == 2:
D = D + 1
n = n - 1
elif flag[m][n] == 3:
S = S + 1
m = m - 1
n = n - 1
if m == 0:
D = D + n
elif n == 0:
I = I + m
H = N - D - S
return H,D, S, I, N
a="5124"
b="1234"
H,D, S, I, N=count_d_i_s_n('124',"1234")
simil=H*2/(len(a)+len(b))
print(H,D, S, I, N)
print(simil)
print(string_similar(a, b))