文本相似度计算

 一:相似度计算的两种方式

import difflib
def string_similar(s1, s2):
    return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
# for i in range(len(data4_message)):
#     s1 = data4_message[i]
#     s2 = data4_answer[i]
#     print(string_similar(s1, s2))


#计算H,D, S, I, N

def count_d_i_s_n( json_str, xml_str):

    len1 = len(json_str)
    len2 = len(xml_str)
    dis = [[0 for i in range(len(xml_str) + 1)] for i in range(len(json_str) + 1)]
    flag = [[0 for i in range(len(xml_str) + 1)] for i in range(len(json_str) + 1)]
    N = len(xml_str)
    H = I = S = D = 0
    for i in range(len1):
        dis[i][0] = i
        flag[i][0] = 0
    for j in range(len2):
        dis[0][j] = j
        flag[0][j] = 0
    for i in range(1, len1 + 1):
        for j in range(1, len2 + 1):
            top = dis[i - 1][j] + 1
            left = dis[i][j - 1] + 1
            if json_str[i - 1] == xml_str[j - 1]:
                lt = dis[i - 1][j - 1]
            else:
                lt = dis[i - 1][j - 1] + 1
            dis[i][j] = min(top, min(left, lt))
            if dis[i][j] == lt:
                flag[i][j] = 3
            elif dis[i][j] == top:
                flag[i][j] = 1
            elif dis[i][j] == left:
                flag[i][j] = 2
    m = len(json_str)
    n = len(xml_str)
    while m > 0 and n > 0:
        if json_str[m - 1] == xml_str[n - 1]:
            m = m - 1
            n = n - 1
        else:
            if flag[m][n] == 1:
                I = I + 1
                m = m - 1
            elif flag[m][n] == 2:
                D = D + 1
                n = n - 1
            elif flag[m][n] == 3:
                S = S + 1
                m = m - 1
                n = n - 1
    if m == 0:
        D = D + n
    elif n == 0:
        I = I + m
    H = N - D - S
    return H,D, S, I, N
a="5124"
b="1234"
H,D, S, I, N=count_d_i_s_n('124',"1234")
simil=H*2/(len(a)+len(b))
print(H,D, S, I, N)
print(simil)
print(string_similar(a, b))

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

空弹壳

你的鼓励是我创作的动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值