#!/usr/bin/python # coding:utf-8 def levenshtein_ratio(first, second): if len(first)== 0 and len(second) == 0: return 1.0 distance = levenshtein_distance(first,second,1,1,2) return 1-float(distance)/(len(first)+len(second)) def levenshtein_distance(first, second, cost_ins = 1 , cost_del = 1 , cost_rep = 1): ''' 计算两个字符串之间的编辑距离 :输入参数 first: 第一个字符串 :输入参数 second: 第二个字符串 :输入参数 cost_ins: 插入开销 :输入参数 cost_del: 删除开销 :输入参数 cost_rep: 替换开销 :返回值: 编辑距离 ''' if len(first)== 0 or len(second) == 0: return len(first)+len(second) first_length = len(first) + 1 second_length = len(second)+ 1 distance_matrix = [range(i, second_length + i) for i in range(first_length)] # 初始化矩阵 for i in range(1, first_length): for j in range(1, second_length): deletion = distance_matrix[i-1][j] + cost_del insertion = distance_matrix[i][j-1] + cost_ins substitution = distance_matrix[i-1][j-1] if first[i-1] != second[j-1]: substitution += cost_rep distance_matrix[i][j] = min(insertion, deletion, substitution) return distance_matrix[first_length-1][second_length-1]
Python levenshtein算法 字符串相似度
最新推荐文章于 2024-08-15 08:45:00 发布