编辑距离:
又称Levenshtein距离(也叫做Edit Distance),是指两个字串之间,由一个转成另一个所需的最少编辑操作次数。许可的编辑操作包括将一个字符替换成另一个字符,插入一个字符,删除一个字符。
例如将kitten一字转成sitting:
sitten(k→s)
sittin(e→i)
sitting(→g)
#include <iostream>
using namespace std;
int cacDistance(const char *lhs, int lBegin, int lEnd,
const char *rhs, int rBegin, int rEnd)
{
if (lBegin > lEnd)
{
if (rBegin > rEnd)
return 0;
else
return rEnd - rBegin + 1;
}
if (rBegin > rEnd)
{
if (lBegin > lEnd)
return 0;
else
return lEnd - lBegin + 1;
}
if (*(lhs + lBegin) == *(rhs + rBegin))
return cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin + 1, rEnd);
else
{
int t1 = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin + 1, rEnd);
int t2 = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin, rEnd);
int t3 = cacDistance(lhs, lBegin, lEnd, rhs, rBegin + 1, rEnd);
return min(t1, min(t2, t3)) + 1;
}
}
void main()
{
const char *str1 = "kitten";
const char *str2 = "sitting";
int result = cacDistance(str1, 0, strlen(str1) - 1, str2, 0, strlen(str2) - 1);
cout << "result: " << result << endl;
}用动态规划来解此题,代码如下:
#include <iostream>
using namespace std;
int info[10][10];
void print(int size1, int size2)
{
for (int i = 1; i <= size1; i++)
{
for (int j = 1; j <= size2; j++)
{
cout << info[i][j] << " ";
}
cout << endl;
}
}
int cacDistance(const char *str1, const char *str2)
{
if (str1 == NULL || str2 == NULL)
return -1;
int size1 = strlen(str1);
int size2 = strlen(str2);
for (int i = 0; i < size1; i++)
info[i][size2] = 0;
for (int j = 0; j < size2; j++)
info[size1 - 1][j] = 0;
for (int i = 1; i <= size1; i++)
{
for (int j = 1; j <= size2; j++)
{
if (str1[i - 1] == str2[j - 1])
info[i][j] = info[i - 1][j - 1];
else if (str1[i - 1] != str2[j - 1])
info[i][j] = min(info[i - 1][j - 1], min(info[i - 1][j], info[i][j - 1])) + 1;
}
}
print(size1, size2);
return info[size1][size2];
}
void main()
{
const char *str1 = "kitten";
const char *str2 = "sitting";
int result = cacDistance(str1, str2);
cout << "result: " << result << endl;
}递归的建表解法,也就是书上说的避免重复计算解法
#include <iostream>
using namespace std;
int info[10][10];
int INF = -1;
int cacDistance(const char *lhs, int lBegin, int lEnd,
const char *rhs, int rBegin, int rEnd)
{
if (lBegin == lEnd)
{
return info[lBegin + 1][rBegin + 1] + rEnd - rBegin;
}
else if (rBegin == rEnd)
{
return info[lBegin + 1][rBegin + 1] + lEnd - lBegin;
}
if (info[lBegin + 1][rBegin + 1] != -1)
{
return info[lBegin + 1][rBegin + 1];
}
if (*(lhs + lBegin) == *(rhs + rBegin))
info[lBegin][rBegin] = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin + 1, rEnd);
else
{
int t1 = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin + 1, rEnd);
int t2 = cacDistance(lhs, lBegin + 1, lEnd, rhs, rBegin, rEnd);
int t3 = cacDistance(lhs, lBegin, lEnd, rhs, rBegin + 1, rEnd);
info[lBegin][rBegin] = min(t1, min(t2, t3)) + 1;
}
return info[lBegin][rBegin];
}
void main()
{
const char *str1 = "kitten";
const char *str2 = "sitting";
int size1 = strlen(str1);
int size2 = strlen(str2);
for (int i = 0; i <= size1; i++)
info[i][size2 - 1] = 0;
for (int j = 0; j <= size2; j++)
info[size1 - 1][j] = 0;
for (int i = 1; i <= size1; i++)
{
for (int j = 1; j <= size2; j++)
{
info[i][j] = -1;
}
}
int result = cacDistance(str1, 0, size1, str2, 0, size2);
cout << "result: " << result << endl;
}
本文深入探讨了编辑距离(Levenshtein距离)的概念及其应用场景,提供了递归、动态规划及带备忘录的递归三种实现方式,并附带完整的C++代码示例。
2556

被折叠的 条评论
为什么被折叠?



