- Edit Distance
Given two words word1 and word2, find the minimum number of steps required to convert word1 to word2. (each operation is counted as 1 step.)
You have the following 3 operations permitted on a word:
Insert a character
Delete a character
Replace a character
Example
Given word1 = “mart” and word2 = “karma”, return 3.
解法1:
思路: DP。这题跟LCS(Longest Common Subsequence)那题有点象。但要注意:
- dp[i][j] = min{dp[i][j-1]+1, dp[i-1][j]+1, dp[i-1][j-1]+1, dp[i-1][j-1]|A[i] = B[i]}。注意+1这个操作不能省。
- 边界条件要注意:
dp[0][i],当B[0…i]中有一个与A[0]相等时,则dp[0][i]=i。即i+1个元素,i个操作。dp[i][0]也类似。
代码如下:
class Solution {
public:
/**
* @param word1: A string
* @param word2: A string
* @return: The minimum number of steps.
*/
int minDistance(string &word1, string &word2) {
int m = word1.size();
int n = word2.size();
if (m == 0) return n;
if (n == 0) return m;
vector<vector<int>> dp(m, vector<int>(n, 0));
bool hasSameChar = false;
if (word1[0] == word2[0]) {
dp[0][0] = 0;
hasSameChar = true;
}else {
dp[0][0] = 1;
}
for (int i = 1; i < m; ++i) {
if (hasSameChar) {
dp[i][0] = i; //i + 1 chars, i operations
} else if (word1[i] == word2[0]) {
dp[i][0] = dp[i - 1][0];
hasSameChar = true;
} else {
dp[i][0] = dp[i - 1][0] + 1;
}
}
hasSameChar = (dp[0][0] == 0);
for (int i = 1; i < n; ++i) {
if (hasSameChar) {
dp[0][i] = i; //i + 1 chars, i operations
} else if (word2[i] == word1[0]) {
dp[0][i] = dp[0][i - 1];
hasSameChar = true;
} else {
dp[0][i] = dp[0][i - 1] + 1;
}
}
for (int i = 1; i < m; ++i) {
for (int j = 1; j < n; ++j) {
dp[i][j] = min(dp[i - 1][j] + 1, dp[i][j - 1] + 1);
if (word1[i] == word2[j]) {
dp[i][j] = min(dp[i][j], dp[i - 1][j - 1]);
} else {
dp[i][j] = min(dp[i][j], dp[i - 1][j - 1] + 1);
}
}
}
return dp[m - 1][n - 1];
}
};
解法2:也是DP。参考的九章。思路更简洁。时间复杂度O(mn), 空间复杂度O(mn)。
注意:
1)这里dp[i][j]表示word1[0…i-1]和word2[0…j-1]的edit distance。
2) if() condition 必须用word1[i-1]==word2[j-1]。
3) 注意dp[i][0]和dp[0][i]的初始值设置。
代码如下:
class Solution {
public:
/**
* @param word1: A string
* @param word2: A string
* @return: The minimum number of steps.
*/
int minDistance(string &word1, string &word2) {
int m = word1.size();
int n = word2.size();
if (m == 0) return n;
if (n == 0) return m;
vector<vector<int>> dp(m + 1, vector<int>(n + 1, 0));
for (int i = 0; i <= m; ++i) {
dp[i][0] = i;
}
for (int i = 0; i <= n; ++i) {
dp[0][i] = i;
}
for (int i = 1; i <= m; ++i) {
for (int j = 1; j <= n; ++j) {
if (word1[i - 1] == word2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = min(min(dp[i - 1][j], dp[i][j - 1]), dp[i - 1][j - 1]) + 1;
}
}
}
return dp[m][n];
}
};
解法3:
解法2的基础上加上滚动数组,空间优化到O(n)。
注意:
- prev[j]就相当于解法2的dp[i-1][j], 可以理解为前一轮比较的结果。next[j]就相当于dp[i][j],相当于本轮比较的结果。
- 第一个for循环, prev[i] = i, 就相当于初始化dp[0][i]=i。
- next[0]=i 相当于初始化dp[i][0]=i。
代码如下:
class Solution {
public:
/**
* @param word1: A string
* @param word2: A string
* @return: The minimum number of steps.
*/
int minDistance(string &word1, string &word2) {
int m = word1.size();
int n = word2.size();
if (m == 0) return n;
if (n == 0) return m;
vector<int> prev(n + 1, 0);
vector<int> next(n + 1, 0);
for (int i = 0; i <= n; ++i) {
prev[i] = i;
}
for (int i = 1; i <= m; ++i) {
next[0] = i;
for (int j = 1; j <= n; ++j) {
if (word1[i - 1] == word2[j - 1]) {
next[j] = prev[j - 1];
} else {
next[j] = min(min(prev[j], next[j - 1]), prev[j - 1]) + 1;
}
}
prev = next;
}
return next[n];
}
};
解法4:滚动数组另一个版本。
class Solution {
public:
/**
* @param word1: A string
* @param word2: A string
* @return: The minimum number of steps.
*/
int minDistance(string &word1, string &word2) {
int m = word1.size();
int n = word2.size();
if (m == 0) return n;
if (n == 0) return m;
vector<vector<int>> dp(2, vector<int>(n + 1, 0));
for (int i = 0; i <= n; ++i) {
dp[0][i] = i;
}
for (int i = 1; i <= m; ++i) {
dp[i % 2][0] = i;
for (int j = 1; j <= n; ++j) {
if (word1[i - 1] == word2[j - 1]) {
dp[i % 2][j] = dp[(i - 1) % 2][j - 1];
} else {
dp[i % 2][j] = min(min(dp[(i - 1) % 2][j], dp[i % 2][j - 1]), dp[(i - 1) % 2][j - 1]) + 1;
}
}
}
return dp[m % 2][n];
}
};
本文深入解析编辑距离算法,包括动态规划解决字符串转换问题的方法。通过三种不同实现方式,从基本DP到滚动数组优化,逐步降低空间复杂度,适用于字符串相似度计算。
4359

被折叠的 条评论
为什么被折叠?



