From : https://leetcode.com/problems/implement-strstr/
mplement strStr().
Returns the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack.
class Solution {
public:
void getNext(vector<int> &next, string &needle) {
int i = 0, j = -1;
next[i] = j;
while (i != needle.length()) {
while (j != -1 && needle[i] != needle[j]) j = next[j];
next[++i] = ++j;
}
}
int strStr(string haystack, string needle) {
if (haystack.empty()) return needle.empty() ? 0 : -1;
if (needle.empty()) return 0;
vector<int> next(needle.length() + 1);
getNext(next, needle);
int i = 0, j = 0;
while (i != haystack.length()) {
while (j != -1 && haystack[i] != needle[j]) j = next[j];
++i; ++j;
if (j == needle.length()) return i - j;
}
return -1;
}
};
KMP在上面方法中,会生成
0 1 2 3 4 5 6 7 indice
A B C D M A B D V
-1 0 0 0 0 0 1 2 0 next
next表示此处匹配失败后,j的取值;即下次j应该返回的值。实际是上次匹配成功的值,这次匹配要先++。
上面的是通常用的KMP算法,但是算法是有一定缺陷的。比如我们的模式串 pattern =“AAAAB”,其中很容易得到next数组为01230。如果目标匹配串为 “AAAACAAAAB” ,大家可以模拟一下,A要回溯多次。就是说我们的next数组优化并不彻底。优化算法:next[i]表示匹配串在i处如果匹配失败下次移到的位置。下面是优化后的的求next数组的代码。虽然两种写求得next值不一样,但是kmp函数的写法是一样的。
class Solution {
public:
void getNext(vector<int> &next, string &needle) {
int i = 0, j = -1;
next[i] = j;
while (i != needle.length()) {
while (j != -1 && needle[i] != needle[j]) j = next[j];
++i; ++j;
//特殊情况,这里即为优化之处。考虑下AAAAB, 防止4个A形成0123在匹配时多次迭代。
if (needle[i] == needle[j]) next[i] = next[j];
else next[i] = j;
}
}
int strStr(string haystack, string needle) {
if (haystack.empty()) return needle.empty() ? 0 : -1;
if (needle.empty()) return 0;
vector<int> next(needle.length() + 1);
getNext(next, needle);
int i = 0, j = 0;
while (i != haystack.length()) {
while (j != -1 && haystack[i] != needle[j]) j = next[j];
++i; ++j;
if (j == needle.length()) return i - j;
}
return -1;
}
};
class Solution {
public:
int strStr(string haystack, string needle) {
int i, j;
for (i = j = 0; i < haystack.size() && j < needle.size();) {
if (haystack[i] == needle[j]) {
++i; ++j;
} else {
i -= j - 1; j = 0;
}
}
return j != needle.size() ? -1 : i - j;
}
};