好文推荐:The Knuth-Morris-Pratt Algorithm
注意:此处有一个坑:C++中,string.length()返回的是无符号数,有符号跟无符号数一起运算时全部当成无符号数处理,负数最高位是符号位,是1,于是出现负数比正数大的情况,从而跳出循环,因此,我们计算字符串长度时需强制转换成int,统一变成有符号数进行处理。
//kmp算法改进 PrintfData(nextval, (int)pattern.length(),1);
void GetNextval(string pattern,int nextval[]){
int i = 1;
int j = 0;
nextval[1] = 0;
while (i < (int)pattern.length()) {
if (j == 0 || pattern[i] == pattern[j]) {
i++;
j++;
if (pattern[i] == pattern[j]) {
nextval[i] = nextval[j];
}else{
nextval[i] = j;
}
}else{
j = nextval[j];
}
}
//PrintfData(nextval, (int)pattern.length(),1,"nextval ");
}
//计算next数组 PrintfData(next, (int)pattern.length(),1);
void GetNext(string pattern,int next[]){
int i = 1;
int j = 0;
next[1] = 0;
while (i < (int)pattern.length()) {
if (j == 0 || pattern[i] == pattern[j]) {
i++;
j++;
next[i] = j;
}else{
j = next[j];
}
}
//PrintfData(next, (int)pattern.length(),1,"next ");
}
//进行模式匹配
int Kmp(string data,string pattern,int next[]){
int i = 0;
int j = 0;
while (i < (int)data.length() && j < (int)pattern.length()) {
if (j == -1 || data[i] == pattern[j]) {
i++;
j++;
}else{
j = next[j];
}
}
if (j == (int)pattern.length()) {
return i - j;
}else{
return -1;
}
}
int main(int argc, const char * argv[]) {
string data = "xxyxxxyxxxxyxyx";
string pattern = "cddcdececdea";
int patLength = (int)pattern.length();
int next[patLength];
GetNext(pattern , next);
int ret = Kmp(data, pattern, next);
if (ret != -1) {
cout<<"Success:Index = "<<ret<<endl;
}else{
cout<<"fail"<<endl;
}
return 0;
}