kmp算法简单理解
next的获取
双指针,当两个指针的字符一致的时候,一起向前走,如果不一致,指向前面的k回退到next[k-1]的位置,直到遇到一致的字符或者到0的位置,然后放置当前位置的k值.
kmp用法
双指针,一个指向带匹配字符和 匹配字符.
但两个指针指向的字符一致的时候,指针同时往前走,但不一致的时候,匹配字符的指针回退到next[q-1]的位置,直到遇到一致的字符或者到0的位置.
到0的位置后待匹配的指针就会向前移动,因为该字符无匹配,只能放弃字符.
kmp算法实现
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// 匹配出next数组
void make_next(const char *pattern, int *next) {
int q, k;
int m = strlen(pattern);
next[0] = 0;
for (q = 1,k = 0;q < m; q ++) {
while (k > 0 && pattern[q] != pattern[k])
k = next[k-1];
if (pattern[q] == pattern[k]) {
k ++;
}
next[q] = k;
}
// 假设为abcabcd
// next[0] = 0;
// q=1, k=0, pattern[q]:pattern[k] = b:a, next[1] = 0;
// q=2, k=0, pattern[q]:pattern[k] = c:a, next[2] = 0;
// q=3, k=0, pattern[q]:pattern[k] = a:a, k++, next[3] = 1;
// q=4, k=1, pattern[q]:pattern[k] = b:b, k++, next[4] = 2;
// q=5, k=2, pattern[q]:pattern[k] = c:c, k++, next[5] = 3;
// q=6, k=3, pattern[q]:pattern[k] = d:a, k=next[k-1] -> k=0; next[6] = 0;
}
// 匹配位置
int kmp(const char *text, const char *pattern, int *next) {
int n = strlen(text);
int m = strlen(pattern);
int i, q;
for (i = 0, q = 0;i < n;i ++) {
while (q > 0 && pattern[q] != text[i]) {
q = next[q-1];
}
if (pattern[q] == text[i]) {
q ++;
}
if (q == m) {
break;
}
}
if (q == m)
return i-q+1;
else
return -1;
}
应用实例
// 获取第一个匹配的位置
int oneKmp(const char *text, const char *pattern)
{
int len = strlen(pattern);
if(len==0) return -2;
int *next = (int *)malloc(sizeof(int)*len);
make_next(pattern, next);
int index = kmp(text, pattern, next);
free(next);
return index;
}
// 返回匹配到的个数,位置放在result数组里面
int allKmp(int *result, const char *text, const char *pattern)
{
int len = strlen(pattern);
if(len==0) return -2;
if(result==NULL) return -3;
int *next = (int *)malloc(sizeof(int)*len);
make_next(pattern, next);
int i = 0,index = 0;
int len = strlen(text);
int ret = 0;
for(i = 0; 1 ; i++){
if(index >= len) break;
if((ret= kmp(text+index,pattern,next))<0)
break;
index += ret;
result[i] = index++;
}
free(next);
return i;
}
int main() {
int i;
char *text = "ababxbababababcdababcabddcadfdsab";
char *pattern = "b";
int *next = (int *)malloc(sizeof(int)*strlen(pattern);
int *result = (int *)malloc(sizeof(int)*strlen(text));
int idx = kmp(text, pattern, next);
printf("match pattern : %d\n", idx);
int num = allKmp(result,text, pattern);
printf("num is :%d \n",num);
for(int i = 0; i < num; i++)
printf("%4d ", result[i]);
printf("\n");
for (i = 0;i < strlen(pattern);i ++) {
printf("%4d", next[i]);
}
printf("\n");
return 0;
}