【LeetCode】字符串匹配-优快云博客

本文介绍了字符串匹配中的几种经典算法：暴力算法、KMP算法及其改进版本。通过对比不同算法的时间复杂度，展示了如何提高字符串匹配的效率。文章还提供了详细的算法实现代码。

给定目标串 haystack 和模式串 needle ，返回 needle 在 haystack 中第一次出现的位置下标，若 needle 不是 haystack 的子串则返回 -1。

1. Brute-Force Algorithm（暴力算法 / 简单模式匹配）

我自己写了一种双层循环的

 1 int strStr(string haystack, string needle) {
 2     if (needle.empty()) return 0;
 3     int m = haystack.size(), n = needle.size();
 4     for (int i = 0; i <= m - n; i++) {
 5         for (int j = 0; j < n; j++) {
 6             if (needle[j] != haystack[i + j])
 7                 break;
 8             if (j == n - 1)
 9                 return i;
10         }
11     }
12     return -1;
13 }

看了答案发现了一种更高效的方法，虽然时间复杂度同样是 O(m*n)，但只要一层循环，非常Nice！

 1 int strStr(string haystack, string needle) {
 2     if (needle.empty()) return 0;
 3     int i = 0, j = 0;
 4     int m = haystack.size(), n = needle.size();
 5     while (i < m && j < n) {
 6         if (haystack[i] == needle[j]) {
 7             i++;
 8             j++;
 9         } else {
10             i = i - j + 1;
11             j = 0;
12         }
13         if (j == n)
14             return i - j;
15     }
16     return -1;
17 }

2. KMP算法

算法讲解可以参考 http://www.61mon.com/index.php/archives/183/ ，讲解的已经很好了。

KMP的时间复杂度仅为 O(m+n)，因为当出现子串与主串某处不匹配时，并不会将遍历主串的下标 i 回溯，而是利用得到的 next 数组将模式子串向右“滑动”尽可能远的一段距离，继续进行比较，提高了效率。

next 数组的求解是关键，它是基于模式子串的最长前后缀，next[i] = needle[0] 到 needle[i - 1] 的字符串的最长相同前后缀的长度。

 1 void getNext(string needle, vector<int> &next) {
 2     int i = 0, j = -1;
 3     // j 表示最长相同前后缀的长度
 4     next[0] = j;
 5     while (i < needle.size()) {
 6         // j == -1 为边界条件判断， j = next[j] 可能使 j 退回到 -1
 7         if (j == -1 || needle[i] == needle[j]) {
 8             i++;
 9             j++;
10             next[i] = j;
11         } else {
12             j = next[j];
13         }
14     }
15 }
16     
17 int strStr(string haystack, string needle) {
18     if (needle.empty()) return 0;
19     int i = 0, j = 0;
20     int m = haystack.size(), n = needle.size();
21     vector<int> next(n + 1);
22     getNext(needle, next);
23     while (i < m && j < n) {
24         if (j == -1 || haystack[i] == needle[j]) {
25             i++;
26             j++;
27         } else {
28             j = next[j];
29         }
30         if (j == n)
31             return i - j;
32     }
33     return -1;
34 }

改进的KMP算法

 1 void getNextval(string needle, vector<int> &nextval) {
 2     int i = 0, j = -1;
 3     nextval[0] = j;
 4     while (i < needle.size()) {
 5         if (j == -1 || needle[i] == needle[j]) {
 6             i++;
 7             j++;
 8             // 生成 nextval 数组
 9             if (needle[i] != needle[j])
10                 nextval[i] = j;
11             else
12                 nextval[i] = nextval[j];
13         } else {
14             j = nextval[j];
15         }
16     }
17 }
18     
19 int strStr(string haystack, string needle) {
20     if (needle.empty()) return 0;
21     int i = 0, j = 0;
22     int m = haystack.size(), n = needle.size();
23     vector<int> nextval(n + 1);
24     getNextval(needle, nextval);
25     while (i < m && j < n) {
26         if (j == -1 || haystack[i] == needle[j]) {
27             i++;
28             j++;
29         } else {
30             j = nextval[j];
31         }
32         if (j == n)
33             return i - j;
34     }
35     return -1;
36 }