字符串匹配?
简单的说就是: 给定两个字符串S, T 在主串
S中找到模式
T
BF
C++代码
/**
* @brief brutal force 暴力
*
* @param str 字符串
* @param pat 模式
* @return int 首字母下标或-1表示没找到
*/
int bf(string str, string pat) {
for (size_t i = 0; i < str.length(); i++) {
size_t j = 0, k = i;
for (; j < pat.size();) {
if (str[k] == pat[j])
++k, ++j;
else
break;
}
if (j == pat.size())
return i;
}
return -1;
}
Java 代码
public static int indexOf(byte[] value, int valueCount, byte[] str, int strCount, int fromIndex) {
byte first = str[0];
int max = (valueCount - strCount);
for (int i = fromIndex; i <= max; i++) {
// Look for first character.
if (value[i] != first) {
while (++i <= max && value[i] != first);
}
// Found first character, now look at the rest of value
if (i <= max) {
int j = i + 1;
int end = j + strCount - 1;
for (int k = 1; j < end && value[j] == str[k]; j++, k++);
if (j == end) {
// Found whole string.
return i;
}
}
}
return -1;
}
KMP
核心思想就是主串不回溯
用next数组记录当前位置失配后,模式串该再从那里开始
难点就是怎么求next数组,下面有两种办法
C++代码
/**
* @brief kmp 使用next数组
*
* @param str 字符串
* @param pat 模式
* @return int
*/
int kmp(string str, string pat) {
const int N = pat.length();
int next[N];
// 初始胡next
next[0] = -1;
for (int k = 0, j = 1; j < N; ++j) {
next[j] = k;
while (k > 0 && pat[j] != pat[k])
k = next[k];
if (pat[j] == pat[k])
++k;
}
// 匹配
int i = 0, j = 0;
for (; i < str.length();) {
if (j == -1 || str[i] == pat[j])
++j, ++i; // 成功++
else
j = next[j]; // 失败回溯
if (j == pat.length())
return i - j; // 首字母下标
}
return -1;
}
/**
* @brief Get the "next" array
*
* @param next 数组
* @param t 模式
*/
void get_next(int next[], string t) {
next[0] = -1;
for (int j = 0, k = -1; j < t.length() - 1;) {
if (k == -1 || t[j] == t[k]) {
j++;
k++;
next[j] = k;
} else
k = next[k];
}
}
测试代码
int main(int argc, char const* argv[]) {
int a = bf("sdsdfaaregwr", "sdf");
cout << a << '\n';
a = bf("gsfwesgfvsfghj", "sdf");
cout << a << '\n';
string str = "abcbababcabcdf";
string pat = "bababcabc";
int idx = kmp(str, pat);
cout << idx;
return 0;
}