/*
Itme:
Boyer-Moore string matching algorithm
Internal support:
Try to shift more positons once unmatching to match faster.
Assistant tools:
Bad-Char array:
case1: pattern contains the Bad-Char, align the Bad-Char with the most
right same char in the pattern.(caution: negative step length is
not allowed.)
case2: pattern does not constain the Bad-Char, just shift the length of
the pattern has been matched.
Good-Suffix array:
case1: pattern contains the sub string matching well with the Good-Suffix.
shift to make the previous Good-Suffix to the position of the last
one related to the target text.
case2: pattern does not conatin the sub string of case1, searching the
longest sub string charaterized with:
pattern[position-s...position] = pattern[0...s]
case3: if all the sub strings compeletely unmatch with the Good-Suffix,
shift the length of all the pattern string.
*/
#include<iostream>
#include<limits.h>
#include<string.h>
#define SIZE 1000
void BM_BC(const char *pattern, int bc[], int length)
{
//the default value of the shift step(no such character in pattern): the lenth
for(int i = 0; i < CHAR_MAX; i++)
{
bc[i] = length;
}
for(int i = 0; i < length - 1; i++)
{//eveluate the most right position against the end of one same char in the pattern
bc[pattern[i]] = length - i - 1;
}
}
void SUFFIX_NAIVE(const char *pattern, int length, int suffix[])
{
//calculate the common suffix's length of the sub stirng with specific end of position and the whole string.
suffix[length -1] = length;
//the variable presenting the position of the sub string's end.
for(int i = length - 2; i >= 0; i--)
{
int j = i;
//shift to evaluate the common suffix's length.
while(pattern[j] == pattern[length - 1 - i + j])
j--;
suffix[i] = i - j;
}
}
void BM_GS(const char *pattern, int gs[], int length)
{
int suffix[SIZE];
SUFFIX_NAIVE(pattern, length, suffix);
//all set to length: including the case 3
for(int i = 0; i < length; i++)
{
gs[i] = length;
}
//case 2
int t = 0;
for(int i = length -1; i >= 0; i--)
{
//if true,the pattern contains the sub string compeletely matches
//the sub string from index 0 to i.
if(i + 1 == suffix[i])
{
//give values to the array of this section
for( ; t < length - 1; t++)
{
//length is the default value assigned earlier.
if(length == gs[t])
{
gs[t] = length - 1 - i;
}
}
}
}
//case 1
for(int i = 0; i <= length - 2; i++)
{
//gs[length - 1 - suffix[i]]: the first position unmatch against the end.
//length - 1 - i: the step to shift.
gs[length - 1 - suffix[i]] = length - 1 - i;
}
}
int Boyer_Moore(const char *pattern, int p_length, char *text, int t_length)
{
int bc[CHAR_MAX],gs[SIZE];
BM_BC(pattern, bc, p_length);
BM_GS(pattern, gs, t_length);
for(int p = 0; p < t_length - p_length; )
{
int i = 0;
for(i = p_length - 1; i >= 0 && pattern[i] == text[i + p]; i--);
if(i < 0)
{
return p;
}
else
{
p += bc[text[i + p]] - p_length + 1 + i > gs[i] ? bc[text[i + p]] - p_length + 1 + i : gs[i];
}
}
return 0;
}
int main(int argc, char *argv[])
{
char text[] = "The good condition of the future.";
char pattern[] = "cond";
std::cout << Boyer_Moore(pattern, strlen(pattern), text, strlen(text)) << std::endl;
return 0;
}
/*
An advanced function to evaluate suffix(haven't made it):
void SUFFIX(const char *pattern, int length, int suffix[])
{
int f,g,i;
suffix[length - 1] = m;
g = m - 1;
for(i = m - 2; i >= 0; --i)
{
if(i > g && suffix[i + m - 1 -f] < i -g)
suffix[i] = suffix[i + m - 1 -f];
else
{
if(i < g)
{
g = i;
}
f = i;
while(g >= 0 && pattern[g] == pattern[g + m - 1 - f])
--g;
suffix[i] = f - g;
}
}
}
*/
字符串匹配-Boyer-Moore
最新推荐文章于 2025-06-13 14:09:45 发布