字符串匹配-Boyer-Moore

最新推荐文章于 2025-06-13 14:09:45 发布

六斤的小猪猪呀

最新推荐文章于 2025-06-13 14:09:45 发布

阅读量159

点赞数

分类专栏：木小猪算法笔记文章标签：算法

本文链接：https://blog.youkuaiyun.com/baidu_38310109/article/details/105325666

版权

木小猪算法笔记专栏收录该内容

2 篇文章

订阅专栏

/*
Itme: 
    Boyer-Moore string matching algorithm
Internal support:
    Try to shift more positons once unmatching to match faster.
Assistant tools:
    Bad-Char array:
        case1: pattern contains the Bad-Char, align the Bad-Char with the most 
                right same char in the pattern.(caution: negative step length is 
                    not allowed.)
        case2: pattern does not constain the Bad-Char, just shift the length of 
            the pattern has been matched.
    Good-Suffix array:
        case1: pattern contains the sub string matching well with the Good-Suffix.
                shift to make the previous Good-Suffix to the position of the last
                one related to the target text.
        case2: pattern does not conatin the sub string of case1, searching the 
                longest sub string charaterized with: 
                        pattern[position-s...position] = pattern[0...s] 
        case3: if all the sub strings compeletely unmatch with the Good-Suffix,
                shift the length of all the pattern string.
*/
#include<iostream>
#include<limits.h>
#include<string.h>

#define SIZE 1000

void BM_BC(const char *pattern, int bc[], int length)
{
    //the default value of the shift step(no such character in pattern): the lenth
    for(int i = 0; i < CHAR_MAX; i++)
    {
        bc[i] = length;
    }
    for(int i = 0; i < length - 1; i++)
    {//eveluate the most right position against the end of one same char in the pattern
        bc[pattern[i]] = length - i - 1;
    }
}

void SUFFIX_NAIVE(const char *pattern, int length, int suffix[])
{
    //calculate the common suffix's length of the sub stirng with specific end of position and the whole string.
    suffix[length -1] = length;
    //the variable presenting the position of the sub string's end.
    for(int i = length - 2; i >= 0; i--)
    {
        int j = i;
        //shift to evaluate the common suffix's length.
        while(pattern[j] == pattern[length - 1 - i + j])
            j--;
        suffix[i] = i - j;
    }
}

void BM_GS(const char *pattern, int gs[], int length)
{
    int suffix[SIZE];
    SUFFIX_NAIVE(pattern, length, suffix);
    //all set to length: including the case 3
    for(int i = 0; i < length; i++)
    {
        gs[i] = length;
    }
    //case 2
    int t = 0;
    for(int i = length -1; i >= 0; i--)
    {
        //if true,the pattern contains the sub string compeletely matches 
        //the sub string from index 0 to i.
        if(i + 1 == suffix[i])
        {
            //give values to the array of this section
            for( ; t < length - 1; t++)
            {   
                //length is the default value assigned earlier.
                if(length == gs[t])
                {
                    gs[t] = length - 1 - i;
                }
            }
        }
    }
    //case 1
    for(int i = 0; i <= length - 2; i++)
    {
        //gs[length - 1 - suffix[i]]: the first position unmatch against the end.
        //length - 1 - i: the step to shift.
        gs[length - 1 - suffix[i]] = length - 1 - i;
    }
}

int Boyer_Moore(const char *pattern, int p_length, char *text, int t_length)
{
    int bc[CHAR_MAX],gs[SIZE];
    BM_BC(pattern, bc, p_length);
    BM_GS(pattern, gs, t_length);

    for(int p = 0; p < t_length - p_length; )
    {
        int i = 0;
        for(i = p_length - 1; i >= 0 && pattern[i] == text[i + p]; i--);
        if(i < 0)
        {
            return p;
        }
        else 
        {
            p += bc[text[i + p]] - p_length + 1 + i > gs[i] ? bc[text[i + p]] - p_length + 1 + i : gs[i];
        }
    }

    return 0;
}

int main(int argc, char *argv[])
{
    char text[] = "The good condition of the future.";
    char pattern[] = "cond";
    std::cout << Boyer_Moore(pattern, strlen(pattern), text, strlen(text)) << std::endl;

    return 0;
}

/*
An advanced function to evaluate suffix(haven't made it):

void SUFFIX(const char *pattern, int length, int suffix[])
{
    int f,g,i;
    suffix[length - 1] = m;
    g = m - 1;
    for(i = m - 2; i >= 0; --i)
    {
        if(i > g && suffix[i + m - 1 -f] < i -g)
            suffix[i] = suffix[i + m - 1 -f];
        else
        {
            if(i < g)
            {
                g = i;
            }
            f = i;
            while(g >= 0 && pattern[g] == pattern[g + m - 1 - f])
                --g;
            suffix[i] = f - g;
        }
    }
}

*/