Trie + AC自动机

最新推荐文章于 2022-05-09 16:21:06 发布

Wtothey

最新推荐文章于 2022-05-09 16:21:06 发布

阅读量310

点赞数

分类专栏：字符串

本文链接：https://blog.youkuaiyun.com/weixin_42318552/article/details/99692198

版权

字符串专栏收录该内容

5 篇文章

订阅专栏

Trie 字典树

AC自动机

Trie 字典树

进行快速的查找字符串是否存在
进行快速的字符串的前缀和的相关的性质的查询

建立

struct Trie{
    int ch[maxn][30];
    int tot;    //总结点个数
    int cnt[maxn];  //是否为单词
    
    void init(){
        tot = 0;
        memset(ch, -1, sizeof ch);
        memset(cnt, 0, sizeof cnt);
    }
};

插入单词

void insert(char* str){
    int p = 0;
    for(int i=0; str[i]; ++i){
        if(ch[p][str[i] - 'a'] == -1)
            ch[p][str[i] - 'a'] = ++ tot;
        p = ch[p][str[i] -'a'];
    }
    cnt[p] ++;
}

查询

int find(char* str){
    int p = 0;
    for(int i=0; str[i]; ++i){
        if(ch[p][str[i] - 'a'] == -1)
            return 0;
        p = ch[p][str[i] - 'a'];
    }
    return cnt[p];
}

删除某个单词

void erase(char* str){
    int p = 0;
    for(int i=0; str[i]; ++i){
        if(ch[p][str[i] - 'a'] == -1)
            return;
        p = ch[p][str[i] - 'a'];
    }
    if(cnt[p])  -- cnt[p];  //是单词，不是前缀节点
}

字典树上的动态规划

void find(char* str, int st){
    int p = 0;
    for(int i=st; str[i]; ++i){
        if(ch[p][str[i] - 'a'] == -1)    return;
        p = ch[p][str[i] - 'a'];
        if(cnt[p])    dp[i] += dp[st - 1];
    }
}

AC自动机

KMP 算法可以用来处理单个串的匹配，而 AC 自动机则是处理多个串的匹配。
也就是，给你许多个字符串的集合 ? 和一个长字符串 ?，问你 ? 有几个子串是 ? 中的。（可以具体到哪几个、可以求出每个子串出现了几次）

建完Trie字典树后，建失配边

last[ ]：沿着失配边到达的下一个单词结束节点

#define SIGMA_SIZE 26
#define root 0
void getFail(){
    queue<int> q;
    fail[0] = 0;
    for(int c=0; c<SIGMA_SIZE; ++c){
        int u = ch[0][c];
        if(u != -1){
            fail[u] = 0;
            last[u] = 0;
            q.push(u);
        }
        else 
            fail[u] = 0;    //补齐缺失的边
    }
    
    while(!q.empty()){
        int r = q.front();
        q.pop();
        for(int c=0; c<SIGMA_SIZE; ++c){
            int u = ch[r][c];
            if(u == -1){
                ch[r][c] = ch[fail[r]][c];      //补齐缺失的边
                continue;
            }
            q.push(u);
            int v = fail[r];
            while(v != -1 && ch[v][c] != -1)
                v = fail[v];
            fail[u] = ch[v][c];     //失配边
            last[u] = cnt[fail[u]]? fail[u]: last[fail[u]];
        }
    }
}

查询

int find(char* str){
    int n = strlen(str);
    int tmp = root, res = 0;
    for(int i=0; str[i]; ++i){
        tmp = ch[tmp][str[i] - 'a'];
        while(tmp != root){
            res += cnt[tmp];
            tmp = last[tmp];    //last[ ]保证是单词结束节点的失配边都访问
        }
    }
    return res;
}