hdu 2222 Keywords Search（AC自动机模板题）

最新推荐文章于 2020-03-09 18:52:11 发布

原创最新推荐文章于 2020-03-09 18:52:11 发布 · 300 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#AC自动机 #HDU

------OJ题解------ 同时被 2 个专栏收录

104 篇文章

订阅专栏

AC自动机

3 篇文章

订阅专栏

本文详细解析了一道AC自动机模板题，介绍了如何使用AC自动机进行字符串匹配，包括构建AC自动机、插入关键字、建立失败指针以及查询文本中关键字出现次数的过程。文章通过具体代码展示了数组和指针两种实现方式。

题目链接：http://acm.hdu.edu.cn/showproblem.php?pid=2222

Problem Description

In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.

Input

First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.

Output

Print how many keywords are contained in the description.

Sample Input

she

say

shr

her

yasherhs

Sample Output

题目大意：t组数据，n个单词，一个文本串，问有多少个单词在文本串中出现过

题目思路：AC自动机模板题，板子就行

数组代码：

#include<cstdio>
#include<cmath>
#include<cstring>
#include<string>
#include<cstdlib>
#include<algorithm>
#include<iostream>
#include<queue>
#include<stack>
#include<map>

using namespace std;

#define FOU(i,x,y) for(int i=x;i<=y;i++)
#define FOD(i,x,y) for(int i=x;i>=y;i--)
#define MEM(a,val) memset(a,val,sizeof(a))
#define PI acos(-1.0)

const double EXP = 1e-9;
typedef long long ll;
typedef unsigned long long ull;
const int INF = 0x3f3f3f3f;
const ll MINF = 0x3f3f3f3f3f3f3f3f;
const double DINF = 0xffffffffffff;
const int mod = 1e9+7;
const int N = 1e6+5;

const int MAXN = 5e5+5;
#define son_num 26    //注意修改
struct Trie{
    int tree[MAXN][son_num];  //26是讨论全小写字母情况,根据题意修改
    int fail[MAXN];   //fail指针，匹配失败时返回位置
    int cnt[MAXN];    //cnt数组表示以该节点结束的字符串数量
    int root,tot;     //root是根节点,tot标记节点序号

    int newnode(){
        for(int i=0;i<son_num;i++)
            tree[tot][i] = -1;
        cnt[tot++] = 0;
        return tot-1;     //返回当前节点编号
    }

    void init(){
        tot = 0;
        root = newnode();
    }

    int get_id(char c){  //返回儿子节点编号，注意修改
        return c-'a';
    }

    void Insert(char *s){
        int len = strlen(s);
        int now = root;
        for(int i=0;i<len;i++){
            int id = get_id(s[i]);
            if(tree[now][id]==-1)   //无后继节点,新建节点
                tree[now][id] = newnode();
            now = tree[now][id];
        }
        cnt[now]++;
    }

    void build(){       //建立fail数组,构造失配指针
        queue<int>q;    //bfs寻找
        fail[root] = root;  //根节点的fail直接指向自己
        for(int i=0;i<son_num;i++){
            if(tree[root][i]==-1)
                tree[root][i] = root;
            else{           //根节点儿子的fail指针指向根节点
                fail[tree[root][i]]=root;
                q.push(tree[root][i]);
            }
        }
        while(!q.empty()){
            int now = q.front();
            q.pop();
            for(int i=0;i<son_num;i++){    //构造该节点的所有儿子fail指针
                if(tree[now][i]==-1)
                    tree[now][i] = tree[fail[now]][i];   //该段的最后一个节点匹配后，跳到拥有最大公共后缀的fail节点继续匹配
                else{
                    fail[tree[now][i]] = tree[fail[now]][i];   //当前节点的fail节点等于它前驱节点的fail节点的后继节点
                    q.push(tree[now][i]);
                }
            }
        }
    }

    int query(char *s){
        int len = strlen(s);
        int now = root;
        int ans = 0;
        for(int i=0;i<len;i++){
            int id = get_id(s[i]);
            now = tree[now][id];
            int tmp = now;
            while(tmp != root){
                ans+=cnt[tmp];   //加上以当前节点结尾的字符串数
                cnt[tmp] = 0;    //可防止计算重复的字符串
                tmp = fail[tmp]; //每次找最大公共后缀对应的fail节点
            }
        }
        return ans;
    }

    void debug(){
        for(int i = 0;i < tot;i++){
            printf("id = %3d,fail = %3d,cnt = %3d,chi = [",i,fail[i],cnt[i]);
            for(int j = 0;j < son_num;j++)
                printf("%2d",tree[i][j]);
            printf("]\n");
        }
    }
}ac;

char s[1000005];

int main()
{
    int  t,n,ans;
    scanf("%d",&t);
    while(t--)
    {
        ac.init();
        scanf("%d",&n);
        for(int i=0;i<n;i++)
        {
            scanf(" %s",s);
            ac.Insert(s);
        }
        ac.build();
        scanf(" %s",s);
        printf("%d\n",ac.query(s));
    }
    return 0;
}

指针代码：

#include<cstdio>
#include<cmath>
#include<cstring>
#include<string>
#include<cstdlib>
#include<algorithm>
#include<iostream>
#include<queue>
#include<stack>
#include<map>

using namespace std;

#define FOU(i,x,y) for(int i=x;i<=y;i++)
#define FOD(i,x,y) for(int i=x;i>=y;i--)
#define MEM(a,val) memset(a,val,sizeof(a))
#define PI acos(-1.0)

const double EXP = 1e-9;
typedef long long ll;
typedef unsigned long long ull;
const int INF = 0x3f3f3f3f;
const ll MINF = 0x3f3f3f3f3f3f3f3f;
const double DINF = 0xffffffffffff;
const int mod = 20071027;
const int N = 1e4+5;

int ans[10];
int vis[505];

#define son_num 26
//#define maxn 10010
struct node{
    int terminal;   //结束位置数量
    node *fail;
    node *Next[son_num];
    node(){
        fail=NULL;
        terminal=0;    //记录结束位置
        for(int i=0;i<son_num;i++)
            Next[i] = NULL;
    }
};

int get_id(char c){  //这里注意修改
    return c-'a';
}

void Insert(node *root,char *str){//x为该病毒的编号
    node *p=root;
    int len = strlen(str);
    for(int i=0;i<len;i++){
        int index=get_id(str[i]);
        if(p->Next[index]==NULL)
            p->Next[index]=new node();
        p=p->Next[index];
    }
    p->terminal++;
}

//寻找失败指针
void build_fail(node *root){
    queue <node *> que;
    root->fail=NULL;
    que.push(root);
    while(!que.empty()){
        node *temp=que.front();
        que.pop();
        node *p=NULL;
        for(int i=0;i<son_num;i++){
            if(temp->Next[i]!=NULL){
                if(temp==root) temp->Next[i]->fail=root;
                else{
                    p=temp->fail;
                    while(p!=NULL){
                        if(p->Next[i]!=NULL){
                            temp->Next[i]->fail=p->Next[i];
                            break;
                        }
                        p=p->fail;
                    }
                    if(p==NULL)
                        temp->Next[i]->fail=root;
                }
                que.push(temp->Next[i]);
            }
        }
    }
}

//询问主串中含有多少个关键字
int query(node *root,char *str){
    int cnt=0;
    int len=strlen(str);
    node *p=root;
    for(int i=0;i<len;i++){
        int index=get_id(str[i]);
        while(p->Next[index]==NULL&&p!=root)
            p=p->fail;
        p=p->Next[index];
        if(p==NULL) p=root;
        node *temp=p;
        while(temp!=root){
            cnt+=temp->terminal;
            temp->terminal = 0;     //防止重复加字符串
            temp=temp->fail;

        }
    }
    return cnt;
}

//指针处理
void deal(node *now){
    if(now == NULL)
        return ;
    for(int i=0;i<son_num;i++){
        if(now->Next[i]!=NULL)
            deal(now->Next[i]);
    }
    delete now;
}

char s[1000005];
int main()
{
    //freopen("in.txt","r",stdin);
    //freopen("out.txt","w",stdout);
    std::ios::sync_with_stdio(false);
    int n,m,t;
    scanf("%d",&t);
    while(t--){
        scanf("%d",&n);
        node *root = new node();
        for(int i=1;i<=n;i++){
            scanf(" %s",s);
            Insert(root,s);
        }
        build_fail(root);
        int x = 0;
        scanf(" %s",s);
        int tot = query(root,s);
        printf("%d\n",tot);
        deal(root);
    }
    return 0;
}