hud 2222
求对于较长字符串寻找出现的单词数 (已存在单词表的单词)。
简单介绍下,Ac自动机通过构建字典树和fail指针指向树,同样应用用公共前后缀的思想,实现匹配时间的优化。
#include <cstdio>
#include <cstring>
#include <queue>
#include <algorithm>
using namespace std;
const int M = 1000010; //目标串长度
const int N = 500005; //图的结点个数
struct Aho{
struct Node{
int nex[26]; //子节点
int fail,endd; //fail指针,ennd是否为模式串结束位置
}node[N];
int Size;
queue<int> que;
int newnode(){
memset(node[Size].nex,0,sizeof(node[Size].nex));
node[Size].fail = node[Size].endd = 0;
return Size++;
}
void init(){
while(!que.empty()) que.pop();
Size = 0;
newnode();
}
void Insert(char *s){
int len = strlen(s);
int now = 0; //当前所在的点
for(int i = 0; i < len; i++){
char c = s[i];
if(node[now].nex[c-'a']==0)
node[now].nex[c-'a'] = newnode();
now = node[now].nex[c-'a'];
}
node[now].endd ++;
}
void build(){
node[0].fail = -1;
que.push(0);
while(!que.empty()){
int u = que.front();
que.pop();
for(int i = 0; i < 26; i++){
if(node[u].nex[i]){
int v = node[u].fail;
while(v!=-1 && node[v].nex[i]==0)
v = node[v].fail;
if(v == -1)
node[node[u].nex[i]].fail = 0;
else
node[node[u].nex[i]].fail = node[v].nex[i];
que.push(node[u].nex[i]);
}
}
}
}
int Get(int now){
int res = 0;
while(now){
res += node[now].endd;
node[now].endd = 0; //每个只算一次
now = node[now].fail;
}
return res;
}
int match(char *s){
int len = strlen(s);
int res = 0, now = 0;
for(int i = 0; i < len; i++){
char c = s[i];
if(node[now].nex[c-'a'])
now = node[now].nex[c-'a'];
else{
int p = node[now].fail;
while(p!=-1 && node[p].nex[c-'a']==0)
p = node[p].fail;
if(p == -1) now = 0;
else now = node[p].nex[c-'a'];
}
if(node[now].endd)
res += Get(now); //把其和与其后缀相同的字符串都记录下来
}
return res;
}
}aho;
char s[M];
int main()
{
int T;
scanf("%d",&T);
while(T--){
aho.init();
int n;
scanf("%d",&n);
for(int i = 1; i <= n; i++){
scanf("%s",s);
aho.Insert(s);
}
aho.build();
scanf("%s",s);
// printf("??");
printf("%d\n",aho.match(s));
}
return 0;
}