题意:给你几个keywords,再给你一段文章,问你keywords出现了几次。
思路:这里就要用到多模匹配算法AC自动机了,AC自动机需要KMP和字典树的知识,匹配时是在字典树上,失配我们就要用到类似KMP的失配值了,如果失配,我们就沿着失配值到某个节点开始匹配,因为是多模匹配,我们每次失配移动都会从某一keyword的某部分开始匹配,这样就节省了很多时间。
话说第一次听到AC自动机我竟天真的以为是会自动AC题目的算法...orz
参考:
代码:
#include<cstdio>
#include<vector>
#include<stack>
#include<queue>
#include<cstring>
#include<string>
#include<cmath>
#include<cstdlib>
#include<algorithm>
#define ll long long
const int maxn = 1000000+5;
const int maxm = 100000+5;
const int MOD = 1e7;
const int INF = 0x3f3f3f3f;
using namespace std;
struct Trie{
Trie *next[26];
Trie *fail; //失配值
int sum; //以此为单词结尾的个数
Trie(){
sum = 0;
memset(next,NULL,sizeof(next));
fail = NULL;
}
};
Trie *root;
Trie *q[maxn]; //模拟队列
int head,tail;
void insert(char *s){
Trie *p = root;
for(int i = 0;s[i];i++){
int x = s[i] - 'a';
if(p ->next[x] == NULL){
p ->next[x] = new Trie();
}
p = p ->next[x];
}
p ->sum++;
}
void buildFail(){ //计算失配值
head = 0,tail = 1;
q[head] = root;
Trie *p,*temp;
while(head < tail){
temp = q[head++];
for(int i = 0;i <= 25;i++){
if(temp ->next[i]){
if(temp == root){ //父节点为root,fail为root
temp ->next[i] ->fail = root;
}
else{
p = temp ->fail; //查看父节点的fail
while(p){
if(p ->next[i]){
temp ->next[i] ->fail = p ->next[i];
break;
}
p = p ->fail;
}
if(p == NULL) temp ->next[i] ->fail = root;
}
q[tail++] = temp ->next[i];
}
}
}
}
int ac_automation(char *ch){
//p为模式串指针
int cnt = 0;
Trie *p = root;
int len = strlen(ch);
for(int i = 0;i < len;i++){
int x = ch[i] - 'a';
while(!p ->next[x] && p != root)
p = p ->fail;
p = p ->next[x]; //找到后p指针指向该结点
if(!p) p = root; //若指针返回为空,则没有找到与之匹配的字符
Trie *temp = p;
while(temp != root){
if(temp ->sum >= 0){ //判断该结点是否被访问
cnt += temp ->sum;
temp ->sum = -1;
}
else break;
temp = temp ->fail;
}
}
return cnt;
}
char word[maxn];
int main(){
int T,n;
char s[55];
scanf("%d",&T);
while(T--){
root = new Trie();
scanf("%d",&n);
for(int i = 0;i < n;i++){
scanf("%s",s);
insert(s);
}
scanf("%s",word);
buildFail();
int cnt = ac_automation(word);
printf("%d\n",cnt);
}
return 0;
}