Hdu 2222 Keywords Search

最新推荐文章于 2021-05-18 10:56:21 发布

原创最新推荐文章于 2021-05-18 10:56:21 发布 · 745 阅读

0 ·

CC 4.0 BY-SA版权

AC自动机专栏收录该内容

2 篇文章

订阅专栏

Keywords Search

Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others)
Total Submission(s): 25192 Accepted Submission(s): 8240

Problem Description

In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.

Input

First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.

Output

Print how many keywords are contained in the description.

Sample Input

1
5
she
he
say
shr
her
yasherhs

Sample Output

Author

Wiskey

Recommend

lcy

模版题：AC自动机

注意，输入的keyword可以重复。

#include<cstdio>
#include<cstring>
#include<queue>

using namespace std;

const int maxnode=600000;
const int SIGMA_SIZE=26;

int ch[maxnode][SIGMA_SIZE];
int val[maxnode],f[maxnode],last[maxnode];
int sz,Count;													//结点总数
char S[1100000];

int idx(char c)											//字符c的编号
{
	return c-'a';
}

void print(int i,int j)
{
	if(j && val[j]!=-1){
		Count+=val[j];	val[j]=-1;
//		printf("%d %d %d\n",i,j,val[j]);
		print(i,last[j]);
	}
}
//插入字符串s，附加信息为v。注意v必须非0，因为0代表“本结点不是单词结点”
void insert(char *s,int v)
{
	int u=0,n=strlen(s);
	for(int i=0;i<n;i++){
		int c=idx(s[i]);
		if(!ch[u][c]){                                           //结点不存在
			memset(ch[sz],0,sizeof(ch[sz]));
			val[sz]=0;											//中间结点的附加信息为0
			ch[u][c]=sz++;										//新建结点
		}
		u=ch[u][c];												//往下走
	}
	val[u]+=v;													//字符串的最后一个字符的附加信息为v
}

void getFail()
{
	queue<int> q;
	f[0]=0;

	//初始化队列
	for(int c=0;c<SIGMA_SIZE;c++)
	{
		int u=ch[0][c];
		if(u)
		{
			f[u]=0;	q.push(u);	last[u]=0;
		}
	}

	//按BFS顺序计算失配函数
	while(!q.empty())
	{
		int r=q.front();	q.pop();
		for(int c=0;c<SIGMA_SIZE;c++)
		{
			int u=ch[r][c];
			if(!u)
				continue;
			q.push(u);
			int v=f[r];
			while(v && !ch[v][c])   v=f[v];
			f[u]=ch[v][c];
			last[u]=val[f[u]]?f[u]:last[f[u]];
		}
	}
}

//在文本串T中找模版
void find(char *T)
{
	int n=strlen(T);
	int j=0;											//当前结点编号,初始为根结点
	for(int i=0;i<n;i++)								//文本串当前指针
	{
		int c=idx(T[i]);
		while(j && !ch[j][c])    j=f[j];				//顺着失配边走,直到可以匹配
		j=ch[j][c];
		if(val[j])
			print(i,j);
		else if(last[j])
			print(i,last[j]);
	}
}

int main()
{
	int T,N,i;
	char sh[100];
	scanf("%d",&T);
	while(T--)
	{
		scanf("%d",&N);
		Count=0;
		sz=1;	memset(ch[0],0,sizeof(ch[0]));			//Tri树的初始化
		for(i=1;i<=N;i++)
		{
			scanf("%s",sh);
			insert(sh,1);
		}
		getFail();
		scanf("%s",S);
		find(S);
		printf("%d\n",Count);
	}

	return 0;
}