后缀自动机学习

最新推荐文章于 2021-11-20 20:23:20 发布

原创最新推荐文章于 2021-11-20 20:23:20 发布 · 336 阅读

0 ·

CC 4.0 BY-SA版权

字符串专栏收录该内容

4 篇文章

订阅专栏

参考：https://blog.youkuaiyun.com/liyuanshuo_nuc/article/details/53561527

一直搞不明白后缀自动机，看了好久算是有点懂了吧。

后缀自动机是一个有向无环图，每一个点代表了一个状态，每个状态就是一个right集合，表示处在该状态的子串的结束位置可以是这个集合中的数，而处在这个状态的子串可以组成一个集合，他们是一个长度连续的互为后缀的子串集合，其中长度最短的是里面所有串的后缀，长度最长的不是别人的后缀。每个状态有son边，表示加入一个字符后转移到的状态，每个状态还有一个parent边，指向其父亲，表示处在状态中的最短串去掉最前面一个字符后所到的状态。deep[i]表示的是状态i的最大的串的长度。加上拓扑后可以求出每个状态的right集合大小。

Now you have a string consists of uppercase letters, two integers AA and BB. We call a substring wonderful substring when the times it appears in that string is between AA and BB (A \le times \le BA≤times≤B). Can you calculate the number of wonderful substrings in that string?

Input

Input has multiple test cases.

For each line, there is a string SS, two integers AA and BB.

\sum length(S) \le 2 \times 10^6∑length(S)≤2×106,

1 \le A \le B \le length(S)1≤A≤B≤length(S)

Output

For each test case, print the number of the wonderful substrings in a line.

样例输入复制

AAA 2 3
ABAB 2 2

样例输出复制

2
3

题目来源

ACM-ICPC 2018 焦作赛区网络预赛

题意：给出一个串，要求串中出现次数为l-r次的子串的个数。

思路：SAM ，求出每个状态的right集合大小，如果满足要求，那么在答案加上这个状态包含的串数就可以（这个状态最大串长-父节点最大串长）。

#include <cstdio>
#include <cstring>
#include<iostream>
#include <algorithm>
#define N 201000
#define T 26
using namespace std;
typedef long long ll;
/*pa[i]代表状态i的父亲状态（即如果状态i为结尾在集合A的一个字符子串集合，
那么pa[i]为这个字符子串集合每个串都去掉第一个字符）
son[i][j]代表这个状态的边为j的儿子节点
*/
int pa[N<<1],son[N<<1][T];  
int deep[N<<1],cnt,root,last;
char str[N<<1];
int sum[N<<1];
int tp[N<<1];  //存储状态的拓扑序，按状态的集合中最长子串的长度升序排序
int num[N<<1];  //存储每个状态的right集合大小
inline int Newnode(int _deep){deep[++cnt]=_deep;return cnt;}
inline void SAM(int alp)
{
	int np=Newnode(deep[last]+1);
    num[np] = 1;
	int u=last;
	while(u&&!son[u][alp])son[u][alp]=np,u=pa[u];
	if(!u)pa[np]=root;
	else 
	{
		int v=son[u][alp];
		if(deep[v]==deep[u]+1)pa[np]=v;
		else 
		{
			int nv=Newnode(deep[u]+1);
			memcpy(son[nv],son[v],sizeof(son[v]));
			pa[nv]=pa[v],pa[v]=pa[np]=nv;
			while(u&&son[u][alp]==v)son[u][alp]=nv,u=pa[u];
		}
	}
	last=np;
}
inline void pre(){root=last=Newnode(0);}

void toposort()
{
    for(int i = 1; i <= deep[last]; i++)sum[i] = 0;
    for(int i = 1; i <= cnt; i++)sum[deep[i]]++;
    for(int i = 1; i <= deep[last]; i++)sum[i] += sum[i-1];
    for(int i = 1; i <= cnt; i++)   tp[sum[deep[i]]--] = i;
}
void init()
{
    last = cnt = 0;
    memset(son,0,sizeof(son));
    memset(pa,0,sizeof(pa));
    memset(num,0,sizeof num);
}
int main()
{
	while(~scanf("%s",str))
    {
        init();
        int l,r;
        scanf("%d%d",&l,&r);
        pre();
        int len = strlen(str);
        for(int i=0;i<len;i++)SAM(str[i]-'A');
        toposort();
        int p,fa;
        ll ans = 0;
        for(int i = cnt;i>0;i--)
        {
            p = tp[i];
            fa = pa[p];
            num[fa] += num[p];
            if(num[p]>=l&&num[p]<=r)ans += deep[p] - deep[fa];
        }
        printf("%lld\n",ans);
    }
	return 0;
}

hdu4622

Reincarnation

Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 131072/65536 K (Java/Others)
Total Submission(s): 4646 Accepted Submission(s): 1897

Problem Description

Now you are back,and have a task to do:
Given you a string s consist of lower-case English letters only,denote f(s) as the number of distinct sub-string of s.
And you have some query,each time you should calculate f(s[l...r]), s[l...r] means the sub-string of s start from l end at r.

Input

The first line contains integer T(1<=T<=5), denote the number of the test cases.
For each test cases,the first line contains a string s(1 <= length of s <= 2000).
Denote the length of s by n.
The second line contains an integer Q(1 <= Q <= 10000),denote the number of queries.
Then Q lines follows,each lines contains two integer l, r(1 <= l <= r <= n), denote a query.

Output

For each test cases,for each query,print the answer in one line.

Sample Input

bbaba

3 4

2 2

2 5

2 4

1 4

baaba

3 3

3 4

1 4

3 5

5 5

Sample Output

Hint

I won't do anything against hash because I am nice.Of course this problem has a solution that don't rely on hash.

Author

WJMZBMR

Source

2013 Multi-University Training Contest 3

题意：求出一个串里面子串[l,r]的不同子串个数。

思路：对于每个开始位置建立一个SAM，预处理

#include <cstdio>
#include <cstring>
#include<iostream>
#include <algorithm>
#define N 2010
#define T 26
using namespace std;
typedef long long ll;
/*pa[i]代表状态i的父亲状态（即如果状态i为结尾在集合A的一个字符子串集合，
那么pa[i]为这个字符子串集合每个串都去掉第一个字符）
son[i][j]代表这个状态的边为j的儿子节点
*/
int pa[N<<1],son[N<<1][T];  
int deep[N<<1],cnt,root,last;
char str[N<<1];
int sum[N<<1];
int tp[N<<1];  //存储状态的拓扑序，按状态的集合中最长子串的长度升序排序
int num[N<<1];  //存储每个状态的right集合大小
int ans;
int Q[N][N];
inline int Newnode(int _deep){deep[++cnt]=_deep;return cnt;}
inline void SAM(int alp)
{
	int np=Newnode(deep[last]+1);
    num[np] = 1;
	int u=last;
	while(u&&!son[u][alp])son[u][alp]=np,u=pa[u];
	if(!u)pa[np]=root;
	else 
	{
		int v=son[u][alp];
		if(deep[v]==deep[u]+1)pa[np]=v;
		else 
		{
			int nv=Newnode(deep[u]+1);
			memcpy(son[nv],son[v],sizeof(son[v]));
            
            ans -= deep[v] - deep[pa[v]];
			pa[nv]=pa[v],pa[v]=pa[np]=nv;
            ans += deep[nv] - deep[pa[nv]];
            ans += deep[v] - deep[pa[v]];
			while(u&&son[u][alp]==v)son[u][alp]=nv,u=pa[u];
		}
	}
	last=np;
    ans += deep[np] - deep[pa[np]];
}
inline void pre(){root=last=Newnode(0);}

void toposort()
{
    for(int i = 1; i <= deep[last]; i++)sum[i] = 0;
    for(int i = 1; i <= cnt; i++)sum[deep[i]]++;
    for(int i = 1; i <= deep[last]; i++)sum[i] += sum[i-1];
    for(int i = 1; i <= cnt; i++)   tp[sum[deep[i]]--] = i;
}
void init()
{
    last = cnt = 0;
    memset(son,0,sizeof(son));
    memset(pa,0,sizeof(pa));
    memset(num,0,sizeof num);
}
int main()
{
    int t;
    scanf("%d",&t);
	while(t--)
    {
        scanf("%s",str+1);
        int len = strlen(str+1);
        for(int i  = 1;i<=len;i++)
        {
            init();
            pre();
            ans = 0;
            for(int j = i;j<=len;j++)
            {
                SAM(str[j]-'a');
                Q[i][j] = ans;
            }
        }
        int q;
        int l,r;
        scanf("%d",&q);
        while(q--)
        {
            scanf("%d%d",&l,&r);
            printf("%d\n",Q[l][r]);
        }
    }
	return 0;
}