参考:https://blog.youkuaiyun.com/liyuanshuo_nuc/article/details/53561527
一直搞不明白后缀自动机,看了好久算是有点懂了吧。
后缀自动机是一个有向无环图,每一个点代表了一个状态,每个状态就是一个right集合,表示处在该状态的子串的结束位置可以是这个集合中的数,而处在这个状态的子串可以组成一个集合,他们是一个长度连续的互为后缀的子串集合,其中长度最短的是里面所有串的后缀,长度最长的不是别人的后缀。每个状态有son边,表示加入一个字符后转移到的状态,每个状态还有一个parent边,指向其父亲,表示处在状态中的最短串去掉最前面一个字符后所到的状态。deep[i]表示的是状态i的最大的串的长度。加上拓扑后可以求出每个状态的right集合大小。
Now you have a string consists of uppercase letters, two integers AA and BB. We call a substring wonderful substring when the times it appears in that string is between AA and BB (A \le times \le BA≤times≤B). Can you calculate the number of wonderful substrings in that string?
Input
Input has multiple test cases.
For each line, there is a string SS, two integers AA and BB.
\sum length(S) \le 2 \times 10^6∑length(S)≤2×106,
1 \le A \le B \le length(S)1≤A≤B≤length(S)
Output
For each test case, print the number of the wonderful substrings in a line.
样例输入复制
AAA 2 3
ABAB 2 2
样例输出复制
2
3
题目来源
题意:给出一个串,要求串中出现次数为l-r次的子串的个数。
思路:SAM ,求出每个状态的right集合大小,如果满足要求,那么在答案加上这个状态包含的串数就可以(这个状态最大串长-父节点最大串长)。
#include <cstdio>
#include <cstring>
#include<iostream>
#include <algorithm>
#define N 201000
#define T 26
using namespace std;
typedef long long ll;
/*pa[i]代表状态i的父亲状态(即如果状态i为结尾在集合A的一个字符子串集合,
那么pa[i]为这个字符子串集合每个串都去掉第一个字符)
son[i][j]代表这个状态的边为j的儿子节点
*/
int pa[N<<1],son[N<<1][T];
int deep[N<<1],cnt,root,last;
char str[N<<1];
int sum[N<<1];
int tp[N<<1]; //存储状态的拓扑序,按状态的集合中最长子串的长度升序排序
int num[N<<1]; //存储每个状态的right集合大小
inline int Newnode(int _deep){deep[++cnt]=_deep;return cnt;}
inline void SAM(int alp)
{
int np=Newnode(deep[last]+1);
num[np] = 1;
int u=last;
while(u&&!son[u][alp])son[u][alp]=np,u=pa[u];
if(!u)pa[np]=root;
else
{
int v=son[u][alp];
if(deep[v]==deep[u]+1)pa[np]=v;
else
{
int nv=Newnode(deep[u]+1);
memcpy(son[nv],son[v],sizeof(son[v]));
pa[nv]=pa[v],pa[v]=pa[np]=nv;
while(u&&son[u][alp]==v)son[u][alp]=nv,u=pa[u];
}
}
last=np;
}
inline void pre(){root=last=Newnode(0);}
void toposort()
{
for(int i = 1; i <= deep[last]; i++)sum[i] = 0;
for(int i = 1; i <= cnt; i++)sum[deep[i]]++;
for(int i = 1; i <= deep[last]; i++)sum[i] += sum[i-1];
for(int i = 1; i <= cnt; i++) tp[sum[deep[i]]--] = i;
}
void init()
{
last = cnt = 0;
memset(son,0,sizeof(son));
memset(pa,0,sizeof(pa));
memset(num,0,sizeof num);
}
int main()
{
while(~scanf("%s",str))
{
init();
int l,r;
scanf("%d%d",&l,&r);
pre();
int len = strlen(str);
for(int i=0;i<len;i++)SAM(str[i]-'A');
toposort();
int p,fa;
ll ans = 0;
for(int i = cnt;i>0;i--)
{
p = tp[i];
fa = pa[p];
num[fa] += num[p];
if(num[p]>=l&&num[p]<=r)ans += deep[p] - deep[fa];
}
printf("%lld\n",ans);
}
return 0;
}
hdu4622
Reincarnation
Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 131072/65536 K (Java/Others)
Total Submission(s): 4646 Accepted Submission(s): 1897
Problem Description
Now you are back,and have a task to do:
Given you a string s consist of lower-case English letters only,denote f(s) as the number of distinct sub-string of s.
And you have some query,each time you should calculate f(s[l...r]), s[l...r] means the sub-string of s start from l end at r.
Input
The first line contains integer T(1<=T<=5), denote the number of the test cases.
For each test cases,the first line contains a string s(1 <= length of s <= 2000).
Denote the length of s by n.
The second line contains an integer Q(1 <= Q <= 10000),denote the number of queries.
Then Q lines follows,each lines contains two integer l, r(1 <= l <= r <= n), denote a query.
Output
For each test cases,for each query,print the answer in one line.
Sample Input
2
bbaba
5
3 4
2 2
2 5
2 4
1 4
baaba
5
3 3
3 4
1 4
3 5
5 5
Sample Output
3
1
7
5
8
1
3
8
5
1
Hint
I won't do anything against hash because I am nice.Of course this problem has a solution that don't rely on hash.
Author
WJMZBMR
Source
2013 Multi-University Training Contest 3
题意:求出一个串里面子串[l,r]的不同子串个数。
思路:对于每个开始位置建立一个SAM,预处理
#include <cstdio>
#include <cstring>
#include<iostream>
#include <algorithm>
#define N 2010
#define T 26
using namespace std;
typedef long long ll;
/*pa[i]代表状态i的父亲状态(即如果状态i为结尾在集合A的一个字符子串集合,
那么pa[i]为这个字符子串集合每个串都去掉第一个字符)
son[i][j]代表这个状态的边为j的儿子节点
*/
int pa[N<<1],son[N<<1][T];
int deep[N<<1],cnt,root,last;
char str[N<<1];
int sum[N<<1];
int tp[N<<1]; //存储状态的拓扑序,按状态的集合中最长子串的长度升序排序
int num[N<<1]; //存储每个状态的right集合大小
int ans;
int Q[N][N];
inline int Newnode(int _deep){deep[++cnt]=_deep;return cnt;}
inline void SAM(int alp)
{
int np=Newnode(deep[last]+1);
num[np] = 1;
int u=last;
while(u&&!son[u][alp])son[u][alp]=np,u=pa[u];
if(!u)pa[np]=root;
else
{
int v=son[u][alp];
if(deep[v]==deep[u]+1)pa[np]=v;
else
{
int nv=Newnode(deep[u]+1);
memcpy(son[nv],son[v],sizeof(son[v]));
ans -= deep[v] - deep[pa[v]];
pa[nv]=pa[v],pa[v]=pa[np]=nv;
ans += deep[nv] - deep[pa[nv]];
ans += deep[v] - deep[pa[v]];
while(u&&son[u][alp]==v)son[u][alp]=nv,u=pa[u];
}
}
last=np;
ans += deep[np] - deep[pa[np]];
}
inline void pre(){root=last=Newnode(0);}
void toposort()
{
for(int i = 1; i <= deep[last]; i++)sum[i] = 0;
for(int i = 1; i <= cnt; i++)sum[deep[i]]++;
for(int i = 1; i <= deep[last]; i++)sum[i] += sum[i-1];
for(int i = 1; i <= cnt; i++) tp[sum[deep[i]]--] = i;
}
void init()
{
last = cnt = 0;
memset(son,0,sizeof(son));
memset(pa,0,sizeof(pa));
memset(num,0,sizeof num);
}
int main()
{
int t;
scanf("%d",&t);
while(t--)
{
scanf("%s",str+1);
int len = strlen(str+1);
for(int i = 1;i<=len;i++)
{
init();
pre();
ans = 0;
for(int j = i;j<=len;j++)
{
SAM(str[j]-'a');
Q[i][j] = ans;
}
}
int q;
int l,r;
scanf("%d",&q);
while(q--)
{
scanf("%d%d",&l,&r);
printf("%d\n",Q[l][r]);
}
}
return 0;
}