题目1链接:SPOJ DISUBSTR - Distinct Substrings
题目1链接:SPOJ SUBST1 - New Distinct Substrings
DISUBSTR - Distinct Substrings
no tags
Given a string, we need to find the total number of its distinct substrings.
Input
T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000
Output
For each test case output one number saying the number of distinct substrings.
Example
Sample Input:
2
CCCCC
ABABA
Sample Output:
5
9
Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9
题意:给定一个字符串,求不同子串的数目。
思路:每一个子串一定是某些后缀的前缀,我们只需要统计所有后缀里面的不同前缀即可。对于后缀sa[i]而言,它的前缀有n-sa[i]个。而对于两个相邻的后缀sa[i-1],sa[i],重复的前缀有H[i]个,我们从前到后扫一遍即可。
第二道题把数组开大点就OK了。
AC代码:
//#include <iostream>
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <cmath>
#include <algorithm>
#include <vector>
#include <queue>
#include <map>
#include <stack>
#define PI acos(-1.0)
#define CLR(a, b) memset(a, (b), sizeof(a))
#define fi first
#define se second
#define ll o<<1
#define rr o<<1|1
using namespace std;
typedef long long LL;
typedef pair<int, int> pii;
const int MAXN = 2*1e3 + 10;
const int pN = 1e6;// <= 10^7
const int INF = 0x3f3f3f3f;
const int MOD = 1e9 + 7;
void add(LL &x, LL y) { x += y; x %= MOD; }
int cmp(int *r, int a, int b, int l) {
return (r[a] == r[b]) && (r[a+l] == r[b+l]);
}
int wa[MAXN], wb[MAXN], ws[MAXN], wv[MAXN];
int R[MAXN];//下标0->n-1 存储的是1->n之间的数
int H[MAXN];//下标2->n H[i] >= H[i-1]-1
void DA(int *r, int *sa, int n, int m)
{
int i, j, p, *x = wa, *y = wb, *t;
for(i = 0; i < m; i++) ws[i] = 0;
for(i = 0; i < n; i++) ws[x[i]=r[i]]++;
for(i = 1; i < m; i++) ws[i] += ws[i-1];
for(i = n-1; i >= 0; i--) sa[--ws[x[i]]] = i;
for(j = 1, p = 1; p < n; j *= 2, m = p)
{
for(p = 0, i = n-j; i < n; i++) y[p++] = i;
for(i = 0; i < n; i++) if(sa[i] >= j) y[p++] = sa[i] - j;
for(i = 0; i < n; i++) wv[i] = x[y[i]];
for(i = 0; i < m; i++) ws[i] = 0;
for(i = 0; i < n; i++) ws[wv[i]]++;
for(i = 1; i < m; i++) ws[i] += ws[i-1];
for(i = n-1; i >= 0; i--) sa[--ws[wv[i]]] = y[i];
for(t = x, x = y, y = t, p = 1, x[sa[0]] = 0, i = 1; i < n; i++)
x[sa[i]] = cmp(y, sa[i-1], sa[i], j) ? p-1 : p++;
}
}
void calh(int *r, int *sa, int n)
{
int i, j, k = 0;
for(i = 1; i <= n; i++) R[sa[i]] = i;
for(i = 0; i < n; H[R[i++]] = k)
for(k ? k-- : 0, j = sa[R[i]-1]; r[i+k] == r[j+k]; k++);
}
int sa[MAXN];//下标从1->n,存储的是0->n-1之间的数
int a[MAXN];
char str[MAXN];
int main()
{
int t; scanf("%d", &t);
while(t--) {
scanf("%s", str);
int n = strlen(str); int Max = 0;
for(int i = 0; i < n; i++) {
a[i] = str[i];
Max = max(Max, a[i]);
}
a[n] = 0;
DA(a, sa, n+1, Max+1);//<n+1
calh(a, sa, n);//<=n
int ans = 0;
for(int i = 1; i <= n; i++) {
if(i == 1) ans += n - sa[i];
else {
ans += n - sa[i] - H[i];
}
}
printf("%d\n", ans);
}
return 0;
}