spoj - Distinct Substrings（后缀数组）

最新推荐文章于 2019-08-31 11:55:10 发布

weixin_34279184

最新推荐文章于 2019-08-31 11:55:10 发布

阅读量58

点赞数

CC 4.0 BY-SA版权

原文链接：http://www.cnblogs.com/ftae/p/7208459.html

本文介绍了一种利用后缀数组高效计算一个字符串中不同子串数量的方法。通过构建后缀数组并计算高度数组，可以遍历排好序的后缀字符串，有效地找出新增加的不同子串数量。

Distinct Substrings

题意

求一个字符串有多少个不同的子串。

分析

又一次体现了后缀数组的强大。

因为对于任意子串，一定是这个字符串的某个后缀的前缀。

我们直接去遍历排好序后的后缀字符串（也就是 \(sa\) 数组），每遍历到一个后缀字符串，会新添数量为这个后缀字符串的长度的前缀，但是要减去 \(height[i]\)，即公共前缀的长度，因为前面已经添加过了这个数量的前缀串。

code

#include<bits/stdc++.h>
using namespace std;
typedef unsigned long long ull;
const int MAXN = 2e3 + 10;
char s[MAXN];
int sa[MAXN], t[MAXN], t2[MAXN], c[MAXN], n; // n 为 字符串长度 + 1，s[n - 1] = 0

int rnk[MAXN], height[MAXN];
// 构造字符串 s 的后缀数组。每个字符值必须为 0 ~ m-1
void build_sa(int m) {
    int i, *x = t, *y = t2;
    for(i = 0; i < m; i++) c[i] = 0;
    for(i = 0; i < n; i++) c[x[i] = s[i]]++;
    for(i = 1; i < m; i++) c[i] += c[i - 1];
    for(i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i;
    for(int k = 1; k <= n; k <<= 1) {
        int p = 0;
        for(i = n - k; i < n; i++) y[p++] = i;
        for(i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i] - k;
        for(i = 0; i < m; i++) c[i] = 0;
        for(i = 0; i < n; i++) c[x[y[i]]]++;
        for(i = 0; i < m; i++) c[i] += c[i - 1];
        for(i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
        swap(x, y);
        p = 1; x[sa[0]] = 0;
        for(i = 1; i < n; i++)
            x[sa[i]] = y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k] ? p - 1 : p++;
        if(p >= n) break;
        m = p;
    }
}
void getHeight() {
    int i, j, k = 0;
    for(i = 0; i < n; i++) rnk[sa[i]] = i;
    for(i = 0; i < n - 1; i++) {
        if(k) k--;
        j = sa[rnk[i] - 1];
        while(s[i + k] == s[j + k]) k++;
        height[rnk[i]] = k;
    }
}
int main() {
    int T;
    scanf("%d", &T);
    while(T--) {
        scanf("%s", s);
        n = strlen(s) + 1;
        build_sa(128);
        getHeight();
        int ans = 0;
        ans += (n - 1) - sa[1];
        for(int i = 2; i < n; i++) {
            ans += (n - 1) - sa[i] - height[i];
        }
        printf("%d\n", ans);
    }
    return 0;
}

转载于:https://www.cnblogs.com/ftae/p/7208459.html