倍增算法(DA)
倍增算法的时间复杂度为O(nlogn),利用倍增思想给字符串所有后缀进行排序,用于解决字符串处理问题。
变量解释:
sa[i] 排名为i的后缀是以下标为sa[i]开头的字符串(以下简称后缀)
rnk[i] 后缀i的排名为rnk[i]
hei[i] sa[i-1]和sa[i]的最长公共前缀(连续)的长度
相关性质:
1.rnk[sa[i]] = i(同理,sa[rnk[i]] = i)
2.对于两个后缀j和k,设rnk[j] < rnk[k],则后缀j和k的最长公共前缀(LCP)长度等于min(hei[rnk[j]+1], hei[rnk[j]+2],…, hei[rnk[k]])
#include <cstdio>
#include <cstring>
#include <string>
#include <iostream>
#include <algorithm>
using namespace std;
const int MAXN = 1000005;
char s[MAXN];
int sa[MAXN], rnk[MAXN], hei[MAXN];
int wa[MAXN], wb[MAXN], c[MAXN];
void da(char *r, int *sa, int n, int m)
{
int i, *x = wa, *y = wb;
//基数排序
for (i = 0; i < m; i++) c[i] = 0;
for (i = 0; i < n; i++) c[x[i] = r[i]]++;
for (i = 1; i < m; i++) c[i] += c[i-1];
for (i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;
for (int k = 1; k <= n; k <<= 1)
{
int p = 0;
//直接利用sa数组排序第二关键字
for (i = n-k; i < n; i++) y[p++] = i;
for (i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i]-k;
//基数排序第一关键字
for (i = 0; i < m; i++) c[i] = 0;
for (i = 0; i < n; i++) c[x[y[i]]]++;
for (i = 0; i < m; i++) c[i] += c[i-1];
for (i = n-1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
//根据sa和y数组计算新的x数组
swap(x, y);
p = 1;
x[sa[0]] = 0;
for (i = 1; i < n; i++)
x[sa[i]] = y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k] ? p-1 : p++;
if (p >= n) break;
m = p;
}
}
void calheight(char *r, int *sa, int n)
{
int i, j, k = 0;
for (i = 0; i <= n; i++) rnk[sa[i]] = i;
for (i = 0; i < n; i++)
{
if (k) k--;
j = sa[rnk[i]-1];
while (r[i+k] == r[j+k]) k++;
hei[rnk[i]] = k;
}
}
int main()
{
while (scanf("%s", s) != EOF)
{
int len = strlen(s);
s[len] = 0;
//注意这里是len+1,最后的结束符也要考虑,130表示最大字符集大小(ANSCII)
da(s, sa, len+1, 130);
//这里是len
calheight(s, sa, len);
printf("sa: ");
for (int i = 0; i <= len; i++)
printf("%4d", sa[i]);
printf("\nrnk: ");
for (int i = 0; i <= len; i++)
printf("%4d", rnk[i]);
printf("\nhei");
for (int i = 0; i <= len; i++)
printf("%4d", hei[i]);
printf("\n");
}
return 0;
}