两篇论文:许智磊后缀数组.pdf 后缀数组——处理字符串的有力工具.pdf
贴两模版:
DA:
/*
*后缀数组,倍增算法实现,复杂度O(nlogn)
*sa[i]: 第i小的后缀是在字符串位置,即后缀sa[i]
*rank[i]: 后追i在sa数组下标,即第rank[i]小
*height[i]: LCP (suffix (sa[i-1], sa[i]))
*/
int sa[N], rank[N], height[N];
int ws[N], wa[N], wb[N];
bool cmp(int *r, int a, int b, int l) {
return (r[a] == r[b] && r[a+l] == r[b+l]);
}
//r数组为读入的字符串,m = max (r[i]) + 1,一般字符128足够了
//n为strlen (s) + 1,加上最后一个'\0'
void DA(char *r, int n, int m = 128) {
int i, j, p, *x = wa, *y = wb;
for (i=0; i<m; ++i) ws[i] = 0;
for (i=0; i<n; ++i) ws[x[i]=r[i]]++;
for (i=1; i<m; ++i) ws[i] += ws[i-1];
for (i=n-1; i>=0; --i) sa[--ws[x[i]]] = i;
for (j=1, p=1; p<n; j<<=1, m=p) {
for (p=0, i=n-j; i<n; ++i) y[p++] = i;
for (i=0; i<n; ++i) if (sa[i] >= j) y[p++] = sa[i] - j;
for (i=0; i<m; ++i) ws[i] = 0;
for (i=0; i<n; ++i) ws[x[y[i]]]++;
for (i=1; i<m; ++i) ws[i] += ws[i-1];
for (i=n-1; i>=0; --i) sa[--ws[x[y[i]]]] = y[i];
std::swap (x, y);
for (p = 1, x[sa[0]] = 0, i=1; i<n; ++i) {
x[sa[i]] = cmp (y, sa[i-1], sa[i], j) ? p - 1 : p++;
}
}
}
void calc_height(char *r, int *sa, int n) {
int i, j, k = 0;
for (i=1; i<=n; ++i) rank[sa[i]] = i; //i: 第i小的后缀 sa[0] = n(s[n]='\0')
for (i=0; i<n; ++i) { //i: 后缀i
if (k) k--;
j = sa[rank[i]-1];
while (r[i+k] == r[j+k]) k++;
height[rank[i]] = k; //其实并没有计算height[n]
}
}
/*
*LCP (suffix (i), suffix (j)) = min (height[l] to height[r]); //RMQ
*l = rank[i], r = rank[j]; if (l > r) swap (l, r); l++;
*/
DC3:
/*
*后缀数组,DC3算法实现,复杂度O(n)
*/
int wa[N],wb[N],wv[N],ws[N];
int rank[N],height[N];
int sa[N],r[N];
int c0(int *y,int a,int b) {
return y[a]==y[b]&&y[a+1]==y[b+1]&&y[a+2]==y[b+2];
}
int c12(int k,int *y,int a,int b) {
if(k==2) return y[a]<y[b]||y[a]==y[b]&&c12(1,y,a+1,b+1);
else return y[a]<y[b]||y[a]==y[b]&&wv[a+1]<wv[b+1];
}
void sort(int *r,int *a,int *b,int n,int m) {
int i;
for(i=0;i<n;i++) wv[i]=r[a[i]];
for(i=0;i<m;i++) ws[i]=0;
for(i=0;i<n;i++) ws[wv[i]]++;
for(i=1;i<m;i++) ws[i]+=ws[i-1];
for(i=n-1;i>=0;i--) b[--ws[wv[i]]]=a[i];
}
void DC3(int *r,int *sa,int n,int m) {
int i,j,*rn=r+n,*san=sa+n,ta=0,tb=(n+1)/3,tbc=0,p;
r[n]=r[n+1]=0;
for(i=0;i<n;i++) if(i%3!=0) wa[tbc++]=i;
sort(r+2,wa,wb,tbc,m);
sort(r+1,wb,wa,tbc,m);
sort(r,wa,wb,tbc,m);
for(p=1,rn[F(wb[0])]=0,i=1;i<tbc;i++)
rn[F(wb[i])]=c0(r,wb[i-1],wb[i])?p-1:p++;
if(p<tbc) dc3(rn,san,tbc,p);
else for(i=0;i<tbc;i++) san[rn[i]]=i;
for(i=0;i<tbc;i++) if(san[i]<tb) wb[ta++]=san[i]*3;
if(n%3==1) wb[ta++]=n-1;
sort(r,wb,wa,ta,m);
for(i=0;i<tbc;i++) wv[wb[i]=G(san[i])]=i;
for(i=0,j=0,p=0;i<ta && j<tbc;p++)
sa[p]=c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
for(;i<ta;p++) sa[p]=wa[i++];
for(;j<tbc;p++) sa[p]=wb[j++];
}
void calc_height(int *r,int *sa,int n) {
int i,j,k=0;
for(i=1;i<=n;i++) rank[sa[i]]=i;
for(i=0;i<n;height[rank[i++]]=k)
for(k?k--:0,j=sa[rank[i]-1];r[i+k]==r[j+k];k++);
}