POJ3693(SummerTrainingDay10-J 后缀数组)

本文介绍了一种寻找字符串中具有最大重复次数子串的算法。通过使用后缀数组和最长公共前缀技术来高效地解决问题,并提供了完整的C++实现代码。

Maximum repetition substring

Time Limit: 1000MS Memory Limit: 65536K
Total Submissions: 10241 Accepted: 3157

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which
gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

Source

 
  1 //2017-08-10
  2 #include <iostream>
  3 #include <cstdio>
  4 #include <cstring>
  5 #include <algorithm>
  6 
  7 using namespace std;
  8 
  9 const int N = 1000010;
 10 const int inf = 0x3f3f3f3f;
 11 char str[N];
 12 int n, r[N];
 13 int wa[N], wb[N], wv[N], wss[N];
 14 int Suffix[N];//Str下标为i ~ Len的连续子串(即后缀) 
 15 int SA[N];//满足Suffix[SA[1]] < Suffix[SA[2]] …… < Suffix[SA[Len]],即排名为i的后缀为Suffix[SA[i]](与Rank是互逆运算) 
 16 int Rank[N];//Suffix[i]在所有后缀中的排名 
 17 int Height[N];//height[i]表示Suffix[SA[i]]和Suffix[SA[i-1]]的最长公共前缀,也就是排名相邻的两个后缀的最长公共前缀
 18 int H[N];//等于Height[Rank[i]],也就是后缀Suffix[i]和它前一名的后缀的最长公共前缀 
 19 
 20 //比较母串r中起始位置为a和b,长度都为len的子串是否相等
 21 int cmp(int *r, int a, int b, int len)
 22 {
 23     return r[a]==r[b] && r[a+len]==r[b+len];
 24 }
 25 
 26 //倍增算法求SA数组。
 27 void da(int *r, int *SA, int n, int m)
 28 {
 29     int i, j, p, *x = wa, *y = wb, *t;
 30     for(i = 0; i < m; i++)wss[i] = 0;
 31     for(i = 0; i < n; i++)wss[x[i]=r[i]]++;
 32     for(i = 0; i < m; i++)wss[i]+=wss[i-1];
 33     for(i = n-1; i >= 0; i--)SA[--wss[x[i]]]=i;
 34     for(j = 1, p = 1; p < n; j *= 2, m = p){
 35         for(p = 0, i = n-j; i < n; i++)
 36               y[p++] = i;
 37         for(i = 0; i < n; i++)
 38               if(SA[i] >= j)
 39                   y[p++] = SA[i]-j;
 40         for(i = 0; i < n; i++)
 41               wv[i] = x[y[i]];
 42         for(i = 0; i < m; i++)
 43               wss[i] = 0;
 44         for(i = 0; i < n; i++)
 45               wss[wv[i]]++;
 46         for(i = 1; i < m; i++)
 47               wss[i] += wss[i-1];
 48         for(i = n-1; i >= 0; i--)
 49               SA[--wss[wv[i]]] = y[i];
 50         for(t = x, x = y, y = t, p = 1, x[SA[0]]=0, i = 1; i < n; i++)
 51               x[SA[i]] = cmp(y, SA[i-1], SA[i], j)?p-1:p++;
 52     }
 53 }
 54 
 55 //计算height数组
 56 void cal_Height(int *r, int *SA, int n)
 57 {
 58     int i, j, k = 0;
 59     for(i = 1; i <= n; i++)Rank[SA[i]] = i;
 60     for(i = 0; i < n; Height[Rank[i++]] = k)
 61           for(k?k--:0, j=SA[Rank[i]-1]; r[i+k]==r[j+k]; k++)
 62           ;
 63 }
 64 
 65 int st[N][30];
 66 
 67 void init_rmq(int n)
 68 {
 69     for(int i=1;i<=n;i++) st[i][0]=Height[i];
 70     for(int j=1;(1<<j)<=n;j++)
 71         for(int i=1;i+(1<<j)-1<=n;i++)
 72         {
 73             st[i][j]=min(st[i][j-1],st[i+(1<<(j-1))][j-1]);
 74         }
 75 }
 76 
 77 //询问后缀i和后缀j的最长公共前缀
 78 int lcp(int i,int j)
 79 {
 80     i = Rank[i];
 81     j = Rank[j];
 82     if(i>j) swap(i,j);
 83     i++;
 84     int k=0;
 85     while(i+(1<<(k+1)) <= j) k++;
 86     return min(st[i][k],st[j-(1<<k)+1][k]);
 87 }
 88 
 89 int main()
 90 {
 91     int kase = 0;
 92     while(scanf("%s", str)!=EOF)
 93     {
 94         if(str[0] == '#')break;
 95         n = strlen(str);
 96         for(int i = 0; i < n; i++)
 97               r[i] = str[i]-'a'+1;
 98         da(r, SA, n+1, 200);
 99         cal_Height(r, SA, n);
100         init_rmq(n);
101         int ans = 0, bg = 0, ed = 0, a, b, c;
102         for(int L = 1; 2*L <= n; L++)
103         {
104             for(int i = 0; (i+1)*L+1 < n; i++)
105             {
106                 a = i*L;
107                 b = (i+1)*L;
108                 if(str[a] != str[b])continue;
109                 c = lcp(a, b);
110                 int ll = 0;
111                 int rr = b+c-1;
112                 for(int j = 0; j < L; j++)
113                 {
114                     if(a - j < 0 || str[a-j] != str[b-j])break;
115                     ll = a - j;
116                     int cnt = (rr-ll+1)/L;
117                     if(cnt > ans || (cnt == ans && Rank[ll] < Rank[bg]))
118                     {
119                         ans = cnt;
120                         bg = ll;
121                         ed = ll+cnt*L-1;
122                     }
123                 }
124             }
125         }
126         printf("Case %d: ", ++kase);
127         if(ans == 0)printf("%c\n", str[SA[1]]);
128         else{
129               for(int i = bg; i <= ed; i++)
130                   printf("%c", str[i]);
131             printf("\n");
132         }
133     }
134 
135     return 0;
136 }

 

转载于:https://www.cnblogs.com/Penn000/p/7341683.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值