Maximum repetition substring 后缀数组

本文介绍了一种寻找字符串中最长重复子串的算法,并通过一个具体的C++代码示例进行了解释。该算法利用了后缀数组和LCP数组的概念,能够有效地找到字符串中重复次数最多的子串。
Maximum repetition substring
Time Limit: 1000MS Memory Limit: 65536K
Total Submissions: 7578 Accepted: 2281

Description

The repetition number of a string is defined as the maximum number R such that the string can be partitioned into R same consecutive substrings. For example, the repetition number of "ababab" is 3 and "ababa" is 1.

Given a string containing lowercase letters, you are to find a substring of it with maximum repetition number.

Input

The input consists of multiple test cases. Each test case contains exactly one line, which gives a non-empty string consisting of lowercase letters. The length of the string will not be greater than 100,000.

The last test case is followed by a line containing a '#'.

Output

For each test case, print a line containing the test case number( beginning with 1) followed by the substring of maximum repetition number. If there are multiple substrings of maximum repetition number, print the lexicographically smallest one.

Sample Input

ccabababc
daabbccaa
#

Sample Output

Case 1: ababab
Case 2: aa

Source

 
题意:给出一串字符,需要求这串字符中的最长重复子串,要是有多个,输出字典序最小的.........
 
 
 
 
  1 #include <iostream>
  2 #include <cstdio>
  3 #include <cstring>
  4 #include <cmath>
  5 #include <algorithm>
  6 #include <string>
  7 #include <vector>
  8 #include <stack>
  9 #include <queue>
 10 #include <set>
 11 #include <map>
 12 #include <list>
 13 #include <iomanip>
 14 #include <cstdlib>
 15 using namespace std;
 16 const int INF=0x5fffffff;
 17 const double EXP=1e-8;
 18 const int MS=200005;
 19 //   KMP  TRIE  DFA  SUFFIX
 20 int dp[MS][30];        //    RMQ
 21 int t1[MS],t2[MS],c[MS],v[MS];
 22 int rank[MS],sa[MS],height[MS];
 23 char str[MS],str1[MS];
 24 int s[MS];
 25 int cmp(int *r,int a,int b,int k)
 26 {
 27     return r[a]==r[b]&&r[a+k]==r[b+k];
 28 }
 29 
 30 void get_sa(int *r,int *sa,int n,int m)
 31 {
 32     int i,j,p,*x=t1,*y=t2;
 33     for(i=0;i<m;i++)
 34         c[i]=0;
 35     for(i=0;i<n;i++)
 36         c[x[i]=r[i]]++;
 37     for(i=1;i<m;i++)
 38         c[i]+=c[i-1];
 39     for(i=n-1;i>=0;i--)
 40         sa[--c[x[i]]]=i;
 41     p=1;j=1;
 42     for(;p<n;j*=2,m=p)
 43     {
 44         for(p=0,i=n-j;i<n;i++)
 45             y[p++]=i;
 46         for(i=0;i<n;i++)
 47             if(sa[i]>=j)
 48                 y[p++]=sa[i]-j;
 49         for(i=0;i<n;i++)
 50             v[i]=x[y[i]];
 51         for(i=0;i<m;i++)
 52             c[i]=0;
 53         for(i=0;i<n;i++)
 54             c[v[i]]++;
 55         for(i=1;i<m;i++)
 56             c[i]+=c[i-1];
 57         for(i=n-1;i>=0;i--)
 58             sa[--c[v[i]]]=y[i];
 59         swap(x,y);
 60         x[sa[0]]=0;
 61         for(p=1,i=1;i<n;i++)
 62             x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
 63     }
 64 }
 65 
 66 void get_height(int *r,int n)
 67 {
 68     int i,j,k=0;
 69     for(i=1;i<=n;i++)
 70         rank[sa[i]]=i;
 71     //height[i]>=height[i-1]-1;
 72     for(i=0;i<n;i++)
 73     {
 74         if(k)
 75             k--;
 76         else
 77             k=0;
 78         j=sa[rank[i]-1];
 79         while(r[i+k]==r[j+k])
 80             k++;
 81         height[rank[i]]=k;
 82     }
 83 }
 84 
 85 void rmq_init(int n)
 86 {
 87     for(int i=1;i<=n;i++)  dp[i][0]=height[i];
 88     for(int j=1;(1<<j)<=n;j++)
 89     for(int i=1;i+(1<<j)-1<=n;i++)
 90     dp[i][j]=min(dp[i][j-1],dp[i+(1<<(j-1))][j-1]);
 91 }
 92 
 93 int rmq(int ll,int rr)
 94 {
 95     int k=0;
 96     ll=rank[ll];
 97     rr=rank[rr];
 98     if(ll>rr)
 99     {
100         int tmp=ll;
101         ll=rr;
102         rr=tmp;
103     }
104     ll++;
105     while((1<<(k+1))<=rr-ll+1) k++;
106     return min(dp[ll][k],dp[rr-(1<<k)+1][k]);
107 }
108 
109 int main()
110 {
111     int text=0;
112     while(scanf("%s",str)>0)
113     {
114         if(str[0]=='#')
115         break;
116         int len=strlen(str);
117         for(int i=0;i<len;i++)
118         s[i]=str[i]-'a'+1;
119         s[len]=0;
120         get_sa(s,sa,len+1,30);
121         get_height(s,len);
122         rmq_init(len);
123         int ans=0,pos=0,lenn;
124         for(int i=1;i<=len/2;i++)
125         {
126             for(int j=0;j<len-i;j+=i)
127             {
128                 if(str[j]!=str[j+i])
129                 continue;
130                 int k=rmq(j,j+i);
131                 int tol=k/i+1;
132                 //printf("%d\n",tol);
133                 int r=i-k%i;
134                 int p=j;
135                 int cnt=0;
136                 for(int m=j-1;m>j-i&&str[m]==str[m+i]&&m>=0;m--)
137                 {
138                     cnt++;
139                     if(cnt==r)
140                     {
141                         tol++;
142                         p=m;
143                     }
144                     else if(rank[p]>rank[m])
145                     {
146                         p=m;
147                     }
148                 }
149                 if(ans<tol)
150                 {
151                     ans=tol;
152                     pos=p;
153                     lenn=tol*i;
154                 }
155                 else if(ans==tol&&rank[pos]>rank[p])
156                 {
157                     pos=p;
158                     lenn=tol*i;
159                 }
160             }
161         }
162         printf("Case %d: ",++text);
163     //  printf("%d %d %d\n",ans,pos,lenn);
164         if(ans<2)                         //这里,如果字符总长度小于2,那么就在原串中找出一个最小的字符就好
165         {
166             char ch='z';
167             for(int i=0;i<len;i++)
168             if(str[i]<ch)
169             ch=str[i];
170             printf("%c\n",ch);
171             continue;
172         }
173         for(int i=pos;i<pos+lenn;i++)
174         printf("%c",str[i]);
175         printf("\n");
176     }
177     return 0;
178 }

 

转载于:https://www.cnblogs.com/767355675hutaishi/p/4310071.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值