1713. Key Substrings
Time limit: 2.0 second
Memory limit: 64 MB
Memory limit: 64 MB
Although the program committee works as one team, heated debates arise frequently enough. For example, there is no agreement upon which client of the version control system is more convenient to use:
a graphic interface program or a console client.
Let us consider some command of a console client. A substring of this command that is not a substring of any other command of this client can be called a key substring because it uniquely identifies
the command. In the latest versions of the client, it is not necessary to type the whole command; it is sufficient to type any of its key substrings.
A supporter of the console client wants to convince the program committee to use it. In order to show how fast and convenient the work with this client is, he wants to find a key substring of minimal
length for each command. Help him do it.
Input
The first line contains the number n of commands in the console client (2 ≤ n ≤ 1000). Each of the following n lines contains one command of the client. Each command is a
nonempty string consisting of lowercase Latin letters and its length is at most 100. No command is a substring of another command.
Output
Output n lines. The i-th line should contain any of the shortest key substrings of the i-th command (the commands are numbered in the order they are given in the input).
Sample
input | output |
---|---|
3 abcm acm bcd |
ab ac d |
Problem Author: Dmitry Ivankov (idea by Alexander Mironenko)
Problem Source: NEERC 2009, Eastern subregional contest
Problem Source: NEERC 2009, Eastern subregional contest
题意要为每一个字符串寻找一个子串,这个子串不会出现在其他字符串中。首先将所有字符串连城一个字符串,中间用特殊值连接。然后记录每一个字符串开始和结束的位置。对于每一个字符串,我们枚举他字串开始的位置,然后通过rank数组找到相应后缀在sa数组中位置,然后分别向前,和向后找第一个不是当前字符串范围内起始的后缀,然后比较lcp取最大值加+1,最后枚举完每一个位置,获得所有最大lcp中的最小值,就是我们要找的替代子串。每一个字符串都用这种方式处理。
罗穗骞 2009 论文
#include <stdio.h>
#include <string.h>
#include <algorithm>
#include <iostream>
using namespace std;
#define maxn 200550
int wa[maxn],wb[maxn],wv[maxn],wd[maxn];
int cmp(int *r,int a,int b,int l)
{
return r[a]==r[b]&&r[a+l]==r[b+l];
}
void da(int *r,int *sa,int n,int m)//倍增算法
{
int i,j,p,*x=wa,*y=wb,*t;
for(i=0; i<m; i++) wd[i]=0;
for(i=0; i<n; i++) wd[x[i]=r[i]]++;
for(i=1; i<m; i++) wd[i]+=wd[i-1];
for(i=n-1; i>=0; i--) sa[--wd[x[i]]]=i;
for(j=1,p=1; p<n; j*=2,m=p)
{
for(p=0,i=n-j; i<n; i++) y[p++]=i;
for(i=0; i<n; i++) if(sa[i]>=j) y[p++]=sa[i]-j;
for(i=0; i<n; i++) wv[i]=x[y[i]];
for(i=0; i<m; i++) wd[i]=0;
for(i=0; i<n; i++) wd[wv[i]]++;
for(i=1; i<m; i++) wd[i]+=wd[i-1];
for(i=n-1; i>=0; i--) sa[--wd[wv[i]]]=y[i];
for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1; i<n; i++)
x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;
}
return;
}
int suf_rank[maxn],height[maxn];
void calheight(int *r,int *sa,int n)//求height数组
{
int i,j,k=0;
for(i=1; i<=n; i++) suf_rank[sa[i]]=i;
for(i=0; i<n; height[suf_rank[i++]]=k)
for(k?k--:0,j=sa[suf_rank[i]-1]; r[i+k]==r[j+k] && r[i+k]; k++);
return;
}
int RMQ[maxn];
int mm[maxn];
int best[20][maxn];
void initRMQ(int n)
{
int i,j,a,b;
for(i=1; i<=n; i++) RMQ[i]=height[i];
for(mm[0]=-1,i=1; i<=n; i++)
mm[i]=((i&(i-1))==0)?mm[i-1]+1:mm[i-1];
for(i=1; i<=n; i++) best[0][i]=i;
for(i=1; i<=mm[n]; i++)
for(j=1; j<=n+1-(1<<i); j++)
{
a=best[i-1][j];
b=best[i-1][j+(1<<(i-1))];
if(RMQ[a]<RMQ[b]) best[i][j]=a;
else best[i][j]=b;
}
return;
}
int askRMQ(int a,int b)
{
int t;
t=mm[b-a+1];
b-=(1<<t)-1;
a=best[t][a];
b=best[t][b];
return RMQ[a]<RMQ[b]?a:b;
}
int lcp(int a,int b)//最长公共前缀
{
int t;
a=suf_rank[a];
b=suf_rank[b];
if(a>b)
{
t=a;
a=b;
b=t;
}
return(height[askRMQ(a+1,b)]);
}
int n,m;
char st[1005][105];
int rpos[1005][105];
int maxL[1005][105];
int len[1005];
int pid[maxn];
int pos[maxn];
int r[maxn],sa[maxn];
char ss[maxn];
int main()
{
int i,j;
scanf("%d",&m);
{
for (i=0; i<m; i++) scanf("%s",st[i]),len[i]=strlen(st[i]);
memset(maxL,0,sizeof(maxL));
n=0;
for (i=0; i<m; i++)
{
for (j=0; j<len[i]; j++)
{
pos[n]=j;
pid[n]=i;
rpos[i][j]=n;
ss[n]=st[i][j];
r[n++]=st[i][j];
}
ss[n]=0;
pos[n]=j;
pid[n]=i;
rpos[i][j]=n;
r[n++]=0;
}
n--;
da(r,sa,n+1,500);
calheight(r,sa,n);
for (i=1; i<=n; i++)
{
int pre_id=pid[sa[i-1]];
int pre_pos=pos[sa[i-1]];
int now_id=pid[sa[i]];
int now_pos=pos[sa[i]];
if (pre_id!=now_id)
{
maxL[pre_id][pre_pos]=max(maxL[pre_id][pre_pos],height[i]);
maxL[now_id][now_pos]=max(maxL[now_id][now_pos],height[i]);
}
}
for (i=1; i<=n; i++)
{
int pre_id=pid[sa[i-1]];
int pre_pos=pos[sa[i-1]];
int now_id=pid[sa[i]];
int now_pos=pos[sa[i]];
if (pre_id==now_id)
maxL[now_id][now_pos]=max(maxL[now_id][now_pos],min(height[i],maxL[pre_id][pre_pos]));
}
for (i=n-1; i>0; i--)
{
int pre_id=pid[sa[i]];
int pre_pos=pos[sa[i]];
int now_id=pid[sa[i+1]];
int now_pos=pos[sa[i+1]];
if (pre_id==now_id)
maxL[pre_id][pre_pos]=max(maxL[pre_id][pre_pos],min(height[i+1],maxL[now_id][now_pos]));
}
for (i=0; i<m; i++)
{
int bestv=105,bestp=-1;
for (j=0; j<len[i]; j++)
if (j+maxL[i][j]+1<=len[i])
if (maxL[i][j]<bestv)
{
bestv=maxL[i][j];
bestp=j;
}
else
{
if (maxL[i][j]==bestv && suf_rank[rpos[i][j]]<suf_rank[rpos[i][bestp]])
bestp=j;
}
for (j=bestp; j<bestp+bestv+1; j++)
putchar(st[i][j]);
putchar('\n');
}
}
return 0;
}