uva11107 - Life Forms 后缀数组

本文深入探讨了人工智能在不同技术领域的应用实例与面临的挑战,涵盖了从基础算法到复杂系统设计的多个方面,旨在为读者提供全面的技术视角。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

Problem C: Life Forms

You may have wondered why most extraterrestrial life forms resemble humans,differing by superficial traits such as height, colour, wrinkles,ears, eyebrows and the like. A few bear no human resemblance; these typicallyhave geometric or amorphous shapes like cubes, oil slicksor clouds of dust.

The answer is given in the 146th episode of Star Trek - The Next Generation,titled The Chase. Itturns out that in the vast majority ofthe quadrant's life forms ended up with a large fragment of common DNA.

Given the DNA sequences of several life forms represented as stringsof letters, you are to find the longestsubstring that is shared by more than half of them.

Standard input contains several test cases. Each test case begins with1 ≤ n ≤ 100, the number of life forms. n lines follow; eachcontains a string of lower case letters representing the DNA sequence ofa life form. Each DNA sequence contains at least one and not more than 1000letters. A line containing 0 follows the last test case.

For each test case, output the longest string or stringsshared by more than half of the life forms. If there are many, output all ofthem in alphabetical order. If there is no solution with at least oneletter, output "?". Leave an empty line between test cases.

Sample Input

3
abcdefg
bcdefgh
cdefghi
3
xxx
yyy
zzz
0

Output for Sample Input

bcdefg
cdefgh

?

  有N个DNA序列,求一个长度最大的字符串,使它在超过一半的DNA序列中出现。如果有多解,按字典序输出。

  首先用不同的分隔符把输入字符串拼起来,记录位置i的字符在第j个DNA序列,idx[i]=j。二分最大长度p,扫一遍height数组,把它分成若干段。若height[i]<p时新开一段,则每一段前p个字符相同,再判断这一段中是否出现在超过N/2个DNA序列中,直接根据idx判断就可以了(这一段后缀的前p个字符肯定都是在某一个DNA序列中,不会同时跨越两个DNA,因为DNA之间的分隔符是不同的)。找到最大长度后再扫一遍height,同样的分段方法,找到就输出,height已经是字典序。

#include<iostream>
#include<queue>
#include<cstring>
#include<cstdio>
#include<cmath>
#include<set>
#include<map>
#include<vector>
#include<stack>
#include<algorithm>
#define INF 0x3f3f3f3f
#define eps 1e-9
#define MAXNODE 105
#define MOD 10000007
#define SIGMA_SIZE 4
typedef long long LL;
using namespace std;

const int MAXN=110000;
const int MAXM=110;

int N,flag[MAXM],idx[MAXN];
char str[1010];

struct SuffixArray{
    int s[MAXN];            //原始字符数组
    int sa[MAXN];           //后缀数组,sa[i]为第i小后缀在s中的下标,最后一个字符是0,前面非0
    int rank[MAXN];         //名次数组,rank[i]为s[i]后缀是第几小,rank[n-1]=0
    int height[MAXN];       //height[i]为sa[i-1]和sa[i]的最长公共前缀
    int c[MAXN];            //基数排序数组
    int t[MAXN],t2[MAXN];   //x,y辅助数组
    int n;                  //字符个数

    void clear(){
        n=0;
        memset(sa,0,sizeof(sa));
    }
    //m为最大字符值+1,调用前需设置好s和n
    void build_sa(int m){
        int i,*x=t,*y=t2;
        //基数排序
        for(i=0;i<m;i++) c[i]=0;
        for(i=0;i<n;i++) c[x[i]=s[i]]++;
        for(i=1;i<m;i++) c[i]+=c[i-1];
        for(i=n-1;i>=0;i--) sa[--c[x[i]]]=i;
        for(int k=1;k<=n;k<<=1){
            int p=0;
            //用sa数组排序第二关键字
            for(i=n-k;i<n;i++) y[p++]=i;
            for(i=0;i<n;i++) if(sa[i]>=k) y[p++]=sa[i]-k;
            //基数排序第一关键字
            for(int i=0;i<m;i++) c[i]=0;
            for(int i=0;i<n;i++) c[x[y[i]]]++;
            for(int i=1;i<m;i++) c[i]+=c[i-1];
            for(int i=n-1;i>=0;i--) sa[--c[x[y[i]]]]=y[i];
            swap(x,y);
            p=1;
            x[sa[0]]=0;
            for(int i=1;i<n;i++) x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+k]==y[sa[i]+k]?p-1:p++;
            if(p>=n) break;
            m=p;
        }
    }
    void build_height(){
        int i,j,k=0;
        for(int i=0;i<n;i++) rank[sa[i]]=i;
        height[0]=0;
        for(int i=0;i<n-1;i++){
            if(k) k--;
            j=sa[rank[i]-1];
            while(s[i+k]==s[j+k]) k++;
            height[rank[i]]=k;
        }
    }
}sa;

bool good(int L,int R){
    if(R-L<=N/2) return false;
    memset(flag,0,sizeof(flag));
    int cnt=0;
    for(int i=L;i<R;i++) if(idx[sa.sa[i]]!=N&&!flag[idx[sa.sa[i]]]){
        cnt++;
        flag[idx[sa.sa[i]]]=1;
    }
    return cnt>N/2;
}

bool check(int len){
    //[L,R)
    int L=0;
    for(int R=1;R<=sa.n;R++) if(R==sa.n||sa.height[R]<len){
        if(good(L,R)) return true;
        L=R;
    }
    return false;
}

void print(int len){
    int L=0;
    for(int R=1;R<=sa.n;R++) if(R==sa.n||sa.height[R]<len){
        if(good(L,R)){
            for(int i=sa.sa[L];i<sa.sa[L]+len;i++) printf("%c",sa.s[i]+'a'-1);
            puts("");
        }
        L=R;
    }
}

void solve(int maxlen){
    if(!check(1)){
        printf("?\n");
        return;
    }
    int L=1,R=maxlen;
    while(L<R){
        int mid=L+(R-L+1)/2;
        if(check(mid)) L=mid;
        else R=mid-1;
    }
    print(L);
}

int main(){
    freopen("in.txt","r",stdin);
    int cas=0;
    while(scanf("%d",&N)!=EOF&&N){
        if(++cas>1) puts("");
        sa.clear();
        int maxlen=0;
        for(int i=0;i<N;i++){
            scanf("%s",str);
            int len=strlen(str);
            maxlen=max(maxlen,len);
            for(int j=0;j<len;j++){
                idx[sa.n]=i;
                sa.s[sa.n++]=str[j]-'a'+1;
            }
            idx[sa.n]=N;
            sa.s[sa.n++]=i+100;
        }
        idx[sa.n]=N;
        sa.s[sa.n++]=0;
        if(N==1){
            printf("%s\n",str);
            continue;
        }
        sa.build_sa(110+N);
        sa.build_height();
        solve(maxlen);
    }
    return 0;
}



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值