UVA 题目760 DNA Sequencing (后缀数组求两个串最长公共子串,字典序输出)

本文介绍DNA分子的基本结构及其组成,并探讨如何寻找两个DNA序列中最长的共同子序列。通过具体的样例输入输出,展示了实现这一功能的算法流程。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >



  DNA Sequencing 

A DNA molecule consists of two strands that wrap around each other to resemble a twisted ladder whose sides, made of sugar and phosphate molecules, are connected by rungs of nitrogen-containing chemicals called bases. Each strand is a linear arrangement of repeating similar units called nucleotides, which are each composed of one sugar, one phosphate, and a nitrogenous base. Four different bases are present in DNA: adenine (A), thymine (T), cytosine (C), and guanine (G). The particular order of the bases arranged along the sugar-phosphate backbone is called the DNA sequence; the sequence specifies the exact genetic instructions required to create a particular organism with its own unique traits.


Geneticists often compare DNA strands and are interested in finding the longest common base sequence in the two strands. Note that these strands can be represented as strings consisting of the lettersatc and g. So, the longest common sequence in the two strands atgc and tga is tg. It is entirely possible that two different common sequences exist that are the same length and are the longest possible common sequences. For example in the strands atgc and gctg, the longest common sequences aregc and tg.

Input and Output 

Write a program that accepts as input two strings representing DNA strands, and prints as output the longest common sequence(s) in lexicographical order.

If there isn't any common sequence between the two strings, just print: ``No common sequence."

If there are more than one test cases, it must be a blank line between two consecutive, both in input and output files.

The strings are at most 300 characters-long.

Sample Input 

atgc
tga

atgc
gctg

Sample Output 

tg

gc
tg

0ms

ac代码

#include<stdio.h>   
#include<string.h>   
#include<algorithm>   
#include<iostream>  
#define min(a,b) (a>b?b:a) 
using namespace std;  
char str1[660],str2[660];
int sa[660],c[660],t2[660];
int t1[660],s[660];  
int rank[660],height[660]; 
int len1,len2; 
void build_sa(int s[],int n,int m)  
{  
    int i,j,p,*x=t1,*y=t2;  
    for(i=0;i<m;i++)  
        c[i]=0;  
    for(i=0;i<n;i++)  
        c[x[i]=s[i]]++;  
    for(i=1;i<m;i++)  
        c[i]+=c[i-1];  
    for(i=n-1;i>=0;i--)  
        sa[--c[x[i]]]=i;  
    for(j=1;j<=n;j<<=1)  
    {  
        p=0;  
        for(i=n-j;i<n;i++)  
            y[p++]=i;  
        for(i=0;i<n;i++)  
            if(sa[i]>=j)  
                y[p++]=sa[i]-j;  
        for(i=0;i<m;i++)  
            c[i]=0;  
        for(i=0;i<n;i++)  
            c[x[y[i]]]++;  
        for(i=1;i<m;i++)  
            c[i]+=c[i-1];  
        for(i=n-1;i>=0;i--)  
            sa[--c[x[y[i]]]]=y[i];  
        swap(x,y);  
        p=1;  
        x[sa[0]]=0;  
        for(i=1;i<n;i++)  
            x[sa[i]]=y[sa[i-1]]==y[sa[i]]&&y[sa[i-1]+j]==y[sa[i]+j]?p-1:p++;  
        if(p>=n)  
            break;  
        m=p;  
    }  
}  
void getHeight(int s[],int n)  
{  
    int i,j,k=0;  
    for(i=0;i<=n;i++)  
        rank[sa[i]]=i;  
    for(i=0;i<n;i++)  
    {  
        if(k)  
            k--;  
        j=sa[rank[i]-1];  
        while(s[i+k]==s[j+k])  
            k++;  
        height[rank[i]]=k;  
    }  
}
int judge(int len,int k)
{
	int i;
	for(i=1;i<=len;i++)
	{
		if(height[i]>=k)
		{
			if(sa[i]>len1&&sa[i-1]<=len1)
				return 1;
			if(sa[i-1]>len1&&sa[i]<=len1)
				return 1;
		}
	}
	return 0;
}
int main()
{
	int flag=0;
	while(scanf("%s%s",str1,str2)!=EOF)
	{
		int i,j,k;
		if(flag)
			printf("\n");
		flag=1;
		len1=strlen(str1);
		len2=strlen(str2);
		for(i=0;i<len1;i++)
		{
			s[i]=str1[i]-'a'+1;
		}
		s[len1]=27;
		int n=len1+1;
		for(i=0;i<len2;i++)
			s[n++]=str2[i]-'a'+1;
		s[n]=0;
		build_sa(s,n+1,28);
		getHeight(s,n);
		int l=0,r=min(len1,len2),ans=0;
		while(l<=r)
		{
			int mid=(l+r)>>1;
			if(judge(n,mid))
			{
				ans=mid;
				l=mid+1;
			}
			else
				r=mid-1;
		}
		if(!ans)
		{
			printf("No common sequence.\n");
			continue;
		}
	//	printf("%d %d\n",n,len1+len2+2);
		for(i=1;i<=n;i++)
		{
			if(height[i]>=ans)
			{
				for(j=i;j<=n&&height[j]>=ans;j++)
					;
				for(k=i;k<j;k++)
				{
					if(sa[k]>len1&&sa[k-1]<len1)
						break;
					if(sa[k-1]>len1&&sa[k]<len1)
						break;
				}
				if(j!=k)
				{
					int st;
					for(st=0;st<ans;st++)
					{
						printf("%c",s[sa[k]+st]+'a'-1);
					}
					printf("\n");
				}
				i=j-1;
			}
		}
	}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值