DNA sequence

最新推荐文章于 2021-02-28 15:32:40 发布

古古天

最新推荐文章于 2021-02-28 15:32:40 发布

阅读量1.5k

点赞数

CC 4.0 BY-SA版权

分类专栏： ACM 文章标签： ACM题目

本文链接：https://blog.youkuaiyun.com/xing634325131/article/details/8806730

ACM 专栏收录该内容

26 篇文章

订阅专栏

杭电1019

Problem Description

The twenty-first century is a biology-technology developing century. We know that a gene is made of DNA. The nucleotide bases from which DNA is built are A(adenine), C(cytosine), G(guanine), and T(thymine). Finding the longest common subsequence between DNA/Protein sequences is one of the basic problems in modern computational molecular biology. But this problem is a little different. Given several DNA sequences, you are asked to make a shortest sequence from them so that each of the given sequence is the subsequence of it.

For example, given "ACGT","ATGC","CGTT" and "CAGT", you can make a sequence in the following way. It is the shortest but may be not the only one.

Input
The first line is the test case number t. Then t test cases follow. In each case, the first line is an integer n ( 1<=n<=8 ) represents number of the DNA sequences. The following k lines contain the k sequences, one per line. Assuming that the length of any sequence is between 1 and 5.

Output
For each test case, print a line containing the length of the shortest sequence that can be made from these sequences.

Sample Input
1
4
ACGT
ATGC
CGTT
CAGT

Sample Output

Code

之前的代码：

#include<iostream>
#include<string>
using namespace std;
void sort(string a[],int m)
{
	for(int i=1;i<m;i++)
	{
		string temp=a[i];
		int minSum=a[i].length();
		for(int j=i+1;j<=m;j++)
			if(a[j].length()<a[i].length())
				minSum=j;
		a[i]=a[minSum];
		a[minSum]=temp;
	}
}
int main()
{
	int n;
	string a[22];
	cin>>n;
	while(n--)
	{
		int m;
		cin>>m;
		int h=m;
		for(int i=1;i<=m;i++)
			cin>>a[i];
		sort(a,m);
		for(int j=1;j<h;)
		{
			int count[25];
			memset(count,0,sizeof(count));
			for(int k=a[j].length()-1;k>=1;k--)
			{
				for(int t=2;t<=h;t++)
				{
					int kk=a[j].length()-k,p=0;
					while(a[j].substr(kk,1)==a[t].substr(p,1))
					{
						count[t]++;
						kk++;
						p++;
					}
				}
			}
			int max=count[2],flag=2;
			for(int q=3;q<=h;q++)
				if(count[q]>max)
				{
					max=count[q];
					flag=q;
				}
			a[1]=a[1]+a[flag].substr(max,a[flag].length()-max);
			for(int we=flag;we<h;we++)
				a[we]=a[we+1];
			h--;
		}
		cout<<a[1].length()<<endl;
	}
	return 0;
}

这个代码还有点问题，以下是之后的代码：

#include<iostream>
#include<string.h>
using namespace std;
 
char seg[11][101];
int n,ans;
int len[11];
int addlen[11][11];
bool used[11];
 
void add(int m,int n){
    int l,i,j,k;
    k=0;
    for(l=1; l<=len[m] && l<=len[n]; l++){ 
        bool sign=true;
        for(i=0,j=len[m]-l; i<l; i++,j++)
            if(seg[m][j]!=seg[n][i]){
                sign=false;
                break;
            }
        if(sign) k=l;
    }
    addlen[m][n]=len[n]-k;
}
 
void dfs(int pre,int length,int sum){
    if(sum>=ans) return;  
    if(length==n){    
        if(sum<ans)
            ans=sum;
        return;
    }
    for(int i=0; i<n; i++){   
        if(used[i]==false){
            used[i]=true;
            dfs(i,length+1,sum+addlen[pre][i]);
            used[i]=false;
        }
    }
}
 
int main(){
    int i,j,t;
    cin>>t;
    while(t--){
        cin>>n;
        for(i=0; i<n; i++){
            cin>>seg[i];
            len[i]=strlen(seg[i]);
        }
        for(i=0; i<n; i++)
            for(j=0; j<n; j++)
                add(i,j);
 
        ans=1000;
        memset(used,false,sizeof(used));
        for(i=0; i<n; i++){ 
            used[i]=true;
            dfs(i,1,len[i]);
            used[i]=false;
        }
        cout<<ans<<endl;
    }
    return 0;
}