UVa760 - DNA Sequencing(后缀数组)

本文探讨了DNA序列比对中寻找最长公共子序列的问题,并提供了一个求解算法,通过输入两个DNA字符串,输出它们之间的最长公共子序列。文章详细解释了DNA分子结构、遗传指令与基因表达,以及如何利用算法解决实际生物学问题。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 A DNA molecule consists of two strands that wrap around each other to resemble atwisted ladder whose sides, made of sugar and phosphate molecules, are connected byrungs of nitrogen-containing chemicals called bases. Each strand is a linear arrangementof repeating similar units called nucleotides, which are each composed of one sugar, onephosphate, and a nitrogenous base. Four different bases are present in DNA: adenine (A),thymine (T), cytosine (C), and guanine (G). The particular order of the bases arrangedalong the sugar-phosphate backbone is called the DNA sequence; the sequence specifiesthe exact genetic instructions required to create a particular organism with its own uniquetraits.


Geneticists often compare DNA strands and are interested in finding the longest commonbase sequence in the two strands. Note that these strands can be represented as stringsconsisting of the lettersa,t,c and g. So, the longest common sequence in the twostrandsatgc andtga istg. It is entirely possible that two different common sequencesexist that are the same length and are the longest possible common sequences. Forexample in the strandsatgc andgctg, the longest common sequences aregcand tg.

Input and Output 

Write a program that accepts as input two strings representing DNAstrands, and prints as output the longest common sequence(s) inlexicographical order.

If there isn't any common sequence between the two strings, just print:``No common sequence."

If there are more than one test cases, it must be ablank line between two consecutive, both in input and output files.

The strings are at most 300 characters-long.

Sample Input 

atgc
tga

atgc
gctg

Sample Output 

tg

gc
tg


import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Scanner;

public class Main implements Runnable
{
	private static final boolean DEBUG = false;
	private static final int MAXN = 660;
	private PrintWriter cout;
	private Scanner cin;
	private char[] s = new char[MAXN];
	private int[] sa = new int [MAXN];
	private int[] x = new int[MAXN];
	private int[] y = new int[MAXN];
	private int[] intTmp = new int[MAXN];
	private int[] c = new int[MAXN];
	private int[] rank = new int[MAXN];
	private int[] height = new int[MAXN];
	private int n, len;
	private boolean first = true;
	
	private void init() 
	{
		try {
			if (DEBUG) {
				cin = new Scanner(new BufferedInputStream(new FileInputStream(
						"d:\\OJ\\uva_in.txt")));
			} else {
				cin = new Scanner(new BufferedInputStream(System.in));
			}
		} catch (Exception e) {
			e.printStackTrace();
		}

		cout = new PrintWriter(new OutputStreamWriter(System.out));
	}

	private boolean input()
	{
		if (!cin.hasNext()) return false;
		
		String tmp = cin.next();
		len = tmp.length();
		for (int i = 0; i < len; i++) s[i] = tmp.charAt(i);
		s[len] = '#';
		
		tmp = cin.next();
		n = len + 1 + tmp.length();
		for (int i = 0; i < tmp.length(); i++) s[len + 1 + i] = tmp.charAt(i);
		s[n] = 0;
		
		return true;
	}
	
	void build_sa(int n, int m)
	{
		for (int i = 0; i < m; i++) c[i] = 0;
		for (int i = 0; i < n; i++) c[x[i] = s[i]]++;
		for (int i = 1; i < m; i++) c[i] += c[i - 1];
		for (int i = n - 1; i >= 0; i--) sa[--c[x[i]]] = i;
		
		for (int k = 1; k <= n; k <<= 1) {
			int p = 0;
			for (int i = n - k; i < n; i++) y[p++] = i;
			for (int i = 0; i < n; i++) if (sa[i] >= k) y[p++] = sa[i] - k;
			
			for (int i = 0; i < m; i++) c[i] = 0;
			for (int i = 0; i < n; i++) c[x[y[i]]]++;
			for (int i = 1; i < m; i++) c[i] += c[i - 1];
			for (int i = n - 1; i >= 0; i--) sa[--c[x[y[i]]]] = y[i];
			
			System.arraycopy(y, 0, intTmp, 0, MAXN);
			System.arraycopy(x, 0, y, 0, MAXN);
			System.arraycopy(intTmp, 0, x, 0, MAXN);
			
			p = 1;
			x[sa[0]] = 0;
			for (int i = 1; i < n; i++) {
				if (y[sa[i - 1]] == y[sa[i]] && y[sa[i - 1] + k] == y[sa[i] + k]) x[sa[i]] = p - 1;
				else x[sa[i]] = p++;
			}
			
			if (p >= n) break;
			m = p;
		}
	}
	
	private void getHeight(int n)
	{
		for (int i = 0; i < n + 1; i++) rank[sa[i]] = i;
		
		int k = 0;
		for (int i = 0; i < n; i++) {
			if (k > 0) k--;
			else k = 0;
			
			int j = sa[rank[i] - 1];
			while (s[i + k] == s[j + k]) k++;
			height[rank[i]] = k;
		}
	}
	
	boolean check(int n, int len, int mid)
	{
		for (int i = 1; i <= n; i++) {
			if (height[i] >= mid) {
				for (int j = i; j <= n && height[j] >= mid; j++) {
					if ((sa[j - 1] < len && sa[j] > len) ||
							(sa[j - 1] > len && sa[j] < len)) return true;
				}
			}
		}
		
		return false;
	}
	
	private void solve()
	{
		if (!first) cout.println();
		
		build_sa(n + 1, 'z' + 1);
		
		getHeight(n);
		
		int low = 0, high = n, mid;
		
		while (low < high) {
			mid = (low + high) >> 1;
			if (check(n, len, mid)) low = mid + 1;
			else high = mid;
		}
		
		low--;
		if (low != 0) {
			for (int i = 1; i <= n; i++) {
				if (height[i] >= low) {
					int j, k;
					for (j = i; j <= n && height[j] >= low; j++);
					for (k = i; k < j; k++) {
						if ((sa[k - 1] < len && sa[k] > len) 
							|| (sa[k - 1] > len && sa[k] < len)) break;
					}
					
					if (k != j) {
						for (k = sa[i]; k < sa[i] + low; k++) {
							cout.print(s[k]);
						}
						cout.println();
					}
					i = j - 1;
				}
			}
		} else {
			cout.println("No common sequence.");
		}
		
		if (first) first = false;
		
		cout.flush();
	}
	
	public void run()
	{
		init();
		
		while (input()) {
			solve();
		}
	}
	
	public static void main(String[] args) 
	{
		new Thread(new Main()).start();
	}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

kgduu

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值