和最长公共子序列(LCS)相同,dp[ i ][ j ] 表示g1[ 1...i ]与g2[ 1...j ]的最大相似度,
那么dp[ i + 1][ j + 1]就为dp[i][j] + score[g1[i+1]][g2[j+1]]、dp[i+1][j] + score['-'][g2[j+1]]、dp[i][j+1] + score[g1[i+1]]['-']三者的最大值。
所以状态转移方程为:dp[i][j] = max (dp[i-1][j-1] + s[g1[i]][g2[j]], max(dp[i][j-1] + s['-'][g2[j]], dp[i-1][j] + s[g1[i]]['-']))
注意初始化,字符串g1和g2下标都从1开始,显然dp[0][0] = 0,但是dp[0][1...len2]和dp[1...len1][0]不为0,也必须对它们初始化,
具体见代码。
#include <cstdio>
#include <iostream>
using namespace std;
#define M 105
int len1, len2, dp[M][M], s[200][200];
char g1[M], g2[M];
inline int max(int a, int b)
{
return a > b ? a : b;
}
void init()
{
s['A']['A'] = s['C']['C'] = s['G']['G'] = s['T']['T'] = 5;
s['A']['C'] = s['C']['A'] = s['A']['T'] = s['T']['A'] = -1;
s['A']['G'] = s['G']['A'] = s['C']['T'] = s['T']['C'] = -2;
s['G']['T'] = s['T']['G'] = s['G']['-'] = s['-']['G'] = -2;
s['A']['-'] = s['-']['A'] = s['C']['G'] = s['G']['C'] = -3;
s['T']['-'] = s['-']['T'] = -1;
s['C']['-'] = s['-']['C'] = -4;
}
int DP()
{
int i, j;
dp[0][0] = 0;
dp[1][0] = s[g1[1]]['-'];
dp[0][1] = s['-'][g2[1]];
for (i = 1; i <= len2; i++)
dp[0][i] = dp[0][i-1] + s['-'][g2[i]];
for (i = 1; i <= len1; i++)
dp[i][0] = dp[i-1][0] + s[g1[i]]['-'];
for (i = 1; i <= len1; i++) {
for (j = 1; j <= len2; j++)
dp[i][j] = max (dp[i-1][j-1] + s[g1[i]][g2[j]], max(dp[i][j-1] + s['-'][g2[j]], dp[i-1][j] + s[g1[i]]['-']));
}
return dp[len1][len2];
}
int main()
{
int n;
init();
scanf ("%d", &n);
while (n--) {
scanf ("%d %s %d %s", &len1, g1+1, &len2, g2+1);
printf ("%d\n", DP());
}
return 0;
}
本文介绍了一种求解两个字符串之间最大相似度子序列的动态规划算法,并给出了详细的实现代码。通过定义状态转移方程,该算法能够高效地计算出任意两个DNA序列之间的最大相似度。
3865

被折叠的 条评论
为什么被折叠?



