目标:
找出两个 字符串 X = 'CGATAATTGAGA' 和 Y = 'GTTCCTAATA' 的最长公共子序列。用动态规划求解(解不唯一?)。
首先建立一个表 table = table[row,col] = table[12+1,10+1];为什么多一格呢?
先初始化每个单元格的值为0;
然后,遍历行和列:
当X[row] = Y[col]时,table[row,col] = table[row-1,col-1]+1;
当X[row] <> Y[col]时,table[row,col] = Math.max(table[row,col-1],table[row-1,col]);
这样就构建出一个下面的表:
G T T C C T A A T A
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
C [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
G [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
A [0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2]
T [0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3]
A [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4]
A [0, 1, 2, 2, 2, 2, 2, 3, 4, 4, 4]
T [0, 1, 2, 3, 3, 3, 3, 3, 4, 5, 5]
T [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]
G [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]
A [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]
G [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]
A [0, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6]
通过这个表来找出LCS,LCS由表的下标给出,将这些下标放入一个数组L
从表的最后开始回溯:
row = 13, col = 11;
1、table[13,11]对应的行和列的字符相等,将(13,11)放入L,row-=1,col-=1;
2、table[12,10]对应的行和列的字符不相等,比较table[11,10]和table[12,9]
2.1 table[11,10] > table[12,9],i -= 1;
2.2 否则 j -= 1;
重复1、2
实现:
/*
initial a row*col table
return a table
*/
function initTable(row,col){
var table = [];
for(var i=0;i<row+1;i++){
if(!table[i])table[i] = [];
for(var j=0;j<col+1;j++){
if(!table[i][j])table[i][j] = [];
table[i][j] = 0;
}
}
return table;
}
/*
compute the path index of the table
return array contain the index of s1 and s2
*/
function computeLCSIndex(s1,s2,table){
var i = s1.length,
j = s2.length,
L1=[];
while(i>0 && j>0){
//console.log(i,j);
if(s1[i-1] == s2[j-1]){
//console.log(i,j);
L1.push([i,j]);
j -=1;
i -=1;
}else{
if(!table[i-1]){
console.log(i);
break;
}
if(table[i-1][j] > table[i][j-1]){
i -= 1;
}else{
j -= 1;
}
}
}
return L1;
}
/*
get LCS from s1,or s2 from LCS index
*/
function LCS(s1,s2,L1){
//console.log(L1 = L1.reverse());
L1 = L1.reverse();
var LCS = "";
for(var j=0;j<L1.length;j++){
LCS+=(s2[L1[j][1]-1]);
}
//console.log(LCS);
return LCS;
}
function diff(s1,s2){
var len1 = s1.length,
len2 = s2.length,
table,
L1;//LCS index of s1
table = initTable(len1,len2);
for(k=1;k<len1+1;k++){
for(l=1;l<len2+1;l++){
if(s1[k-1] === s2[l-1]){
//console.log("k:"+k+",l:"+l+table[k-1][l-1]);
table[k][l] = table[k-1][l-1] + 1;
}else{
//console.log("k:"+k,"l:"+l);
table[k][l] = Math.max(table[k][l-1],table[k-1][l]);
}
}
}
/*
table.forEach(function(i){
console.log(i);
});
*/
L1 = computeLCSIndex(s1,s2,table);
return L1;
}
var debug = true;
if(debug){
var A = 'CGATAATTGAGA',
B = 'GTTCCTAATA';
var L = diff(A,B);
var s = LCS(A,B,L);
console.log(s);
var X = ["Cqdf", "G23", "A", "T", "A", "A", "T", "T", "G", "A", "G", "A"],
Y = ["G2", "Tsfj", "T", "C", "C", "T", "A", "A", "T", "A"];
var L2 = diff(X,Y);
var s2 = LCS(X,Y,L2);
console.log(s2);
}
通常是以单个字符作为比较的“元”,若以多个字符作为比较的元,那么输入的参数为数组即可: