///要求:求串s到串t编辑距离并输出一种编辑方法。
///编辑距离就是用来计算从原串(s)转换到目标串(t)所需要的最少的插入,删除和替换的数目,在NLP中应用比较广泛。
#include <iostream>
#include <string.h>
using namespace std;
//存储操作信息;
struct OperInfo
{
int oper; //操作类型,0:无操作、1:delete,2:insert,3:replace;
int i; //当前操作处于源串第i位;
int j; //当前操作处于目的串第j位;
OperInfo *next;//下一个操作;
};
//第五位表示操作0:无操作、1:delete,2:insert,3:replace;
int doInfo[1000][5]={{0,0,0,0,0}};
int cnt=0;
int count=0;
//求三个数中的最小数
int Minimum(int a, int b, int c)
{
int mi;
mi = a;
if (b < mi)
{
mi = b;
}
if (c < mi)
{
mi = c;
}
return mi;
}
//计算两个字符串间的编辑距离
//原理请参见:http://www.gdcp.cn/jpkc/sjjg/app/jm/edit_distance/problem.htm
int getEditDistance(char *s, char *t) {
int **d; // matrix
int n; // length of s
int m; // length of t
int i; // iterates through s
int j; // iterates through t
char s_i; // ith character of s
char t_j; // jth character of t
int cost; // cost
// Step 1
n = strlen(s);
m = strlen(t);
d=new int*[n+1];
for(i=0;i<=n;i++)
{
d[i]=new int[m+1];
}
if (n == 0) {
return m;
}
if (m == 0) {
return n;
}
// Step 2
for (i = 0; i <= n; i++) {
d[i][0] = i;
}
for (j = 0; j <= m; j++) {
d[0][j] = j;
}
// Step 3
for (i = 1; i <= n; i++) {
s_i = s[i-1];
// Step 4
for (j = 1; j <= m; j++) {
t_j = t[j - 1];
// Step 5
if (s_i == t_j) {
cost = 0;
} else {
cost = 1;
}
// Step 6
//若最后一步为delete操作:则d[i][j]=d[i-1][j]+1;
//若最后一步为insert操作:则d[i][j]=d[i][j-1]+1;
//若cost=0,则不操作。
//若cost=1,则作替换。
d[i][j] = Minimum(d[i - 1][j] + 1, d[i][j - 1] + 1,
d[i - 1][j - 1] + cost);
if (d[i][j]==(d[i - 1][j] + 1))
{
doInfo[cnt][0]=i;
doInfo[cnt][1]=j;
doInfo[cnt][2]=i-1;
doInfo[cnt][3]=j;
doInfo[cnt][4]=1;//delete
cnt++;
}
if (d[i][j]==(d[i][j-1] + 1))
{
doInfo[cnt][0]=i;
doInfo[cnt][1]=j;
doInfo[cnt][2]=i;
doInfo[cnt][3]=j-1;
doInfo[cnt][4]=2;//insert
cnt++;
}
if (d[i][j]==(d[i - 1][j-1] +cost)&&cost==1)
{
doInfo[cnt][0]=i;
doInfo[cnt][1]=j;
doInfo[cnt][2]=i-1;
doInfo[cnt][3]=j-1;
doInfo[cnt][4]=3;//replace
cnt++;
}
if (d[i][j]==(d[i - 1][j-1] +cost)&&cost==0)
{
doInfo[cnt][0]=i;
doInfo[cnt][1]=j;
doInfo[cnt][2]=i-1;
doInfo[cnt][3]=j-1;
doInfo[cnt][4]=0;//no
cnt++;
}
}
}
// Step 7
int result=d[n][m];
//释放二维数组占用的空间
for (i=0;i<=n;i++)
{
delete []d[i];
}
delete []d;
return result;
}
//将操作步聚存储到链表中;
void getOperater(int n,int m,OperInfo *head)
{
int i;
for (i=0;i<100;i++)
{
if(doInfo[i][0]==0&&doInfo[i][1]==0&&doInfo[i][2]==0&&doInfo[i][3]==0&&doInfo[i][4]==0)
break;
if (doInfo[i][0]==n&&doInfo[i][1]==m)
{
OperInfo *node=new OperInfo;
node->oper=doInfo[i][4];
node->i=doInfo[i][0];
node->j=doInfo[i][1];
node->next=head->next;
head->next=node;
if (m==0&&n==0)
break;
getOperater(doInfo[i][2],doInfo[i][3],head);
break;//只取一种实现方式。
}
}
}
void displayDoInfo(OperInfo *head,char *a,char *b)
{
cout<<"其中的一种操作步骤如下:"<<endl;
OperInfo *node=head;
node=node->next;
while(node!=NULL)
{
if (node->oper==0)
{
cout<<"没有操作,直接看下一位"<<endl;
}
if (node->oper==1)
{
count++;
cout<<"第"<<count<<"步--"<<"delete:"<<a[node->i-1]<<endl;
}
if (node->oper==2)
{
count++;
cout<<"第"<<count<<"步--"<<"insert:"<<b[node->j-1]<<endl;
}
if (node->oper==3)
{
count++;
cout<<"第"<<count<<"步--"<<"replace:"<<a[node->i-1]<<"->"<<b[node->j-1]<<endl;
}
node=node->next;
}
/*
int i,j;
for (i=0;i<100;i++)
{
for (j=0;j<5;j++)
{
cout<<doInfo[i][j]<<" ";
}
cout<<endl;
} */
}
void main()
{
char *a="aded";
char *b="acfbe";
int n=strlen(a);
int m=strlen(b);
OperInfo *head=new OperInfo;
head->next=NULL;
cout<<a<<"-->"<<b<<"的编辑距离为:"<<getEditDistance(a,b)<<endl;
getOperater(n,m,head);
displayDoInfo(head,a,b);
}