//也可用作求相似度
package com.xxx.xbrl.zip;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;


public class MaxMatch ...{

class Stat...{
Map map=new TreeMap();

class Pos...{

Pos(String s1,int from1,String s2,int from2)...{
this.s1=s1;
this.s2=s2;
this.from1=from1;
this.from2=from2;
}

public boolean equals(Object obj)...{
Pos p=(Pos)obj;
return p.from1==from1&&p.from2==from2&&p.s1.equals(s1)&&p.s2.equals(s2);
}
String s1;
String s2;
int from1;
int from2;
}

class Item...{
int matchCount=0;
List pos=new ArrayList();

void addPos(String s1,String s2,int from1,int from2)...{
Pos p=new Pos(s1,from1,s2,from2);

if(!pos.contains(p))...{
matchCount++;
pos.add(p);
}
}
}


public List getMaxLengthList()...{
List list=new ArrayList();
String key;
int maxLen=-1;

for(Iterator iter=map.keySet().iterator();iter.hasNext();)...{
key=(String)iter.next();
maxLen=key.length()>maxLen?key.length():maxLen;
}

if(maxLen!=-1)...{

for(Iterator iter=map.keySet().iterator();iter.hasNext();)...{
key=(String)iter.next();

if(key.length()==maxLen)...{
list.add(key);
}
}
}
return list;
}

public void dump()...{
String key;
Item item;
int maxLen=-1;

for(Iterator iter=map.keySet().iterator();iter.hasNext();)...{
key=(String)iter.next();
maxLen=key.length()>maxLen?key.length():maxLen;
item=(Item)map.get(key);
System.out.println("关键字["+key.length()+"]:"+key+" 匹配次数="+item.matchCount);
}

if(maxLen==-1)...{
System.out.println("无匹配,请配置匹配长度.");

}else...{
System.out.println("----------------------------------");

for(Iterator iter=map.keySet().iterator();iter.hasNext();)...{
key=(String)iter.next();

if(key.length()==maxLen)...{
item=(Item)map.get(key);
System.out.println("最大匹配["+key.length()+"]="+key+" 匹配次数="+item.matchCount);
}
}
}
}

public void gatherToken(String s1,int from1,String s2,int from2,char[] matched)...{
String key=new String(matched);
Item item=(Item)map.get(key);

if(item==null)...{
item=new Item();
map.put(key,item);
}
item.addPos(s1,s2,from1,from2);
}
}

private int nextMatchedPos(char[] chs,char ch,int from)...{

for(int i=from;i<chs.length;i++)...{

if(chs[i]==ch)...{
return i;
}
}
return -1;
}

private boolean isMatch(char[] chs,int from,char[] matched)...{
int m=matched.length;
int n=0;

for(int i=from,k=0;i<chs.length&&k<m;i++,k++)...{

if(chs[i]!=matched[k])...{
return false;
}
n++;
}
return m==n;
}

private char[] matchMaxChars(char[] chs1,int from1,char[] chs2,int from2)...{
StringBuffer s=new StringBuffer();

for(int i=from1,j=from2;i<chs1.length&&j<chs2.length;i++,j++)...{
char ch=chs1[i];

if(ch==chs2[j])...{
s.append(ch);

}else...{
break;
}
}
return s.toString().toCharArray();
}


private void doMatch(String s1,String s2,int from,Stat s,int minMatchLen)...{
char ch1;
char[] chs1=s1.toCharArray();
char[] chs2=s2.toCharArray();
int len1=chs1.length;
int len2=chs2.length;

for(int jj=0;jj<len2-minMatchLen;jj++)...{//s2搜索起点

for(int i=from;i<len1-from-minMatchLen;i++)...{
ch1=chs1[i];
int k=nextMatchedPos(chs2,ch1,jj);

if(k!=-1)...{
char[] matched=matchMaxChars(chs1,i,chs2,k);
//缩小关键字系列(若只求最大匹配则无需缩小),保证正确统计关键字间相互包含的情况

for(int mi=minMatchLen;mi<=matched.length;mi++)...{
int kk=k;
char[] matchedBuf=new char[mi];
System.arraycopy(matched,0,matchedBuf,0,mi);
int matchedLen=matchedBuf.length;

do...{
s.gatherToken(s1,i,s2,kk,matchedBuf);
kk+=matchedLen;
kk=nextMatchedPos(chs2,ch1,kk);

if(mi==matched.length)...{
k=kk;
}

if(kk==-1)...{
break;
}
}while(isMatch(chs2,kk,matchedBuf));
}
continue;
}
}
}
}

private Stat match(String s1,String s2,int minMatchLen)...{
Stat s=new Stat();

for(int i=0,n=s1.length()-minMatchLen;i<n;i++)...{
doMatch(s1,s2,i,s,minMatchLen);
}

for(int i=0,n=s2.length()-minMatchLen;i<n;i++)...{
doMatch(s2,s1,i,s,minMatchLen);
}
return s;
}

public static void main(String[] args) throws Exception...{
String s1="aab2bcacaa";
String s2="aabbcc";
s1="A trace of length L is a sequence of L match points.";
s2="The sequence of match points visited in traversing.";
MaxMatch mm=new MaxMatch();
Stat s=mm.match(s1,s2,10);
s.dump();
}
}



测试:
关键字[10]: match poi 匹配次数=2
关键字[11]: match poin 匹配次数=2
关键字[12]: match point 匹配次数=2
关键字[13]: match points 匹配次数=2
关键字[10]: sequence 匹配次数=2
关键字[11]: sequence o 匹配次数=2
关键字[12]: sequence of 匹配次数=2
关键字[13]: sequence of 匹配次数=2
关键字[10]:atch point 匹配次数=2
关键字[11]:atch points 匹配次数=2
关键字[10]:equence of 匹配次数=2
关键字[11]:equence of 匹配次数=2
关键字[10]:match poin 匹配次数=2
关键字[11]:match point 匹配次数=2
关键字[12]:match points 匹配次数=2
关键字[10]:quence of 匹配次数=2
关键字[10]:sequence o 匹配次数=2
关键字[11]:sequence of 匹配次数=2
关键字[12]:sequence of 匹配次数=2
关键字[10]:tch points 匹配次数=2
----------------------------------
最大匹配[13]= match points 匹配次数=2
最大匹配[13]= sequence of 匹配次数=2