Lucene中的自定义排序功能和Java集合中的自定义排序的实现方法差不多,都要实现一下比较接口. 在Java中只要实现Comparable接口就可以了.但是在Lucene中要实现SortComparatorSource接口和ScoreDocComparator接口.在了解具体实现方法之前先来看看这两个接口的定义吧.
SortComparatorSource接口的功能是返回一个用来排序ScoreDocs的comparator(Expert: returns a comparator for sorting ScoreDocs).该接口只定义了一个方法.如下:
- /**
- *Createsacomparatorforthefieldinthegivenindex.
- *@paramreader-Indextocreatecomparatorfor.
- *@paramfieldname-Fieldtocreatecomparatorfor.
- *@returnComparatorofScoreDocobjects.
- *@throwsIOException-Ifanerroroccursreadingtheindex.
- */
- publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException
该方法只是创造一个ScoreDocComparator 实例用来实现排序.所以我们还要实现ScoreDocComparator 接口.来看看ScoreDocComparator 接口.功能是比较来两个ScoreDoc 对象来排序(Compares two ScoreDoc objects for sorting) 里面定义了两个Lucene实现的静态实例.如下:
- //Specialcomparatorforsortinghitsaccordingtocomputedrelevance(documentscore).
- publicstaticfinalScoreDocComparatorRELEVANCE;
- //Specialcomparatorforsortinghitsaccordingtoindexorder(documentnumber).
- publicstaticfinalScoreDocComparatorINDEXORDER;
有3个方法与排序相关,需要我们实现 分别如下:
- /**
- *ComparestwoScoreDocobjectsandreturnsaresultindicatingtheirsortorder.
- *@paramiFirstScoreDoc
- *@paramjSecondScoreDoc
- *@return-1ifishouldcomebeforej;
- *1ifishouldcomeafterj;
- *0iftheyareequal
- */
- publicintcompare(ScoreDoci,ScoreDocj);
- /**
- *Returnsthevalueusedtosortthegivendocument.Theobjectreturnedmustimplementthejava.io.Serializableinterface.Thisisusedbymultisearcherstodeterminehowtocollateresultsfromtheirsearchers.
- *@paramiDocument
- *@returnSerializableobject
- */
- publicComparablesortValue(ScoreDoci);
- /**
- *Returnsthetypeofsort.ShouldreturnSortField.SCORE,SortField.DOC,SortField.STRING,SortField.INTEGER,SortField.FLOATorSortField.CUSTOM.ItisnotvalidtoreturnSortField.AUTO.Thisisusedbymultisearcherstodeterminehowtocollateresultsfromtheirsearchers.
- *@returnOneoftheconstantsinSortField.
- */
- publicintsortType();
看个例子吧!
该例子为Lucene in Action中的一个实现,用来搜索距你最近的餐馆的名字. 餐馆坐标用字符串"x,y"来存储.
- packagecom.nikee.lucene;
- importjava.io.IOException;
- importorg.apache.lucene.index.IndexReader;
- importorg.apache.lucene.index.Term;
- importorg.apache.lucene.index.TermDocs;
- importorg.apache.lucene.index.TermEnum;
- importorg.apache.lucene.search.ScoreDoc;
- importorg.apache.lucene.search.ScoreDocComparator;
- importorg.apache.lucene.search.SortComparatorSource;
- importorg.apache.lucene.search.SortField;
- //实现了搜索距你最近的餐馆的名字.餐馆坐标用字符串"x,y"来存储
- //DistanceComparatorSource实现了SortComparatorSource接口
- publicclassDistanceComparatorSourceimplementsSortComparatorSource{
- privatestaticfinallongserialVersionUID=1L;
- //xy用来保存坐标位置
- privateintx;
- privateinty;
- publicDistanceComparatorSource(intx,inty){
- this.x=x;
- this.y=y;
- }
- //返回ScoreDocComparator用来实现排序功能
- publicScoreDocComparatornewComparator(IndexReaderreader,Stringfieldname)throwsIOException{
- returnnewDistanceScoreDocLookupComparator(reader,fieldname,x,y);
- }
- //DistanceScoreDocLookupComparator实现了ScoreDocComparator用来排序
- privatestaticclassDistanceScoreDocLookupComparatorimplementsScoreDocComparator{
- privatefloat[]distances;//保存每个餐馆到指定点的距离
- //构造函数,构造函数在这里几乎完成所有的准备工作.
- publicDistanceScoreDocLookupComparator(IndexReaderreader,Stringfieldname,intx,inty)throwsIOException{
- System.out.println("fieldName2="+fieldname);
- finalTermEnumenumerator=reader.terms(newTerm(fieldname,""));
- System.out.println("maxDoc="+reader.maxDoc());
- distances=newfloat[reader.maxDoc()];//初始化distances
- if(distances.length>0){
- TermDocstermDocs=reader.termDocs();
- try{
- if(enumerator.term()==null){
- thrownewRuntimeException("notermsinfield"+fieldname);
- }
- inti=0,j=0;
- do{
- System.out.println("indo-while:"+i++);
- Termterm=enumerator.term();//取出每一个Term
- if(term.field()!=fieldname)//与给定的域不符合则比较下一个
- break;
- //SetsthistothedataforthecurrentterminaTermEnum.
- //Thismaybeoptimizedinsomeimplementations.
- termDocs.seek(enumerator);//参考TermDocsDoc
- while(termDocs.next()){
- System.out.println("inwhile:"+j++);
- System.out.println("inwhile,Term:"+term.toString());
- String[]xy=term.text().split(",");//去处xy
- intdeltax=Integer.parseInt(xy[0])-x;
- intdeltay=Integer.parseInt(xy[1])-y;
- //计算距离
- distances[termDocs.doc()]=(float)Math.sqrt(deltax*deltax+deltay*deltay);
- }
- }
- while(enumerator.next());
- }finally{
- termDocs.close();
- }
- }
- }
- //有上面的构造函数的准备这里就比较简单了
- publicintcompare(ScoreDoci,ScoreDocj){
- if(distances[i.doc]<distances[j.doc])
- return-1;
- if(distances[i.doc]>distances[j.doc])
- return1;
- return0;
- }
- //返回距离
- publicComparablesortValue(ScoreDoci){
- returnnewFloat(distances[i.doc]);
- }
- //指定SortType
- publicintsortType(){
- returnSortField.FLOAT;
- }
- }
- publicStringtoString(){
- return"Distancefrom("+x+","+y+")";
- }
- }
这是一个实现了上面两个接口的两个类, 里面带有详细注释, 可以看出 自定义排序并不是很难的. 该实现能否正确实现,我们来看看测试代码能否通过吧.
- packagecom.nikee.lucene.test;
- importjava.io.IOException;
- importjunit.framework.TestCase;
- importorg.apache.lucene.analysis.WhitespaceAnalyzer;
- importorg.apache.lucene.document.Document;
- importorg.apache.lucene.document.Field;
- importorg.apache.lucene.index.IndexWriter;
- importorg.apache.lucene.index.Term;
- importorg.apache.lucene.search.FieldDoc;
- importorg.apache.lucene.search.Hits;
- importorg.apache.lucene.search.IndexSearcher;
- importorg.apache.lucene.search.Query;
- importorg.apache.lucene.search.ScoreDoc;
- importorg.apache.lucene.search.Sort;
- importorg.apache.lucene.search.SortField;
- importorg.apache.lucene.search.TermQuery;
- importorg.apache.lucene.search.TopFieldDocs;
- importorg.apache.lucene.store.RAMDirectory;
- importcom.nikee.lucene.DistanceComparatorSource;
- publicclassDistanceComparatorSourceTestextendsTestCase{
- privateRAMDirectorydirectory;
- privateIndexSearchersearcher;
- privateQueryquery;
- //建立测试环境
- protectedvoidsetUp()throwsException{
- directory=newRAMDirectory();
- IndexWriterwriter=newIndexWriter(directory,newWhitespaceAnalyzer(),true);
- addPoint(writer,"ElCharro","restaurant",1,2);
- addPoint(writer,"CafePocaCosa","restaurant",5,9);
- addPoint(writer,"LosBetos","restaurant",9,6);
- addPoint(writer,"Nico'sTacoShop","restaurant",3,8);
- writer.close();
- searcher=newIndexSearcher(directory);
- query=newTermQuery(newTerm("type","restaurant"));
- }
- privatevoidaddPoint(IndexWriterwriter,Stringname,Stringtype,intx,inty)throwsIOException{
- Documentdoc=newDocument();
- doc.add(newField("name",name,Field.Store.YES,Field.Index.TOKENIZED));
- doc.add(newField("type",type,Field.Store.YES,Field.Index.TOKENIZED));
- doc.add(newField("location",x+","+y,Field.Store.YES,Field.Index.UN_TOKENIZED));
- writer.addDocument(doc);
- }
- publicvoidtestNearestRestaurantToHome()throwsException{
- //使用DistanceComparatorSource来构造一个SortField
- Sortsort=newSort(newSortField("location",newDistanceComparatorSource(0,0)));
- Hitshits=searcher.search(query,sort);//搜索
- //测试
- assertEquals("closest","ElCharro",hits.doc(0).get("name"));
- assertEquals("furthest","LosBetos",hits.doc(3).get("name"));
- }
- publicvoidtestNeareastRestaurantToWork()throwsException{
- Sortsort=newSort(newSortField("location",newDistanceComparatorSource(10,10)));//工作的坐标10,10
- //上面的测试实现了自定义排序,但是并不能访问自定义排序的更详细信息,利用
- //TopFieldDocs可以进一步访问相关信息
- TopFieldDocsdocs=searcher.search(query,null,3,sort);
- assertEquals(4,docs.totalHits);
- assertEquals(3,docs.scoreDocs.length);
- //取得FieldDoc利用FieldDoc可以取得关于排序的更详细信息请查看FieldDocDoc
- FieldDocfieldDoc=(FieldDoc)docs.scoreDocs[0];
- assertEquals("(10,10)->(9,6)=sqrt(17)",newFloat(Math.sqrt(17)),fieldDoc.fields[0]);
- Documentdocument=searcher.doc(fieldDoc.doc);
- assertEquals("LosBetos",document.get("name"));
- dumpDocs(sort,docs);//显示相关信息
- }
- //显示有关排序的信息
- privatevoiddumpDocs(Sortsort,TopFieldDocsdocs)throwsIOException{
- System.out.println("Sortedby:"+sort);
- ScoreDoc[]scoreDocs=docs.scoreDocs;
- for(inti=0;i<scoreDocs.length;i++){
- FieldDocfieldDoc=(FieldDoc)scoreDocs[i];
- Floatdistance=(Float)fieldDoc.fields[0];
- Documentdoc=searcher.doc(fieldDoc.doc);
- System.out.println(""+doc.get("name")+"@("+doc.get("location")+")->"+distance);
- }
- }
- }
本文介绍如何在Lucene中实现自定义排序功能,通过实现SortComparatorSource和ScoreDocComparator接口来满足特定应用场景的需求,并提供了一个搜索附近餐馆的例子。
1197

被折叠的 条评论
为什么被折叠?



