lucene自定义排序例子

本文详细介绍了如何使用 Lucene 实现自定义排序功能,以匹配结果根据用户所在地址查找离其最近的餐厅。通过实现 FieldComparatorSource 和 FieldComparator 接口,自定义比较逻辑,实现按距离排序的搜索结果。并通过具体代码示例展示了一个简单的应用,匹配用户地址附近的餐厅。

本人主要是参考lucene实战一书,不过中文版上总是发现一些错误,导致程序并没有给出想要的结果,还是要看api文档。

lucene3.X实现自定义排序,主要是实现继承FieldComparatorSource抽象类的子类和继承FieldComparator的子类。

1.继承FieldComparatorSource,必须实现抽象方法newComparator。

2.继承FieldComparator,必须实现下面6个抽象方法:

  • compare(int, int) Compare a hit at 'slot a' with hit 'slot b'.
  • setBottom(int) This method is called by FieldValueHitQueue to notify the FieldComparator of the current weakest ("bottom") slot. Note that this slot may not hold the weakest value according to your comparator, in cases where your comparator is not the primary one (ie, is only used to break ties from the comparators before it).
  • compareBottom(int) Compare a new hit (docID) against the "weakest" (bottom) entry in the queue.
  • copy(int, int) Installs a new hit into the priority queue. The FieldValueHitQueue calls this method when a new hit is competitive.
  • setNextReader(org.apache.lucene.index.IndexReader, int) Invoked when the search is switching to the next segment. You may need to update internal state of the comparator, for example retrieving new values from the FieldCache.
  • value(int) Return the sort value stored in the specified slot. This is only called at the end of the search, in order to populate FieldDoc.fields when returning the top results.

上面方法描述摘自api文档,详细请查阅api。

例子是书上的一个简单例子,匹配结果根据用户所在地址(二维)查找离他最近的餐厅顺序排序。每个地点指定了三个域,即地名、二维坐标x和y,以及该地点的类型。下面是具体实现代码:

package org.apache.lucene.demo;

import java.io.IOException;

import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.SortField;

public class DistanceComparatorSource extends FieldComparatorSource{
    private  int x;
    private int y;
   
    public DistanceComparatorSource(int x,int y){
    	this.x = x;
    	this.y = y;
    }
	@Override
	public FieldComparator<?> newComparator(String arg0, int arg1, int arg2,
			boolean arg3) throws IOException {
		// TODO Auto-generated method stub
		return new DistanceSourceLookupComparator(arg0, arg1);
	}
	
	private class DistanceSourceLookupComparator extends FieldComparator{
		private int[] xDoc,yDoc;
		private float[] values;
		private float bottom;
		String fieldName;
		
		public DistanceSourceLookupComparator(String fieldName , int numHits){
			values = new float[numHits];
			this.fieldName = fieldName;
		}

		@Override
		public int compare(int arg0, int arg1) {
			// TODO Auto-generated method stub
			if(values[arg0] > values[arg1]) return 1;
			if(values[arg0] < values[arg1]) return -1;
			return 0;
		}

		private float getDistance(int doc){
			int deltax = xDoc[doc] - x ;
			int deltay = yDoc[doc] - y;
			return (float)Math.sqrt(deltax*deltax+deltay*deltay);
		}
		@Override
		public int compareBottom(int arg0) throws IOException {
			// TODO Auto-generated method stub
			float distance = getDistance(arg0);
			if(bottom < distance) return -1;
			if(bottom > distance) return 1;
			return 0;
		}

		@Override
		public void copy(int arg0, int arg1) throws IOException {
			// TODO Auto-generated method stub
			values[arg0] = getDistance(arg1);
		}

		@Override
		public void setBottom(int arg0) {
			// TODO Auto-generated method stub
			bottom = values[arg0];
		}

		@Override
		public void setNextReader(IndexReader arg0, int arg1) //在读下一个段时,书上有误,根据api的理解,如下实现得到正确结果
				throws IOException {
			// TODO Auto-generated method stub
			String[] temp = FieldCache.DEFAULT.getStrings(arg0, "location");
			xDoc = new int[temp.length];
			yDoc = new int[temp.length];
			for(int i = 0 ;i<temp.length;i++){
				String[] str = temp[i].split(",");
				xDoc[i] = Integer.parseInt(str[0]);
				yDoc[i] = Integer.parseInt(str[1]);
			}
		}

		@Override
		public Object value(int arg0) {
			// TODO Auto-generated method stub
			return new Float(values[arg0]);
		}
		
		public int sortType(){
			return SortField.CUSTOM;
		}
		
		public String toString(){
			return "Distance from ("+x+","+y+")";
		}
	}
 
}


下面是具体的测试运行排序结果的程序:

package org.apache.lucene.demo;

import java.io.IOException;

import javax.crypto.SealedObject;


import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;

public class DistanceSortingTest {

	/**
	 * @param args
	 * @throws IOException 
	 * @throws LockObtainFailedException 
	 * @throws CorruptIndexException 
	 */
	public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException {
		// TODO Auto-generated method stub
        RAMDirectory directory = new RAMDirectory();
        IndexWriter indexWriter = new IndexWriter(directory, new WhitespaceAnalyzer(),
        		IndexWriter.MaxFieldLength.UNLIMITED);
        addPoint(indexWriter, "El charro", "restaurant", 1, 2);
        addPoint(indexWriter, "Cafe Poca Cosa", "restaurant", 5, 9);
        addPoint(indexWriter, "Los Betos", "restaurant", 9, 6);
        addPoint(indexWriter, "Nico's Toco Shop", "restaurant", 3, 8);
        indexWriter.close();
        
        Searcher searcher = new IndexSearcher(directory);
        Query query = new TermQuery(new Term("type","restaurant"));
        Sort sort = new Sort(new SortField("location",new DistanceComparatorSource(10, 10)));
        TopFieldDocs topDocs = searcher.search(query, null, 5,sort);
        ScoreDoc[] docs = topDocs.scoreDocs;
        //FieldDoc fieldDoc = (FieldDoc)topDocs.scoreDocs[0];
        //System.out.println(fieldDoc.fields[0]);
        for(ScoreDoc doc : docs){
        	FieldDoc fieldDoc2 = (FieldDoc)doc;
        	Document document = searcher.doc(doc.doc);
        	System.out.println(document.get("name"));
        }
        System.out.println(Math.sqrt(17));
	}
	
	private static void addPoint(IndexWriter writer,String name,String type,int x,int y) throws CorruptIndexException, IOException{
		Document document = new Document();
		document.add(new Field("name",name,Field.Store.YES,Field.Index.NOT_ANALYZED));
		document.add(new Field("type",type,Field.Store.YES,Field.Index.NOT_ANALYZED));
		document.add(new Field("location",x+","+y,Field.Store.YES,Field.Index.NOT_ANALYZED));
		writer.addDocument(document);
	}

}


运行结果:

4.1231055
Los Betos
5.0990195
Cafe Poca Cosa
7.28011
Nico's Toco Shop
12.0415945
El charro

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值