lucene HitCollector 的作用

最新推荐文章于 2018-04-25 20:13:18 发布

最新推荐文章于 2018-04-25 20:13:18 发布 · 126 阅读

文章标签：

本文介绍了Lucene中TopDocCollector类的实现原理及其使用方法。TopDocCollector用于收集并返回查询结果中评分最高的前N个文档。文章还提供了一个使用优先队列实现TopDocCollector的具体示例。

导读：
　　HitCollector 的作用很简单，通过collect（）方法控制检索返回的结果，下面是lucene自带的一个例子----使用一个优先队
　　列，返回指定数目的Top n Doc。
　　package org.apache.lucene.search;
　　/**
　　* Copyright 2004 The Apache Software Foundation
　　*
　　* Licensed under the Apache License, Version 2.0 (the "License");
　　* you may not use this file except in compliance with the License.
　　* You may obtain a copy of the License at
　　*
　　* http://www.apache.org/licenses/LICENSE-2.0
　　*
　　* Unless required by applicable law or agreed to in writing, software
　　* distributed under the License is distributed on an "AS IS" BASIS,
　　* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
　　implied.
　　* See the License for the specific language governing permissions and
　　* limitations under the License.
　　*/
　　import java.io.IOException;
　　import java.util.BitSet;
　　import org.apache.lucene.store.Directory;
　　import org.apache.lucene.document.Document;
　　import org.apache.lucene.index.IndexReader;
　　import org.apache.lucene.index.Term;
　　import org.apache.lucene.util.PriorityQueue;
　　/** A {@link HitCollector} implementation that collects the top-
　　scoring
　　* documents, returning them as a {@link TopDocs}. This is used by
　　{@link
　　* IndexSearcher} to implement {@link TopDocs}-based search.
　　*
　　*
　　This may be extended, overriding the collect method to, e.g.,
　　* conditionally invoke super()in order to filter which
　　* documents are collected.
　　**/
　　public class TopDocCollector extends HitCollector {
　　private int numHits;
　　private float minScore = 0.0f;
　　int totalHits;
　　PriorityQueue hq;
　　/** Construct to collect a given number of hits.
　　* @param numHits the maximum number of hits to collect
　　*/
　　public TopDocCollector(int numHits) {
　　this(numHits, new HitQueue(numHits));
　　}
　　TopDocCollector(int numHits, PriorityQueue hq) {
　　this.numHits = numHits;
　　this.hq = hq;
　　}
　　// javadoc inherited
　　public void collect(int doc, float score) {
　　if (score > 0.0f) {
　　totalHits++;
　　if (hq.size() <numhits>= minScore) { 　　hq.insert(new ScoreDoc(doc, score)); 　　minScore = ((ScoreDoc)hq.top()).score; // maintain minScore 　　} 　　} 　　} 　　/** The total number of documents that matched this query. */ 　　public int getTotalHits() {return totalHits; } 　　/** The top-scoring hits. */ 　　public TopDocs topDocs() { 　　ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; 　　for (int i = hq.size()-1; i >= 0; i--) // put docs in array 　　scoreDocs[i] = (ScoreDoc)hq.pop(); 　　float maxScore = (totalHits==0) 　　 Float.NEGATIVE_INFINITY 　　: scoreDocs[0].score; 　　return new TopDocs(totalHits, scoreDocs, maxScore); 　　} 　　} 本文转自 <a href="http://blog.lough.com.cn/post/234/">http://blog.lough.com.cn/post/234/</a></numhits>