导读:
HitCollector 的作用很简单,通过collect()方法控制检索返回的结果,下面是lucene自带的一个例子----使用一个优先队
列,返回指定数目的Top n Doc。
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.PriorityQueue;
/** A {@link HitCollector} implementation that collects the top-
scoring
* documents, returning them as a {@link TopDocs}. This is used by
{@link
* IndexSearcher} to implement {@link TopDocs}-based search.
*
*
This may be extended, overriding the collect method to, e.g.,
* conditionally invoke super()in order to filter which
* documents are collected.
**/
public class TopDocCollector extends HitCollector {
private int numHits;
private float minScore = 0.0f;
int totalHits;
PriorityQueue hq;
/** Construct to collect a given number of hits.
* @param numHits the maximum number of hits to collect
*/
public TopDocCollector(int numHits) {
this(numHits, new HitQueue(numHits));
}
TopDocCollector(int numHits, PriorityQueue hq) {
this.numHits = numHits;
this.hq = hq;
}
// javadoc inherited
public void collect(int doc, float score) {
if (score > 0.0f) {
totalHits++;
if (hq.size() <numhits>= minScore) { <br> hq.insert(new ScoreDoc(doc, score)); <br> minScore = ((ScoreDoc)hq.top()).score; // maintain minScore <br> } <br> } <br> } <br> /** The total number of documents that matched this query. */ <br> public int getTotalHits() {return totalHits; } <br> /** The top-scoring hits. */ <br> public TopDocs topDocs() { <br> ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; <br> for (int i = hq.size()-1; i >= 0; i--) // put docs in array <br> scoreDocs[i] = (ScoreDoc)hq.pop(); <br> float maxScore = (totalHits==0) <br> Float.NEGATIVE_INFINITY <br> : scoreDocs[0].score; <br> return new TopDocs(totalHits, scoreDocs, maxScore); <br> } <br> } <br><br>本文转自 <br><a href="http://blog.lough.com.cn/post/234/">http://blog.lough.com.cn/post/234/</a></numhits>
HitCollector 的作用很简单,通过collect()方法控制检索返回的结果,下面是lucene自带的一个例子----使用一个优先队
列,返回指定数目的Top n Doc。
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.PriorityQueue;
/** A {@link HitCollector} implementation that collects the top-
scoring
* documents, returning them as a {@link TopDocs}. This is used by
{@link
* IndexSearcher} to implement {@link TopDocs}-based search.
*
*
This may be extended, overriding the collect method to, e.g.,
* conditionally invoke super()in order to filter which
* documents are collected.
**/
public class TopDocCollector extends HitCollector {
private int numHits;
private float minScore = 0.0f;
int totalHits;
PriorityQueue hq;
/** Construct to collect a given number of hits.
* @param numHits the maximum number of hits to collect
*/
public TopDocCollector(int numHits) {
this(numHits, new HitQueue(numHits));
}
TopDocCollector(int numHits, PriorityQueue hq) {
this.numHits = numHits;
this.hq = hq;
}
// javadoc inherited
public void collect(int doc, float score) {
if (score > 0.0f) {
totalHits++;
if (hq.size() <numhits>= minScore) { <br> hq.insert(new ScoreDoc(doc, score)); <br> minScore = ((ScoreDoc)hq.top()).score; // maintain minScore <br> } <br> } <br> } <br> /** The total number of documents that matched this query. */ <br> public int getTotalHits() {return totalHits; } <br> /** The top-scoring hits. */ <br> public TopDocs topDocs() { <br> ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; <br> for (int i = hq.size()-1; i >= 0; i--) // put docs in array <br> scoreDocs[i] = (ScoreDoc)hq.pop(); <br> float maxScore = (totalHits==0) <br> Float.NEGATIVE_INFINITY <br> : scoreDocs[0].score; <br> return new TopDocs(totalHits, scoreDocs, maxScore); <br> } <br> } <br><br>本文转自 <br><a href="http://blog.lough.com.cn/post/234/">http://blog.lough.com.cn/post/234/</a></numhits>