Lucence自定义评分查询

该博客介绍了如何在Lucene中实现自定义评分查询,通过三个测试用例展示了根据分数、文件和日期进行搜索的功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package org.adv.lucene.util;


import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;


import org.apache.lucene.document.Document;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CustomScoreProvider;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.Longs;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;


/**
 * 自定义评分
 * @author 
 * 自定义评分步骤:
 * 1.创建一个类继承自CustomScoreQuery,并且重写其中的方法getCustomScoreProvider
 * 2.创建一个类继承自CustomScoreProvider,并且重写其中的方法customScore
 *
 */
public class MyScoreQuery {

/**
* 自定义评分查询
* 根据score域自定义评分
*/
public void searchByScoreQuery() {
try {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FileIndexUtils.getDirectory()));
Query q = new TermQuery(new Term("content","java"));
IntFieldSource fs=new IntFieldSource("score");
FunctionQuery fq=new FunctionQuery(fs);
//创建一个基于原始查询Query与score域的自定义评分Query
MyCustomScoreQuery query = new MyCustomScoreQuery(q,fq);
TopDocs tds = null;
tds = searcher.search(query, 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for(ScoreDoc sd:tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":("+sd.score+")" +
"["+d.get("filename")+"["+d.get("path")+"]--->"+
d.get("size")+"-----"+sdf.format(new Date(Long.valueOf(d.get("date"))))+"]");
}
searcher.getIndexReader().close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 自定义评分查询
* 根据filename域自定义评分
*/
public void searchByFileScoreQuery() {
try {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FileIndexUtils.getDirectory()));
Query q = new TermQuery(new Term("content","java"));
//1、创建一个基于filename的自定义评分Query
FilenameScoreQuery query = new FilenameScoreQuery(q);
TopDocs tds = null;
tds = searcher.search(query, 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for(ScoreDoc sd:tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":("+sd.score+")" +
"["+d.get("filename")+"["+d.get("path")+"]--->"+
d.get("size")+"-----"+sdf.format(new Date(Long.valueOf(d.get("date"))))+"]");
}
searcher.getIndexReader().close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}



/**
* 自定义评分查询
* 根据date域自定义评分
*/
public void searchByDateScoreQuery() {
try {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FileIndexUtils.getDirectory()));
Query q = new TermQuery(new Term("content","java"));
//1、创建一个基于日期的自定义评分Query
DateScoreQuery query = new DateScoreQuery(q);
TopDocs tds = null;
tds = searcher.search(query, 1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for(ScoreDoc sd:tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":("+sd.score+")" +
"["+d.get("filename")+"["+d.get("path")+"]--->"+
d.get("size")+"-----"+sdf.format(new Date(Long.valueOf(d.get("date"))))+"]");
}
searcher.getIndexReader().close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

@SuppressWarnings("serial")
private class FilenameScoreQuery extends CustomScoreQuery {


public FilenameScoreQuery(Query subQuery) {
super(subQuery);
}

@Override
protected CustomScoreProvider getCustomScoreProvider(AtomicReaderContext context)
throws IOException {
return new FilenameScoreProvider(context);
}

}

private class FilenameScoreProvider extends CustomScoreProvider {
BinaryDocValues filenames = null;
public FilenameScoreProvider(AtomicReaderContext context) {
super(context);
try {
//从域缓存中获取各个域的值,只要reader没有关闭,域缓存就一直存在
filenames = FieldCache.DEFAULT.getTerms(context.reader(), "filename",false);
} catch (IOException e) {
e.printStackTrace();
}
}

@Override
public float customScore(int doc, float subQueryScore, float valSrcScore)
throws IOException {
System.out.println("subQueryScore="+subQueryScore);
System.out.println("valSrcScore="+valSrcScore);


//如何根据doc获取相应的field的值
/*
* 在reader没有关闭之前,所有的数据会存储要一个域缓存中,可以通过域缓存获取很多有用的信息
* filenames = FieldCache.DEFAULT.getTerms(reader, "filename",false);可以获取所有的filename域的信息
*/
String filename = filenames.get(doc).utf8ToString();
System.out.println("filename="+filename);
if(filename.endsWith(".xml") || filename.endsWith(".she")) {
return subQueryScore*100000.0f;
}else if(filename.endsWith(".aa") || filename.endsWith(".ba")) {
return subQueryScore/1.5f;
}else{
   return subQueryScore*0.8f;
}
}
}


@SuppressWarnings("serial")
private class MyCustomScoreQuery extends CustomScoreQuery {

public MyCustomScoreQuery(Query subQuery, FunctionQuery scoringQuery) {
super(subQuery, scoringQuery);
}

@Override
protected CustomScoreProvider getCustomScoreProvider(AtomicReaderContext context)
throws IOException {
//默认情况实现的评分是通过原有的评分*传入进来的评分域所获取的评分来确定最终打分的
//为了根据不同的需求进行评分,需要自己进行评分的设定
/**
* 自定评分的步骤:
* 1.创建一个类继承于CustomScoreProvider
* 2.覆盖customScore方法
*/
return new MyCustomScoreProvider(context);
}

}

private class MyCustomScoreProvider extends CustomScoreProvider {


public MyCustomScoreProvider(AtomicReaderContext context) {
super(context);
}

/**
* subQueryScore表示默认文档的打分
* valSrcScore表示的评分域的打分
*/
@Override
public float customScore(int doc, float subQueryScore, float valSrcScore)
throws IOException {
System.out.println("subQueryScore="+subQueryScore);
System.out.println("valSrcScore="+valSrcScore);
return subQueryScore/valSrcScore;
}

}

private  class DateScoreQuery  extends  CustomScoreQuery{


public DateScoreQuery(Query subQuery) {
super(subQuery);
}


@Override
protected CustomScoreProvider getCustomScoreProvider(
AtomicReaderContext context) throws IOException {
return new DateScoreProvider(context);
}



}


private class DateScoreProvider extends CustomScoreProvider {
Longs dates = null;
public DateScoreProvider(AtomicReaderContext context) {
super(context);
try {
//从域缓存中获取各个域的值,只要reader没有关闭,域缓存就一直存在
dates = FieldCache.DEFAULT.getLongs(context.reader(), "date",false);
} catch (IOException e) {
e.printStackTrace();
}
}

@Override
public float customScore(int doc, float subQueryScore, float valSrcScore)
throws IOException {
System.out.println("subQueryScore="+subQueryScore);
System.out.println("valSrcScore="+valSrcScore);
long date = dates.get(doc);
long today = new Date().getTime();
long year = 1000*60*60*24*3;
if(today-date<=year) {
//为其加分
return subQueryScore*1.5f;
}else{
return subQueryScore*0.5f;
}
}

}

}



package org.adv.lucene.test;


import org.adv.lucene.util.MyScoreQuery;
import org.junit.Test;


public class TestCustomScore {


@Test
public void test01() {
MyScoreQuery msq = new MyScoreQuery();
msq.searchByScoreQuery();
}

@Test
public void test02() {
MyScoreQuery msq = new MyScoreQuery();
msq.searchByFileScoreQuery();
}

@Test
public void test03() {
MyScoreQuery msq = new MyScoreQuery();
msq.searchByDateScoreQuery();
}
}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值