Lucene学习之二搜索(计分Score)

本文通过具体测试案例介绍如何使用Lucene进行简单查询、通配符查询及模糊查询,并自定义相似度评分策略。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 
// From chapter 3
public class ScoreTest extends TestCase {
  private Directory directory;

  public void setUp() throws Exception {
    directory = new RAMDirectory();
  }

  public void tearDown() throws Exception {
    directory.close();
  }

  public void testSimple() throws Exception {
    indexSingleFieldDocs(new Field[] {new Field("contents", "x", Field.Store.YES, Field.Index.ANALYZED)});
    IndexSearcher searcher = new IndexSearcher(directory);
    searcher.setSimilarity(new SimpleSimilarity());

    Query query = new TermQuery(new Term("contents", "x"));
    Explanation explanation = searcher.explain(query, 0);
    System.out.println(explanation);

    TopDocs matches = searcher.search(query, 10);
    assertEquals(1, matches.totalHits);

    assertEquals(1F, matches.scoreDocs[0].score, 0.0);

    searcher.close();
  }

  private void indexSingleFieldDocs(Field[] fields) throws Exception {
    IndexWriter writer = new IndexWriter(directory,
        new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);
    for (Field f : fields) {
      Document doc = new Document();
      doc.add(f);
      writer.addDocument(doc);
    }
    writer.optimize();
    writer.close();
  }

  public void testWildcard() throws Exception {
    indexSingleFieldDocs(new Field[]
      { new Field("contents", "wild", Field.Store.YES, Field.Index.ANALYZED),
        new Field("contents", "child", Field.Store.YES, Field.Index.ANALYZED),
        new Field("contents", "mild", Field.Store.YES, Field.Index.ANALYZED),
        new Field("contents", "mildew", Field.Store.YES, Field.Index.ANALYZED) });

    IndexSearcher searcher = new IndexSearcher(directory);
    Query query = new WildcardQuery(new Term("contents", "?ild*"));  //#A
    TopDocs matches = searcher.search(query, 10);
    assertEquals("child no match", 3, matches.totalHits);

    assertEquals("score the same", matches.scoreDocs[0].score,
                                   matches.scoreDocs[1].score, 0.0);
    assertEquals("score the same", matches.scoreDocs[1].score,
                                   matches.scoreDocs[2].score, 0.0);
    searcher.close();
  }
  /*
    #A Construct WildcardQuery using Term
  */

  public void testFuzzy() throws Exception {
    indexSingleFieldDocs(new Field[] { new Field("contents",
                                                 "fuzzy",
                                                 Field.Store.YES,
                                                 Field.Index.ANALYZED),
                                       new Field("contents",
                                                 "wuzzy",
                                                 Field.Store.YES,
                                                 Field.Index.ANALYZED)
                                     });

    IndexSearcher searcher = new IndexSearcher(directory);
    Query query = new FuzzyQuery(new Term("contents", "wuzza"));
    TopDocs matches = searcher.search(query, 10);
    assertEquals("both close enough", 2, matches.totalHits);

    assertTrue("wuzzy closer than fuzzy",
               matches.scoreDocs[0].score != matches.scoreDocs[1].score);

    Document doc = searcher.doc(matches.scoreDocs[0].doc);
    assertEquals("wuzza bear", "wuzzy", doc.get("contents"));
    searcher.close();
  }

  public static class SimpleSimilarity extends Similarity {
    public float lengthNorm(String field, int numTerms) {
      return 1.0f;
    }

    public float queryNorm(float sumOfSquaredWeights) {
      return 1.0f;
    }

    public float tf(float freq) {
      return freq;
    }

    public float sloppyFreq(int distance) {
      return 2.0f;
    }

    public float idf(Vector terms, Searcher searcher) {
      return 1.0f;
    }

    public float idf(int docFreq, int numDocs) {
      return 1.0f;
    }

    public float coord(int overlap, int maxOverlap) {
      return 1.0f;
    }
  }

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值