2.3.1 向索引添加文档例程

最新推荐文章于 2025-05-30 15:34:14 发布

LinZiHAO1216

最新推荐文章于 2025-05-30 15:34:14 发布

阅读量415

点赞数

CC 4.0 BY-SA版权

分类专栏： lucene

本文链接：https://blog.youkuaiyun.com/LinZiHAO1216/article/details/44035105

lucene 专栏收录该内容

4 篇文章

订阅专栏

本文介绍如何使用Java Lucene库进行全文检索，包括建立索引、查询及结果解析，通过示例代码展示具体操作流程。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

<pre name="code" class="java">package lia.indexing;

/**
 * Copyright Manning Publications Co.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific lan      
*/

import junit.framework.TestCase;
//import lia.common.TestUtil;

import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.index.Term;

import java.io.IOException;

// From chapter 2
public class IndexingTest_bai extends TestCase {
	//[ [1, Netherland, Amsterdam has lots of bridges, Amsterdam],
	//	[2, Italy, Venice has lots of canals, Venice] ]
  protected String[] ids = {"1", "2"};
  protected String[] unindexed = {"Netherlands", "Italy"};
  protected String[] unstored = {"Amsterdam has lots of bridges",
                                 "Venice has lots of canals"};
  protected String[] text = {"Amsterdam", "Venice"};

  private Directory directory;						//直接声明，不初始化

  protected void setUp() throws Exception {     //1
    directory = new RAMDirectory();		//内存Directory

    IndexWriter writer = getWriter();           //2

    for (int i = 0; i < ids.length; i++) {      //3
      Document doc = new Document();
      doc.add(new Field("id", ids[i],
                        Field.Store.YES,
                        Field.Index.NOT_ANALYZED));
      doc.add(new Field("country", unindexed[i],
                        Field.Store.YES,
                        Field.Index.NO));
      doc.add(new Field("contents", unstored[i],
                        Field.Store.NO,
                        Field.Index.ANALYZED));
      doc.add(new Field("city", text[i],
                        Field.Store.YES,
                        Field.Index.ANALYZED));
      writer.addDocument(doc);
    }
    writer.close();
  }

  private IndexWriter getWriter() throws IOException {            // 2
    return new IndexWriter(directory, new WhitespaceAnalyzer(),   // 2
                           IndexWriter.MaxFieldLength.UNLIMITED); // 2
  }

  protected int getHitCount(String fieldName, String searchString)
    throws IOException {
    IndexSearcher searcher = new IndexSearcher(directory); //4
    Term t = new Term(fieldName, searchString);
    Query query = new TermQuery(t);                        //5
    
    //query的样子
    System.out.println("Query is: " + query.toString());
    
    //int hitCount = TestUtil.hitCount(searcher, query);     //6	这句依赖到common.TestUtil.java，删掉
    TopDocs td = searcher.search(query, 1);
    int hitCount = td.totalHits;		//返回匹配数量
    
    //TopDocs的样子
    System.out.println("TopDocs.toString is: " + td.toString());
    System.out.println("TopDocs.scoreDocs is: " + td.scoreDocs);
    System.out.println("TopDocs.scoreDocs is: " + td.totalHits);
    
    searcher.close();
    return hitCount;
  }

  public void testIndexWriter() throws IOException {
    IndexWriter writer = getWriter();
    assertEquals(ids.length, writer.numDocs());            //7
    writer.close();
  }

  public void testIndexReader() throws IOException {
    IndexReader reader = IndexReader.open(directory);
    assertEquals(ids.length, reader.maxDoc());             //8
    assertEquals(ids.length, reader.numDocs());            //8
    reader.close();
  }
  
  public static void main(String args[]) throws Exception{
	  IndexingTest_bai it = new IndexingTest_bai();
	  it.setUp();
	  
	  System.out.println(it.getHitCount("city", "Amsterdam"));
	  
  }

}

</pre><pre name="code" class="java">