2.3.2删除索引中的文档例程

本文介绍了一个使用Apache Lucene进行文档索引、搜索及更新的实战案例,演示了如何创建索引、添加文档、执行搜索、删除及优化索引等关键步骤。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package lia.indexing;

/**
 * Copyright Manning Publications Co.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific lan      
*/

import junit.framework.TestCase;
//import lia.common.TestUtil;


import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.index.Term;

import java.io.IOException;

// From chapter 2
public class IndexingTest_bai extends TestCase {
	//[ [1, Netherland, Amsterdam has lots of bridges, Amsterdam],
	//	[2, Italy, Venice has lots of canals, Venice] ]
  protected String[] ids = {"1", "2", "3"};
  protected String[] unindexed = {"Netherlands", "Italy", "Deutschland"};
  protected String[] unstored = {"Amsterdam has lots of bridges",
                                 "Venice has lots of canals",
                                 "Mia san Mia"};
  protected String[] text = {"Amsterdam", "Venice", "Munchen"};

  private Directory directory;						//直接声明,不初始化

  protected void setUp() throws Exception {     //1
    directory = new RAMDirectory();		//内存Directory

    IndexWriter writer = getWriter();           //2

    for (int i = 0; i < ids.length; i++) {      //3
      Document doc = new Document();
      doc.add(new Field("id", ids[i],
                        Field.Store.YES,
                        Field.Index.NOT_ANALYZED));
      doc.add(new Field("country", unindexed[i],
                        Field.Store.YES,
                        Field.Index.NO));
      doc.add(new Field("contents", unstored[i],
                        Field.Store.NO,
                        Field.Index.ANALYZED));
      doc.add(new Field("city", text[i],
                        Field.Store.YES,
                        Field.Index.ANALYZED));
      writer.addDocument(doc);
    }
    writer.close();
  }

  private IndexWriter getWriter() throws IOException {            // 2
    return new IndexWriter(directory, new WhitespaceAnalyzer(),   // 2
                           IndexWriter.MaxFieldLength.UNLIMITED); // 2
  }

  protected int getHitCount(String fieldName, String searchString)
    throws IOException {
    IndexSearcher searcher = new IndexSearcher(directory); //4
    
    //IndexSearcher的样子
    System.out.println("IndexSearcher is:" + searcher.toString());
    
    Term t = new Term(fieldName, searchString);
    Query query = new TermQuery(t);                        //5
    
    //query的样子
    System.out.println("Query is: " + query.toString());
    
    //int hitCount = TestUtil.hitCount(searcher, query);     //6	这句依赖到common.TestUtil.java,删掉
    TopDocs td = searcher.search(query, 1);
    int hitCount = td.totalHits;		//返回匹配数量
    
    //TopDocs的样子
    System.out.println("TopDocs.toString is: " + td.toString());
    System.out.println("TopDocs.scoreDocs is: " + td.scoreDocs);
    System.out.println("TopDocs.scoreDocs is: " + td.totalHits);
    
    searcher.close();
    return hitCount;
  }

  public void testIndexWriter() throws IOException {
    IndexWriter writer = getWriter();
    assertEquals(ids.length, writer.numDocs());            //7
    writer.close();
  }

  public void testIndexReader() throws IOException {
    IndexReader reader = IndexReader.open(directory);
    assertEquals(ids.length, reader.maxDoc());             //8
    assertEquals(ids.length, reader.numDocs());            //8
    reader.close();
  }
  
  public void showIndexInfo() throws CorruptIndexException, IOException{
	  IndexReader reader = IndexReader.open(directory);
	  System.out.println( "reader.numDocs() = " + reader.numDocs() );
	  System.out.println( "reader.maxDoc() = " + reader.maxDoc() );
	  System.out.println( "reader.getVersion() = " + reader.getVersion() );
	  for(int i = 0; i < reader.numDocs(); i++){
		  System.out.println( String.format("reader.numDocs(%d) = ",i) + reader.document(i).toString() );
	  }
  }

  /*
    #1 Run before every test
    #2 Create IndexWriter
    #3 Add documents
    #4 Create new searcher
    #5 Build simple single-term query
    #6 Get number of hits
    #7 Verify writer document count
    #8 Verify reader document count
  */

  
  public void testDeleteBeforeOptimize() throws IOException {
    IndexWriter writer = getWriter();
    
    //一个关于数量的断言
    //assertEquals(2, writer.numDocs()); //A
    
    writer.deleteDocuments(new Term("id", "1"));  //B
    writer.commit();
    
    //数量相关的断言
    //assertTrue(writer.hasDeletions());    //1
    //assertEquals(2, writer.maxDoc());    //2
    //assertEquals(1, writer.numDocs());   //2   
    
    writer.close();
  }

  public void testDeleteAfterOptimize() throws IOException {
    IndexWriter writer = getWriter();
    
    //一句断言
    //assertEquals(2, writer.numDocs());
    writer.deleteDocuments(new Term("id", "1"));
    writer.optimize();                //3
    writer.commit();
    
    //全是断言
    //assertFalse(writer.hasDeletions());
    //assertEquals(1, writer.maxDoc());  //C
    //assertEquals(1, writer.numDocs()); //C    
    
    writer.close();
  }
  
  public static void main(String args[]) throws Exception{
	  IndexingTest_bai it = new IndexingTest_bai();
	  it.setUp();
	  
	  System.out.println(it.getHitCount("city", "Amsterdam"));
	  
	  it.showIndexInfo();
	  
	  it.testDeleteBeforeOptimize();
	  //it.testDeleteAfterOptimize();
	  
	  it.showIndexInfo();
  }
  
  

  /*
    #A 2 docs in the index
    #B Delete first document
    #C 1 indexed document, 0 deleted documents
    #1 Index contains deletions
    #2 1 indexed document, 1 deleted document
    #3 Optimize compacts deletes
  */  
  

  public void testUpdate() throws IOException {

    assertEquals(1, getHitCount("city", "Amsterdam"));

    IndexWriter writer = getWriter();

    Document doc = new Document();                   //A            
    doc.add(new Field("id", "1",
                      Field.Store.YES,
                      Field.Index.NOT_ANALYZED));    //A
    doc.add(new Field("country", "Netherlands",
                      Field.Store.YES,
                      Field.Index.NO));              //A  
    doc.add(new Field("contents",                    
                      "Den Haag has a lot of museums",
                      Field.Store.NO,
                      Field.Index.ANALYZED));       //A
    doc.add(new Field("city", "Den Haag",
                      Field.Store.YES,
                      Field.Index.ANALYZED));       //A

    writer.updateDocument(new Term("id", "1"),       //B
                          doc);                      //B
    writer.close();

    assertEquals(0, getHitCount("city", "Amsterdam"));//C   
    assertEquals(1, getHitCount("city", "Haag"));     //D  
  }
  
  

  
}
</pre><pre name="code" class="java">

对于it.testDeleteBeforeOptimize();的运行结果

IndexSearcher is:org.apache.lucene.search.IndexSearcher@1b07961
Query is: city:Amsterdam
TopDocs.toString is: org.apache.lucene.search.TopDocs@fed938
TopDocs.scoreDocs is: [Lorg.apache.lucene.search.ScoreDoc;@1672476
TopDocs.scoreDocs is: 1
1
reader.numDocs() = 3
reader.maxDoc() = 3
reader.getVersion() = 1425356634346
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(2) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
reader.numDocs() = 2
reader.maxDoc() = 3
reader.getVersion() = 1425356634347
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>


对于it.testDeleteBeforeOptimize();的运行结果

IndexSearcher is:org.apache.lucene.search.IndexSearcher@1b07961
Query is: city:Amsterdam
TopDocs.toString is: org.apache.lucene.search.TopDocs@fed938
TopDocs.scoreDocs is: [Lorg.apache.lucene.search.ScoreDoc;@1672476
TopDocs.scoreDocs is: 1
1
reader.numDocs() = 3
reader.maxDoc() = 3
reader.getVersion() = 1425357077599
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(2) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
reader.numDocs() = 2
reader.maxDoc() = 2
reader.getVersion() = 1425357077600
reader.numDocs(0) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(1) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>

标题基于SpringBoot+Vue的社区便民服务平台研究AI更换标题第1章引言介绍社区便民服务平台的研究背景、意义,以及基于SpringBoot+Vue技术的研究现状和创新点。1.1研究背景与意义分析社区便民服务的重要性,以及SpringBoot+Vue技术在平台建设中的优势。1.2国内外研究现状概述国内外在社区便民服务平台方面的发展现状。1.3研究方法与创新点阐述本文采用的研究方法和在SpringBoot+Vue技术应用上的创新之处。第2章相关理论介绍SpringBoot和Vue的相关理论基础,以及它们在社区便民服务平台中的应用。2.1SpringBoot技术概述解释SpringBoot的基本概念、特点及其在便民服务平台中的应用价值。2.2Vue技术概述阐述Vue的核心思想、技术特性及其在前端界面开发中的优势。2.3SpringBoot与Vue的整合应用探讨SpringBoot与Vue如何有效整合,以提升社区便民服务平台的性能。第3章平台需求分析与设计分析社区便民服务平台的需求,并基于SpringBoot+Vue技术进行平台设计。3.1需求分析明确平台需满足的功能需求和性能需求。3.2架构设计设计平台的整体架构,包括前后端分离、模块化设计等思想。3.3数据库设计根据平台需求设计合理的数据库结构,包括数据表、字段等。第4章平台实现与关键技术详细阐述基于SpringBoot+Vue的社区便民服务平台的实现过程及关键技术。4.1后端服务实现使用SpringBoot实现后端服务,包括用户管理、服务管理等核心功能。4.2前端界面实现采用Vue技术实现前端界面,提供友好的用户交互体验。4.3前后端交互技术探讨前后端数据交互的方式,如RESTful API、WebSocket等。第5章平台测试与优化对实现的社区便民服务平台进行全面测试,并针对问题进行优化。5.1测试环境与工具介绍测试
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值