package lia.indexing;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import junit.framework.TestCase;
//import lia.common.TestUtil;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.index.Term;
import java.io.IOException;
// From chapter 2
public class IndexingTest_bai extends TestCase {
//[ [1, Netherland, Amsterdam has lots of bridges, Amsterdam],
// [2, Italy, Venice has lots of canals, Venice] ]
protected String[] ids = {"1", "2", "3"};
protected String[] unindexed = {"Netherlands", "Italy", "Deutschland"};
protected String[] unstored = {"Amsterdam has lots of bridges",
"Venice has lots of canals",
"Mia san Mia"};
protected String[] text = {"Amsterdam", "Venice", "Munchen"};
private Directory directory; //直接声明,不初始化
protected void setUp() throws Exception { //1
directory = new RAMDirectory(); //内存Directory
IndexWriter writer = getWriter(); //2
for (int i = 0; i < ids.length; i++) { //3
Document doc = new Document();
doc.add(new Field("id", ids[i],
Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("country", unindexed[i],
Field.Store.YES,
Field.Index.NO));
doc.add(new Field("contents", unstored[i],
Field.Store.NO,
Field.Index.ANALYZED));
doc.add(new Field("city", text[i],
Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.close();
}
private IndexWriter getWriter() throws IOException { // 2
return new IndexWriter(directory, new WhitespaceAnalyzer(), // 2
IndexWriter.MaxFieldLength.UNLIMITED); // 2
}
protected int getHitCount(String fieldName, String searchString)
throws IOException {
IndexSearcher searcher = new IndexSearcher(directory); //4
//IndexSearcher的样子
System.out.println("IndexSearcher is:" + searcher.toString());
Term t = new Term(fieldName, searchString);
Query query = new TermQuery(t); //5
//query的样子
System.out.println("Query is: " + query.toString());
//int hitCount = TestUtil.hitCount(searcher, query); //6 这句依赖到common.TestUtil.java,删掉
TopDocs td = searcher.search(query, 1);
int hitCount = td.totalHits; //返回匹配数量
//TopDocs的样子
System.out.println("TopDocs.toString is: " + td.toString());
System.out.println("TopDocs.scoreDocs is: " + td.scoreDocs);
System.out.println("TopDocs.scoreDocs is: " + td.totalHits);
searcher.close();
return hitCount;
}
public void testIndexWriter() throws IOException {
IndexWriter writer = getWriter();
assertEquals(ids.length, writer.numDocs()); //7
writer.close();
}
public void testIndexReader() throws IOException {
IndexReader reader = IndexReader.open(directory);
assertEquals(ids.length, reader.maxDoc()); //8
assertEquals(ids.length, reader.numDocs()); //8
reader.close();
}
public void showIndexInfo() throws CorruptIndexException, IOException{
IndexReader reader = IndexReader.open(directory);
System.out.println( "reader.numDocs() = " + reader.numDocs() );
System.out.println( "reader.maxDoc() = " + reader.maxDoc() );
System.out.println( "reader.getVersion() = " + reader.getVersion() );
for(int i = 0; i < reader.numDocs(); i++){
System.out.println( String.format("reader.numDocs(%d) = ",i) + reader.document(i).toString() );
}
}
/*
#1 Run before every test
#2 Create IndexWriter
#3 Add documents
#4 Create new searcher
#5 Build simple single-term query
#6 Get number of hits
#7 Verify writer document count
#8 Verify reader document count
*/
public void testDeleteBeforeOptimize() throws IOException {
IndexWriter writer = getWriter();
//一个关于数量的断言
//assertEquals(2, writer.numDocs()); //A
writer.deleteDocuments(new Term("id", "1")); //B
writer.commit();
//数量相关的断言
//assertTrue(writer.hasDeletions()); //1
//assertEquals(2, writer.maxDoc()); //2
//assertEquals(1, writer.numDocs()); //2
writer.close();
}
public void testDeleteAfterOptimize() throws IOException {
IndexWriter writer = getWriter();
//一句断言
//assertEquals(2, writer.numDocs());
writer.deleteDocuments(new Term("id", "1"));
writer.optimize(); //3
writer.commit();
//全是断言
//assertFalse(writer.hasDeletions());
//assertEquals(1, writer.maxDoc()); //C
//assertEquals(1, writer.numDocs()); //C
writer.close();
}
public static void main(String args[]) throws Exception{
IndexingTest_bai it = new IndexingTest_bai();
it.setUp();
System.out.println(it.getHitCount("city", "Amsterdam"));
it.showIndexInfo();
it.testDeleteBeforeOptimize();
//it.testDeleteAfterOptimize();
it.showIndexInfo();
}
/*
#A 2 docs in the index
#B Delete first document
#C 1 indexed document, 0 deleted documents
#1 Index contains deletions
#2 1 indexed document, 1 deleted document
#3 Optimize compacts deletes
*/
public void testUpdate() throws IOException {
assertEquals(1, getHitCount("city", "Amsterdam"));
IndexWriter writer = getWriter();
Document doc = new Document(); //A
doc.add(new Field("id", "1",
Field.Store.YES,
Field.Index.NOT_ANALYZED)); //A
doc.add(new Field("country", "Netherlands",
Field.Store.YES,
Field.Index.NO)); //A
doc.add(new Field("contents",
"Den Haag has a lot of museums",
Field.Store.NO,
Field.Index.ANALYZED)); //A
doc.add(new Field("city", "Den Haag",
Field.Store.YES,
Field.Index.ANALYZED)); //A
writer.updateDocument(new Term("id", "1"), //B
doc); //B
writer.close();
assertEquals(0, getHitCount("city", "Amsterdam"));//C
assertEquals(1, getHitCount("city", "Haag")); //D
}
}
</pre><pre name="code" class="java">
对于it.testDeleteBeforeOptimize();的运行结果
IndexSearcher is:org.apache.lucene.search.IndexSearcher@1b07961
Query is: city:Amsterdam
TopDocs.toString is: org.apache.lucene.search.TopDocs@fed938
TopDocs.scoreDocs is: [Lorg.apache.lucene.search.ScoreDoc;@1672476
TopDocs.scoreDocs is: 1
1
reader.numDocs() = 3
reader.maxDoc() = 3
reader.getVersion() = 1425356634346
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(2) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
reader.numDocs() = 2
reader.maxDoc() = 3
reader.getVersion() = 1425356634347
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
对于it.testDeleteBeforeOptimize();的运行结果
IndexSearcher is:org.apache.lucene.search.IndexSearcher@1b07961
Query is: city:Amsterdam
TopDocs.toString is: org.apache.lucene.search.TopDocs@fed938
TopDocs.scoreDocs is: [Lorg.apache.lucene.search.ScoreDoc;@1672476
TopDocs.scoreDocs is: 1
1
reader.numDocs() = 3
reader.maxDoc() = 3
reader.getVersion() = 1425357077599
reader.numDocs(0) = Document<stored,indexed<id:1> stored,omitNorms<country:Netherlands> stored,indexed,tokenized<city:Amsterdam>>
reader.numDocs(1) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(2) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>
reader.numDocs() = 2
reader.maxDoc() = 2
reader.getVersion() = 1425357077600
reader.numDocs(0) = Document<stored,indexed<id:2> stored,omitNorms<country:Italy> stored,indexed,tokenized<city:Venice>>
reader.numDocs(1) = Document<stored,indexed<id:3> stored,omitNorms<country:Deutschland> stored,indexed,tokenized<city:Munchen>>