Lucene学习笔记(二)

本文通过示例展示了如何使用Apache Lucene进行文档的索引、查询、更新及删除等基本操作,并对比了文件系统目录与内存目录在索引速度上的差异。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

None.gifimportjava.io.IOException;
None.gif
None.gifimportorg.apache.lucene.analysis.Analyzer;
None.gifimportorg.apache.lucene.analysis.SimpleAnalyzer;
None.gifimportorg.apache.lucene.document.Document;
None.gifimportorg.apache.lucene.document.Field;
None.gifimportorg.apache.lucene.index.IndexReader;
None.gifimportorg.apache.lucene.index.IndexWriter;
None.gifimportorg.apache.lucene.store.Directory;
None.gifimportorg.apache.lucene.store.FSDirectory;
None.gif
None.gifimportjunit.framework.TestCase;
None.gif
None.gif
None.gif
publicclassBaseIndexTestCaseextendsTestCase
ExpandedBlockStart.gifContractedBlock.gif
dot.gif{
ExpandedSubBlockStart.gifContractedSubBlock.gif
protectedString[]keywords=dot.gif{"1","2"};
ExpandedSubBlockStart.gifContractedSubBlock.gif
protectedString[]unindexed=dot.gif{"Netherlands","Italy"};
ExpandedSubBlockStart.gifContractedSubBlock.gif
protectedString[]unstored=dot.gif{"Amsterdamhaslotsofbridges","Venicehaslotsofcanals"};
ExpandedSubBlockStart.gifContractedSubBlock.gif
protectedString[]text=dot.gif{"Amsterdam","Venice"};
InBlock.gif
protectedDirectorydir;
InBlock.gif
ExpandedSubBlockStart.gifContractedSubBlock.gif
protectedvoidsetUp()throwsIOExceptiondot.gif{
InBlock.gifStringindexDir
=
InBlock.gifSystem.getProperty(
"java.io.tmpdir","tmp")+
InBlock.gifSystem.getProperty(
"file.separator")+"index-dir";
InBlock.gifdir
=FSDirectory.getDirectory(indexDir,true);
InBlock.gifaddDocuments(dir);
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
protectedvoidaddDocuments(Directorydir)
ExpandedSubBlockStart.gifContractedSubBlock.gifthrowsIOException
dot.gif{
InBlock.gifIndexWriterwriter
=newIndexWriter(dir,getAnalyzer(),true);
InBlock.gifwriter.setUseCompoundFile(isCompound());
InBlock.gif
for(inti=0;i<keywords.length;i++)
ExpandedSubBlockStart.gifContractedSubBlock.gif
dot.gif{
InBlock.gifDocumentdoc
=newDocument();
InBlock.gifdoc.add(
newField("id",keywords[i],Field.Store.YES,Field.Index.UN_TOKENIZED));
InBlock.gifdoc.add(
newField("country",unindexed[i],Field.Store.YES,Field.Index.NO));
InBlock.gifdoc.add(
newField("contents",unstored[i],Field.Store.NO,Field.Index.TOKENIZED));
InBlock.gifdoc.add(
newField("city",text[i],Field.Store.YES,Field.Index.TOKENIZED));
InBlock.gifwriter.addDocument(doc);
ExpandedSubBlockEnd.gif}

InBlock.gifwriter.optimize();
InBlock.gifwriter.close();
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
protectedAnalyzergetAnalyzer()
ExpandedSubBlockStart.gifContractedSubBlock.gif
dot.gif{
InBlock.gif
returnnewSimpleAnalyzer();
ExpandedSubBlockEnd.gif}

InBlock.gif
protectedbooleanisCompound()
ExpandedSubBlockStart.gifContractedSubBlock.gif
dot.gif{
InBlock.gif
returntrue;
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
publicvoidtestIndexWriter()throwsIOException
ExpandedSubBlockStart.gifContractedSubBlock.gif
dot.gif{
InBlock.gifIndexWriterwriter
=newIndexWriter(dir,this.getAnalyzer(),false);
InBlock.gifassertEquals(keywords.length,writer.docCount());
InBlock.gifwriter.close();
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
publicvoidtestIndexReader()throwsIOException
ExpandedSubBlockStart.gifContractedSubBlock.gif
dot.gif{
InBlock.gifIndexReaderreader
=IndexReader.open(dir);
InBlock.gifassertEquals(keywords.length,reader.maxDoc());
InBlock.gifassertEquals(keywords.length,reader.numDocs());
InBlock.gifreader.close();
ExpandedSubBlockEnd.gif}

ExpandedBlockEnd.gif}

None.gifimportjava.io.IOException;
None.gif
None.gifimportorg.apache.lucene.analysis.Analyzer;
None.gifimportorg.apache.lucene.analysis.WhitespaceAnalyzer;
None.gifimportorg.apache.lucene.document.Document;
None.gifimportorg.apache.lucene.document.Field;
None.gifimportorg.apache.lucene.index.IndexReader;
None.gifimportorg.apache.lucene.index.IndexWriter;
None.gifimportorg.apache.lucene.index.Term;
None.gifimportorg.apache.lucene.search.Hits;
None.gifimportorg.apache.lucene.search.IndexSearcher;
None.gifimportorg.apache.lucene.search.Query;
None.gifimportorg.apache.lucene.search.TermQuery;
None.gif
None.gif
None.gif
publicclassDocumentDeleteTestextendsBaseIndexTestCase
ExpandedBlockStart.gifContractedBlock.gif
dot.gif{
InBlock.gif
publicvoidtestDeleteBeforeIndexMerge()throwsIOException
ExpandedSubBlockStart.gifContractedSubBlock.gif
dot.gif{
InBlock.gifassertEquals(
1,getHitCount("city","Amsterdam"));
InBlock.gif
InBlock.gifIndexReaderreader
=IndexReader.open(dir);
InBlock.gifassertEquals(
2,reader.maxDoc());
InBlock.gifassertEquals(
2,reader.numDocs());
InBlock.gif
InBlock.gif
InBlock.gifreader.deleteDocument(
1);
InBlock.gif
InBlock.gifassertTrue(reader.isDeleted(
1));
InBlock.gifassertTrue(reader.hasDeletions());
InBlock.gifassertEquals(
2,reader.maxDoc());
InBlock.gifassertEquals(
1,reader.numDocs());
InBlock.gif
InBlock.gifreader.close();
InBlock.gif
InBlock.gifreader
=IndexReader.open(dir);
InBlock.gif
InBlock.gifassertEquals(
2,reader.maxDoc());
InBlock.gifassertEquals(
1,reader.numDocs());
InBlock.gif
InBlock.gifreader.close();
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
publicvoidtestDeleteAfterIndexMerge()throwsIOException
ExpandedSubBlockStart.gifContractedSubBlock.gif
dot.gif{
InBlock.gifIndexReaderreader
=IndexReader.open(dir);
InBlock.gifassertEquals(
2,reader.maxDoc());
InBlock.gifassertEquals(
2,reader.numDocs());
InBlock.gifreader.deleteDocument(
1);
InBlock.gifreader.close();
InBlock.gif
InBlock.gifIndexWriterwriter
=newIndexWriter(dir,getAnalyzer(),false);
InBlock.gifwriter.optimize();
InBlock.gifwriter.close();
InBlock.gif
InBlock.gifreader
=IndexReader.open(dir);
InBlock.gif
InBlock.gifassertFalse(reader.isDeleted(
1));
InBlock.gifassertFalse(reader.hasDeletions());
InBlock.gifassertEquals(
1,reader.maxDoc());
InBlock.gifassertEquals(
1,reader.numDocs());
InBlock.gif
InBlock.gifreader.close();
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
InBlock.gif
privateintgetHitCount(StringfieldName,StringsearchString)
ExpandedSubBlockStart.gifContractedSubBlock.gifthrowsIOException
dot.gif{
InBlock.gifIndexSearchersearcher
=newIndexSearcher(dir);
InBlock.gifTermt
=newTerm(fieldName,searchString);
InBlock.gifQueryquery
=newTermQuery(t);
InBlock.gifHitshits
=searcher.search(query);
InBlock.gif
inthitCount=hits.length();
InBlock.gifsearcher.close();
InBlock.gif
returnhitCount;
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
ExpandedSubBlockStart.gifContractedSubBlock.gif
protectedAnalyzergetAnalyzer()dot.gif{
InBlock.gif
returnnewWhitespaceAnalyzer();
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
InBlock.gif
ExpandedBlockEnd.gif}

None.gif

None.gifimportjava.io.IOException;
None.gif
None.gifimportorg.apache.lucene.analysis.Analyzer;
None.gifimportorg.apache.lucene.analysis.WhitespaceAnalyzer;
None.gifimportorg.apache.lucene.document.Document;
None.gifimportorg.apache.lucene.document.Field;
None.gifimportorg.apache.lucene.index.IndexReader;
None.gifimportorg.apache.lucene.index.IndexWriter;
None.gifimportorg.apache.lucene.index.Term;
None.gifimportorg.apache.lucene.search.Hits;
None.gifimportorg.apache.lucene.search.IndexSearcher;
None.gifimportorg.apache.lucene.search.Query;
None.gifimportorg.apache.lucene.search.TermQuery;
None.gif
None.gif
None.gif
publicclassDocumentUpdateTestextendsBaseIndexTestCase
ExpandedBlockStart.gifContractedBlock.gif
dot.gif{
InBlock.gif
InBlock.gif
publicvoidtestUpdate()throwsIOException
ExpandedSubBlockStart.gifContractedSubBlock.gif
dot.gif{
InBlock.gifassertEquals(
1,getHitCount("city","Amsterdam"));
InBlock.gifIndexReaderreader
=IndexReader.open(dir);
InBlock.gifreader.deleteDocuments(
newTerm("city","Amsterdam"));
InBlock.gifreader.close();
InBlock.gif
InBlock.gifIndexWriterwriter
=newIndexWriter(dir,getAnalyzer(),
InBlock.gif
false);
InBlock.gifDocumentdoc
=newDocument();
InBlock.gifdoc.add(
newField("id","1",Field.Store.YES,Field.Index.UN_TOKENIZED));
InBlock.gifdoc.add(
newField("country","Russia",Field.Store.YES,Field.Index.NO));
InBlock.gifdoc.add(
newField("contents","St.Petersburghaslotsofbridges",Field.Store.NO,Field.Index.TOKENIZED));
InBlock.gifdoc.add(
newField("city","St.Petersburg",Field.Store.YES,Field.Index.TOKENIZED));
InBlock.gif
InBlock.gifwriter.addDocument(doc);
InBlock.gifwriter.optimize();
InBlock.gifwriter.close();
InBlock.gif
InBlock.gifassertEquals(
0,getHitCount("city","Amsterdam"));
InBlock.gifassertEquals(
1,getHitCount("city","Petersburg"));
ExpandedSubBlockEnd.gif}

InBlock.gif
ExpandedSubBlockStart.gifContractedSubBlock.gif
protectedAnalyzergetAnalyzer()dot.gif{
InBlock.gif
returnnewWhitespaceAnalyzer();
ExpandedSubBlockEnd.gif}

InBlock.gif
InBlock.gif
privateintgetHitCount(StringfieldName,StringsearchString)
ExpandedSubBlockStart.gifContractedSubBlock.gifthrowsIOException
dot.gif{
InBlock.gifIndexSearchersearcher
=newIndexSearcher(dir);
InBlock.gifTermt
=newTerm(fieldName,searchString);
InBlock.gifQueryquery
=newTermQuery(t);
InBlock.gifHitshits
=searcher.search(query);
InBlock.gif
inthitCount=hits.length();
InBlock.gifsearcher.close();
InBlock.gif
returnhitCount;
ExpandedSubBlockEnd.gif}

InBlock.gif
ExpandedBlockEnd.gif}

None.gif

None.gifimportorg.apache.lucene.analysis.Analyzer;
None.gifimportorg.apache.lucene.analysis.SimpleAnalyzer;
None.gifimportorg.apache.lucene.document.Document;
None.gifimportorg.apache.lucene.document.Field;
None.gifimportorg.apache.lucene.index.IndexWriter;
None.gifimportorg.apache.lucene.store.Directory;
None.gifimportorg.apache.lucene.store.FSDirectory;
None.gif
None.gif
None.gif
publicclassIndexTuningDemo
ExpandedBlockStart.gifContractedBlock.gif
dot.gif{
ExpandedSubBlockStart.gifContractedSubBlock.gif
publicstaticvoidmain(String[]args)throwsExceptiondot.gif{
InBlock.gif
intdocsInIndex=Integer.parseInt(args[0]);
InBlock.gif
InBlock.gif
//createanindexcalled'index-dir'inatempdirectory
InBlock.gif
Directorydir=FSDirectory.getDirectory(
InBlock.gifSystem.getProperty(
"java.io.tmpdir","tmp")+
InBlock.gifSystem.getProperty(
"file.separator")+"index-dir",true);
InBlock.gifAnalyzeranalyzer
=newSimpleAnalyzer();
InBlock.gifIndexWriterwriter
=newIndexWriter(dir,analyzer,true);
InBlock.gif
InBlock.gif
//setvariablesthataffectspeedofindexing
InBlock.gif
writer.setMergeFactor(Integer.parseInt(args[1]));
InBlock.gifwriter.setMaxMergeDocs(Integer.parseInt(args[
2]));
InBlock.gifwriter.setInfoStream(System.
out);
InBlock.gifwriter.setMaxBufferedDocs(Integer.parseInt(args[
3]));
InBlock.gif
InBlock.gifSystem.
out.println("Mergefactor:"+writer.getMergeFactor());
InBlock.gifSystem.
out.println("Maxmergedocs:"+writer.getMaxMergeDocs());
InBlock.gifSystem.
out.println("Minmergedocs:"+writer.getMaxBufferedDocs());
InBlock.gif
InBlock.gif
longstart=System.currentTimeMillis();
ExpandedSubBlockStart.gifContractedSubBlock.gif
for(inti=0;i<docsInIndex;i++)dot.gif{
InBlock.gifDocumentdoc
=newDocument();
InBlock.gifdoc.add(
newField("fieldname","Bibamus",Field.Store.YES,Field.Index.TOKENIZED));
InBlock.gifwriter.addDocument(doc);
ExpandedSubBlockEnd.gif}

InBlock.gifwriter.close();
InBlock.gif
longstop=System.currentTimeMillis();
InBlock.gifSystem.
out.println("Time:"+(stop-start)+"ms");
ExpandedSubBlockEnd.gif}

InBlock.gif
ExpandedBlockEnd.gif}

None.gif

<!--<br><br>Code highlighting produced by Actipro CodeHighlighter (freeware)<br>http://www.CodeHighlighter.com/<br><br>-->importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;
importorg.apache.lucene.store.RAMDirectory;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.analysis.SimpleAnalyzer;

importjunit.framework.TestCase;
importjava.io.IOException;
importjava.util.Collection;
importjava.util.ArrayList;
importjava.util.Iterator;

publicclassFSversusRAMDirectoryTestextendsTestCase
{
privateDirectoryfsDir;
privateDirectoryramDir;
privateCollectiondocs=loadDocuments(3000,5);//加载数据

protectedvoidsetUp()throwsException
{
StringfsIndexDir
=System.getProperty("java.io.tmpdir","tmp")+System.getProperty("file.separator")+"fs-index";
ramDir
=newRAMDirectory();//内存中目录
fsDir=FSDirectory.getDirectory(fsIndexDir,true);
}

publicvoidtestTiming()throwsIOException
{
longramTiming=timeIndexWriter(ramDir);
longfsTiming=timeIndexWriter(fsDir);

assertTrue(fsTiming
>ramTiming);


System.
out.println("RAMDirectoryTime:"+(ramTiming)+"ms");
System.
out.println("FSDirectoryTime:"+(fsTiming)+"ms");
}

privatelongtimeIndexWriter(Directorydir)throwsIOException
{
longstart=System.currentTimeMillis();
addDocuments(dir);
longstop=System.currentTimeMillis();
return(stop-start);
}

privatevoidaddDocuments(Directorydir)throwsIOException
{
IndexWriterwriter
=newIndexWriter(dir,newSimpleAnalyzer(),true);

/**
//changetoadjustperformanceofindexingwithFSDirectory
writer.mergeFactor=writer.mergeFactor;
writer.maxMergeDocs=writer.maxMergeDocs;
writer.minMergeDocs=writer.minMergeDocs;
*/

for(Iteratoriter=docs.iterator();iter.hasNext();)
{
Documentdoc
=newDocument();
Stringword
=(String)iter.next();
doc.add(
newField("keyword",word,Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(
newField("unindexed",word,Field.Store.YES,Field.Index.NO));
doc.add(
newField("unstored",word,Field.Store.NO,Field.Index.TOKENIZED));
doc.add(
newField("text",word,Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}

privateCollectionloadDocuments(intnumDocs,intwordsPerDoc)
{
Collectiondocs
=newArrayList(numDocs);
for(inti=0;i<numDocs;i++)
{
StringBufferdoc
=newStringBuffer(wordsPerDoc);
for(intj=0;j<wordsPerDoc;j++)
{
doc.append(
"Bibamus");
}
docs.add(doc.toString());
}
returndocs;
}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值