本文章转自http://blog.youkuaiyun.com/karldoenitz/article/details/7913505,这篇文章是根据晚上http://www.tudou.com/programs/view/dutIgfATXf4/的视频来做的,视频有些模糊,也可以在优酷网上搜到,但是会不全。我是照着视频做的,自己第一次做的时候老是错误,原来是因为writer.adddocument(doc)时候把这句放在了for循环的外边,导致索引只建立了最后一条的,不能正确索引。
下面是我的源代码,用了lucene.core-3.5.0.jar和junit-4.7.jar俩个包
这是索引和搜索的:
package cn.edu.hit.lx;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import javax.swing.text.AbstractDocument.Content;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class hellolucene {
public void index() {
//1.创建Directory
//创建在内存中
// Directory directory = new RAMDirectory();
//2.创建IndexWriter
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
IndexWriter writer = null;
try {
//创建在硬盘上
Directory directory = FSDirectory.open(new File("d:/lucene/index01"));
writer = new IndexWriter(directory, iwc);
//3.创建Document对象
Document doc = null;
//4.为Document添加Field
File f = new File("d:/lucene/examples");
for(File file:f.listFiles()) {
doc = new Document();
// String content=FileUtils.readFileToString(file);
// System.out.println(content);
// doc.add(new Field("content",content,Field.Store.NO,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",new FileReader(file)));
doc.add(new Field("filename",file.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
//5.通过IndexWriter添加文档到索引中
writer.addDocument(doc);
System.out.println(file.getName()+" has Indexed!");
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void searcher() {
//1.创建Directory
//创建在硬盘上
IndexReader reader = null;
try {
Directory directory = FSDirectory.open(new File("d:/lucene/index01"));
//2.创建IndexReader
reader = IndexReader.open(directory);
//3.根据IndexReader创建IndexSearcher
IndexSearcher searcher = new IndexSearcher(reader);
//4.创建搜索的Query
//创建parser来确定要搜索文件的内容,第二个参数表示要搜索的域
QueryParser parser = new QueryParser(Version.LUCENE_35,"content",new StandardAnalyzer(Version.LUCENE_35));
//创建query,表示搜索域为content中包含java的文档
Query query = parser.parse("software");
//5.根据searcher搜索并返回TopDocs,这里设置返回十条
TopDocs tds = searcher.search(query, 10);
//6.根据TopDocs获取ScoreDoc对象
ScoreDoc[] sds = tds.scoreDocs;
for(ScoreDoc sd:sds) {
//7.根据searcher和ScoredDoc对象获取具体的Document对象
Document d = searcher.doc(sd.doc);
//8.根据Document对象获取需要的值
System.out.println(d.get("filename")+"["+d.get("path")+"]");
}
} catch (IOException e) {
e.printStackTrace();
} catch (ParseException e) {
e.printStackTrace();
} finally {
//关闭reader
try {
if(reader!=null)
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
测试代码:
package cn.edu.hit.lx;
import org.junit.Test;
public class testlucene {
@Test
public void testindex(){
hellolucene hl=new hellolucene();
hl.index();
}
@Test
public void testsearch(){
hellolucene hl=new hellolucene();
hl.searcher();
}
}