在全文索引工具中,都是由这样三部分组成
1、 索引部分(Iam a boy)
2、 分词部分
3、 搜索部分
4、Field.Store.YES/NO:存储域选项
设置为YES,表示会把这个域中的内容完全存储到文件中,方便进行文本的还原
设置为No,表示吧这个域的内容不存储在文件爱你中,但是可以被索引,此时内容无法完全还原(doc.get())
5、Field.Index(索引域选项)
Field.Index.ANALYZED:进行分词和索引,适用于标题、内容等
Field.Index.NOT_ANALYZED:进行索引,但是不进行分词,如果身份证号,姓名,ID等,适用于精确搜索
Field.Index.ANALYZED_NOT_NORMS:进行分词但是不存储norms信息,这个norms中包括了创建索引的时间和权值等信息
Field.Index.NOT_ANALYZED_NOT_NORMS:既不进行分词也不存储norms信息。
Index.NO:不进行索引
其最基本的使用方法如下面的步骤介绍
public class HelloLucene {
public static void main(String[] args) {
}
/**
* 建立索引
*/
public void index() {
IndexWriter writer = null;
try {
//1、创建Directory(内存、银盘等等)
// Directory directory = new RAMDirectory();//创建内存的
Directory directory = FSDirectory.open(new File("d:/lucene/index01"));
//2、创建IndexWriter
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));
writer = new IndexWriter(directory, iwc);
//3、创建Document对象
Document document = null;
//4、为Document添加Field
File file = new File("d:/lucene/example");
for(File f:file.listFiles()) {
document = new Document();
document.add(new Field("content", new FileReader(f)));
document.add(new Field("filename",f.getName(),Field.Store.YES,Field.Index.NOT_ANALYZED));
document.add(new Field("path",f.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
//5、通过IndexWriter添加文档到索引中
writer.addDocument(document);
}
} catch (Exception e) {
} finally {
if(writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 搜索
*/
public void searcher() {
try {
//1、创建Directory
Directory directory = FSDirectory.open(new File("d:/lucene/index01"));
//2、创建IndexReader
IndexReader reader = IndexReader.open(directory);
//3、根据IndexReader创建IndexSearcher
IndexSearcher searcher = new IndexSearcher(reader);
//4、创建搜索的Query//创建parser来确定搜索文件的内容,第二个参数表示搜索的域
QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
//创建query,表示搜索域为content中包含java的文档
Query query = parser.parse("address");
//5、根据searcher搜索并且返回TopDocs
TopDocs tds = searcher.search(query, 10);
//6、根据TopDocs获取ScoreDoc对象
ScoreDoc[] sds = tds.scoreDocs;
for(ScoreDoc sd:sds){
//7、根据Searcher和ScordDoc获取具体的Document对象
Document document = searcher.doc(sd.doc);
//8、根据Document对象获取需要的值
System.out.println(document.get("filename")+"["+document.get("path")+"]");
}
//9、关闭reader
reader.clone();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
2、
package com.lxp.index;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class IndexUtil {
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"aa@sina.com","bb@123.com","cc@qq.com","dd@uestc.com","ee@qq.com","ff@uestc.com"};
private String[] contents = {
"welcome to visited the space,I like book",
"hello boy, I like pingpeng ball",
"my name is cc I like game",
"I like football",
"I like football and I like basketball too",
"I like movie and swim"
};
private Date[] dates = null;
private int[] attachs = {2,3,1,4,5,5};
private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
private Directory directory = null;
private Map<String,Float> scores = new HashMap<String,Float>();
private static IndexReader reader = null;
public IndexUtil() {
try {
setDates();
scores.put("uestc.com", 2.0f);
scores.put("sina.com", 1.0f);
directory = FSDirectory.open(new File("d:/lucene/index02"));
reader = IndexReader.open(directory,false);
} catch (IOException e) {
e.printStackTrace();
}
}
public IndexSearcher getSearcher() {
try {
if(reader==null) {
reader = IndexReader.open(directory,false);
} else {
IndexReader tr = IndexReader.openIfChanged(reader);
if(tr!=null) {
reader.close();
reader = tr;
}
}
return new IndexSearcher(reader);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
private void setDates() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
try {
dates = new Date[ids.length];
dates[0] = sdf.parse("2010-02-18");
dates[1] = sdf.parse("2012-03-24");
dates[2] = sdf.parse("2011-02-18");
dates[3] = sdf.parse("2012-02-18");
dates[4] = sdf.parse("2014-05-18");
dates[5] = sdf.parse("2013-06-30");
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void query() {
try {
IndexReader reader = IndexReader.open(directory);
System.out.println("numDocs:"+reader.numDocs());
System.out.println("maxDocs:"+reader.maxDoc());
System.out.println("deleteDocs:"+reader.numDeletedDocs());
reader.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void delete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//参数是一个选项,可以是一个query(一系列)也可以是一个term(一个精确查找的值)
//此时删除的文档并不会被完全删除,而是存储在一个回收站,可以恢复
writer.deleteDocuments(new Term("id","1"));
writer.commit();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null) {
writer.close();
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void delete2() {
try {
reader.deleteDocuments(new Term("id","1"));
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
}
}
public void undelete() {
//使用IndexReader进行delete后的恢复
try {
//恢复时,必须把IndexRader的只读设置为false
IndexReader reader = IndexReader.open(directory,false);
reader.undeleteAll();
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void forceDelete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
writer.forceMergeDeletes();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null) {
writer.close();
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void index() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
writer.deleteAll();
Document document = null;
for(int i=0;i<ids.length;i++) {
document = new Document();
document.add(new Field("id", ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
document.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
document.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
document.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
//存储数字
document.add(new NumericField("attachs",Field.Store.YES,true).setIntValue(attachs[i]));
//存储日期
document.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
System.out.println(et);
if(scores.containsKey(et)) {
document.setBoost(scores.get(et));
} else {
document.setBoost(0.5f);
}
writer.addDocument(document);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null)
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void mergeIndex() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//会将索引合并为2段,这两段中的被删除的数据会被清空
writer.forceMerge(2);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null) {
writer.close();
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void update() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
/*
* lucene并没有提供更新,其实是如下两个的合集
* 先删除后添加
*/
Document document = new Document();
document.add(new Field("id", "11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
document.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
document.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
document.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.addDocument(document);
writer.updateDocument(new Term("id","1"), document);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null) {
writer.close();
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void search() {
try {
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TermQuery query = new TermQuery(new Term("content","like"));
TopDocs tds = searcher.search(query, 10);
for(ScoreDoc sd: tds.scoreDocs) {
Document doc =searcher.doc(sd.doc);
System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attachs")+","+doc.get("date"));
}
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void search2() {
try {
IndexSearcher searcher = getSearcher();
TermQuery query = new TermQuery(new Term("content","like"));
TopDocs tds = searcher.search(query, 10);
for(ScoreDoc sd: tds.scoreDocs) {
Document doc =searcher.doc(sd.doc);
System.out.println("("+sd.doc+")"+doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attachs")+","+doc.get("date"));
}
searcher.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}