Lucence创建索引实例

package org.test.index;


import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;


/**
 * 实现索引的增删改查
 * 注意:对索引进行了操作后,必须执行IndexWriter的commit,之后才能被IndexReader所读到,否则对索引的操作无效
 * @author 
 *
 */
public class IndexUtil {
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
private String[] contents = {
"welcome to visited the space,I like book",
"hello boy, I like pingpeng ball",
"my name is cc I like game",
"I like football",
"I like football and I like basketball too",
"I like movie and swim"
};
private Date[] dates = null;
private int[] attachs = {2,3,1,4,5,5};
private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
private Directory directory = null;
private Map<String,Float> scores = new HashMap<String,Float>();
//创建IndexReader的开销特别大,一个项目最好使用一个
private static IndexReader reader = null;
//IndexSearcher是线程安全的,公用一个实例就可以
private static IndexSearcher  searcher = null;
private static  String   Index_File_Path="indexs\\index02";
//IndexWriter是线程安全地,可以共用
private  static  IndexWriter  writer=null;

public static final FieldType TYPE_STORED = new FieldType();


 static {
   TYPE_STORED.setIndexed(true);
   TYPE_STORED.setOmitNorms(false);
   TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);
   TYPE_STORED.setStored(true);
   TYPE_STORED.setTokenized(false);
   TYPE_STORED.freeze();
 }



public IndexUtil() {
try {
setDates();
scores.put("itat.org",2.0f);
scores.put("zttc.edu", 1.5f);
//将索引文件存储在物理硬盘上
directory = FSDirectory.open(new File(Index_File_Path));
//将索引文件存储在内存中
//directory = new RAMDirectory();
//index();
reader = DirectoryReader.open(directory);
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 获取IndexSearcher
* @return
*/
public IndexSearcher getSearcher() {
boolean  hasChanged=false; 
try {
if(reader==null) {
reader = DirectoryReader.open(directory);
} else {
IndexReader tr = DirectoryReader.openIfChanged((DirectoryReader)reader);
if(tr!=null) {
reader.close();
reader = tr;
hasChanged=true;
}
}
if(searcher==null || hasChanged){
searcher=new IndexSearcher(reader);
}
return searcher;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;

}


private void setDates() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
try {
dates = new Date[ids.length];
dates[0] = sdf.parse("2010-02-19");
dates[1] = sdf.parse("2012-01-11");
dates[2] = sdf.parse("2011-09-19");
dates[3] = sdf.parse("2010-12-22");
dates[4] = sdf.parse("2012-01-01");
dates[5] = sdf.parse("2011-05-19");
} catch (ParseException e) {
e.printStackTrace();
}
}


/**
* 撤销对索引的删除
* 恢复删除的文档索引
* TODO:有问题
*/
public void undelete() {
IndexWriter writer = null;
try {
//获取IndexWriter
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9)));
//从回收站中撤销对索引的删除
writer.rollback();
} catch (CorruptIndexException e) {
e.printStackTrace();
}  catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
if(writer!=null){
writer.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 优化和合并索引
* 对索引的segment文件进行合并
*/
public void merge() {
IndexWriter writer = null;
try {
//获取IndexWriter
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9)));
//合并为2个segment
//会将索引合并为2段,这两段中的被删除的数据会被清空
//特别注意:此处在Lucene3.5之后不建议使用,因为会消耗大量的开销,Lucene会根据情况自动处理的
writer.forceMerge(1);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null){
writer.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 强制删除索引
* 不能恢复
*/
public void forceDelete() {
IndexWriter writer = null;
try {
//获取IndexWriter
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9)));
//强制删除索引,删除的文档无法恢复
//在Lucene3.5之前都是使用optimize进行处理的,但是这个操作消耗资源,已经被弃用了
writer.forceMergeDeletes();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null){
writer.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 删除索引
*/
public void delete() {
IndexWriter writer = null;
try {
//获取IndexWriter
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9)));
//根据id来删除索引,此时删除索引并不会完全删除,没有commit时只在内存中做删除标记,但还是可以被搜索到的,commit时才真正将索引放入回收站*.del文件中。是可以恢复的,直到merge时才真正物理删除
//参数是一个选项,可以是一个Query,也可以是一个Term,Term是一个精确查找的值
writer.deleteDocuments(new Term("id","1"));
//索引提交
writer.commit();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null){
writer.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 删除索引,只执行delete,并没有真正物理删除,没有commit时只在内存中做删除标记,但还是可以被搜索到的,直到commit时才将索引文档放入回收站*.del文件中,在回收站中的索引文档搜索是搜不到的,是可以恢复的
* 直到merge时才真正物理删除索引文档
*/
public void delete02() {
IndexWriter writer = null;
try {
//获取IndexWriter
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9)));
//此时删除的文档不会彻底删除,
writer.deleteDocuments(new Term("id","1"));
query();
writer.rollback();
query();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
if(writer!=null){
writer.close(false);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 修改索引
* Lucene并没有提供更新方法,这里的更新操作其实是以下两个操作的合集:
* 先删除之后再添加
* 注意:索引的修改是先删除原有的索引,再增加新的索引
*/
public void update() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,
new IndexWriterConfig(Version.LUCENE_4_9,new StandardAnalyzer(Version.LUCENE_4_9)));
/*
* Lucene索引的修改是先删除,再增加,因此可以使用不能的ID

*/
Document doc = new Document();
doc.add(new StringField("id","11",Field.Store.YES));
doc.add(new StringField("email",emails[0],Field.Store.YES));
doc.add(new StringField("email","test"+2+"@test.com",Field.Store.YES));
doc.add(new TextField("content",contents[0],Field.Store.NO));
doc.add(new StringField("name",names[0],Field.Store.YES));
doc.add(new IntField("attach",attachs[0],Field.Store.YES));
//创建日期型field
doc.add(new LongField("date",dates[0].getTime(),Field.Store.YES));
writer.updateDocument(new Term("id","1"), doc);
//writer.commit();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null){
writer.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 索引的查询
*/
public void query() {
try {
//1.构造IndexReader
IndexReader reader = DirectoryReader.open(directory);
//能被搜索到的索引文档数
int numDocs=reader.numDocs();
//删除的文档数,即在回收站*.del中的文档数
int numDeletedDocs=reader.numDeletedDocs();
//索引的总文档数,包括删除的文档数,即在回收站*.del中的文档数  //maxDocs=numDocs+numDeletedDocs
int maxDocs=reader.maxDoc();
System.out.println("numDocs:"+numDocs);
System.out.println("deleteDocs:"+numDeletedDocs);
System.out.println("maxDocs:"+maxDocs);
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 创建索引
*/
public void index() {
IndexWriter writer = null;
try {
//获取IndexWriter
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_4_9, new StandardAnalyzer(Version.LUCENE_4_9)));
//删除索引
//writer.deleteAll();
//writer.commit();
Document doc = null;
for(int i=0;i<ids.length;i++) {
//创建Document对象
doc = new Document();
//创建字符串型field
doc.add(new StringField("id",ids[i],Field.Store.YES));

//创建文本型field
doc.add(new TextField("content",contents[i],Field.Store.NO));
doc.add(new StringField("name",names[i],Field.Store.YES));
//创建数字型field
doc.add(new IntField("attach",attachs[i],Field.Store.YES));
//创建日期型field
doc.add(new LongField("date",dates[i].getTime(),Field.Store.YES));
Field emailfield=new Field("email",emails[i],TYPE_STORED);
String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
System.out.println(et);
if(scores.containsKey(et)) {
//设置权值,默认权值为1.0f
emailfield.setBoost(scores.get(et));
} else {
//设置权值,默认权值为1.0f
emailfield.setBoost(0.5f);
}
doc.add(emailfield);
doc.add(new StringField("email","test"+i+"@test.com",Field.Store.YES));
//写入索引库
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if(writer!=null){
writer.close();
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

/**
* 搜索
*/
public void search01() {
try {
//1.创建IndexReader
IndexReader reader = DirectoryReader.open(directory);
//2.创建IndexSearcher
IndexSearcher searcher = new IndexSearcher(reader);
//3.创建Query
TermQuery query = new TermQuery(new Term("email","test0@test.com"));
//4.执行搜索,获取TopDocs
TopDocs tds = searcher.search(query, 10);
//5.通过TopDocs获取ScoreDoc
for(ScoreDoc sd:tds.scoreDocs) {
//6.通过docid获取Document
Document doc = searcher.doc(sd.doc);
//7.通过Document获取各个field的值
System.out.println("("+sd.doc+"-"+doc.getField("email").boost()+"-"+sd.score+")"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date")+","+doc.getValues("email")[1]);
}
//8.关闭IndexReader
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* 搜索
* 不用重复创建IndexReader(因为创建IndexReader的开销特别大)
*/
public void search02() {
try {
IndexSearcher searcher = getSearcher();
TermQuery query = new TermQuery(new Term("content","like"));
TopDocs tds = searcher.search(query, 10);
for(ScoreDoc sd:tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date")+","+doc.getValues("email")[1]);
}

} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}


}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值