package lucene;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* lucene3.5+ik的例子
*
* @author zz
* @date 2012-11-29
*/
public class LuceneDemo {
//索引文件位置
File dataFile = new File("D://indexFile");
//使用IK分词器
Analyzer analyzer = new IKAnalyzer();
public void bulidIndex(){
Directory directory = null;
IndexWriter writer = null;
try {
directory = FSDirectory.open(dataFile);
IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
writer = new IndexWriter(directory, writerConfig);
writer.addDocument(addDocument(1, "中国好平台", "中国好平台,1是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
writer.addDocument(addDocument(2, "中国好平台", "中国好平台,2是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
writer.addDocument(addDocument(3, "中国好平台", "中国好平台,3是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
writer.addDocument(addDocument(4, "资料", "中国好平台,4是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
writer.addDocument(addDocument(5, "微知识", "中国好平台,5是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
writer.addDocument(addDocument(6, "我", "中国好平台,6是以分享知识与经验的交流平台,加入中国好平台,新知识、结交新朋友、塑造个人形象。"));
}catch(Exception ex){
ex.printStackTrace();
} finally {
try {
writer.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* 添加Document
*/
@SuppressWarnings("deprecation")
public Document addDocument(Integer id, String title, String content) {
Document doc = new Document();
//Field.Index.NO 表示不索引
//Field.Index.ANALYZED 表示分词且索引
//Field.Index.NOT_ANALYZED 表示不分词且索引
doc.add(new Field("id", String.valueOf(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
return doc;
}
/**
* 更新索引
*/
public void update(Integer id, String title, String content) {
try {
Directory directory = FSDirectory.open(dataFile);
IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
IndexWriter writer = new IndexWriter(directory, writerConfig);
Document doc = addDocument(id, title, content);
Term term = new Term("id", String.valueOf(id));
writer.updateDocument(term, doc);
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 删除索引
*/
public void delete(Integer id) {
try {
Directory directory = FSDirectory.open(dataFile);
IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
IndexWriter writer = new IndexWriter(directory, writerConfig);
Term term = new Term("id", String.valueOf(id));
writer.deleteDocuments(term);
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 搜索
* @param where 搜索条件
* @param after 分页时要用到,不分页时为null
*/
public void search(String where, ScoreDoc after) {
IndexSearcher isearcher = null;
Directory directory = null;
try {
directory = FSDirectory.open(dataFile);
//创建索引搜索器 且只读
IndexReader reader = IndexReader.open(FSDirectory.open(dataFile));
isearcher = new IndexSearcher(reader);
//在索引器中使用IKSimilarity相似度评估器
//isearcher.setSimilarity(new IKSimilarity());
QueryParser parser = new QueryParser(Version.LUCENE_35, null, analyzer);
Query query = parser.parse(where);
//lucene3.5深度分页,每页显示10条记录
TopDocs topDocs = isearcher.searchAfter(after, query, 10);
ScoreDoc[] hits = topDocs.scoreDocs;
//关键字高亮
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
for(ScoreDoc scoreDoc : hits){
Document hitDoc = isearcher.doc(scoreDoc.doc);
String id = hitDoc.get("id");
String title = hitDoc.get("title");
String content = hitDoc.get("content");
float score = scoreDoc.score;
title = highlighter.getBestFragment(analyzer, "title", title);
content = highlighter.getBestFragment(analyzer, "content", content);
if(title == null){
title = hitDoc.get("title");
}
if(content == null){
content = hitDoc.get("content");
}
System.out.println("doc:" + scoreDoc.doc + " score:" + score + " id:" + id + " title:" + title + " content:" + content);
}
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
// isearcher.close();
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
package lucene;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class LuceneDemoTest {
static LuceneDemo luceneDemo = null;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
luceneDemo = new LuceneDemo();
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
}
@Test
public void testBulidIndex() {
luceneDemo.bulidIndex();
}
@Test
public void testUpdate() {
luceneDemo.update(1, "测试更新", "更新内容。。。。");
}
@Test
public void testDelete() {
luceneDemo.delete(1);
}
@Test
public void testSearch() {
String where = "title:中国好平台 content:学习"; //类似SQL为:title='中国好平台' or content='学习'
//String where = "title:中国好平台 +content:学习 -id:1"; //类似SQL为:title='中国好平台' and content='学习' and id!=1
//String where = "title:我 -content:学习"; // 类似SQL为:title='中国好平台' and content!='学习'
//ScoreDoc after = new ScoreDoc(0, (float)0.032529574);
//luceneDemo.search(where, after);
luceneDemo.search(where, null);
}
}
此处为造轮子,闲来无事,造个轮子,看将来是否有用得上的地方,此处其实用的是lucene4.0和IK2012都是最新版本,可以去网站去下载
lucene:http://apache.etoak.com/lucene/java/4.0.0/
ik:http://code.google.com/p/ik-analyzer/downloads/list
参考:http://www.juziku.com/sunlightcs/wiki/4205.htm