1. 环境 lucene 2.4
a. 实体 Article.java
public class Article {
private Long id;
private String title;
private String content;
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}
b. 将 实体和 document 转换的类
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
// logForj
public class ArticleDocumentUtils {
/**
* Article --> Document
* @param article
* @return
*/
public static Document article2Document(Article article) {
Document doc = new Document();
// article.properties --> doc.fieldList
doc.add(new Field("id", NumberTools.longToString(article.getId()), Store.YES, Index.NOT_ANALYZED));
Field field = new Field("title", article.getTitle(), Store.YES, Index.ANALYZED);
field.setBoost(2.0F); // 默认为1.0F
doc.add(field);
doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));
return doc;
}
/**
* Document --> Article
*
* @param doc
* @return
*/
public static Article document2Article(Document doc) {
Article article = new Article();
Long id =NumberTools.stringToLong(doc.getField("id").stringValue());
String title = doc.getField("title").stringValue();
String content = doc.getField("content").stringValue();
article.setId(id);
article.setTitle(title);
article.setContent(content);
return article;
}
}
// 时间类弄的转换
doc.add(new Field("postTime", DateTools.dateToString(article.getPostTime(), Resolution.SECOND), Store.YES, Index.NO));
article.setPostTime(DateTools.stringToDate(doc.get("postTime")));
c. 测试方法 HelloWorld.java
import java.util.ArrayList;
import java.util.List;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.Test;
// Lucene 2.4
public class HelloWorld {
// 索引目录
private String indexPath = "./index/";
// 分词器
// private Analyzer analyzer = new StandardAnalyzer();
private Analyzer analyzer = new MMAnalyzer();
// 建立索引
@Test
public void createIndex() throws Exception {
// 模拟一个已经存在的文章
Article article = new Article();
article.setId(1L);
article.setTitle("小笑话 -- 牛人发帖"); // 笑话
article
.setContent("有一牛人发一帖,然后马上就用发帖id疯狂回复自己的帖子:自己回帖1:楼主太有才了自己回帖2:楼主说的不错,挺有道理的自己回帖3:楼主真是太牛了,好崇拜你.最后终于有人回复他的帖子: 我靠,好歹你也换个id啊");
// article --> Document
Document doc = ArticleDocumentUtils.article2Document(article);
// 建立索引(放到索引库中)
IndexWriter indexWriter = new IndexWriter(indexPath, analyzer,
MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);
indexWriter.close();
}
// 搜索
@Test
public void search() throws Exception {
// String queryString = "笑话";
String queryString = "幽默";
// =============================================
// 1,queryString --> query
// hql --> Hibernate.Query
String[] fields = new String[] { "title", "content" };
QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer);
Query query = queryParser.parse(queryString);
// 2,搜索-->搜索结果 // 在所有文章的"标题"和"内容"中搜索
IndexSearcher indexSearcher = new IndexSearcher(indexPath); // 在指定的索引库中搜索
TopDocs topDocs = indexSearcher.search(query, null, 100);// TopDocs是包装了查询结果的对象
// 3,处理搜索结果
// topDocs.totalHits; 数字类型,代表匹配的结果的数量
// topDocs.scoreDocs; ScoreDoc数组,代表匹配的所有结果(ScoreDoc只有Document的内部编号)
System.out.println("匹配的结果的数量:" + topDocs.totalHits);
List<Article> list = new ArrayList<Article>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int docSn = scoreDoc.doc; // 文档对应的内部编码
Document doc = indexSearcher.doc(docSn); // 根据内部编号取出Document
list.add(ArticleDocumentUtils.document2Article(doc));
}
indexSearcher.close();
// =============================================
// 打印结果
for (Article a : list) {
System.out.println("---------------------------> " + a.getId());
System.out.println("Id = " + a.getId());
System.out.println("Title = " + a.getTitle());
System.out.println("Content = " + a.getContent());
}
}
}
高亮器测试
import java.util.ArrayList;
import java.util.List;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.junit.Test;
import cn.itcast.lucene.helloworld.Article;
import cn.itcast.lucene.helloworld.ArticleDocumentUtils;
public class HighLighterTest {
// 索引目录
private String indexPath = "./index/";
// 分词器
private Analyzer analyzer = new MMAnalyzer();// new StandardAnalyzer();
@Test
public void test() throws Exception {
String queryString = "回帖";
// String queryString = "幽默";
// =============================================
// 1,queryString --> query
QueryParser queryParser = new MultiFieldQueryParser(new String[] { "title", "content" }, analyzer);
Query query = queryParser.parse(queryString);
// 2,搜索-->搜索结果 // 在所有文章的"标题"和"内容"中搜索
IndexSearcher indexSearcher = new IndexSearcher(indexPath); // 在指定的索引库中搜索
TopDocs topDocs = indexSearcher.search(query, null, 100);// TopDocs是包装了查询结果的对象
// ===================== 初始化高亮器
Formatter formatter = new SimpleHTMLFormatter("<span class='keyword'>", "</span>");// 默认为<b>和</b>
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
Fragmenter fragmenter = new SimpleFragmenter(50); // 默认为100
highlighter.setTextFragmenter(fragmenter);
// =====================
// 3,处理搜索结果
// topDocs.totalHits; 数字类型,代表匹配的结果的数量
// topDocs.scoreDocs; ScoreDoc数组,代表匹配的所有结果(ScoreDoc只有Document的内部编号)
System.out.println("匹配的结果的数量:" + topDocs.totalHits);
List<Article> list = new ArrayList<Article>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int docSn = scoreDoc.doc; // 文档对应的内部编码
// scoreDoc.score;
Document doc = indexSearcher.doc(docSn); // 根据内部编号取出Document
// =================== 使用高亮器
// doc.getField("content").stringValue() --> doc.get("content")
// 高亮操作不影响原始数据
// 如果高亮的属性值中没有出现关键词,就返回null
String ht = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
if (ht != null) {
doc.getField("content").setValue(ht);
}
// ===================
list.add(ArticleDocumentUtils.document2Article(doc));
}
indexSearcher.close();
// =============================================
// 打印结果
for (Article a : list) {
System.out.println("---------------------------> " + a.getId());
System.out.println("Id = " + a.getId());
System.out.println("Title = " + a.getTitle());
System.out.println("Content = " + a.getContent());
}
}
}
CRUD 操作
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import cn.itcast.lucene.helloworld.Article;
import cn.itcast.lucene.helloworld.ArticleDocumentUtils;
public class IndexDao {
// 索引目录
private String indexPath = "./index/";
// 分词器
private Analyzer analyzer = new StandardAnalyzer();
/**
* 建立索引(保存到索引库)
*
* @param article
*/
public void save(Article article) {
// 1, article --> Document
Document doc = ArticleDocumentUtils.article2Document(article);
// 2, indexWriter.add( doc )
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (indexWriter != null) {
try {
indexWriter.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
}
/**
* 删除索引
*
* @param id
*
* delete from table_article WHERE ?(term.name)=?(term.value)
*/
public void delete(Long id) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
Term term = new Term("id", id.toString());
// 含有term的所有Document都将被删掉
indexWriter.deleteDocuments(term);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (indexWriter != null) {
try {
indexWriter.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
}
/**
* 更新索引
*
* @param article
*
* update table_article set xxx=xxx,yy=yyy... WHERE id=?( article.getId() )
*/
public void update(Article article) {
IndexWriter indexWriter = null;
try {
Term term = new Term("id", article.getId().toString());
Document doc = ArticleDocumentUtils.article2Document(article);
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
// 更新含有term的Document,更新后的状态在doc中
indexWriter.updateDocument(term, doc);
// 更新就是“先删除,再创建”
// indexWriter.deleteDocuments(term);
// indexWriter.addDocument(doc);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (indexWriter != null) {
try {
indexWriter.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
}
/**
* 搜索,分页(符合某条件的某一页的数据)
*
* @param queryString
* @param firstResult
* @param maxResults
* @return
*
* select * from table_article limit ?:first,?:max
*
* select count(*) from table_article
*/
public SearchResult search(String queryString, int firstResult, int maxResults) {
// 1, queryString --> Query
IndexSearcher indexSearcher = null;
try {
QueryParser queryParser = new MultiFieldQueryParser(new String[] { "title", "content" }, analyzer);
Query query = queryParser.parse(queryString);
// 2, 进行搜索, 在title与content中搜索 --> TopDocs( totalHits, scoreDocs )
indexSearcher = new IndexSearcher(indexPath);
TopDocs topDocs = indexSearcher.search(query, null, 100);
// 3, 处理结果,返回
List<Article> list = new ArrayList<Article>();
int end = Math.min(firstResult + maxResults, topDocs.scoreDocs.length);
for (int i = firstResult; i < end; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docSn = scoreDoc.doc; // Document的内部编号
Document doc = indexSearcher.doc(docSn); // 根据Document的内部编号取出相应的Document
Article article = ArticleDocumentUtils.document2Article(doc);
list.add(article);
}
return new SearchResult(topDocs.totalHits, list);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (indexSearcher != null) {
try {
indexSearcher.close(); // 取完数据在关闭
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
}
}