本文只涉及lucene的应用,关于其原理等暂不涉及,有时间再单独写一篇。
用常用的文章类作为例子,实体类代码如下:
@Data
public class Article implements Serializable{
private Long id;
private String title;
private String describe;
private String content;
private Integer status;
private Date createtime;
}
maven依赖引入:
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.3.1</version>
</dependency>
<!--一般分词器,适用于英文分词-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.3.1</version>
</dependency>
<!--中文分词器-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>5.3.1</version>
</dependency>
<!--对分词索引查询解析-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.3.1</version>
</dependency>
<!--检索关键字高亮显示-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.3.1</version>
</dependency>
创建索引过程如图:
生成document文档:
/**
* 生成document
* @param article
* @return
*/
public static Document createDocument(Article article){
if(article == null){
return null;
}
Document doc = new Document();
Field id = new LongField("id",article.getId(),Field.Store.YES);
Field title = new TextField("title",article.getTitle(), Field.Store.YES);
Field describe = new TextField("describe",article.getDescribe(), Field.Store.YES);
Field content = new TextField("content",article.getContent(), Field.Store.YES);
doc.add(title);
doc.add(describe);
doc.add(content);
doc.add(id);
return doc;
}
创建索引(写在一起了。。):
/**
* 生成索引
* @param article
*/
public static void createIndex(Article article){
if(article == null ){
return;
}
Document doc = createDocument(article);
try {
//获取中文分词器
Analyzer analyzer = new SmartChineseAnalyzer();
//索引目录
Directory directory = FSDirectory.open(new File(PATH).toPath());
//OpenMode.APPEND:增量索引
//OpenMode.CREATE:覆盖
IndexWriterConfig config = new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.APPEND);
IndexWriter writer = new IndexWriter(directory,config);
writer.addDocument(doc);
writer.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
}
查询:
/**
* 查询
* @param source
*/
public static void search(String source){
try {
Directory directory = FSDirectory.open(Paths.get(PATH));
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new SmartChineseAnalyzer();
//从多个fields中查找
QueryParser parser = new MultiFieldQueryParser(FIELDS,analyzer);
Query query = parser.parse(source);
//高亮显示
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>");
QueryScorer scorer = new QueryScorer(query);//计算得分
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分计算片段
Highlighter highlighter = new Highlighter(formatter,scorer);
highlighter.setTextFragmenter(fragmenter);//设置要显示的片段
TopDocs docs = searcher.search(query,10);
for (ScoreDoc score:docs.scoreDocs){
Document document = searcher.doc(score.doc);
String title = document.get("title");
System.out.println(document.get("id"));
//显示高亮部分
if(title != null){
TokenStream tokenStream = analyzer.tokenStream("title",new StringReader(title));
String htitle = highlighter.getBestFragment(tokenStream,title);
System.out.println(htitle);
}
}
}catch (Exception e){
}
}