java 代码
- package com.feedsky.lucene;
- import java.io.StringReader;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.TokenStream;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.Hits;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.highlight.Highlighter;
- import org.apache.lucene.search.highlight.QueryScorer;
- import org.apache.lucene.search.highlight.SimpleFragmenter;
- import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.RAMDirectory;
- public class HighLighterTest
- {
- public static void main(String[] args)
- {
- String fieldName = "text";
- String text = "《越狱3》播出时间再次延期 急煞中国“狱友” 巨型变形金刚惊现北京 《龙珠Z》绝招威力排行 《不能说的秘密》清纯版 迅雷宽频合作发行 《越狱动画版》"; //检索内容
- //采用猎兔分词
- Analyzer analyzer = new CnAnalyzer();
- Directory directory = new RAMDirectory();
- try
- {
- //索引
- IndexWriter iwriter = new IndexWriter(directory, analyzer, true);
- iwriter.setMaxFieldLength(25000);
- Document doc = new Document();
- doc.add(new Field(fieldName, text, Field.Store.YES,
- Field.Index.TOKENIZED,
- Field.TermVector.WITH_POSITIONS_OFFSETS));
- iwriter.addDocument(doc);
- iwriter.close();
- IndexSearcher isearcher = new IndexSearcher(directory);
- QueryParser queryParse = new QueryParser(fieldName, analyzer);
- Query query = queryParse.parse("越狱");
- Hits hits = isearcher.search(query);
- for (int i = 0; i < hits.length(); i++) {
- Document docTemp = hits.doc(i);
- String value = docTemp.get(fieldName);
- // 对要高亮显示的字段格式化,这里只是加红色显示和加粗
- SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
- Highlighter highlighter = new Highlighter(sHtmlF,new QueryScorer(query));
- highlighter.setTextFragmenter(new SimpleFragmenter(10));
- if (value != null) {
- TokenStream tokenStream = analyzer.tokenStream(fieldName,new StringReader(value));
- String str = highlighter.getBestFragment(tokenStream, value);
- System.out.println(str);
- }
- }
- isearcher.close();
- directory.close();
- }
- catch (Exception e)
- {
- e.printStackTrace();
- }
- }
- }
运行结果为:
《<b><font color='red'>越狱</font></b>3》播出时间