使用lucene实现搜索结果 命中关键字高亮显示,大致流程与lucene 建立文件索引和针对索引进行搜索(lucene2.2版本)介绍的一致,只需在代码里稍作修改。
1.索引生成过程:(红色为修改部分,针对需要进行高亮显示索引内容,进行分词与关键字位置索引)
package demo.example.searcher;
import java.io.*;
import java.util.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.index.*;
import org.apache.lucene.document.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class Indexer {
private static Log log = LogFactory.getLog(Indexer.class);
public static void main(String[] args) throws Exception {
File indexDir = new File("C:\\index");
File dataDir = new File("C:\\lucene\\src");
long start = new Date().getTime();
int numIndexed = index(indexDir, dataDir);
long end = new Date().getTime();
System.out.println("use:" + (end - start));
}
public static int index(File indexDir, File dataDir) {
int ret = 0;
try {
IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(), true);
writer.setUseCompoundFile(false);
indexDirectory(writer, dataDir);
ret = writer.docCount();
writer.optimize();
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
return ret;
}
public static void indexDirectory(IndexWriter writer, File dir) {
try {
File[] files = dir.listFiles();
for (File f : files) {
if (f.isDirectory()) {
indexDirectory(writer, f);
} else {
indexFile(writer, f);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
public static void indexFile(IndexWriter writer, File f) {
try {
System.out.println("Indexing:" + f.getCanonicalPath());
Document doc = new Document();
Reader txtReader = new FileReader(f);
doc.add(new Field("contents", txtReader,Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));
writer.addDocument(doc);
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.搜索过程,红色的为修改部分,需要highlight和analysis的lucene的jar文件
package demo.example.searcher;
import java.util.*;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.analysis.*;
import org.apache.lucene.search.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.document.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class Searcher {
private static Log log = LogFactory.getLog(Searcher.class);
public static void main(String[] args) {
String indexDir = "C:\\index";
String q = "查询关键字";
search(indexDir, q);
}
public static void search(String indexDir, String q) {
try {
IndexSearcher is = new IndexSearcher(indexDir);
QueryParser queryParser = new QueryParser("contents", new StandardAnalyzer());
Query query = queryParser.parse(q);
long start = new Date().getTime();
Hits hits = is.search(query);
long end = new Date().getTime();
System.out.println("use:" + (end - start));
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<strong><font color='red'>", "</font></strong>");
SimpleFragmenter fragmenter = new SimpleFragmenter(60);
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(q));
highlighter.setTextFragmenter(fragmenter);
int maxNumFragmentsRequired = 10;
String fragmentSeparator = "";
TermPositionVector tpv = null;
TokenStream tokenstream = null;
for (int i = 0; i < hits.length(); i++) {
Document doc = hits.doc(i);
int id = hits.id(i);
System.out.println("The right file:" + doc.get("filename"));
tpv = (TermPositionVector) is.getIndexReader().getTermFreqVector(id, "contents");
tokenstream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenstream, doc.get("contents"),
maxNumFragmentsRequired, fragmentSeparator)
System.out.println("The right file context is :" + result);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
在输出结果中,文件内容中命中的关键字被加上了
"<strong><font color='red'>", "</font></strong>"
在网页中显示内容,即为黑体红字显示效果。
本文介绍如何使用Lucene实现搜索结果中的关键字高亮显示。通过修改索引生成和搜索过程,利用Highlighter组件,可以有效地突出显示搜索命中词汇。
415

被折叠的 条评论
为什么被折叠?



