一, 建立索引,根据索引进行搜索
二, 调用
package com.xbkj.business.demo.lucene;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
public class SearchIndex {
protected static Log log = LogFactory.getLog(SearchIndex.class);
public SearchIndex(){}
public void indexwrite(File indexFile, File dataFile) throws Exception{
// Analyzer luceneAnalyzer = new StandardAnalyzer();
Analyzer luceneAnalyzer = new PaodingAnalyzer();
IndexWriter writer = new IndexWriter(indexFile, luceneAnalyzer, true);
long startTime = System.currentTimeMillis();
indexDocs(writer, dataFile);
writer.optimize();
writer.close(); //必须,只有这样数据才会被写入索引的目录里
long endTime = System.currentTimeMillis();
log.info("共花费:"+(endTime-startTime)+"毫秒,索引文件存放在:"+indexFile.getCanonicalPath());
}
public void indexDocs(IndexWriter writer, File dataFile){
if(dataFile.isDirectory()){
File[] dataFiles = dataFile.listFiles();
if(null != dataFiles){
Arrays.sort(dataFiles);
for(int i=0; i<dataFiles.length; i++){
indexDocs(writer, dataFiles[i]);
}
}
}else{
//检索所有以.jsp结尾的文件 }else if(dataFile.getName().endsWith(".jsp")){
indexFile(writer, dataFile);
}
}
public void indexFile(IndexWriter writer, File dataFile){
try{
log.info("正索引文件:" + dataFile.getCanonicalPath()+"\t:"+dataFile.getAbsolutePath());
//建立一个新的文档,它可以看作是数据库的一行记录
Document doc = new Document();
Reader txtReader = new FileReader(dataFile);
doc.add(new Field("name", dataFile.getName(), Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("path", dataFile.getCanonicalPath(), Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("lastmodify", ""+dataFile.lastModified(), Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("contents", txtReader));
writer.addDocument(doc);
}catch(IOException ioex){
log.error("", ioex);
}
}
public List<HashMap> search(String keywords, File indexDir){
if(!indexDir.exists()){
log.error("索引目录不存在!");
return null;
}
List<HashMap> list = new ArrayList<HashMap>();
try{
FSDirectory indexDirectory = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(indexDirectory);
Term term = new Term("contents", keywords.toLowerCase());
TermQuery termQuery = new TermQuery(term);
Hits hits = searcher.search(termQuery, new Filter(){
@Override
public BitSet bits(IndexReader reader) throws IOException {
BitSet bit = new BitSet(reader.maxDoc());
for(int i=0; i<reader.maxDoc(); i++){
if(reader.document(i).get("name").endsWith(".jar")) //将以".tld"后缀的过滤
continue;
bit.set(i);
}
return bit;
}
});
log.info("共有:"+searcher.maxDoc()+"条索引,命中:"+hits.length()+"条!");
Document doc = null;
for(int i=0; i<hits.length(); i++){
int docId = hits.id(i);
doc = hits.doc(i);
log.info(docId+"\tlastmodify:"+new Date(new Long(doc.get("lastmodify"))).toLocaleString()+"\tcontents:"+doc.get("path"));
HashMap<String, Object> map = new HashMap<String, Object>();
map.put("id", docId);
map.put("name", doc.get("name"));
map.put("lastmodify", new Date(new Long(doc.get("lastmodify"))).toLocaleString());
map.put("path", doc.get("path"));
list.add(map);
}
}catch(Exception ex){
log.error("", ex);
return null;
}
return list;
}
public List<HashMap> searchFilter(String keywords, File indexDir){
if(!indexDir.exists()){
log.error("索引目录不存在!");
return null;
}
List<HashMap> list = new ArrayList<HashMap>();
try{
FSDirectory indexDirectory = FSDirectory.getDirectory(indexDir);
IndexSearcher searcher = new IndexSearcher(indexDirectory);
// QueryParser queryParser = new QueryParser("contents", new StandardAnalyzer());
QueryParser queryParser = new QueryParser("contents", new PaodingAnalyzer());
queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
Query query = queryParser.parse(keywords.toLowerCase());
Hits hits = searcher.search(query, new Filter(){
@Override
public BitSet bits(IndexReader reader) throws IOException {
BitSet bit = new BitSet(reader.maxDoc());
for(int i=0; i<reader.maxDoc(); i++){
if(reader.document(i).get("name").endsWith(".jar")) //将以".tld"后缀的过滤
continue;
bit.set(i);
}
return bit;
}
});
log.info("共有:"+searcher.maxDoc()+"条索引,命中:"+hits.length()+"条!");
Document doc = null;
for(int i=0; i<hits.length(); i++){
int docId = hits.id(i);
doc = hits.doc(i);
log.info(docId+"\tlastmodify:"+new Date(new Long(doc.get("lastmodify"))).toLocaleString()+"\tcontents:"+doc.get("path"));
HashMap<String, Object> map = new HashMap<String, Object>();
map.put("id", docId);
map.put("name", doc.get("name"));
map.put("lastmodify", new Date(new Long(doc.get("lastmodify"))).toLocaleString());
map.put("path", doc.get("path"));
list.add(map);
}
}catch(Exception ex){
log.error("", ex);
return null;
}
return list;
}
}
二, 调用
SearchIndex si = new SearchIndex();
si.indexwrite(new File("F:\\index"), new File(datadir));
//datadir 数据目录 eg. C:\\data
List list = si.searchFilter(keywords, new File("F:\\index"));
//keywords 搜索关键词 eg.lucene