Lucene 生成索引以及搜索的完整例子

最新推荐文章于 2025-08-13 14:12:29 发布

转载最新推荐文章于 2025-08-13 14:12:29 发布 · 1.4k 阅读

文章标签：

#lucene #string #exception #null #query #path

lucene 专栏收录该内容

4 篇文章

订阅专栏

本文介绍了一个用于操作Lucene索引的实用工具类，包括添加、更新、删除文档及搜索等功能。通过该工具类可以方便地进行索引管理和文档检索。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

[文件] LuceneIndexUtils.java ~ 7KB 下载(45)

 
package my.search;
 
 
 
import java.io.File;
 
import java.io.FileNotFoundException;
 
import java.io.IOException;
 
import java.text.NumberFormat;
 
import java.util.*;
 
 
 
import my.mvc.RequestContext;
 
 
 
import org.apache.commons.beanutils.*;
 
import org.apache.commons.lang.StringUtils;
 
import org.apache.commons.lang.math.NumberUtils;
 
import org.apache.commons.lang.time.DateFormatUtils;
 
import org.apache.lucene.analysis.Analyzer;
 
//import org.apache.lucene.analysis.standard.StandardAnalyzer;
 
import org.apache.lucene.document.Document;
 
import org.apache.lucene.document.Field;
 
import org.apache.lucene.index.IndexWriter;
 
import org.apache.lucene.index.Term;
 
import org.apache.lucene.search.*;
 
import org.apache.lucene.store.*;
 
import org.wltea.analyzer.lucene.IKAnalyzer;
 
import org.wltea.analyzer.lucene.IKSimilarity;
 
 
 
/**
 
 * 操作索引的工具
 
 * @author liudong
 
 */
 
public class LuceneIndexUtils {
 
     
 
    private final static String _g_lucene_path = RequestContext.root() +
 
        "WEB-INF" + File.separator + "lucene_idx" + File.separator;
 
     
 
    private final static String _KEYWORD_FIELD_NAME = "id";
 
    private final static String _FMT_DATE = "yyyyMMddHHmmssSSS";
 
    private final static Analyzer g_analyzer = new IKAnalyzer(false);
 
     
 
    public final static Analyzer getAnalyzer(){
 
        return g_analyzer;
 
    }
 
     
 
    /**
 
     * 添加文档
 
     * @param objClass
 
     * @param doc
 
     * @throws Exception
 
     */
 
    public static int add(Class<? extends SearchEnabled> objClass, List<? extendsSearchEnabled> docs) throws Exception {
 
        if (docs == null || docs.size() == 0)
 
            return 0;
 
        IndexWriter writer = _GetWriter(objClass);
 
        try {
 
            int ar = _Add(writer, docs);
 
            writer.optimize();
 
            return ar;
 
        } finally {
 
            writer.close();
 
            writer = null;
 
        }
 
    }
 
     
 
    /**
 
     * 添加文档
 
     *
 
     * @param doc
 
     * @throws Exception
 
     */
 
    private static int _Add(IndexWriter writer, List<? extends SearchEnabled> docs)throws Exception {
 
        if (docs == null || docs.size() == 0)
 
            return 0;
 
        int doc_count = 0;
 
        for (SearchEnabled doc : docs) {
 
            Document lucene_doc = _ObjectToDocument(doc);
 
            lucene_doc.setBoost(doc.GetBoost());
 
            writer.addDocument(lucene_doc);
 
            doc_count++;
 
        }
 
        return doc_count;
 
    }
 
 
 
    /**
 
     * 从索引库中搜索
 
     * @param beanClass
 
     * @param query
 
     * @param max_count
 
     * @return
 
     * @throws IOException
 
     */
 
    public static List<Long> find(Class<? extends SearchEnabled> beanClass, Query query,int max_count) throws IOException {
 
        IndexSearcher searcher = _GetSearcher(beanClass);
 
        try{
 
            TopDocs hits = searcher.search(query, null, max_count);
 
            if(hits==null) return null;
 
            List<Long> results = new ArrayList<Long>();
 
            int numResults = Math.min(hits.totalHits, max_count);
 
            for (int i = 0; i < numResults; i++){
 
                ScoreDoc s_doc = (ScoreDoc)hits.scoreDocs[i];
 
                Document doc = searcher.doc(s_doc.doc);
 
                long id = NumberUtils.toLong(doc.get(_KEYWORD_FIELD_NAME), 0);
 
                if(id > 0 && !results.contains(id))
 
                    results.add(id);   
 
            }
 
            return results;
 
        }catch(FileNotFoundException e){
 
            e.printStackTrace();
 
            return null;
 
        }finally{
 
            searcher.close();
 
        }
 
    }
 
 
 
    /**
 
     * 添加文档
 
     *
 
     * @param doc
 
     * @throws Exception
 
     */
 
    public static void add(SearchEnabled doc) throws Exception {
 
        if(doc == null)
 
            return ;
 
        IndexWriter writer = _GetWriter(doc.getClass());
 
        try{
 
            writer.addDocument(_ObjectToDocument(doc));
 
            writer.commit();
 
        }finally{
 
            writer.close();
 
        }
 
    }
 
 
 
    public static void update(SearchEnabled doc) throws Exception {
 
        if(doc == null)
 
            return ;
 
        IndexWriter writer = _GetWriter(doc.getClass());
 
        try{
 
            writer.deleteDocuments(new Term("id", String.valueOf(doc.getId())));
 
            writer.addDocument(_ObjectToDocument(doc));
 
            writer.commit();
 
        }finally{
 
            writer.close();
 
        }
 
    }
 
 
 
    public static void delete(SearchEnabled doc) throws IOException {
 
        if(doc == null)
 
            return ;
 
        IndexWriter writer = _GetWriter(doc.getClass());
 
        try{
 
            writer.deleteDocuments(new Term("id", String.valueOf(doc.getId())));
 
            writer.commit();
 
        }finally{
 
            writer.close();
 
        }
 
    }
 
 
 
    /**
 
     * 获取索引写
 
     *
 
     * @param path
 
     * @return
 
     * @throws IOException
 
     */
 
    protected static IndexWriter _GetWriter(Class<?> beanClass) throws IOException {
 
        Directory indexDir = FSDirectory.open(new File(_g_lucene_path + beanClass.getSimpleName()));
 
        return new IndexWriter(indexDir, g_analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
 
    }
 
 
 
    /**
 
     * 获取索引读
 
     *
 
     * @param path
 
     * @return
 
     * @throws IOException
 
     */
 
    protected static IndexSearcher _GetSearcher(Class<?> beanClass) throws IOException {
 
        Directory indexDir = FSDirectory.open(new File(_g_lucene_path + beanClass.getSimpleName()));
 
        IndexSearcher is = new IndexSearcher(indexDir);
 
        is.setSimilarity(new IKSimilarity());
 
        return is;
 
    }
 
     
 
    private final static NumberFormat _FMT_ID = NumberFormat.getInstance();
 
    static{
 
        _FMT_ID.setGroupingUsed(false);
 
        _FMT_ID.setMaximumFractionDigits(0);
 
        _FMT_ID.setMaximumIntegerDigits(12);
 
        _FMT_ID.setMinimumIntegerDigits(12);       
 
    }
 
    private static Document _ObjectToDocument(SearchEnabled doc) throws Exception {
 
        Document lucene_doc = new Document();
 
 
 
        // Set keyword field
 
        lucene_doc.add(_Keyword(_KEYWORD_FIELD_NAME, _FMT_ID.format(doc.getId())));
 
 
 
        // Set storage field
 
        String[] storeFields = doc.GetStoreFields();
 
        if(storeFields != null)
 
        for (String s_field : storeFields) {
 
            String propertyValue = _GetField(doc, s_field);
 
            if (propertyValue != null)
 
                lucene_doc.add(_Keyword(s_field, propertyValue));
 
        }
 
        // Set extends values
 
        if(doc.GetExtendValues() != null) {
 
            for(String key : doc.GetExtendValues().keySet()){
 
                String value = doc.GetExtendValues().get(key);
 
                lucene_doc.add(_Keyword(key, value));
 
            }
 
        }
 
        // Set indexed field
 
        String[] indexFields = doc.GetIndexFields();
 
        for (String idx_field : indexFields) {
 
            String propertyValue = _GetField(doc, idx_field);
 
            if (StringUtils.isNotBlank(propertyValue))
 
                lucene_doc.add(_Index(idx_field, propertyValue));
 
        }
 
 
 
        // Set extends values
 
        if(doc.GetExtendIndexValues() != null) {
 
            for(String key : doc.GetExtendIndexValues().keySet()){
 
                String value = doc.GetExtendIndexValues().get(key);
 
                try{
 
                    lucene_doc.add(_Index(key, value));
 
                }catch(Exception e){
 
                    e.printStackTrace();
 
                    continue;
 
                }
 
            }
 
        }
 
        return lucene_doc;
 
    }
 
 
 
    /**
 
     * 访问对象某个属性的值
 
     *
 
     * @param obj
 
     * @param field
 
     * @return
 
     */
 
    private static String _GetField(Object obj, String field) throws Exception {
 
        Object fieldValue = PropertyUtils.getProperty(obj, field);
 
        if (fieldValue instanceof String)
 
            return (String) fieldValue;
 
        if (fieldValue instanceof Date)
 
            return DateFormatUtils.format((Date) fieldValue, _FMT_DATE);
 
        return String.valueOf(fieldValue);
 
    }
 
 
 
    private static final Field _Keyword(String name, String value) {
 
        return new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
 
    }
 
 
 
    private static final Field _Index(String name, String value) {
 
        return new Field(name, value, Field.Store.NO, Field.Index.ANALYZED);
 
    }
 
 
 
}

[文件] SearchEnabled.java ~ 1016B 下载(33)

 
package my.search;
 
 
 
import java.util.*;
 
 
 
/**
 
 * 支持搜索功能的Bean类需要实现该接口
 
 * @author liudong
 
 */
 
public interface SearchEnabled {
 
 
 
    /**
 
     * 获取搜索对象的关键字
 
     * @return
 
     */
 
    public long getId();
 
 
 
    /**
 
     * 返回搜索对象需要存储的字段名，例如createTime, author等
 
     * @return
 
     */
 
    public String[] GetStoreFields();
 
 
 
    /**
 
     * 返回搜索对象的索引字段，例如title,content
 
     * @return
 
     */
 
    public String[] GetIndexFields();
 
     
 
    /**
 
     * 返回对象的扩展信息
 
     * @return
 
     */
 
    public HashMap<String, String> GetExtendValues();
 
 
 
    /**
 
     * 返回对象的扩展索引信息
 
     * @return
 
     */
 
    public HashMap<String, String> GetExtendIndexValues();
 
     
 
    /**
 
     * 列出id值大于指定值得所有对象
 
     * @param id
 
     * @param count
 
     * @return
 
     */
 
    public List<? extends SearchEnabled> ListAfter(long id, int count) ;
 
     
 
    /**
 
     * 返回文档的权重
 
     * @return
 
     */
 
    public float GetBoost();
 
 
 
}