Lucene 生成索引以及搜索的完整例子

本文介绍了一个用于操作Lucene索引的实用工具类,包括添加、更新、删除文档及搜索等功能。通过该工具类可以方便地进行索引管理和文档检索。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

[文件] LuceneIndexUtils.java ~ 7KB    下载(45)

001 package my.search;
002  
003 import java.io.File;
004 import java.io.FileNotFoundException;
005 import java.io.IOException;
006 import java.text.NumberFormat;
007 import java.util.*;
008  
009 import my.mvc.RequestContext;
010  
011 import org.apache.commons.beanutils.*;
012 import org.apache.commons.lang.StringUtils;
013 import org.apache.commons.lang.math.NumberUtils;
014 import org.apache.commons.lang.time.DateFormatUtils;
015 import org.apache.lucene.analysis.Analyzer;
016 //import org.apache.lucene.analysis.standard.StandardAnalyzer;
017 import org.apache.lucene.document.Document;
018 import org.apache.lucene.document.Field;
019 import org.apache.lucene.index.IndexWriter;
020 import org.apache.lucene.index.Term;
021 import org.apache.lucene.search.*;
022 import org.apache.lucene.store.*;
023 import org.wltea.analyzer.lucene.IKAnalyzer;
024 import org.wltea.analyzer.lucene.IKSimilarity;
025  
026 /**
027  * 操作索引的工具
028  * @author liudong
029  */
030 public class LuceneIndexUtils {
031      
032     private final static String _g_lucene_path = RequestContext.root() +
033         "WEB-INF" + File.separator + "lucene_idx" + File.separator;
034      
035     private final static String _KEYWORD_FIELD_NAME = "id";
036     private final static String _FMT_DATE = "yyyyMMddHHmmssSSS";
037     private final static Analyzer g_analyzer = new IKAnalyzer(false);
038      
039     public final static Analyzer getAnalyzer(){
040         return g_analyzer;
041     }
042      
043     /**
044      * 添加文档
045      * @param objClass
046      * @param doc
047      * @throws Exception
048      */
049     public static int add(Class<? extends SearchEnabled> objClass, List<? extendsSearchEnabled> docs) throws Exception {
050         if (docs == null || docs.size() == 0)
051             return 0;
052         IndexWriter writer = _GetWriter(objClass);
053         try {
054             int ar = _Add(writer, docs);
055             writer.optimize();
056             return ar;
057         finally {
058             writer.close();
059             writer = null;
060         }
061     }
062      
063     /**
064      * 添加文档
065      *
066      * @param doc
067      * @throws Exception
068      */
069     private static int _Add(IndexWriter writer, List<? extends SearchEnabled> docs)throws Exception {
070         if (docs == null || docs.size() == 0)
071             return 0;
072         int doc_count = 0;
073         for (SearchEnabled doc : docs) {
074             Document lucene_doc = _ObjectToDocument(doc);
075             lucene_doc.setBoost(doc.GetBoost());
076             writer.addDocument(lucene_doc);
077             doc_count++;
078         }
079         return doc_count;
080     }
081  
082     /**
083      * 从索引库中搜索
084      * @param beanClass
085      * @param query
086      * @param max_count
087      * @return
088      * @throws IOException
089      */
090     public static List<Long> find(Class<? extends SearchEnabled> beanClass, Query query,int max_count) throws IOException {
091         IndexSearcher searcher = _GetSearcher(beanClass);
092         try{
093             TopDocs hits = searcher.search(query, null, max_count);
094             if(hits==nullreturn null;
095             List<Long> results = new ArrayList<Long>();
096             int numResults = Math.min(hits.totalHits, max_count);
097             for (int i = 0; i < numResults; i++){
098                 ScoreDoc s_doc = (ScoreDoc)hits.scoreDocs[i];
099                 Document doc = searcher.doc(s_doc.doc);
100                 long id = NumberUtils.toLong(doc.get(_KEYWORD_FIELD_NAME), 0);
101                 if(id > 0 && !results.contains(id))
102                     results.add(id);   
103             }
104             return results;
105         }catch(FileNotFoundException e){
106             e.printStackTrace();
107             return null;
108         }finally{
109             searcher.close();
110         }
111     }
112  
113     /**
114      * 添加文档
115      *
116      * @param doc
117      * @throws Exception
118      */
119     public static void add(SearchEnabled doc) throws Exception {
120         if(doc == null)
121             return ;
122         IndexWriter writer = _GetWriter(doc.getClass());
123         try{
124             writer.addDocument(_ObjectToDocument(doc));
125             writer.commit();
126         }finally{
127             writer.close();
128         }
129     }
130  
131     public static void update(SearchEnabled doc) throws Exception {
132         if(doc == null)
133             return ;
134         IndexWriter writer = _GetWriter(doc.getClass());
135         try{
136             writer.deleteDocuments(new Term("id", String.valueOf(doc.getId())));
137             writer.addDocument(_ObjectToDocument(doc));
138             writer.commit();
139         }finally{
140             writer.close();
141         }
142     }
143  
144     public static void delete(SearchEnabled doc) throws IOException {
145         if(doc == null)
146             return ;
147         IndexWriter writer = _GetWriter(doc.getClass());
148         try{
149             writer.deleteDocuments(new Term("id", String.valueOf(doc.getId())));
150             writer.commit();
151         }finally{
152             writer.close();
153         }
154     }
155  
156     /**
157      * 获取索引写
158      *
159      * @param path
160      * @return
161      * @throws IOException
162      */
163     protected static IndexWriter _GetWriter(Class<?> beanClass) throws IOException {
164         Directory indexDir = FSDirectory.open(new File(_g_lucene_path + beanClass.getSimpleName()));
165         return new IndexWriter(indexDir, g_analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
166     }
167  
168     /**
169      * 获取索引读
170      *
171      * @param path
172      * @return
173      * @throws IOException
174      */
175     protected static IndexSearcher _GetSearcher(Class<?> beanClass) throws IOException {
176         Directory indexDir = FSDirectory.open(new File(_g_lucene_path + beanClass.getSimpleName()));
177         IndexSearcher is = new IndexSearcher(indexDir);
178         is.setSimilarity(new IKSimilarity());
179         return is;
180     }
181      
182     private final static NumberFormat _FMT_ID = NumberFormat.getInstance();
183     static{
184         _FMT_ID.setGroupingUsed(false);
185         _FMT_ID.setMaximumFractionDigits(0);
186         _FMT_ID.setMaximumIntegerDigits(12);
187         _FMT_ID.setMinimumIntegerDigits(12);       
188     }
189     private static Document _ObjectToDocument(SearchEnabled doc) throws Exception {
190         Document lucene_doc = new Document();
191  
192         // Set keyword field
193         lucene_doc.add(_Keyword(_KEYWORD_FIELD_NAME, _FMT_ID.format(doc.getId())));
194  
195         // Set storage field
196         String[] storeFields = doc.GetStoreFields();
197         if(storeFields != null)
198         for (String s_field : storeFields) {
199             String propertyValue = _GetField(doc, s_field);
200             if (propertyValue != null)
201                 lucene_doc.add(_Keyword(s_field, propertyValue));
202         }
203         // Set extends values
204         if(doc.GetExtendValues() != null) {
205             for(String key : doc.GetExtendValues().keySet()){
206                 String value = doc.GetExtendValues().get(key);
207                 lucene_doc.add(_Keyword(key, value));
208             }
209         }
210         // Set indexed field
211         String[] indexFields = doc.GetIndexFields();
212         for (String idx_field : indexFields) {
213             String propertyValue = _GetField(doc, idx_field);
214             if (StringUtils.isNotBlank(propertyValue))
215                 lucene_doc.add(_Index(idx_field, propertyValue));
216         }
217  
218         // Set extends values
219         if(doc.GetExtendIndexValues() != null) {
220             for(String key : doc.GetExtendIndexValues().keySet()){
221                 String value = doc.GetExtendIndexValues().get(key);
222                 try{
223                     lucene_doc.add(_Index(key, value));
224                 }catch(Exception e){
225                     e.printStackTrace();
226                     continue;
227                 }
228             }
229         }
230         return lucene_doc;
231     }
232  
233     /**
234      * 访问对象某个属性的值
235      *
236      * @param obj
237      * @param field
238      * @return
239      */
240     private static String _GetField(Object obj, String field) throws Exception {
241         Object fieldValue = PropertyUtils.getProperty(obj, field);
242         if (fieldValue instanceof String)
243             return (String) fieldValue;
244         if (fieldValue instanceof Date)
245             return DateFormatUtils.format((Date) fieldValue, _FMT_DATE);
246         return String.valueOf(fieldValue);
247     }
248  
249     private static final Field _Keyword(String name, String value) {
250         return new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
251     }
252  
253     private static final Field _Index(String name, String value) {
254         return new Field(name, value, Field.Store.NO, Field.Index.ANALYZED);
255     }
256  
257 }

[文件] SearchEnabled.java ~ 1016B    下载(33)

01 package my.search;
02  
03 import java.util.*;
04  
05 /**
06  * 支持搜索功能的Bean类需要实现该接口
07  * @author liudong
08  */
09 public interface SearchEnabled {
10  
11     /**
12      * 获取搜索对象的关键字
13      * @return
14      */
15     public long getId();
16  
17     /**
18      * 返回搜索对象需要存储的字段名,例如createTime, author等
19      * @return
20      */
21     public String[] GetStoreFields();
22  
23     /**
24      * 返回搜索对象的索引字段,例如title,content
25      * @return
26      */
27     public String[] GetIndexFields();
28      
29     /**
30      * 返回对象的扩展信息
31      * @return
32      */
33     public HashMap<String, String> GetExtendValues();
34  
35     /**
36      * 返回对象的扩展索引信息
37      * @return
38      */
39     public HashMap<String, String> GetExtendIndexValues();
40      
41     /**
42      * 列出id值大于指定值得所有对象
43      * @param id
44      * @param count
45      * @return
46      */
47     public List<? extends SearchEnabled> ListAfter(long id, int count) ;
48      
49     /**
50      * 返回文档的权重
51      * @return
52      */
53     public float GetBoost();
54  
55 }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值