[文件] LuceneIndexUtils.java ~ 7KB 下载(45)
001 | package my.search; |
002 |
003 | import java.io.File; |
004 | import java.io.FileNotFoundException; |
005 | import java.io.IOException; |
006 | import java.text.NumberFormat; |
007 | import java.util.*; |
008 |
009 | import my.mvc.RequestContext; |
010 |
011 | import org.apache.commons.beanutils.*; |
012 | import org.apache.commons.lang.StringUtils; |
013 | import org.apache.commons.lang.math.NumberUtils; |
014 | import org.apache.commons.lang.time.DateFormatUtils; |
015 | import org.apache.lucene.analysis.Analyzer; |
016 | //import org.apache.lucene.analysis.standard.StandardAnalyzer; |
017 | import org.apache.lucene.document.Document; |
018 | import org.apache.lucene.document.Field; |
019 | import org.apache.lucene.index.IndexWriter; |
020 | import org.apache.lucene.index.Term; |
021 | import org.apache.lucene.search.*; |
022 | import org.apache.lucene.store.*; |
023 | import org.wltea.analyzer.lucene.IKAnalyzer; |
024 | import org.wltea.analyzer.lucene.IKSimilarity; |
025 |
026 | /** |
027 | * 操作索引的工具 |
028 | * @author liudong |
029 | */ |
030 | public class LuceneIndexUtils { |
031 | |
032 | private final static String _g_lucene_path = RequestContext.root() + |
033 | "WEB-INF" + File.separator + "lucene_idx" + File.separator; |
034 | |
035 | private final static String _KEYWORD_FIELD_NAME = "id" ; |
036 | private final static String _FMT_DATE = "yyyyMMddHHmmssSSS" ; |
037 | private final static Analyzer g_analyzer = new IKAnalyzer( false ); |
038 | |
039 | public final static Analyzer getAnalyzer(){ |
040 | return g_analyzer; |
041 | } |
042 | |
043 | /** |
044 | * 添加文档 |
045 | * @param objClass |
046 | * @param doc |
047 | * @throws Exception |
048 | */ |
049 | public static int add(Class<? extends SearchEnabled> objClass, List<? extends SearchEnabled> docs) throws Exception { |
050 | if (docs == null || docs.size() == 0 ) |
051 | return 0 ; |
052 | IndexWriter writer = _GetWriter(objClass); |
053 | try { |
054 | int ar = _Add(writer, docs); |
055 | writer.optimize(); |
056 | return ar; |
057 | } finally { |
058 | writer.close(); |
059 | writer = null ; |
060 | } |
061 | } |
062 | |
063 | /** |
064 | * 添加文档 |
065 | * |
066 | * @param doc |
067 | * @throws Exception |
068 | */ |
069 | private static int _Add(IndexWriter writer, List<? extends SearchEnabled> docs) throws Exception { |
070 | if (docs == null || docs.size() == 0 ) |
071 | return 0 ; |
072 | int doc_count = 0 ; |
073 | for (SearchEnabled doc : docs) { |
074 | Document lucene_doc = _ObjectToDocument(doc); |
075 | lucene_doc.setBoost(doc.GetBoost()); |
076 | writer.addDocument(lucene_doc); |
077 | doc_count++; |
078 | } |
079 | return doc_count; |
080 | } |
081 |
082 | /** |
083 | * 从索引库中搜索 |
084 | * @param beanClass |
085 | * @param query |
086 | * @param max_count |
087 | * @return |
088 | * @throws IOException |
089 | */ |
090 | public static List<Long> find(Class<? extends SearchEnabled> beanClass, Query query, int max_count) throws IOException { |
091 | IndexSearcher searcher = _GetSearcher(beanClass); |
092 | try { |
093 | TopDocs hits = searcher.search(query, null , max_count); |
094 | if (hits== null ) return null ; |
095 | List<Long> results = new ArrayList<Long>(); |
096 | int numResults = Math.min(hits.totalHits, max_count); |
097 | for ( int i = 0 ; i < numResults; i++){ |
098 | ScoreDoc s_doc = (ScoreDoc)hits.scoreDocs[i]; |
099 | Document doc = searcher.doc(s_doc.doc); |
100 | long id = NumberUtils.toLong(doc.get(_KEYWORD_FIELD_NAME), 0 ); |
101 | if (id > 0 && !results.contains(id)) |
102 | results.add(id); |
103 | } |
104 | return results; |
105 | } catch (FileNotFoundException e){ |
106 | e.printStackTrace(); |
107 | return null ; |
108 | } finally { |
109 | searcher.close(); |
110 | } |
111 | } |
112 |
113 | /** |
114 | * 添加文档 |
115 | * |
116 | * @param doc |
117 | * @throws Exception |
118 | */ |
119 | public static void add(SearchEnabled doc) throws Exception { |
120 | if (doc == null ) |
121 | return ; |
122 | IndexWriter writer = _GetWriter(doc.getClass()); |
123 | try { |
124 | writer.addDocument(_ObjectToDocument(doc)); |
125 | writer.commit(); |
126 | } finally { |
127 | writer.close(); |
128 | } |
129 | } |
130 |
131 | public static void update(SearchEnabled doc) throws Exception { |
132 | if (doc == null ) |
133 | return ; |
134 | IndexWriter writer = _GetWriter(doc.getClass()); |
135 | try { |
136 | writer.deleteDocuments( new Term( "id" , String.valueOf(doc.getId()))); |
137 | writer.addDocument(_ObjectToDocument(doc)); |
138 | writer.commit(); |
139 | } finally { |
140 | writer.close(); |
141 | } |
142 | } |
143 |
144 | public static void delete(SearchEnabled doc) throws IOException { |
145 | if (doc == null ) |
146 | return ; |
147 | IndexWriter writer = _GetWriter(doc.getClass()); |
148 | try { |
149 | writer.deleteDocuments( new Term( "id" , String.valueOf(doc.getId()))); |
150 | writer.commit(); |
151 | } finally { |
152 | writer.close(); |
153 | } |
154 | } |
155 |
156 | /** |
157 | * 获取索引写 |
158 | * |
159 | * @param path |
160 | * @return |
161 | * @throws IOException |
162 | */ |
163 | protected static IndexWriter _GetWriter(Class<?> beanClass) throws IOException { |
164 | Directory indexDir = FSDirectory.open( new File(_g_lucene_path + beanClass.getSimpleName())); |
165 | return new IndexWriter(indexDir, g_analyzer, IndexWriter.MaxFieldLength.UNLIMITED); |
166 | } |
167 |
168 | /** |
169 | * 获取索引读 |
170 | * |
171 | * @param path |
172 | * @return |
173 | * @throws IOException |
174 | */ |
175 | protected static IndexSearcher _GetSearcher(Class<?> beanClass) throws IOException { |
176 | Directory indexDir = FSDirectory.open( new File(_g_lucene_path + beanClass.getSimpleName())); |
177 | IndexSearcher is = new IndexSearcher(indexDir); |
178 | is.setSimilarity( new IKSimilarity()); |
179 | return is; |
180 | } |
181 | |
182 | private final static NumberFormat _FMT_ID = NumberFormat.getInstance(); |
183 | static { |
184 | _FMT_ID.setGroupingUsed( false ); |
185 | _FMT_ID.setMaximumFractionDigits( 0 ); |
186 | _FMT_ID.setMaximumIntegerDigits( 12 ); |
187 | _FMT_ID.setMinimumIntegerDigits( 12 ); |
188 | } |
189 | private static Document _ObjectToDocument(SearchEnabled doc) throws Exception { |
190 | Document lucene_doc = new Document(); |
191 |
192 | // Set keyword field |
193 | lucene_doc.add(_Keyword(_KEYWORD_FIELD_NAME, _FMT_ID.format(doc.getId()))); |
194 |
195 | // Set storage field |
196 | String[] storeFields = doc.GetStoreFields(); |
197 | if (storeFields != null ) |
198 | for (String s_field : storeFields) { |
199 | String propertyValue = _GetField(doc, s_field); |
200 | if (propertyValue != null ) |
201 | lucene_doc.add(_Keyword(s_field, propertyValue)); |
202 | } |
203 | // Set extends values |
204 | if (doc.GetExtendValues() != null ) { |
205 | for (String key : doc.GetExtendValues().keySet()){ |
206 | String value = doc.GetExtendValues().get(key); |
207 | lucene_doc.add(_Keyword(key, value)); |
208 | } |
209 | } |
210 | // Set indexed field |
211 | String[] indexFields = doc.GetIndexFields(); |
212 | for (String idx_field : indexFields) { |
213 | String propertyValue = _GetField(doc, idx_field); |
214 | if (StringUtils.isNotBlank(propertyValue)) |
215 | lucene_doc.add(_Index(idx_field, propertyValue)); |
216 | } |
217 |
218 | // Set extends values |
219 | if (doc.GetExtendIndexValues() != null ) { |
220 | for (String key : doc.GetExtendIndexValues().keySet()){ |
221 | String value = doc.GetExtendIndexValues().get(key); |
222 | try { |
223 | lucene_doc.add(_Index(key, value)); |
224 | } catch (Exception e){ |
225 | e.printStackTrace(); |
226 | continue ; |
227 | } |
228 | } |
229 | } |
230 | return lucene_doc; |
231 | } |
232 |
233 | /** |
234 | * 访问对象某个属性的值 |
235 | * |
236 | * @param obj |
237 | * @param field |
238 | * @return |
239 | */ |
240 | private static String _GetField(Object obj, String field) throws Exception { |
241 | Object fieldValue = PropertyUtils.getProperty(obj, field); |
242 | if (fieldValue instanceof String) |
243 | return (String) fieldValue; |
244 | if (fieldValue instanceof Date) |
245 | return DateFormatUtils.format((Date) fieldValue, _FMT_DATE); |
246 | return String.valueOf(fieldValue); |
247 | } |
248 |
249 | private static final Field _Keyword(String name, String value) { |
250 | return new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED); |
251 | } |
252 |
253 | private static final Field _Index(String name, String value) { |
254 | return new Field(name, value, Field.Store.NO, Field.Index.ANALYZED); |
255 | } |
256 |
257 | } |
[文件] SearchEnabled.java ~ 1016B 下载(33)
01 | package my.search; |
02 |
03 | import java.util.*; |
04 |
05 | /** |
06 | * 支持搜索功能的Bean类需要实现该接口 |
07 | * @author liudong |
08 | */ |
09 | public interface SearchEnabled { |
10 |
11 | /** |
12 | * 获取搜索对象的关键字 |
13 | * @return |
14 | */ |
15 | public long getId(); |
16 |
17 | /** |
18 | * 返回搜索对象需要存储的字段名,例如createTime, author等 |
19 | * @return |
20 | */ |
21 | public String[] GetStoreFields(); |
22 |
23 | /** |
24 | * 返回搜索对象的索引字段,例如title,content |
25 | * @return |
26 | */ |
27 | public String[] GetIndexFields(); |
28 | |
29 | /** |
30 | * 返回对象的扩展信息 |
31 | * @return |
32 | */ |
33 | public HashMap<String, String> GetExtendValues(); |
34 |
35 | /** |
36 | * 返回对象的扩展索引信息 |
37 | * @return |
38 | */ |
39 | public HashMap<String, String> GetExtendIndexValues(); |
40 | |
41 | /** |
42 | * 列出id值大于指定值得所有对象 |
43 | * @param id |
44 | * @param count |
45 | * @return |
46 | */ |
47 | public List<? extends SearchEnabled> ListAfter( long id, int count) ; |
48 | |
49 | /** |
50 | * 返回文档的权重 |
51 | * @return |
52 | */ |
53 | public float GetBoost(); |
54 |
55 | } |