FACET 案例!
添加对应的字段.
&facet=true &facet.field=menu &facet.field=camera_type
返回的结果:
"facet_fields" : { "menu" : [ "Canon USA" , 25, "Olympus" , 21, "Sony" , 12, "Panasonic" , 9, "Nikon" , 4 ], "camera_type" : [ "Compact" , 17, "Ultracompact" , 11, "SLR" , 9, "Full body" , 8 ] }
对于facet对应的源码,可以从FacetComponent这个类分析.
其类结构的说明可以浏览--https://lucene.apache.org/solr/5_3_0/solr-core/org/apache/solr/handler/component/FacetComponent.html
FacetParams http://www.docjar.com/docs/api/org/apache/solr/common/params/FacetParams.html
源码笔记如下--
1 /**
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.solr.handler.component;
19
20 import java.io.IOException;
21 import java.net.URL;
22 import java.util;
23
24 import org.apache.solr.common.params.CommonParams;
25 import org.apache.solr.common.params.FacetParams;
26 import org.apache.solr.common.params.SolrParams;
27 import org.apache.solr.common.params.ModifiableSolrParams;
28 import org.apache.solr.common.util.NamedList;
29 import org.apache.solr.common.util.SimpleOrderedMap;
30 import org.apache.solr.common.SolrException;
31 import org.apache.solr.request.SimpleFacets;
32 import org.apache.solr.util.OpenBitSet;
33 import org.apache.solr.schema.SchemaField;
34 import org.apache.solr.search.QueryParsing;
35 import org.apache.lucene.queryParser.ParseException;
36
37 /**
38 * TODO!
39 *
40 * @version $Id: FacetComponent.java 692551 2008-09-05 21:02:35Z yonik $
41 * @since solr 1.3
42 */
43 public class FacetComponent extends SearchComponent //继承自SearchComponent
44 {
45 public static final String COMPONENT_NAME = "facet";
46
47 @Override
48 public void prepare(ResponseBuilder rb) throws IOException
49 { // method. Called for every incoming request.
50 if (rb.req.getParams().getBool(FacetParams.FACET,false)) {
51 rb.setNeedDocSet( true );
52 rb.doFacets = true;
53 }
54 }
55
56 /**
57 * Actually run the query
58 * @param rb
59 */
60 @Override
61 public void process(ResponseBuilder rb) throws IOException
62 {
63 if (rb.doFacets) {
64 SolrParams params = rb.req.getParams();
65 SimpleFacets f = new SimpleFacets(rb.req,
66 rb.getResults().docSet,
67 params );
68
69 // TODO ???? add this directly to the response, or to the builder?
70 rb.rsp.add( "facet_counts", f.getFacetCounts() );
71 }
72 }
73
74
75 @Override 分布式处理请求
76 public int distributedProcess(ResponseBuilder rb) throws IOException {
77 if (!rb.doFacets) {
78 return ResponseBuilder.STAGE_DONE;
79 }
80
81 if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
82 // overlap facet refinement requests (those shards that we need a count for
83 // particular facet values from), where possible, with
84 // the requests to get fields (because we know that is the
85 // only other required phase).
86 // We do this in distributedProcess so we can look at all of the
87 // requests in the outgoing queue at once.
88
89 for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
90 List<String> fqueries = rb._facetInfo._toRefine[shardNum];
91 if (fqueries == null || fqueries.size()==0) continue;
92
93 String shard = rb.shards[shardNum];
94
95 ShardRequest refine = null;
96 boolean newRequest = false;
97
98 // try to find a request that is already going out to that shard.
99 // If nshards becomes to great, we way want to move to hashing for better
100 // scalability.
101 for (ShardRequest sreq : rb.outgoing) {
102 if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0
103 && sreq.shards != null & sreq.shards.length==1
104 && sreq.shards[0].equals(shard))
105 {
106 refine = sreq;
107 break;
108 }
109 }
110
111 if (refine == null) {
112 // we didn't find any other suitable requests going out to that shard, so
113 // create one ourselves.
114 newRequest = true;
115 refine = new ShardRequest();
116 refine.shards = new String[]{rb.shards[shardNum]};
117 refine.params = new ModifiableSolrParams(rb.req.getParams());
118 // don't request any documents
119 refine.params.remove(CommonParams.START);
120 refine.params.set(CommonParams.ROWS,"0");
121 }
122
123 refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
124 refine.params.set(FacetParams.FACET,"true");
125 refine.params.remove(FacetParams.FACET_FIELD);
126 // TODO: perhaps create a more compact facet.terms method?
127 refine.params.set(FacetParams.FACET_QUERY, fqueries.toArray(new String[fqueries.size()]));
128
129 if (newRequest) {
130 rb.addRequest(this, refine);
131 }
132 }
133 }
134
135 return ResponseBuilder.STAGE_DONE;
136 }
137
138 @Override
139 public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
140 if (!rb.doFacets) return;
141
142 if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
143 sreq.purpose |= ShardRequest.PURPOSE_GET_FACETS;
144
145 FacetInfo fi = rb._facetInfo;
146 if (fi == null) {
147 rb._facetInfo = fi = new FacetInfo();
148 fi.parse(rb.req.getParams(), rb);
149 // should already be true...
150 // sreq.params.set(FacetParams.FACET, "true");
151 }
152
153 sreq.params.remove(FacetParams.FACET_MINCOUNT);
154 sreq.params.remove(FacetParams.FACET_OFFSET);
155 sreq.params.remove(FacetParams.FACET_LIMIT);
156
157 for (DistribFieldFacet dff : fi.topFacets.values()) {
158 String paramStart = "f." + dff.field + '.';
159 sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT);
160 sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
161
162 if(dff.limit > 0) {
163 // set the initial limit higher in increase accuracy
164 dff.initialLimit = dff.offset + dff.limit;
165 dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
166 } else {
167 dff.initialLimit = dff.limit;
168 }
169
170 // TEST: Uncomment the following line when testing to supress over-requesting facets and
171 // thus cause more facet refinement queries.
172 // if (dff.limit > 0) dff.initialLimit = dff.offset + dff.limit;
173
174 sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit);
175 }
176 } else {
177 // turn off faceting on other requests
178 sreq.params.set(FacetParams.FACET, "false");
179 // we could optionally remove faceting params
180 }
181 }
182
183 @Override
184 public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
185 if (!rb.doFacets) return;
186
187 if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
188 countFacets(rb, sreq);
189 } else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) {
190 refineFacets(rb, sreq);
191 }
192 }
193
194
195
196 //<span style="font-family:Hiragino Sans GB, Microsoft Yahei, 微软雅黑, sans-serif;color:#666666;"><span style="font-size: 13.92px; line-height: 24px;">计数排序</span></span>
197 private void countFacets(ResponseBuilder rb, ShardRequest sreq) {
198 FacetInfo fi = rb._facetInfo;
199
200 for (ShardResponse srsp: sreq.responses) {
201 int shardNum = rb.getShardNum(srsp.getShard());
202 NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
203
204 // handle facet queries
205 NamedList facet_queries = (NamedList)facet_counts.get("facet_queries");
206 if (facet_queries != null) {
207 for (int i=0; i<facet_queries.size(); i++) {
208 String facet_q = (String)facet_queries.getName(i);
209 long count = ((Number)facet_queries.getVal(i)).longValue();
210 Long prevCount = fi.queryFacets.get(facet_q);
211 if (prevCount != null) count += prevCount;
212 fi.queryFacets.put(facet_q, count);
213 }
214 }
215
216 // step through each facet.field, adding results from this shard
217 NamedList facet_fields = (NamedList)facet_counts.get("facet_fields");
218 for (DistribFieldFacet dff : fi.topFacets.values()) {
219 dff.add(shardNum, (NamedList)facet_fields.get(dff.field), dff.initialLimit);
220 }
221 }
222
223
224 //
225 // This code currently assumes that there will be only a single
226 // request ((with responses from all shards) sent out to get facets...
227 // otherwise we would need to wait until all facet responses were received.
228 //
229
230 // list of queries to send each shard
231 List<String>[] toRefine = new List[rb.shards.length];
232 fi._toRefine = toRefine;
233 for (int i=0; i<toRefine.length; i++) {
234 toRefine[i] = new ArrayList<String>();
235 }
236
237
238 for (DistribFieldFacet dff : fi.topFacets.values()) {
239 if (dff.limit <= 0) continue; // no need to check these facets for refinement
240 ShardFacetCount[] counts = dff.getSorted();
241 int ntop = Math.min(counts.length, dff.offset + dff.limit);
242 long smallestCount = counts.length == 0 ? 0 : counts[ntop-1].count;
243
244 for (int i=0; i<counts.length; i++) {
245 ShardFacetCount sfc = counts[i];
246 String query = null;
247 boolean needRefinement = false;
248
249 if (i<ntop) {
250 // automatically flag the top values for refinement
251 needRefinement = true;
252 } else {
253 // calculate the maximum value that this term may have
254 // and if it is >= smallestCount, then flag for refinement
255 long maxCount = sfc.count;
256 for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
257 OpenBitSet obs = dff.counted[shardNum];
258 if (!obs.get(sfc.termNum)) {
259 // if missing from this shard, add the max it could be
260 maxCount += dff.maxPossible(sfc,shardNum);
261 }
262 }
263 if (maxCount >= smallestCount) {
264 // TODO: on a tie, we could check the term values
265 needRefinement = true;
266 }
267 }
268
269 if (needRefinement) {
270 // add a query for each shard missing the term that needs refinement
271 for (int shardNum=0; shardNum<rb.shards.length; shardNum++) {
272 OpenBitSet obs = dff.counted[shardNum];
273 if (!obs.get(sfc.termNum) && dff.maxPossible(sfc,shardNum)>0) {
274 dff.needRefinements = true;
275 if (query==null) query = dff.makeQuery(sfc);
276 toRefine[shardNum].add(query);
277 }
278 }
279 }
280 }
281 }
282 }
283
284 //提炼结果
285 private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
286 FacetInfo fi = rb._facetInfo;
287
288 for (ShardResponse srsp: sreq.responses) {
289 // int shardNum = rb.getShardNum(srsp.shard);
290 NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
291 NamedList facet_queries = (NamedList)facet_counts.get("facet_queries");
292
293 // These are single term queries used to fill in missing counts
294 // for facet.field queries
295 for (int i=0; i<facet_queries.size(); i++) {
296 try {
297
298 String facet_q = (String)facet_queries.getName(i);
299 long count = ((Number)facet_queries.getVal(i)).longValue();
300
301 // expect {!field f=field}value style params
302 SolrParams qparams = QueryParsing.getLocalParams(facet_q,null);
303 if (qparams == null) continue; // not a refinement
304 String field = qparams.get(QueryParsing.F);
305 String val = qparams.get(QueryParsing.V);
306
307 // Find the right field.facet for this field
308 DistribFieldFacet dff = fi.topFacets.get(field);
309 if (dff == null) continue; // maybe this wasn't for facet count refinement
310
311 // Find the right constraint count for this value
312 ShardFacetCount sfc = dff.counts.get(val);
313
314 if (sfc == null) {
315 continue;
316 // Just continue, since other components might have added
317 // this facet.query for other purposes. But if there are charset
318 // issues then the values coming back may not match the values sent.
319 }
320
321 // TODO REMOVE
322 // System.out.println("Got " + facet_q + " , refining count: " + sfc + " += " + count);
323
324 sfc.count += count;
325
326 } catch (ParseException e) {
327 // shouldn't happen, so fail for now rather than covering it up
328 throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
329 }
330 }
331 }
332 }
333
334 @Override
335 public void finishStage(ResponseBuilder rb) {
336 if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return;
337 // wait until STAGE_GET_FIELDS
338 // so that "result" is already stored in the response (for aesthetics)
339
340
341 FacetInfo fi = rb._facetInfo;
342
343 NamedList facet_counts = new SimpleOrderedMap();
344 NamedList facet_queries = new SimpleOrderedMap();
345 facet_counts.add("facet_queries",facet_queries);
346 for (Map.Entry<String,Long> entry : fi.queryFacets.entrySet()) {
347 facet_queries.add(entry.getKey(), num(entry.getValue()));
348 }
349
350 NamedList facet_fields = new SimpleOrderedMap();
351 facet_counts.add("facet_fields", facet_fields);
352
353 for (DistribFieldFacet dff : fi.topFacets.values()) {
354 NamedList fieldCounts = new NamedList(); // order is more important for facets
355 facet_fields.add(dff.field, fieldCounts);
356
357 ShardFacetCount[] counts = dff.countSorted;
358 if (counts == null || dff.needRefinements) {
359 counts = dff.getSorted();
360 }
361
362 int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length);
363 for (int i=dff.offset; i<end; i++) {
364 if (counts[i].count < dff.minCount) break;
365 fieldCounts.add(counts[i].name, num(counts[i].count));
366 }
367
368 if (dff.missing) {
369 fieldCounts.add(null, num(dff.missingCount));
370 }
371 }
372
373 // TODO: list facets (sorted by natural order)
374 // TODO: facet dates
375 facet_counts.add("facet_dates", new SimpleOrderedMap());
376
377 rb.rsp.add("facet_counts", facet_counts);
378
379 rb._facetInfo = null; // could be big, so release asap
380 }
381
382
383 // use <int> tags for smaller facet counts (better back compatibility)
384 private Number num(long val) {
385 if (val < Integer.MAX_VALUE) return (int)val;
386 else return val;
387 }
388 private Number num(Long val) {
389 if (val.longValue() < Integer.MAX_VALUE) return val.intValue();
390 else return val;
391 }
392
393
394 /////////////////////////////////////////////
395 /// SolrInfoMBean
396 ////////////////////////////////////////////
397
398 @Override
399 public String getDescription() {
400 return "Handle Faceting";
401 }
402
403 @Override
404 public String getVersion() {
405 return "$Revision: 692551 $";
406 }
407
408 @Override
409 public String getSourceId() {
410 return "$Id: FacetComponent.java 692551 2008-09-05 21:02:35Z yonik $";
411 }
412
413 @Override
414 public String getSource() {
415 return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.3/src/java/org/apache/solr/handler/component/FacetComponent.java $";
416 }
417
418 @Override
419 public URL[] getDocs() {
420 return null;
421 }
422 }
423
424
425
426 class FacetInfo {
427 List<String>[] _toRefine;
428
429 void parse(SolrParams params, ResponseBuilder rb) {
430 queryFacets = new LinkedHashMap<String,Long>();
431 topFacets = new LinkedHashMap<String,DistribFieldFacet>();
432 listFacets = new LinkedHashMap<String,DistribFieldFacet>();
433
434 String[] facetQs = params.getParams(FacetParams.FACET_QUERY);
435 if (facetQs != null) {
436 for (String query : facetQs) {
437 queryFacets.put(query,0L);
438 }
439 }
440
441 String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
442 if (facetFs != null) {
443 for (String field : facetFs) {
444 DistribFieldFacet ff = new DistribFieldFacet(rb, field);
445 ff.fillParams(params, field);
446 if (ff.sort) {
447 topFacets.put(field, ff);
448 } else {
449 listFacets.put(field, ff);
450 }
451 }
452 }
453 }
454
455 LinkedHashMap<String,Long> queryFacets;
456 LinkedHashMap<String,DistribFieldFacet> topFacets; // field facets that order by constraint count (sort=true)
457 LinkedHashMap<String,DistribFieldFacet> listFacets; // field facets that list values in term order
458 }
459
460
461 class FieldFacet {
462 String field;
463 int offset;
464 int limit;
465 int minCount;
466 boolean sort;
467 boolean missing;
468 String prefix;
469 long missingCount;
470
471 void fillParams(SolrParams params, String field) {
472 this.field = field;
473 this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
474 this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
475 Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
476 if (mincount==null) {
477 Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
478 // mincount = (zeros!=null && zeros) ? 0 : 1;
479 mincount = (zeros!=null && !zeros) ? 1 : 0;
480 // current default is to include zeros.
481 }
482 this.minCount = mincount;
483 this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
484 // default to sorting if there is a limit.
485 this.sort = params.getFieldBool(field, FacetParams.FACET_SORT, limit>0);
486 this.prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX);
487 }
488 }
489
490 class DistribFieldFacet extends FieldFacet {
491 SchemaField sf;
492
493 // the max possible count for a term appearing on no list
494 long missingMaxPossible;
495 // the max possible count for a missing term for each shard (indexed by shardNum)
496 long[] missingMax;
497 OpenBitSet[] counted; // a bitset for each shard, keeping track of which terms seen
498 HashMap<String,ShardFacetCount> counts = new HashMap<String,ShardFacetCount>(128);
499 int termNum;
500 String queryPrefix;
501
502 int initialLimit; // how many terms requested in first phase
503 boolean needRefinements;
504 ShardFacetCount[] countSorted;
505
506 DistribFieldFacet(ResponseBuilder rb, String field) {
507 sf = rb.req.getSchema().getField(field);
508 missingMax = new long[rb.shards.length];
509 counted = new OpenBitSet[rb.shards.length];
510 queryPrefix = "{!field f=" + field + '}';
511 }
512
513 void add(int shardNum, NamedList shardCounts, int numRequested) {
514 int sz = shardCounts.size();
515 int numReceived = sz;
516
517 OpenBitSet terms = new OpenBitSet(termNum+sz);
518
519 long last = 0;
520 for (int i=0; i<sz; i++) {
521 String name = shardCounts.getName(i);
522 long count = ((Number)shardCounts.getVal(i)).longValue();
523 if (name == null) {
524 missingCount += count;
525 numReceived--;
526 } else {
527 ShardFacetCount sfc = counts.get(name);
528 if (sfc == null) {
529 sfc = new ShardFacetCount();
530 sfc.name = name;
531 sfc.termNum = termNum++;
532 counts.put(name, sfc);
533 }
534 sfc.count += count;
535 terms.fastSet(sfc.termNum);
536 last = count;
537 }
538 }
539
540 // the largest possible missing term is 0 if we received less
541 // than the number requested (provided mincount==0 like it should be for
542 // a shard request)
543 if (numRequested<0 || numRequested != 0 && numReceived < numRequested) {
544 last = 0;
545 }
546
547 missingMaxPossible += last;
548 missingMax[shardNum] = last;
549 counted[shardNum] = terms;
550 }
551
552 //对应块的比较
553 ShardFacetCount[] getSorted() {
554 ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
555 Arrays.sort(arr, new Comparator<ShardFacetCount>() {
556 public int compare(ShardFacetCount o1, ShardFacetCount o2) {
557 if (o2.count < o1.count) return -1;
558 else if (o1.count < o2.count) return 1;
559 // TODO: handle tiebreaks for types other than strings
560 return o1.name.compareTo(o2.name);
561 }
562 });
563 countSorted = arr;
564 return arr;
565 }
566
567 String makeQuery(ShardFacetCount sfc) {
568 return queryPrefix + sfc.name;
569 }
570
571 // returns the max possible value this ShardFacetCount could have for this shard
572 // (assumes the shard did not report a count for this value)
573 long maxPossible(ShardFacetCount sfc, int shardNum) {
574 return missingMax[shardNum];
575 // TODO: could store the last term in the shard to tell if this term
576 // comes before or after it. If it comes before, we could subtract 1
577 }
578
579 }
580
581
582 class ShardFacetCount {
583 String name;
584 long count;
585 int termNum; // term number starting at 0 (used in bit arrays)
586
587 public String toString() {
588 return "{term="+name+",termNum="+termNum+",count="+count+"}";
589 }
590 }
要了解一点就是其分布式处理相关的类:ResponseBuilder内部</span>的几个状态,顾名思义。
1.STAGE_START
2.STAGE_PARSE_QUERY
3.STAGE_EXECUTE_QUERY
4.STAGE_GET_FIELDS
5.STAGE_DONE
对于DistribFieldFacet这个类,继承于FieldFacet,除了本身提取对应查询的参数能力外,还有一个特点就是存在一个HashMap对每个shard对应字段的计算有一个映射存储。
下次推出group by和solr parallel SQL的心得文章。