在solr上可以方便地使用分布式搜索
比如设计某个handle
<requestHandler name="search" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">none</str>
<str name="timeAllowed">2000</str>
<str name="shards">localhost:28080/solr,localhost:28080/smallSolr</str>
<str name="shards.qt">singleSearch</str>
<str name="q">*:*</str>
<str name="wt">json</str>
<str name="sort">rtsDate desc</str>
</lst>
</requestHandler>
<requestHandler name="singleSearch" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">none</str>
<int name="rows">10</int>
<int name="start">0</int>
<str name="version">2.2</str>
<str name="indent">off</str>
</lst>
</requestHandler>
这样,调用qt=search的时候,就会使用分布式的搜索,但如果两个索引出现重复的文档时候(有唯一值来判断),这个时候到底是保留哪一个呢?
它们是看哪一个shard先返回,就保留哪一个先。。但我们可能要保留最新更新的,而最新更新的比较有可能在小索引 里,但如何对shard来设置优先级呢。。
修改源代码:(修改合并id的一小段代码,)
org.apache.solr.handler.component.QueryComponentExt.mergeIds(ResponseBuilder, ShardRequest)----------------------
queue = new ShardFieldSortedHitQueue(sortFields, ss.getOffset()
+ ss.getCount());
for (ShardResponse srsp : sreq.responses) {
SolrDocumentList docs = (SolrDocumentList) srsp.getSolrResponse()
.getResponse().get("response");
// calculate global maxScore and numDocsFound
if (docs.getMaxScore() != null) {
maxScore = maxScore == null ? docs.getMaxScore() : Math.max(
maxScore, docs.getMaxScore());
}
numFound += docs.getNumFound();
NamedList sortFieldValues = (NamedList) (srsp.getSolrResponse()
.getResponse().get("sort_values"));
// go through every doc in this response, construct a ShardDoc, and
// put it in the priority queue so it can be ordered.
for (int i = 0; i < docs.size(); i++) {
SolrDocument doc = docs.get(i);
Object id = doc.getFieldValue(uniqueKeyField.getName());
ShardDoc shardDoc = new ShardDoc();
shardDoc.id = id;
shardDoc.shard = srsp.getShard();
shardDoc.orderInShard = i;
Object scoreObj = doc.getFieldValue("score");
if (scoreObj != null) {
if (scoreObj instanceof String) {
shardDoc.score = Float.parseFloat((String) scoreObj);
} else {
shardDoc.score = (Float) scoreObj;
}
}
shardDoc.sortFieldValues = sortFieldValues;
ShardDoc prevShardDoc = uniqueShardDoc.put(id, shardDoc);
if (prevShardDoc != null) {// 有重复
numFound--;
if(prevShardDoc.score!=null&&shardDoc.score!=null&&prevShardDoc.score>shardDoc.score){
uniqueShardDoc.put(id, prevShardDoc);
}
if(!queue.lessThan(prevShardDoc, shardDoc)){//prevShardDoc<shardDoc
uniqueShardDoc.put(id, prevShardDoc);
}
// if (prevShardDoc.shard.toLowerCase().indexOf("small") != -1) {// 旧的
// uniqueShardDoc.put(id, prevShardDoc);
// }
}
}// end for-each-doc-in-response
}// end for-each-response
for (Entry<Object, ShardDoc> entry : uniqueShardDoc.entrySet()) {
queue.insertWithOverflow(entry.getValue());
}