Nutch1.2插件实现or查询

Nutch的搜索前台的默认搜索方式是and,也就是所有关键词都必须出现。现在想实现or查询发现nutch本身并不支持,在去官方论坛搜索无果,改源代码无果的情况下想到用插件实现or查询,于是参照query-base插件改写了个query-or插件,源码如下:

public class OrQueryFilter implements QueryFilter { private Configuration conf; float myBoost = 0f; private String[] FIELDS = { "url", "anchor", "content", "title", "host" }; private static final int URL_BOOST = 0; private static final int ANCHOR_BOOST = 1; private static final int CONTENT_BOOST = 2; private static final int TITLE_BOOST = 3; private static final int HOST_BOOST = 4; private static int SLOP = Integer.MAX_VALUE; private float PHRASE_BOOST; private float[] FIELD_BOOSTS = new float[5]; /** * Set the boost factor for url matches, relative to content and anchor * matches */ public void setUrlBoost(float boost) { FIELD_BOOSTS[URL_BOOST] = boost; } /** * Set the boost factor for title/anchor matches, relative to url and * content matches. */ public void setAnchorBoost(float boost) { FIELD_BOOSTS[ANCHOR_BOOST] = boost; } /** * Set the boost factor for sloppy phrase matches relative to unordered term * matches. */ public void setPhraseBoost(float boost) { PHRASE_BOOST = boost; } public void setConf(Configuration conf) { this.conf = conf; } public Configuration getConf() { return this.conf; } @Override public BooleanQuery filter(Query input, BooleanQuery output) throws QueryException { for (Clause c : input.getClauses()) { if (!c.getField().equals("or")) continue; String value = c.getTerm().toString(); BooleanQuery bq = new BooleanQuery(); for (int f = 0; f < FIELDS.length; f++) { Clause o = c; if (o.isPhrase()) { String[] opt = new CommonGrams(getConf()) .optimizePhrase(o.getPhrase(), FIELDS[f]); if (opt.length == 1) { o = new Clause(new Term(opt[0]), o.isRequired(), o.isProhibited(), getConf()); } else { o = new Clause(new Phrase(opt), o.isRequired(), o.isProhibited(), getConf()); } } bq.add(o.isPhrase() ? exactPhrase(o.getPhrase(), FIELDS[f], FIELD_BOOSTS[f]) : termQuery(FIELDS[f], o.getTerm(), FIELD_BOOSTS[f]), BooleanClause.Occur.SHOULD); } bq.setBoost(myBoost); output.add(bq, BooleanClause.Occur.SHOULD); } return output; } private org.apache.lucene.search.Query exactPhrase(Phrase nutchPhrase, String field, float boost) { Term[] terms = nutchPhrase.getTerms(); PhraseQuery exactPhrase = new PhraseQuery(); for (int i = 0; i < terms.length; i++) { exactPhrase.add(luceneTerm(field, terms[i])); } exactPhrase.setBoost(boost); return exactPhrase; } private org.apache.lucene.search.Query termQuery(String field, Term term, float boost) { TermQuery result = new TermQuery(luceneTerm(field, term)); result.setBoost(boost); return result; } /** Utility to construct a Lucene Term given a Nutch query term and field. */ private static org.apache.lucene.index.Term luceneTerm(String field, Term term) { return new org.apache.lucene.index.Term(field, term.toString()); } }

使用方式:查询是输入 or:关键词1or:关键词2

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值