lucene常用搜索总结


lucene常用搜索总结2008-04-17 09:02转载一篇文章,在此只为搜藏和大家查询方便。

public class Test{

Analyzer analyzer = new StandardAnalyzer();

RAMDirectory directory = new RAMDirectory();

/**
* 创建索引
*
* @throws IOException
*/
public void index() throws IOException{

IndexWriter indexWriter = new IndexWriter(directory,analyzer,true);

Document doc1 = new Document();

doc1.add(new Field("title","aaabbb",Store.YES,Index.TOKENIZED));

doc1.add(new Field("content","If you would like to help promote OpenOffice",Store.YES,Index.TOKENIZED));

doc1.add(new Field("time","2005",Store.YES,Index.TOKENIZED));

indexWriter.addDocument(doc1);

Document doc2 = new Document();

doc2.add(new Field("title","bbcc",Store.YES,Index.TOKENIZED));

doc2.add(new Field("content","sdfsdfsdfasdds",Store.YES,Index.TOKENIZED));

doc2.add(new Field("time","2007",Store.YES,Index.TOKENIZED));

indexWriter.addDocument(doc2);

indexWriter.optimize();

indexWriter.close();
}

// 按词条搜索
public void termSearcher() throws IOException{

IndexSearcher searcher = new IndexSearcher(directory);

// 查询title中包含aaa
Term term = new Term("title","aaa");

Query query = new TermQuery(term);

searcher.search(query);

searcher.close();
}

// 短语搜索
public void phraseSearcher() throws IOException{

IndexSearcher searcher = new IndexSearcher(directory);

PhraseQuery phraseQuery = new PhraseQuery();

// slop 两个项的位置之间允许的最大间隔,这里would和help中间只隔了like(to会被去掉),所以最大间隔设为1就能找到值
// 对于两个相连的关键词来说,无论坡度设为多少,都可以找到
// 对于不相连的词来说,当两个关键词相隔数小于坡度都可以找到,否则找不到
phraseQuery.setSlop(1);

phraseQuery.add(new Term("content","would"));

phraseQuery.add(new Term("content","help"));

// 如果将help放前面,would放后面,需要将would向后移动3个位置才能到help后面(to不算),所以要设为slop最少要设为3
// phraseQuery.add(new Term("content","help"));

// phraseQuery.add(new Term("content","would"));

// phraseQuery.setSlop(3);

// 短语的评分是根据项之间距离越小,评分越高,否则越小
Hits hits = searcher.search(phraseQuery);

printResult(hits);

searcher.close();
}

// 通配符搜索 WildcardQuery
// 通配符包括’?’匹配一个任意字符和’*’匹配零个或多个任意字符,例如你搜索’use*’,你可能找到’user’或者’uses’:
public void wildcardSearcher() throws IOException{

IndexSearcher searcher = new IndexSearcher(directory);

// 与正则一样,*代表0个或多个字母,?代表0个或一个字母
// WildcardQuery与QueryParser不同的是:WildcardQuery的前缀可以为*,而QueryParser不行
WildcardQuery query = new WildcardQuery(new Term("content","a?bbb*"));

Hits hits = searcher.search(query);

printResult(hits);

searcher.close();
}

// 模糊搜索 FuzzyQuery
public void fuzzySearcher() throws IOException{

IndexSearcher search = new IndexSearcher(directory);

// OpenOffica虽然没被索引,但能找到相近的OpenOffice
FuzzyQuery query = new FuzzyQuery(new Term("content","OpenOffica"));

Hits hits = search.search(query);

printResult(hits);

search.close();
}

// 使用前缀PrefixQuery
public void prefixSearcher() throws IOException{

IndexSearcher search = new IndexSearcher(directory);

// 全部title前缀为a
PrefixQuery query = new PrefixQuery(new Term("title","b"));

Hits hits = search.search(query);

printResult(hits);

search.close();

}

// 范围搜索 RangeQuery
public void rangeSearcher() throws IOException{

IndexSearcher search = new IndexSearcher(directory);

// RangeQuery query = new RangeQuery(beginTime, endTime, false);
// 开始时间,结束时间,最后一个参数表示是否包含边界条件本身,如果为false
RangeQuery query = new RangeQuery(new Term("time","2005"),new Term("time","2007"),true);

Hits hits = search.search(query);

printResult(hits);

search.close();
}


// 与或搜索BooleanQuery
//BooleanClause用于表示布尔查询子句关系的类,包括:BooleanClause.Occur.MUST,BooleanClause.Occur.MUST_NOT,BooleanClause.Occur.SHOULD。有以下6种组合:

//1.MUST和MUST:取得连个查询子句的交集。

//2.MUST和MUST_NOT:表示查询结果中不能包含MUST_NOT所对应得查询子句的检索结果。

//3.MUST_NOT和MUST_NOT:无意义,检索无结果。

//4.SHOULD与MUST、SHOULD与MUST_NOT:SHOULD与MUST连用时,无意义,结果为MUST子句的检索结果。与MUST_NOT连用时,功能同MUST。

//5.SHOULD与SHOULD:表示“或”关系,最终检索结果为所有检索子句的并集。

public void booleanSearcher() throws IOException, ParseException{

IndexSearcher search = new IndexSearcher(directory);

QueryParser qp1 = new QueryParser("title",new StandardAnalyzer());

Query query1 = qp1.parse("aa*");

QueryParser qp2 = new QueryParser("title",new StandardAnalyzer());

Query query2 = qp2.parse("bb*");

BooleanQuery query = new BooleanQuery();

// 搜索结果的title的前双缀可以是aa,或bb
query.add(query1, BooleanClause.Occur.SHOULD);

// BooleanClause.Occur.MUST 必须
// BooleanClause.Occur.MUST_NOT 必须不是
query.add(query2, BooleanClause.Occur.SHOULD);

Hits hits = search.search(query);

printResult(hits);

search.close();

}


// 多关键的搜索 PhrasePrefixQuery
public void phrasePrefixSearcher() throws IOException{

IndexSearcher search = new IndexSearcher(directory);

PhrasePrefixQuery query = new PhrasePrefixQuery();

// 这里两项都有可能首先被匹配
query.add(new Term[]{new Term("content","would"),new Term("content","can")});

// 只有一项必须匹配
query.add(new Term("content","help"));

// If you would like to help promote OpenOffice
// can I help you
// slop因子的作用域为查询中的所有短语
query.setSlop(1);

// 匹配第一项为 would 或 can 第二项为help
// solp设置为1
// If you would like to help promote OpenOffice 除去if to 外,would与help的距离=1
// can I help you 的距离也=1 所以可以搜索出两条数据

Hits hits = search.search(query);

printResult(hits);

search.close();
}

// 在多个域上查询 MultiFieldQueryParser
public void multiFieldSearcher() throws IOException, ParseException{

IndexSearcher search = new IndexSearcher(directory);

// 默认情况下的方式为Occur.SHOULD
// titile可以匹配bb,content可以匹配you
// MultiFieldQueryParser.parse(new String[]{"bb","you"},new String[]{"title","content"}, analyzer);

// titile必须匹配bb,content不能匹配
Query query = MultiFieldQueryParser.parse( new String[]{"bb","you"},new String[]{"title","content"},new BooleanClause.Occur[]{Occur.MUST,Occur.MUST_NOT}, analyzer);

// title中必须包含bb content不能有bb
// Query query = MultiFieldQueryParser.parse( "bb*",new String[]{"title","content"},new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST_NOT}, analyzer);

Hits hits = search.search(query);

printResult(hits);

search.close();
}

public void printResult(Hits hits) throws IOException{
for(int i = 0; i < hits.length(); i++){
Document d = hits.doc(i);
System.out.println(d.get("title"));
System.out.println(d.get("content"));
System.out.println(d.get("time"));
}
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值