3.1 搜索的简单实现
1.创建IndexReader
package com.mzsx.index;
import java.io.File;
import java.io.IOException;
importorg.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import com.mzsx.write.DirectoryConext;
public class IndexReaderContext {
privatestatic IndexReader reader = null;
privatestatic Directory directory=null;
privateIndexReaderContext(){}
publicstatic IndexReader getIndexReader(String fileName){
if(reader==null) {
directory=DirectoryConext.getDirectory(fileName);
synchronized(IndexReaderContext.class){
if(reader==null) {
try{
reader=IndexReader.open(directory,false);
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
}
}
returnreader;
}
publicstatic IndexReader getIndexReader(Directory dir){
if(reader==null) {
directory=dir;
synchronized(IndexReaderContext.class){
if(reader==null) {
try{
reader=IndexReader.open(directory,false);
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
}
}
returnreader;
}
}
2.创建IndexSearcher
IndexSearcher searcher=new IndexSearcher(indexReader);
3.创建Term和TermQuery
Term term=new Term(field,name);
TermQuery termQuery=new TermQuery(term);
4.根据TermQuery获取TopDocs
TopDocs topDocs= searcher.search(termQuery, num);
int length= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
5.根据TopDocs获取ScoreDoc和ScoreDoc获取相应文档
ScoreDoc[] scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0, 50) );
}
6.整体
//精确查询
publicvoid searchByTerm(String field,String name,int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
Termterm=new Term(field,name);
TermQuerytermQuery=new TermQuery(term);
TopDocs topDocs= searcher.search(termQuery, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0, 50) );
}
}catch (IOException e) {
e.printStackTrace();
}
}
3.2 其他搜索Query
1. TermQuery—单个精确查询
//精确查询
publicvoid searchByTerm(String field,String name,int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
Termterm=new Term(field,name);
TermQuerytermQuery=new TermQuery(term);
TopDocs topDocs= searcher.search(termQuery, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0, 50) );
}
}catch (IOException e) {
e.printStackTrace();
}
}
2.TermRangeQuery—查找字符串某个范围
public void searchByTermRange(String field,StringlowerTerm,String upperTerm,int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
TermRangeQuery range=new TermRangeQuery(field, lowerTerm,upperTerm, true, true);
TopDocs topDocs= searcher.search(range, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0, 50) );
}
}catch (CorruptIndexException e) {
//TODO Auto-generated catch block
e.printStackTrace();
}catch (IOException e) {
//TODO Auto-generated catch block
e.printStackTrace();
}
}
3. NumericRangeQuery--查询某个数字的范围
public void searchByNumricRange(String field,longmin,long max,int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
NumericRangeQuery range=NumericRangeQuery.newLongRange(field,min, max, true, true);
TopDocs topDocs= searcher.search(range, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0, 50) );
}
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
4.PrefixQuery—前缀查询
public void searchByPrefix(String field,Stringvalue,int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
PrefixQuery range=new PrefixQuery(new Term(field,value));
TopDocs topDocs= searcher.search(range, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents") );
}
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
5. WildcardQuery—通配符查询
//通配符模糊搜索
//在传入的value中可以使用通配符:?和*,?表示匹配一个字符,*表示匹配任意多个字符
publicvoid searchByWildcard(String field,String value,int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
WildcardQuery range=new WildcardQuery(newTerm(field,value));
TopDocs topDocs= searcher.search(range, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0,40) );
}
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
6.BooleanQuery—联合查询
public void searchByBoolean(int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
BooleanQueryquery = new BooleanQuery();
query.add(newTermQuery(new Term("filename","凤凰台")), Occur.SHOULD);
query.add(newTermQuery(new Term("contents","用户")),Occur.SHOULD);
TopDocs topDocs= searcher.search(query, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0,40) );
}
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
7. PhraseQuery--短语检索
public void searchByPhrase(int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
PhraseQueryquery = new PhraseQuery();
query.add(newTerm("filename","项目"));
query.setSlop(3);
query.add(newTerm("filename","系统"));
TopDocs topDocs= searcher.search(query, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0,40) );
}
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
8.FuzzyQuery--模糊匹配
public void searchByFuzzy(int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
FuzzyQueryquery = new FuzzyQuery(new Term("contens","安全"),0.3f,0);
TopDocs topDocs= searcher.search(query, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0,40) );
}
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
3.3 QueryParser
1.QueryParser简介
Mike | 默认域包含mike |
Mike john Mike OR john | 默认域包含mike或者john |
+mike +address:zhaotong Mike AND address:zhaotong | 默认域即使mike并且address是zhaotong |
id :2 | Id域为2 |
Address:Kunming –desc:she Address:Kunming AND NOT desc:she | Address是kunming并且desc不是she |
(mike OR john) AND address:zhaotong | 默认域是mike或者john 并且address是zhaotong |
Desc: “she like” | Desc域是she like |
desc:”happy girl”~5 | 查找happy和girl之间距离小于5的文档 |
J* | 默认域是j开头 |
Johe~ | 模糊搜索johe |
Id:[“1” TO “3”] | Id从1到3 |
2.QueryParser实例
public void searchByQueryParse(StringqueryContion,int num) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
QueryParserparser = new QueryParser(Version.LUCENE_35,"contents",analyzer);
org.apache.lucene.search.Queryquery=parser.parse(queryContion);
TopDocs topDocs= searcher.search(query, num);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
for(ScoreDoc scoreDoc : scoreDocs) {
Documentdoc=searcher.doc(scoreDoc.doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0,40) );
}
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}catch (ParseException e) {
e.printStackTrace();
}
}
//测试代码
@Test
publicvoid searchByQueryParse(){
SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true);
opera.searchByQueryParse("filename:[aTO z]",10);
//opera.searchByQueryParse("filename:{aTO g}",10);
//没有办法匹配数字范围(自己扩展Parser)
//opera.searchByQueryParse("size:[200TO 13000]",10);
//完全匹配
//opera.searchByQueryParse("contents:\"完全是宠溺\"",10);
//距离为1匹配
//opera.searchByQueryParse("contents:\"完全宠溺\"~1",10);
//模糊查询
//opera.searchByQueryParse("contents:*",10);
}
3.4 分页搜索
1.第一种方式:再查询
public void searchPage(String queryContion,intpageIndex,int pageSize){
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
QueryParserparser = new QueryParser(Version.LUCENE_35,"contents",analyzer);
org.apache.lucene.search.Queryquery=parser.parse(queryContion);
TopDocs topDocs= searcher.search(query, 500);
intlength= topDocs.totalHits;
System.out.println("总共查询出来总数:"+length);
ScoreDoc[]scoreDocs= topDocs.scoreDocs;
intstart = (pageIndex-1)*pageSize;
intend = pageIndex*pageSize;
for(int i=start;i<end;i++) {
Documentdoc=searcher.doc(scoreDocs[i].doc);
System.out.println(doc.get("id")+ "---->"
+doc.get("filename") + "[" + doc.get("fullpath")
+"]-->\n" + doc.get("contents").substring(0,40) );
}
}catch (CorruptIndexException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}catch (ParseException e) {
e.printStackTrace();
}
}
//测试代码
@Test
publicvoid searchPage(){
SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true);
opera.searchPage("filename:[aTO z]", 1, 3);
}
2.第二章方式:searchAfter
public void searchPageByAfter(String query,intpageIndex,int pageSize) {
try{
IndexSearchersearcher=new IndexSearcher(indexReader);
QueryParserparser = new QueryParser(Version.LUCENE_35,"content",analyzer);
Queryq = parser.parse(query);
//先获取上一页的最后一个元素
ScoreDoclastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher);
//通过最后一个元素搜索下页的pageSize个元素
TopDocstds = searcher.searchAfter(lastSd,q, pageSize);
for(ScoreDocsd:tds.scoreDocs) {
Documentdoc = searcher.doc(sd.doc);
System.out.println(sd.doc+":"+doc.get("path")+"-->"+doc.get("filename"));
}
searcher.close();
}catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
}catch (IOException e) {
e.printStackTrace();
}
}
/**
* 根据页码和分页大小获取上一次的最后一个ScoreDoc
*/
privateScoreDoc getLastScoreDoc(int pageIndex,int pageSize,Query query,IndexSearchersearch) throws IOException {
if(pageIndex==1)returnnull;//如果是第一页就返回空
intnum = pageSize*(pageIndex-1);//获取上一页的数量
TopDocstds = search.search(query, num);
returntds.scoreDocs[num-1];
}
//测试代码
@Test
publicvoid searchPageByAfter(){
SearchOperaopera=new SearchOpera("D:/luceneIndex/index", analyzer, true);
opera.searchPageByAfter("filename:[aTO z]",1,3);
}
转载于:https://blog.51cto.com/qiangmzsx/1440489