Lucene中的各种Query实例

本文介绍Lucene中的多种搜索技巧,包括精确匹配、模糊匹配、范围搜索等,通过具体实例演示了如何灵活运用这些技巧来提高搜索效率。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

(1):TernQuery

/*
 * Created on 2006-3-11
 
*/

package  ch11;

import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.Term;
import  org.apache.lucene.search.Hits;
import  org.apache.lucene.search.IndexSearcher;
import  org.apache.lucene.search.Query;
import  org.apache.lucene.search.TermQuery;

public   class  TermQueryTest
{
  
public static void main(String[] args) throws Exception
  
{
    Document doc1 
= new Document();
    doc1.add(Field.Text(
"name""word1 word2 word3"));
    doc1.add(Field.Keyword(
"title""doc1"));

    IndexWriter writer 
= new IndexWriter("c:/java/index"new StandardAnalyzer(), true);
    writer.addDocument(doc1);
    writer.close();

    Query query 
= null;
    Hits hits 
= null;
    
    IndexSearcher searcher 
= new IndexSearcher("c:/java/index");
    
    query 
= new TermQuery(new Term("name","word1"));
    hits 
= searcher.search(query);
    printResult(hits, 
"word1");
    
    query 
= new TermQuery(new Term("title","doc1"));
    hits 
= searcher.search(query);
    printResult(hits, 
"doc1");

  }


  
public static void printResult(Hits hits, String key) throws Exception
  
{
    System.out.println(
"查找 "" + key + "" :");
    
if (hits != null)
    
{
      
if (hits.length() == 0)
      
{
        System.out.println(
"没有找到任何结果");
      }

      
else
      
{
        System.out.println(
"找到" + hits.length() + "个结果");
        
for (int i = 0; i < hits.length(); i++)
        
{
          Document d 
= hits.doc(i);
          String dname 
= d.get("title");
          System.out.print(dname 
+ "   ");
        }

        System.out.println();
        System.out.println();
      }

    }

  }

}


运行结果:
C:/>java TermQueryTest
查找 "word1" :
找到1个结果
doc1

查找 "doc1" :
找到1个结果
doc1

(2):与或搜索

 

/*
 * Created on 2006-3-11
 
*/

import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.Term;
import  org.apache.lucene.search.BooleanQuery;
import  org.apache.lucene.search.Hits;
import  org.apache.lucene.search.IndexSearcher;
import  org.apache.lucene.search.Query;
import  org.apache.lucene.search.TermQuery;

public   class  BooleanQueryTest1
{
  
public static void main (String [] args) throws Exception {
    Document doc1 
= new Document();
    doc1.add(Field.Text(
"name""word1 word2 word3"));
    doc1.add(Field.Keyword(
"title""doc1"));
    
    Document doc2 
= new Document();
    doc2.add(Field.Text(
"name""word1 word4 word5"));
    doc2.add(Field.Keyword(
"title""doc2"));
    
    Document doc3 
= new Document();
    doc3.add(Field.Text(
"name""word1 word2 word6"));
    doc3.add(Field.Keyword(
"title""doc3"));
    
    IndexWriter writer 
= new IndexWriter("c:/java/index"new StandardAnalyzer(), true);
    writer.addDocument(doc1);
    writer.addDocument(doc2);
    writer.addDocument(doc3);
    writer.close();
    
    Query query1 
= null;
    Query query2 
= null;
    BooleanQuery query 
= null;
    Hits hits 
= null;
    
    IndexSearcher searcher 
= new IndexSearcher("c:/java/index");
    
    query1 
= new TermQuery(new Term("name","word1"));
    query2 
= new TermQuery(new Term("name","word2"));
    
    
// 构造一个布尔查询
    query = new BooleanQuery();

  
// 添加两个子查询,与关系
  
//  query.add(query1, true, false);
 
//  query.add(query2, true, false);
    
    
    
// 添加两个子查询,或关系
    query.add(query1, falsefalse);
    query.add(query2, 
falsefalse);
    
    hits 
= searcher.search(query);
    printResult(hits, 
"word1和word2");
    
  }

  
  
public static void printResult(Hits hits, String key) throws Exception
  
{
    System.out.println(
"查找 "" + key + "" :");
    
if (hits != null)
    
{
      
if (hits.length() == 0)
      
{
        System.out.println(
"没有找到任何结果");
      }

      
else
      
{
        System.out.println(
"找到" + hits.length() + "个结果");
        
for (int i = 0; i < hits.length(); i++)
        
{
          Document d 
= hits.doc(i);
          String dname 
= d.get("title");
          System.out.print(dname 
+ "   ");
        }

        System.out.println();
        System.out.println();
      }

    }

  }

}

运行结果:

C:/>java BooleanQueryTest1
查找 "word1和word2" :
找到3个结果
doc1 doc3 doc2
 

(3):在范围内搜索

import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.Term;
import  org.apache.lucene.search.Hits;
import  org.apache.lucene.search.IndexSearcher;
import  org.apache.lucene.search.RangeQuery;

public   class  RangeQueryTest  {
    
public static void main (String [] args) throws Exception {
        Document doc1 
= new Document();
        doc1.add(Field.Text(
"time""200001"));
        doc1.add(Field.Keyword(
"title""doc1"));
        
        Document doc2 
= new Document();
        doc2.add(Field.Text(
"time""200002"));
        doc2.add(Field.Keyword(
"title""doc2"));
        
        Document doc3 
= new Document();
        doc3.add(Field.Text(
"time""200003"));
        doc3.add(Field.Keyword(
"title""doc3"));
        
        Document doc4 
= new Document();
        doc4.add(Field.Text(
"time""200004"));
        doc4.add(Field.Keyword(
"title""doc4"));
        
        Document doc5 
= new Document();
        doc5.add(Field.Text(
"time""200005"));
        doc5.add(Field.Keyword(
"title""doc5"));
        
        IndexWriter writer 
= new IndexWriter("c:/java/index"new StandardAnalyzer(), true);
        writer.setUseCompoundFile(
true);
        writer.addDocument(doc1);
        writer.addDocument(doc2);
        writer.addDocument(doc3);
        writer.addDocument(doc4);
        writer.addDocument(doc5);
        writer.close();
        
        IndexSearcher searcher 
= new IndexSearcher("c:/java/index");
        Term beginTime 
= new Term("time","200001");
        Term endTime 
= new Term("time","200005");
        
        Hits hits 
= null;
        RangeQuery query 
= null;
        
        query 
= new RangeQuery(beginTime, endTime, false);
        hits 
= searcher.search(query);
        printResult(hits, 
"从200001到200005的文档,不包括200001和200005");
        
        query 
= new RangeQuery(beginTime, endTime, true);
        hits 
= searcher.search(query);
        printResult(hits, 
"从200001到200005的文档,包括200001和200005");
        
    }

    
    
public static void printResult(Hits hits, String key) throws Exception {
        System.out.println(
"查找 "" + key + "" :");
        
if (hits != null{
            
if (hits.length() == 0{
                System.out.println(
"没有找到任何结果");
            }
 else {
                System.out.print(
"找到");
                
for (int i = 0; i < hits.length(); i++{
                    Document d 
= hits.doc(i);
                    String dname 
= d.get("title");
                    System.out.print(dname 
+ "   " );
                }

                System.out.println();
                System.out.println();
            }

        }

    }

}


运行结果:
C:/java>java   RangeQueryTest
查找 "从200001到200005的文档,不包括200001和200005" :
找到doc2   doc3   doc4

查找 "从200001到200005的文档,包括200001和200005" :
找到doc1 doc2 doc3 doc4 doc5

(4):使用前缀搜索

import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.Term;
import  org.apache.lucene.search.Hits;
import  org.apache.lucene.search.IndexSearcher;
import  org.apache.lucene.search.PrefixQuery;
import  org.apache.lucene.search.RangeQuery;

public   class  PrefixQueryTest  {
    
public static void main(String[] args) throws Exception {
        Document doc1 
= new Document();
        doc1.add(Field.Text(
"name""David"));
        doc1.add(Field.Keyword(
"title""doc1"));

        Document doc2 
= new Document();
        doc2.add(Field.Text(
"name""Darwen"));
        doc2.add(Field.Keyword(
"title""doc2"));

        Document doc3 
= new Document();
        doc3.add(Field.Text(
"name""Smith"));
        doc3.add(Field.Keyword(
"title""doc3"));

        Document doc4 
= new Document();
        doc4.add(Field.Text(
"name""Smart"));
        doc4.add(Field.Keyword(
"title""doc4"));

        IndexWriter writer 
= new IndexWriter("c:/java/index",
                
new StandardAnalyzer(), true);
        writer.setUseCompoundFile(
true);
        writer.addDocument(doc1);
        writer.addDocument(doc2);
        writer.addDocument(doc3);
        writer.addDocument(doc4);
        writer.close();

        IndexSearcher searcher 
= new IndexSearcher("c:/java/index");
        Term pre1 
= new Term("name""Da");
        Term pre2 
= new Term("name""da");
        Term pre3 
= new Term("name""sm");

        Hits hits 
= null;
        PrefixQuery query 
= null;

        query 
= new PrefixQuery(pre1);
        hits 
= searcher.search(query);
        printResult(hits, 
"前缀为'Da'的文档");
        
        query 
= new PrefixQuery(pre2);
        hits 
= searcher.search(query);
        printResult(hits, 
"前缀为'da'的文档");
        
        query 
= new PrefixQuery(pre3);
        hits 
= searcher.search(query);
        printResult(hits, 
"前缀为'sm'的文档");

    }


    
public static void printResult(Hits hits, String key) throws Exception {
        System.out.println(
"查找 "" + key + "" :");
        
if (hits != null{
            
if (hits.length() == 0{
                System.out.println(
"没有找到任何结果");
                System.out.println();
            }
 else {
                System.out.print(
"找到");
                
for (int i = 0; i < hits.length(); i++{
                    Document d 
= hits.doc(i);
                    String dname 
= d.get("title");
                    System.out.print(dname 
+ "   ");
                }

                System.out.println();
                System.out.println();
            }

        }

    }

}

C:/>java  PrefixQueryTest
查找 "前缀为'Da'的文档" :
没有找到任何结果

查找 "前缀为'da'的文档" :
找到doc1 doc2

查找 "前缀为'sm'的文档" :
找到doc3 doc4

注:Lucene的标准分析器在进行分词过滤时将所有的关键字一律转成了小写,所以有上述结果。

(5):多关键字搜索

 

package  ch11;

import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.Term;
import  org.apache.lucene.search.Hits;
import  org.apache.lucene.search.IndexSearcher;
import  org.apache.lucene.search.PhraseQuery;
import  org.apache.lucene.search.PrefixQuery;

public   class  PhraseQueryTest  {
    
public static void main(String[] args) throws Exception {
        Document doc1 
= new Document();
        doc1.add(Field.Text(
"content""david mary smith robert"));
        doc1.add(Field.Keyword(
"title""doc1"));

        IndexWriter writer 
= new IndexWriter("c:/java/index",
                
new StandardAnalyzer(), true);
        writer.setUseCompoundFile(
true);
        writer.addDocument(doc1);
        writer.close();

        IndexSearcher searcher 
= new IndexSearcher("c:/java/index");
        Term word1 
= new Term("content""david");
        Term word2 
= new Term("content","mary");
        Term word3 
= new Term("content","smith");
        Term word4 
= new Term("content","robert");
        
        Hits hits 
= null;
        PhraseQuery query 
= null;

        
// 第一种情况,两个词本身紧密相连,先设置坡度为0,再设置坡度为2
        query = new PhraseQuery();
        query.add(word1);
        query.add(word2);
        query.setSlop(
0);
        hits 
= searcher.search(query);
        printResult(hits, 
"'david'与'mary'紧紧相隔的Document");
        
        query.setSlop(
2);
        hits 
= searcher.search(query);
        printResult(hits, 
"'david'与'mary'中相隔两个词的短语");
        
        
// 第二种情况,两个词本身相隔两个词,先设置坡度为0,再设置坡度为2
        query = new PhraseQuery();
        query.add(word1);
        query.add(word4);
        query.setSlop(
0);
        hits 
= searcher.search(query);
        printResult(hits, 
"'david'与'robert'紧紧相隔的Document");
        
        query.setSlop(
2);
        hits 
= searcher.search(query);
        printResult(hits, 
"'david'与'robert'中相隔两个词的短语");
        

    }


    
public static void printResult(Hits hits, String key) throws Exception {
        System.out.println(
"查找 "" + key + "" :");
        
if (hits != null{
            
if (hits.length() == 0{
                System.out.println(
"没有找到任何结果");
                System.out.println();
            }
 else {
                System.out.print(
"找到");
                
for (int i = 0; i < hits.length(); i++{
                    Document d 
= hits.doc(i);
                    String dname 
= d.get("title");
                    System.out.print(dname 
+ "   ");
                }

                System.out.println();
                System.out.println();
            }

        }

    }

}

运行结果:
C:/java>java  PrefixQueryTest
查找 "前缀为'Da'的文档" :
没有找到任何结果

查找 "前缀为'da'的文档" :
找到doc1 doc2

查找 "前缀为'sm'的文档" :
找到doc3 doc4

注:对两个紧连的关键字,无论将坡度设置为多少,Lucene总能找到它所在的文档,而对不紧连的关键字,如果坡度值小于它们之间无关词的数量,那么则无法找到。

(6):短语搜索

import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.Term;
import  org.apache.lucene.search.Hits;
import  org.apache.lucene.search.IndexSearcher;
import  org.apache.lucene.search.PhrasePrefixQuery;
import  org.apache.lucene.search.PhraseQuery;
import  org.apache.lucene.search.RangeQuery;

public   class  PhrasePrefixQueryTest  {
    
public static void main(String[] args) throws Exception {
        Document doc1 
= new Document();
        doc1.add(Field.Text(
"content""david mary smith robert"));
        doc1.add(Field.Keyword(
"title""doc1"));

        IndexWriter writer 
= new IndexWriter("c:/java/index",
                
new StandardAnalyzer(), true);
        writer.addDocument(doc1);
        writer.close();

        IndexSearcher searcher 
= new IndexSearcher("c:/java/index");
        Term word1 
= new Term("content""david");
        Term word2 
= new Term("content""mary");
        Term word3 
= new Term("content""smith");
        Term word4 
= new Term("content""robert");

        Hits hits 
= null;
        PhrasePrefixQuery query 
= null;
        
        query 
= new PhrasePrefixQuery();
        
// 加入可能的所有不确定的词
        query.add(new Term[]{word1, word2});
        
// 加入确定的词
        query.add(word4);
        query.setSlop(
2);
        hits 
= searcher.search(query);
        printResult(hits, 
"存在短语'david robert'或'mary robert'的文档");
        
        
    }


    
public static void printResult(Hits hits, String key) throws Exception {
        System.out.println(
"查找 "" + key + "" :");
        
if (hits != null{
            
if (hits.length() == 0{
                System.out.println(
"没有找到任何结果");
                System.out.println();
            }
 else {
                System.out.print(
"找到");
                
for (int i = 0; i < hits.length(); i++{
                    Document d 
= hits.doc(i);
                    String dname 
= d.get("title");
                    System.out.print(dname 
+ "   ");
                }

                System.out.println();
                System.out.println();
            }

        }

    }

}


运行结果:
C:/>java  PhrasePrefixQueryTest
查找 "存在短语'david robert'或'mary robert'的文档" :
找到doc1

(7):相近词搜索

import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.Term;
import  org.apache.lucene.search.FuzzyQuery;
import  org.apache.lucene.search.Hits;
import  org.apache.lucene.search.IndexSearcher;

public   class  FuzzyQueryTest  {
    
public static void main(String[] args) throws Exception {
        Document doc1 
= new Document();
        doc1.add(Field.Text(
"content""david"));
        doc1.add(Field.Keyword(
"title""doc1"));

        Document doc2 
= new Document();
        doc2.add(Field.Text(
"content""sdavid"));
        doc2.add(Field.Keyword(
"title""doc2"));

        Document doc3 
= new Document();
        doc3.add(Field.Text(
"content""davie"));
        doc3.add(Field.Keyword(
"title""doc3"));

        IndexWriter writer 
= new IndexWriter("c:/java/index",
                
new StandardAnalyzer(), true);
        writer.addDocument(doc1);
        writer.addDocument(doc2);
        writer.addDocument(doc3);
        writer.close();

        IndexSearcher searcher 
= new IndexSearcher("c:/java/index");
        Term word1 
= new Term("content""david");

        Hits hits 
= null;
        FuzzyQuery query 
= null;

        query 
= new FuzzyQuery(word1);
        hits 
= searcher.search(query);
        printResult(hits,
"与'david'相似的词");
    }


    
public static void printResult(Hits hits, String key) throws Exception {
        System.out.println(
"查找 "" + key + "" :");
        
if (hits != null{
            
if (hits.length() == 0{
                System.out.println(
"没有找到任何结果");
                System.out.println();
            }
 else {
                System.out.print(
"找到");
                
for (int i = 0; i < hits.length(); i++{
                    Document d 
= hits.doc(i);
                    String dname 
= d.get("title");
                    System.out.print(dname 
+ "   ");
                }

                System.out.println();
                System.out.println();
            }

        }

    }

}

(8):通配符搜索

import  org.apache.lucene.analysis.standard.StandardAnalyzer;
import  org.apache.lucene.document.Document;
import  org.apache.lucene.document.Field;
import  org.apache.lucene.index.IndexWriter;
import  org.apache.lucene.index.Term;
import  org.apache.lucene.search.Hits;
import  org.apache.lucene.search.IndexSearcher;
import  org.apache.lucene.search.WildcardQuery;

public   class  WildcardQueryTest  {
    
public static void main(String[] args) throws Exception {
        Document doc1 
= new Document();
        doc1.add(Field.Text(
"content""whatever"));
        doc1.add(Field.Keyword(
"title""doc1"));
        
        Document doc2 
= new Document();
        doc2.add(Field.Text(
"content""whoever"));
        doc2.add(Field.Keyword(
"title""doc2"));
        
        Document doc3 
= new Document();
        doc3.add(Field.Text(
"content""however"));
        doc3.add(Field.Keyword(
"title""doc3"));
        
        Document doc4 
= new Document();
        doc4.add(Field.Text(
"content""everest"));
        doc4.add(Field.Keyword(
"title""doc4"));
        
        IndexWriter writer 
= new IndexWriter("c:/java/index",
                
new StandardAnalyzer(), true);
        writer.addDocument(doc1);
        writer.addDocument(doc2);
        writer.addDocument(doc3);
        writer.addDocument(doc4);
        writer.close();

        IndexSearcher searcher 
= new IndexSearcher("c:/java/index");
        Term word1 
= new Term("content""*ever");
        Term word2 
= new Term("content""wh?ever");
        Term word3 
= new Term("content""h??ever");
        Term word4 
= new Term("content""ever*");
        WildcardQuery query 
= null;
        Hits hits 
= null;
        
        query 
= new WildcardQuery(word1);
        hits 
= searcher.search(query);
        printResult(hits, 
"*ever");
        
        query 
= new WildcardQuery(word2);
        hits 
= searcher.search(query);
        printResult(hits, 
"wh?ever");
        
        query 
= new WildcardQuery(word3);
        hits 
= searcher.search(query);
        printResult(hits, 
"h??ever");
        
        query 
= new WildcardQuery(word4);
        hits 
= searcher.search(query);
        printResult(hits, 
"ever*");
    }

    
    
public static void printResult(Hits hits, String key) throws Exception {
        System.out.println(
"查找 "" + key + "" :");
        
if (hits != null{
            
if (hits.length() == 0{
                System.out.println(
"没有找到任何结果");
                System.out.println();
            }
 else {
                System.out.print(
"找到");
                
for (int i = 0; i < hits.length(); i++{
                    Document d 
= hits.doc(i);
                    String dname 
= d.get("title");
                    System.out.print(dname 
+ "   ");
                }

                System.out.println();
                System.out.println();
            }

        }

    }

}

运行结果:
C:/>java   WildcardQueryTest
查找 "*ever" :
找到doc1   doc2   doc3

查找 "wh?ever" :
找到doc2

查找 "h??ever" :
找到doc3

查找 "ever*" :
找到doc4

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值