(1):TernQuery
/**/
/*
* Created on 2006-3-11
*/
package
ch11;

import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.document.Field;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.Term;
import
org.apache.lucene.search.Hits;
import
org.apache.lucene.search.IndexSearcher;
import
org.apache.lucene.search.Query;
import
org.apache.lucene.search.TermQuery;

public
class
TermQueryTest

...
{
public static void main(String[] args) throws Exception

...{
Document doc1 = new Document();
doc1.add(Field.Text("name", "word1 word2 word3"));
doc1.add(Field.Keyword("title", "doc1"));

IndexWriter writer = new IndexWriter("c:/java/index", new StandardAnalyzer(), true);
writer.addDocument(doc1);
writer.close();

Query query = null;
Hits hits = null;
IndexSearcher searcher = new IndexSearcher("c:/java/index");
query = new TermQuery(new Term("name","word1"));
hits = searcher.search(query);
printResult(hits, "word1");
query = new TermQuery(new Term("title","doc1"));
hits = searcher.search(query);
printResult(hits, "doc1");

}

public static void printResult(Hits hits, String key) throws Exception

...{
System.out.println("查找 "" + key + "" :");
if (hits != null)

...{
if (hits.length() == 0)

...{
System.out.println("没有找到任何结果");
}
else

...{
System.out.println("找到" + hits.length() + "个结果");
for (int i = 0; i < hits.length(); i++)

...{
Document d = hits.doc(i);
String dname = d.get("title");
System.out.print(dname + " ");
}
System.out.println();
System.out.println();
}
}
}
}

运行结果:
C:/>java TermQueryTest
查找 "word1" :
找到1个结果
doc1
查找 "doc1" :
找到1个结果
doc1
(2):与或搜索
/**/
/*
* Created on 2006-3-11
*/
import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.document.Field;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.Term;
import
org.apache.lucene.search.BooleanQuery;
import
org.apache.lucene.search.Hits;
import
org.apache.lucene.search.IndexSearcher;
import
org.apache.lucene.search.Query;
import
org.apache.lucene.search.TermQuery;

public
class
BooleanQueryTest1

...
{

public static void main (String [] args) throws Exception ...{
Document doc1 = new Document();
doc1.add(Field.Text("name", "word1 word2 word3"));
doc1.add(Field.Keyword("title", "doc1"));
Document doc2 = new Document();
doc2.add(Field.Text("name", "word1 word4 word5"));
doc2.add(Field.Keyword("title", "doc2"));
Document doc3 = new Document();
doc3.add(Field.Text("name", "word1 word2 word6"));
doc3.add(Field.Keyword("title", "doc3"));
IndexWriter writer = new IndexWriter("c:/java/index", new StandardAnalyzer(), true);
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.close();
Query query1 = null;
Query query2 = null;
BooleanQuery query = null;
Hits hits = null;
IndexSearcher searcher = new IndexSearcher("c:/java/index");
query1 = new TermQuery(new Term("name","word1"));
query2 = new TermQuery(new Term("name","word2"));
// 构造一个布尔查询
query = new BooleanQuery();

// 添加两个子查询,与关系
// query.add(query1, true, false);
// query.add(query2, true, false);
// 添加两个子查询,或关系
query.add(query1, false, false);
query.add(query2, false, false);
hits = searcher.search(query);
printResult(hits, "word1和word2");
}
public static void printResult(Hits hits, String key) throws Exception

...{
System.out.println("查找 "" + key + "" :");
if (hits != null)

...{
if (hits.length() == 0)

...{
System.out.println("没有找到任何结果");
}
else

...{
System.out.println("找到" + hits.length() + "个结果");
for (int i = 0; i < hits.length(); i++)

...{
Document d = hits.doc(i);
String dname = d.get("title");
System.out.print(dname + " ");
}
System.out.println();
System.out.println();
}
}
}
}
运行结果:
C:/>java BooleanQueryTest1
查找 "word1和word2" :
找到3个结果
doc1 doc3 doc2
(3):在范围内搜索
import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.document.Field;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.Term;
import
org.apache.lucene.search.Hits;
import
org.apache.lucene.search.IndexSearcher;
import
org.apache.lucene.search.RangeQuery;


public
class
RangeQueryTest
...
{

public static void main (String [] args) throws Exception ...{
Document doc1 = new Document();
doc1.add(Field.Text("time", "200001"));
doc1.add(Field.Keyword("title", "doc1"));
Document doc2 = new Document();
doc2.add(Field.Text("time", "200002"));
doc2.add(Field.Keyword("title", "doc2"));
Document doc3 = new Document();
doc3.add(Field.Text("time", "200003"));
doc3.add(Field.Keyword("title", "doc3"));
Document doc4 = new Document();
doc4.add(Field.Text("time", "200004"));
doc4.add(Field.Keyword("title", "doc4"));
Document doc5 = new Document();
doc5.add(Field.Text("time", "200005"));
doc5.add(Field.Keyword("title", "doc5"));
IndexWriter writer = new IndexWriter("c:/java/index", new StandardAnalyzer(), true);
writer.setUseCompoundFile(true);
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.addDocument(doc4);
writer.addDocument(doc5);
writer.close();
IndexSearcher searcher = new IndexSearcher("c:/java/index");
Term beginTime = new Term("time","200001");
Term endTime = new Term("time","200005");
Hits hits = null;
RangeQuery query = null;
query = new RangeQuery(beginTime, endTime, false);
hits = searcher.search(query);
printResult(hits, "从200001到200005的文档,不包括200001和200005");
query = new RangeQuery(beginTime, endTime, true);
hits = searcher.search(query);
printResult(hits, "从200001到200005的文档,包括200001和200005");
}

public static void printResult(Hits hits, String key) throws Exception ...{
System.out.println("查找 "" + key + "" :");

if (hits != null) ...{

if (hits.length() == 0) ...{
System.out.println("没有找到任何结果");

} else ...{
System.out.print("找到");

for (int i = 0; i < hits.length(); i++) ...{
Document d = hits.doc(i);
String dname = d.get("title");
System.out.print(dname + " " );
}
System.out.println();
System.out.println();
}
}
}
}
运行结果:
C:/java>java RangeQueryTest
查找 "从200001到200005的文档,不包括200001和200005" :
找到doc2 doc3 doc4
查找 "从200001到200005的文档,包括200001和200005" :
找到doc1 doc2 doc3 doc4 doc5
(4):使用前缀搜索
import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.document.Field;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.Term;
import
org.apache.lucene.search.Hits;
import
org.apache.lucene.search.IndexSearcher;
import
org.apache.lucene.search.PrefixQuery;
import
org.apache.lucene.search.RangeQuery;


public
class
PrefixQueryTest
...
{

public static void main(String[] args) throws Exception ...{
Document doc1 = new Document();
doc1.add(Field.Text("name", "David"));
doc1.add(Field.Keyword("title", "doc1"));

Document doc2 = new Document();
doc2.add(Field.Text("name", "Darwen"));
doc2.add(Field.Keyword("title", "doc2"));

Document doc3 = new Document();
doc3.add(Field.Text("name", "Smith"));
doc3.add(Field.Keyword("title", "doc3"));

Document doc4 = new Document();
doc4.add(Field.Text("name", "Smart"));
doc4.add(Field.Keyword("title", "doc4"));

IndexWriter writer = new IndexWriter("c:/java/index",
new StandardAnalyzer(), true);
writer.setUseCompoundFile(true);
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.addDocument(doc4);
writer.close();

IndexSearcher searcher = new IndexSearcher("c:/java/index");
Term pre1 = new Term("name", "Da");
Term pre2 = new Term("name", "da");
Term pre3 = new Term("name", "sm");

Hits hits = null;
PrefixQuery query = null;

query = new PrefixQuery(pre1);
hits = searcher.search(query);
printResult(hits, "前缀为'Da'的文档");
query = new PrefixQuery(pre2);
hits = searcher.search(query);
printResult(hits, "前缀为'da'的文档");
query = new PrefixQuery(pre3);
hits = searcher.search(query);
printResult(hits, "前缀为'sm'的文档");

}


public static void printResult(Hits hits, String key) throws Exception ...{
System.out.println("查找 "" + key + "" :");

if (hits != null) ...{

if (hits.length() == 0) ...{
System.out.println("没有找到任何结果");
System.out.println();

} else ...{
System.out.print("找到");

for (int i = 0; i < hits.length(); i++) ...{
Document d = hits.doc(i);
String dname = d.get("title");
System.out.print(dname + " ");
}
System.out.println();
System.out.println();
}
}
}
}
C:/>java PrefixQueryTest
查找 "前缀为'Da'的文档" :
没有找到任何结果
查找 "前缀为'da'的文档" :
找到doc1 doc2
查找 "前缀为'sm'的文档" :
找到doc3 doc4
注:Lucene的标准分析器在进行分词过滤时将所有的关键字一律转成了小写,所以有上述结果。
(5):多关键字搜索
package
ch11;

import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.document.Field;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.Term;
import
org.apache.lucene.search.Hits;
import
org.apache.lucene.search.IndexSearcher;
import
org.apache.lucene.search.PhraseQuery;
import
org.apache.lucene.search.PrefixQuery;


public
class
PhraseQueryTest
...
{

public static void main(String[] args) throws Exception ...{
Document doc1 = new Document();
doc1.add(Field.Text("content", "david mary smith robert"));
doc1.add(Field.Keyword("title", "doc1"));

IndexWriter writer = new IndexWriter("c:/java/index",
new StandardAnalyzer(), true);
writer.setUseCompoundFile(true);
writer.addDocument(doc1);
writer.close();

IndexSearcher searcher = new IndexSearcher("c:/java/index");
Term word1 = new Term("content", "david");
Term word2 = new Term("content","mary");
Term word3 = new Term("content","smith");
Term word4 = new Term("content","robert");
Hits hits = null;
PhraseQuery query = null;

// 第一种情况,两个词本身紧密相连,先设置坡度为0,再设置坡度为2
query = new PhraseQuery();
query.add(word1);
query.add(word2);
query.setSlop(0);
hits = searcher.search(query);
printResult(hits, "'david'与'mary'紧紧相隔的Document");
query.setSlop(2);
hits = searcher.search(query);
printResult(hits, "'david'与'mary'中相隔两个词的短语");
// 第二种情况,两个词本身相隔两个词,先设置坡度为0,再设置坡度为2
query = new PhraseQuery();
query.add(word1);
query.add(word4);
query.setSlop(0);
hits = searcher.search(query);
printResult(hits, "'david'与'robert'紧紧相隔的Document");
query.setSlop(2);
hits = searcher.search(query);
printResult(hits, "'david'与'robert'中相隔两个词的短语");

}


public static void printResult(Hits hits, String key) throws Exception ...{
System.out.println("查找 "" + key + "" :");

if (hits != null) ...{

if (hits.length() == 0) ...{
System.out.println("没有找到任何结果");
System.out.println();

} else ...{
System.out.print("找到");

for (int i = 0; i < hits.length(); i++) ...{
Document d = hits.doc(i);
String dname = d.get("title");
System.out.print(dname + " ");
}
System.out.println();
System.out.println();
}
}
}
}
运行结果:
C:/java>java PrefixQueryTest
查找 "前缀为'Da'的文档" :
没有找到任何结果
查找 "前缀为'da'的文档" :
找到doc1 doc2
查找 "前缀为'sm'的文档" :
找到doc3 doc4
注:对两个紧连的关键字,无论将坡度设置为多少,Lucene总能找到它所在的文档,而对不紧连的关键字,如果坡度值小于它们之间无关词的数量,那么则无法找到。
(6):短语搜索
import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.document.Field;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.Term;
import
org.apache.lucene.search.Hits;
import
org.apache.lucene.search.IndexSearcher;
import
org.apache.lucene.search.PhrasePrefixQuery;
import
org.apache.lucene.search.PhraseQuery;
import
org.apache.lucene.search.RangeQuery;


public
class
PhrasePrefixQueryTest
...
{

public static void main(String[] args) throws Exception ...{
Document doc1 = new Document();
doc1.add(Field.Text("content", "david mary smith robert"));
doc1.add(Field.Keyword("title", "doc1"));

IndexWriter writer = new IndexWriter("c:/java/index",
new StandardAnalyzer(), true);
writer.addDocument(doc1);
writer.close();

IndexSearcher searcher = new IndexSearcher("c:/java/index");
Term word1 = new Term("content", "david");
Term word2 = new Term("content", "mary");
Term word3 = new Term("content", "smith");
Term word4 = new Term("content", "robert");

Hits hits = null;
PhrasePrefixQuery query = null;
query = new PhrasePrefixQuery();
// 加入可能的所有不确定的词

query.add(new Term[]...{word1, word2});
// 加入确定的词
query.add(word4);
query.setSlop(2);
hits = searcher.search(query);
printResult(hits, "存在短语'david robert'或'mary robert'的文档");
}


public static void printResult(Hits hits, String key) throws Exception ...{
System.out.println("查找 "" + key + "" :");

if (hits != null) ...{

if (hits.length() == 0) ...{
System.out.println("没有找到任何结果");
System.out.println();

} else ...{
System.out.print("找到");

for (int i = 0; i < hits.length(); i++) ...{
Document d = hits.doc(i);
String dname = d.get("title");
System.out.print(dname + " ");
}
System.out.println();
System.out.println();
}
}
}
}
运行结果:
C:/>java PhrasePrefixQueryTest
查找 "存在短语'david robert'或'mary robert'的文档" :
找到doc1
(7):相近词搜索
import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.document.Field;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.Term;
import
org.apache.lucene.search.FuzzyQuery;
import
org.apache.lucene.search.Hits;
import
org.apache.lucene.search.IndexSearcher;


public
class
FuzzyQueryTest
...
{

public static void main(String[] args) throws Exception ...{
Document doc1 = new Document();
doc1.add(Field.Text("content", "david"));
doc1.add(Field.Keyword("title", "doc1"));

Document doc2 = new Document();
doc2.add(Field.Text("content", "sdavid"));
doc2.add(Field.Keyword("title", "doc2"));

Document doc3 = new Document();
doc3.add(Field.Text("content", "davie"));
doc3.add(Field.Keyword("title", "doc3"));

IndexWriter writer = new IndexWriter("c:/java/index",
new StandardAnalyzer(), true);
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.close();

IndexSearcher searcher = new IndexSearcher("c:/java/index");
Term word1 = new Term("content", "david");

Hits hits = null;
FuzzyQuery query = null;

query = new FuzzyQuery(word1);
hits = searcher.search(query);
printResult(hits,"与'david'相似的词");
}


public static void printResult(Hits hits, String key) throws Exception ...{
System.out.println("查找 "" + key + "" :");

if (hits != null) ...{

if (hits.length() == 0) ...{
System.out.println("没有找到任何结果");
System.out.println();

} else ...{
System.out.print("找到");

for (int i = 0; i < hits.length(); i++) ...{
Document d = hits.doc(i);
String dname = d.get("title");
System.out.print(dname + " ");
}
System.out.println();
System.out.println();
}
}
}
}
(8):通配符搜索
import
org.apache.lucene.analysis.standard.StandardAnalyzer;
import
org.apache.lucene.document.Document;
import
org.apache.lucene.document.Field;
import
org.apache.lucene.index.IndexWriter;
import
org.apache.lucene.index.Term;
import
org.apache.lucene.search.Hits;
import
org.apache.lucene.search.IndexSearcher;
import
org.apache.lucene.search.WildcardQuery;


public
class
WildcardQueryTest
...
{

public static void main(String[] args) throws Exception ...{
Document doc1 = new Document();
doc1.add(Field.Text("content", "whatever"));
doc1.add(Field.Keyword("title", "doc1"));
Document doc2 = new Document();
doc2.add(Field.Text("content", "whoever"));
doc2.add(Field.Keyword("title", "doc2"));
Document doc3 = new Document();
doc3.add(Field.Text("content", "however"));
doc3.add(Field.Keyword("title", "doc3"));
Document doc4 = new Document();
doc4.add(Field.Text("content", "everest"));
doc4.add(Field.Keyword("title", "doc4"));
IndexWriter writer = new IndexWriter("c:/java/index",
new StandardAnalyzer(), true);
writer.addDocument(doc1);
writer.addDocument(doc2);
writer.addDocument(doc3);
writer.addDocument(doc4);
writer.close();

IndexSearcher searcher = new IndexSearcher("c:/java/index");
Term word1 = new Term("content", "*ever");
Term word2 = new Term("content", "wh?ever");
Term word3 = new Term("content", "h??ever");
Term word4 = new Term("content", "ever*");
WildcardQuery query = null;
Hits hits = null;
query = new WildcardQuery(word1);
hits = searcher.search(query);
printResult(hits, "*ever");
query = new WildcardQuery(word2);
hits = searcher.search(query);
printResult(hits, "wh?ever");
query = new WildcardQuery(word3);
hits = searcher.search(query);
printResult(hits, "h??ever");
query = new WildcardQuery(word4);
hits = searcher.search(query);
printResult(hits, "ever*");
}

public static void printResult(Hits hits, String key) throws Exception ...{
System.out.println("查找 "" + key + "" :");

if (hits != null) ...{

if (hits.length() == 0) ...{
System.out.println("没有找到任何结果");
System.out.println();

} else ...{
System.out.print("找到");

for (int i = 0; i < hits.length(); i++) ...{
Document d = hits.doc(i);
String dname = d.get("title");
System.out.print(dname + " ");
}
System.out.println();
System.out.println();
}
}
}
}
运行结果:
C:/>java WildcardQueryTest
查找 "*ever" :
找到doc1 doc2 doc3
查找 "wh?ever" :
找到doc2
查找 "h??ever" :
找到doc3
查找 "ever*" :
找到doc4