自己动手写搜索引擎(常搜吧历程三#搜索二#)(Java、Lucene、hadoop)

接着上一节搜索一,我们来深入学习Lucene下搜索的其他功能。


Lucene分页搜索的实现:

package com.qianyan.lucene;

import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestSeacher3 {

	public static void main(String[] args) throws IOException {
		String indexDir = "E:/luceneindex";
		Directory dir = FSDirectory.getDirectory(indexDir);
		IndexSearcher searcher = new IndexSearcher(dir);
		ScoreDoc[] hits = null;
		
		Term term = new Term("contents", "ontology");
		TermQuery query = new TermQuery(term);
		TopDocs topDocs = searcher.search(query, 126);
		int eachPageNumber = 10;	//每页显示记录数
		int pageNumber = 3;			//当前页
		hits = topDocs.scoreDocs;
		
		for(int i = (pageNumber - 1) * eachPageNumber; i < pageNumber * eachPageNumber; i++){
			Document doc = searcher.doc(hits[i].doc);
			System.out.print(hits[i].score);
			System.out.println(doc.get("contents"));
		}
		
		searcher.close();
		dir.close();
	}
}

搜索中使用RamDirectory:

package com.qianyan.lucene;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

public class TestRamSearch {

	Directory directory = new RAMDirectory();
	
	public void createRamIndex() throws IOException{
		String[] ids = {"1", "2", "3", "4"};
		String[] names = {"zhangsan", "lisi", "wangwu", "zhaoliu"};
		String[] addresses = {"shanghai", "beijing", "guangzhou", "nanjing"};
		String[] birthdays = {"19820720", "19840203", "19770409", "19830130"};
		Analyzer analyzer = new StandardAnalyzer();
		IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
		for(int i = 0; i < ids.length; i++){
			Document document = new Document();
			document.add(new Field("id", ids[i], Field.Store.YES, Field.Index.ANALYZED));
			document.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED));
			document.add(new Field("address", addresses[i], Field.Store.YES, Field.Index.ANALYZED));
			document.add(new Field("birthday", birthdays[i], Field.Store.YES, Field.Index.ANALYZED));
			writer.addDocument(document);
		}
		writer.optimize();
		writer.close();
	}
	
	public void searchRam() throws IOException{
		IndexSearcher searcher = new IndexSearcher(directory);
		ScoreDoc[] hits = null;
		
		Term term = new Term("name", "zhangsan");
		TermQuery query = new TermQuery(term);
		TopDocs topDocs = searcher.search(query, 126);
	
		hits = topDocs.scoreDocs;
		
		for(int i = 0; i < hits.length; i++){
			Document doc = searcher.doc(hits[i].doc);
			//System.out.println(hits[i].score);
			System.out.print(doc.get("id") + " ");
			System.out.print(doc.get("name") + " ");
			System.out.print(doc.get("address") + " ");
			System.out.println(doc.get("birthday") + " ");
		}
		
		searcher.close();
		directory.close();
	}
	
	public static void main(String[] args) throws IOException {
		TestRamSearch trs = new TestRamSearch();
		trs.createRamIndex();
		trs.searchRam();
	}
}

QueryParser的使用:

1、默认字段的检索

2、在指定字段中检索

3、在默认字段进行“或”检索

4、对默认字段进行“与”检索

5、AND NOT 的检索

6、默认字段中前缀检索

7、对短语进行检索

package com.qianyan.lucene;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class TestQueryParser {

	public static void main(String[] args) throws IOException, ParseException {
		Analyzer analyzer = new StandardAnalyzer();
		String indexDir = "E:/luceneindex";
		Directory dir = FSDirectory.getDirectory(indexDir);
		IndexSearcher searcher = new IndexSearcher(dir);
		ScoreDoc[] hits = null;
		
		QueryParser parser = new QueryParser("address", analyzer);		//name为默认字段检索
		//Query query = parser.parse("address:resides in");			//支持短语搜索
		//Query query = parser.parse("birthday:[19820720 TO 19840203]"); //中括号包含首尾,花括号不包含。TO指范围
		//Query query = parser.parse("zhangsan~");	//前缀检索
		//Query query = parser.parse("shanghai beijing");	//"或"形式有三种:1、空格:shanghai beijing 3、or关键字:shanghai or beijing
		//Query query = parser.parse("shanghai and beijing");	//"与"形式有三种:1、+号:shanghai beijing 2、and关键字:shanghai and beijing
		//Query query = parser.parse("address:shanghai beijing AND NOT name:wangwu");	//AND NOT 不满足 ; "-"号也等于 AND NOT
		Query query = parser.parse("name:li*"); //前缀检索
		TopDocCollector topdoc = new TopDocCollector(100);
		
		searcher.search(query, topdoc);
		hits = topdoc.topDocs().scoreDocs;
		
		for(int i = 0; i < hits.length; i++){
			Document doc = searcher.doc(hits[i].doc);
			//System.out.println(hits[i].score);
			System.out.print(doc.get("id") + " ");
			System.out.print(doc.get("name") + " ");
			System.out.print(doc.get("address") + " ");
			System.out.println(doc.get("birthday") + " ");
		}
		
		searcher.close();
		dir.close();
	}
}



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值