lucene再查询分页搜索和lucene的搜索_基于searchAfter的实现

最新推荐文章于 2019-09-25 16:12:00 发布

原创最新推荐文章于 2019-09-25 16:12:00 发布 · 904 阅读

1 ·

CC 4.0 BY-SA版权

lucene 专栏收录该内容

10 篇文章

订阅专栏

本文详细介绍了如何在Lucene中实现高效的分页搜索，特别是利用searchAfter方法进行深度分页。通过实例代码，展示了如何有效避免传统分页带来的性能问题，提升查询效率。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

package com.dhb.search;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class FileIndexUtils {
	private static Directory directory = null;
	static {
		try {
			directory = FSDirectory.open(new File("D:/luceneData/files/"));
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public static Directory getDirectory() {
		return directory;
	}
	public static void index(boolean hasNew) {
		IndexWriter writer = null;
		try {
			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35, 
					new StandardAnalyzer(Version.LUCENE_35));
			writer = new IndexWriter(directory, iwc);
			//是否新建索引
			if(hasNew) {
				writer.deleteAll();
			}
			Document doc = null;
			File f = new File("D:/luceneData/example");
			for (File file : f.listFiles()) {
				doc = new Document();
				doc.add(new Field("content", new FileReader(file)));
				doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
				doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
				doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified()));
				doc.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length()/1024)));
				
				writer.addDocument(doc);
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(writer!=null)
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
		}
	}
}

/**
	 * 分页查询
	 */
	public IndexSearcher getSearcher(Directory dir) {
		try {
			if(reader==null) {
				reader = IndexReader.open(dir);
			} else {
				IndexReader tr = IndexReader.openIfChanged(reader);
				if(tr!=null) {
					reader.close();           //关闭原来的reader
					reader = tr;	
				}
			}
			return new IndexSearcher(reader);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return null;
	}
	
	public void searchPage(String query, int pageIndex, int pageSize) {
		Directory directory = FileIndexUtils.getDirectory();
		IndexSearcher searcher = getSearcher(directory);
		QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
				new StandardAnalyzer(Version.LUCENE_35));
		try {
			Query q = parser.parse(query);
			TopDocs tds = searcher.search(q, 500);
			ScoreDoc[] sds = tds.scoreDocs;
			
			int start = (pageIndex-1)*pageSize;
			int end = pageIndex*pageSize;
			//这个for循环是一个bug
			for (int i = start; i < end; i++) {
				Document doc = searcher.doc(sds[i].doc);
				System.out.println(i+": "+doc.get("path")+"-->"+doc.get("filename")+"---"+sds[i].doc);
			}
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}
	
	
	public void searchNoPage(String query) {
		Directory directory = FileIndexUtils.getDirectory();
		IndexSearcher searcher = getSearcher(directory);
		QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
				new StandardAnalyzer(Version.LUCENE_35));
		try {
			Query q = parser.parse(query);
			TopDocs tds = searcher.search(q, 500);
			ScoreDoc[] sds = tds.scoreDocs;
			
			for (int i = 0; i < sds.length; i++) {
				Document doc = searcher.doc(sds[i].doc);
				System.out.println(i+": "+doc.get("path")+"-->"+doc.get("filename"));
			}
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}
	
	public void searchPageByAfter(String query, int pageIndex, int pageSize) {
		Directory directory = FileIndexUtils.getDirectory();
		IndexSearcher searcher = getSearcher(directory);
		QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
				new StandardAnalyzer(Version.LUCENE_35));
		try {
			Query q = parser.parse(query);
			TopDocs tds = searcher.search(q, 500);
			
			int last = (pageIndex-1)*pageSize-1;
			ScoreDoc[] sds = tds.scoreDocs;
			//int start = (pageIndex-1)*pageSize;
			//int end = pageIndex*pageSize;
			tds = searcher.searchAfter(sds[last], q, 10);
			for (ScoreDoc sd : tds.scoreDocs) {
				Document doc = searcher.doc(sd.doc);
				System.out.println(doc.get("path")+"-->"+doc.get("filename")+"---"+sd.doc);
			}
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}
	/**
	 * 根据页码和分页大小获取上一次的最后一个ScoreDoc
	 *
	 */
	private ScoreDoc getLastScoreDoc(int pageIndex, int pageSize, Query query, IndexSearcher searcher) throws IOException {
		if(pageIndex == 1) return null;
		int num = (pageIndex-1)*pageSize;
		TopDocs tds = searcher.search(query, num);
		return tds.scoreDocs[num-1];
	}
	
	public void searchPageByAfter_2(String query, int pageIndex, int pageSize) {
		Directory directory = FileIndexUtils.getDirectory();
		IndexSearcher searcher = getSearcher(directory);
		QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
				new StandardAnalyzer(Version.LUCENE_35));
		try {
			Query q = parser.parse(query);
			//先获取上一页的最后一个元素
			ScoreDoc lastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher);
			//通过最后一个元素搜索下一页的pageSize元素
			TopDocs tds = searcher.searchAfter(lastSd, q, pageSize);
			
			
			
			for (ScoreDoc sd : tds.scoreDocs) {
				Document doc = searcher.doc(sd.doc);
				System.out.println(doc.get("path")+"-->"+doc.get("filename")+"---"+sd.doc);
			}
		} catch (org.apache.lucene.queryParser.ParseException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
	}