package com.dhb.search;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class FileIndexUtils {
private static Directory directory = null;
static {
try {
directory = FSDirectory.open(new File("D:/luceneData/files/"));
} catch (IOException e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
public static void index(boolean hasNew) {
IndexWriter writer = null;
try {
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,
new StandardAnalyzer(Version.LUCENE_35));
writer = new IndexWriter(directory, iwc);
//是否新建索引
if(hasNew) {
writer.deleteAll();
}
Document doc = null;
File f = new File("D:/luceneData/example");
for (File file : f.listFiles()) {
doc = new Document();
doc.add(new Field("content", new FileReader(file)));
doc.add(new Field("filename", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("path",file.getAbsolutePath(),Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified()));
doc.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length()/1024)));
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer!=null)
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 分页查询
*/
public IndexSearcher getSearcher(Directory dir) {
try {
if(reader==null) {
reader = IndexReader.open(dir);
} else {
IndexReader tr = IndexReader.openIfChanged(reader);
if(tr!=null) {
reader.close(); //关闭原来的reader
reader = tr;
}
}
return new IndexSearcher(reader);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public void searchPage(String query, int pageIndex, int pageSize) {
Directory directory = FileIndexUtils.getDirectory();
IndexSearcher searcher = getSearcher(directory);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
try {
Query q = parser.parse(query);
TopDocs tds = searcher.search(q, 500);
ScoreDoc[] sds = tds.scoreDocs;
int start = (pageIndex-1)*pageSize;
int end = pageIndex*pageSize;
//这个for循环是一个bug
for (int i = start; i < end; i++) {
Document doc = searcher.doc(sds[i].doc);
System.out.println(i+": "+doc.get("path")+"-->"+doc.get("filename")+"---"+sds[i].doc);
}
} catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void searchNoPage(String query) {
Directory directory = FileIndexUtils.getDirectory();
IndexSearcher searcher = getSearcher(directory);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
try {
Query q = parser.parse(query);
TopDocs tds = searcher.search(q, 500);
ScoreDoc[] sds = tds.scoreDocs;
for (int i = 0; i < sds.length; i++) {
Document doc = searcher.doc(sds[i].doc);
System.out.println(i+": "+doc.get("path")+"-->"+doc.get("filename"));
}
} catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void searchPageByAfter(String query, int pageIndex, int pageSize) {
Directory directory = FileIndexUtils.getDirectory();
IndexSearcher searcher = getSearcher(directory);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
try {
Query q = parser.parse(query);
TopDocs tds = searcher.search(q, 500);
int last = (pageIndex-1)*pageSize-1;
ScoreDoc[] sds = tds.scoreDocs;
//int start = (pageIndex-1)*pageSize;
//int end = pageIndex*pageSize;
tds = searcher.searchAfter(sds[last], q, 10);
for (ScoreDoc sd : tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("path")+"-->"+doc.get("filename")+"---"+sd.doc);
}
} catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 根据页码和分页大小获取上一次的最后一个ScoreDoc
*
*/
private ScoreDoc getLastScoreDoc(int pageIndex, int pageSize, Query query, IndexSearcher searcher) throws IOException {
if(pageIndex == 1) return null;
int num = (pageIndex-1)*pageSize;
TopDocs tds = searcher.search(query, num);
return tds.scoreDocs[num-1];
}
public void searchPageByAfter_2(String query, int pageIndex, int pageSize) {
Directory directory = FileIndexUtils.getDirectory();
IndexSearcher searcher = getSearcher(directory);
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
try {
Query q = parser.parse(query);
//先获取上一页的最后一个元素
ScoreDoc lastSd = getLastScoreDoc(pageIndex, pageSize, q, searcher);
//通过最后一个元素搜索下一页的pageSize元素
TopDocs tds = searcher.searchAfter(lastSd, q, pageSize);
for (ScoreDoc sd : tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("path")+"-->"+doc.get("filename")+"---"+sd.doc);
}
} catch (org.apache.lucene.queryParser.ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}