Lucene
想要对目录下的文件数据进行检索,先建立索引
1、建立索引
Demo的结构
Indexer.java
package com.matrix.lucene;
import java.io.File;
import java.io.FileReader;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Indexer {
// 写索引实例
private IndexWriter writer;
// indexDir:将索引写到什么目录下
/**
* 实例化Indexer,构造方法
*/
public Indexer(String indexDir) throws Exception {
Directory dir = FSDirectory.open(Paths.get(indexDir));
// 标准分词器
Analyzer analyzer = new StandardAnalyzer();
// 实例化
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
writer = new IndexWriter(dir, iwc);
}
// 关闭写索引
public void close() throws Exception {
writer.close();
}
/**
*
* 描述:索引指定目录的所有文件<BR>
* 方法名:index<BR>
* 创建人:Matrix <BR>
* 时间:2016年3月17日-下午8:02:50 <BR>
*
*/
public int index(String dataDir) throws Exception {
// 将目录下的文件全部遍历出来
File[] files = new File(dataDir).listFiles();
System.out.println(files);
// 对每个文件进行索引
for (File f : files) {
indexFile(f);
}
// 返回索引了多少文件
System.out.println("返回索引了" + writer.numDocs() + "个文件");
return writer.numDocs();
}
/**
*
* 描述:索引指定文件<BR>
* 方法名:indexFile<BR>
* 创建人:Matrix <BR>
* 时间:2016年3月17日-下午8:03:15 <BR>
*
*/
private void indexFile(File f) throws Exception {
// 有点类似文件的绝对路径
System.out.println("索引文件:" + f.getCanonicalPath());
Document doc = getDocument(f);
writer.addDocument(doc);
}
/**
*
* 描述:获取文档,文档里在设置每个字段<BR>
* 方法名:getDocument<BR>
* 创建人:Matrix <BR>
* 时间:2016年3月17日-下午8:05:33 <BR>
*/
private Document getDocument(File f) throws Exception {
Document doc = new Document();
// 添加字段,TextField/文本字段
doc.add(new TextField("contents", new FileReader(f)));
doc.add(new TextField("fileName", f.getName(), Field.Store.YES));
doc.add(new TextField("fullPath", f.getCanonicalPath(), Field.Store.YES));
System.out.println("获取文档中每个字段:" + doc);
return doc;
}
public static void main(String[] args) {
// 要继续索引的目录
String indexDir = "F:\\testLucene";
// 数据源
String dataDir = "F:\\testLucene\\data";
Indexer indexer = null;
int numIndexed = 0;
long start = System.currentTimeMillis();
try {
indexer = new Indexer(indexDir);
// 对进行目录下的数据进行索引,并返回索引了多少数据
numIndexed = indexer.index(dataDir);
} catch (Exception e) {
e.printStackTrace();
} finally {
// 关闭索引
try {
indexer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
long end = System.currentTimeMillis();
System.out.println("索引:" + numIndexed + "个文件花费了" + (end - start) + "毫秒");
}
}
运行结果:
2、查询
demo结构
Searcher.java
package com.matrix.lucene;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Searcher {
public static void search(String indexDir, String q) throws Exception {
Directory dir = FSDirectory.open(Paths.get(indexDir));
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher is = new IndexSearcher(reader);
// 标准分词器
Analyzer analyzer = new StandardAnalyzer();
QueryParser parser = new QueryParser("contents", analyzer);
// 直接解析,查询的字段
Query query = parser.parse(q);
long start = System.currentTimeMillis();
TopDocs hits = is.search(query, 10);
long end = System.currentTimeMillis();
System.out.println("匹配" + q + ",总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
// 获取文档
Document doc = is.doc(scoreDoc.doc);
// 输出完整路径
System.out.println(doc.get("fullPath"));
}
reader.close();
}
public static void main(String[] args) {
String indexDir = "F:\\testLucene";
String q = "Java";
try {
search(indexDir, q);
} catch (Exception e) {
e.printStackTrace();
}
}
}
运行结果: