/*
*michzel new java files
*
*Created on 2010-9-15
*
*Copyright 2010 Anchora info company. all rights reserved
*/
package LuceneTest;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.demo.FileDocument;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;
//为指定目录下的所有文件建立索引
public class Indexer {
//private Indexer() {}
public static final String INDEX_DIR= "d://test"; //存放建立索引的目录
public static final String DATA_DIR = "d://data";
static File file= new File(DATA_DIR);
public static void main(String[] args) {
// 如果不存在以上问题,按如下流程执行:
Date start = new Date();
try {
// 通过目录INDEX_DIR构造一个IndexWriter对象
File indexDir = new File("D:/test/");
Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_30);
IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir),
luceneAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);
writer.setInfoStream(System.out);
System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
indexDocs(writer, file);
System.out.println("Optimizing...");
writer.optimize();
writer.close();
// 计算创建索引文件所需要的时间
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"/n with message: " + e.getMessage());
}
}
static void indexDocs(IndexWriter writer, File file)
throws IOException {
// file可以读取
if (file.canRead()) {
if (file.isDirectory()) { // 如果file是一个目录(该目录下面可能有文件、目录文件、空文件三种情况)
String[] files = file.list(); // 获取file目录下的所有文件(包括目录文件)File对象,放到数组files里
//如果files!=null
if (files != null) {
for (int i = 0; i < files.length; i++) { // 对files数组里面的File对象递归索引,通过广度遍历
indexDocs(writer, new File(file, files[i]));
}
}
} else { // 到达叶节点时,说明是一个File,而不是目录,则建立索引
System.out.println("adding " + file);
try {
writer.addDocument(FileDocument.Document(file)); // 通过writer,使用file对象构造一个Document对象,添加到writer中,以便能够通过建立的索引查找到该文件
}
catch (FileNotFoundException fnfe) {
;
}
}
}
}
}