/*
*michzel new java files
*
*Created on 2010-9-15
*
*Copyright 2010 Anchora info company. all rights reserved
*/
package LuceneTest;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.demo.FileDocument;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
//为指定目录下的所有文件建立索引
public class Indexer {
//private Indexer() {}
//public static final String INDEX_STORE_PATH= "d:\\index";
private IndexWriter writer= null;
public static void main(String[] args) throws IOException {
Indexer indexer= new Indexer();
indexer.newWriter();
indexer.toIndex("d:\\data");
indexer.close();
}
//构造索引器
public void newWriter(){
try {
File file = new File("d:\\index");
// 通过目录INDEX_DIR构造一个IndexWriter对象
Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_30);
writer = new IndexWriter(FSDirectory.open(file),
luceneAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);
//writer.setInfoStream(System.out);
//System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
//indexFiles( file);
//System.out.println("Optimizing...");
//writer.optimize();
//writer.close();
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
}
//索引某个目录下的文件
public void toIndex(String path) throws IOException{
toIndex(new File(path));
}
//索引某个对象
public void toIndex(File file) throws IOException{
//refreshed = false;
Date start = new Date();
int number= indexFiles(file);
Date end= new Date();
System.out.println("总共耗时"+(end.getTime()-start.getTime())+"毫秒");
System.out.println("一共为"+number+"个文件建立索引");
}
//向索引中加入文档
private void addDocument(File file) throws IOException{
writer.addDocument(getDocument(file));
//docs_in_ram++;
System.out.println("向索引中加入文档");
//refreshed= false;
}
//递归遍历文件目录来建立索引
private int indexFiles(File file) throws IOException{
if(file.isDirectory()){
File[] files= file.listFiles();
int num= 0;
for(int i= 0; i<files.length;i++){
num+= indexFiles(files[i]);
System.out.println(+i);
}
return num;
}else{
if(file.getPath().endsWith(".txt")){
System.out.println("正在建索:"+file.getCanonicalPath());
addDocument(file);
return 1;
}
else{
System.out.println("文件类型不支持"+file);
return 0;
}
}
}
//关闭索引器
public void close() throws IOException{
//refreshRam();
writer.close();
//fsWriter.addIndexesNoOptimize(new Directory[]{ramdir});
//fsWriter.close(true);
}
public static Document getDocument(File file) throws IOException{
Document doc = new Document();
doc.add(new Field("path", file.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title",file.getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("contents", "",Field.Store.NO, Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
return doc;
}
}
*michzel new java files
*
*Created on 2010-9-15
*
*Copyright 2010 Anchora info company. all rights reserved
*/
package LuceneTest;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.demo.FileDocument;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
//为指定目录下的所有文件建立索引
public class Indexer {
//private Indexer() {}
//public static final String INDEX_STORE_PATH= "d:\\index";
private IndexWriter writer= null;
public static void main(String[] args) throws IOException {
Indexer indexer= new Indexer();
indexer.newWriter();
indexer.toIndex("d:\\data");
indexer.close();
}
//构造索引器
public void newWriter(){
try {
File file = new File("d:\\index");
// 通过目录INDEX_DIR构造一个IndexWriter对象
Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_30);
writer = new IndexWriter(FSDirectory.open(file),
luceneAnalyzer, true, IndexWriter.MaxFieldLength.LIMITED);
//writer.setInfoStream(System.out);
//System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
//indexFiles( file);
//System.out.println("Optimizing...");
//writer.optimize();
//writer.close();
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
}
//索引某个目录下的文件
public void toIndex(String path) throws IOException{
toIndex(new File(path));
}
//索引某个对象
public void toIndex(File file) throws IOException{
//refreshed = false;
Date start = new Date();
int number= indexFiles(file);
Date end= new Date();
System.out.println("总共耗时"+(end.getTime()-start.getTime())+"毫秒");
System.out.println("一共为"+number+"个文件建立索引");
}
//向索引中加入文档
private void addDocument(File file) throws IOException{
writer.addDocument(getDocument(file));
//docs_in_ram++;
System.out.println("向索引中加入文档");
//refreshed= false;
}
//递归遍历文件目录来建立索引
private int indexFiles(File file) throws IOException{
if(file.isDirectory()){
File[] files= file.listFiles();
int num= 0;
for(int i= 0; i<files.length;i++){
num+= indexFiles(files[i]);
System.out.println(+i);
}
return num;
}else{
if(file.getPath().endsWith(".txt")){
System.out.println("正在建索:"+file.getCanonicalPath());
addDocument(file);
return 1;
}
else{
System.out.println("文件类型不支持"+file);
return 0;
}
}
}
//关闭索引器
public void close() throws IOException{
//refreshRam();
writer.close();
//fsWriter.addIndexesNoOptimize(new Directory[]{ramdir});
//fsWriter.close(true);
}
public static Document getDocument(File file) throws IOException{
Document doc = new Document();
doc.add(new Field("path", file.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title",file.getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("contents", "",Field.Store.NO, Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS));
return doc;
}
}