这是工程目录
package index;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.junit.Test;
public class testIndexOperation {
@Test
public void testCreateIndex() throws Exception{
List list = new ArrayList();
//读取需要创建索引的文件
File dir = new File("C:\\Users\\sam\\Desktop\\lucene");
//利用io包对其进行遍历
for(File file:dir.listFiles()){
Document doc = new Document();
String fileName = file.getName();
Long fileSize = FileUtils.sizeOf(file);
String fileContext = FileUtils.readFileToString(file);
TextField f1 = new TextField("fileName", fileName, Store.YES);
TextField f2 = new TextField("fileContext", fileContext, Store.YES);
LongField f3 = new LongField("fileSize", fileSize, Store.YES);
doc.add(f1);
doc.add(f2);
doc.add(f3);
list.add(doc);
}
//以上对文件进行分析完毕,创建分词器
Analyzer analyzer = new StandardAnalyzer();
//设置索引输出目录
Directory directory = FSDirectory.open(new File("C:\\Users\\sam\\Desktop\\dir"));
//设置索引输出配置
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);
//创建索引输出器
IndexWriter indexWriter = new IndexWriter(directory, config);
//遍历document
for(Document doc:list){
indexWriter.addDocument(doc);
}
//提交
indexWriter.commit();
//关闭资源
indexWriter.close();
}
}
这是等待分析的文本
这是得到的索引文件。
索引文件可以利用lukeall打开进行查阅