现在网上很多例子都是基于老版本的Lucene做得例子,我给改了一下,这个例子是基于lucene 2.4的,希望能有所帮助
调用的代码如下:
- package utils;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import java.io.Reader;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- public class Indexer {
- public int index(String indexDir, String dataDir) throws IOException
- {
- File indexDirFile = new File(indexDir);
- File dataDirFile = new File(dataDir);
- int numIndexed = index(indexDirFile, dataDirFile);
- return 0;
- }
- private int index(File indexDirFile, File dataDirFile) throws IOException {
- if(!dataDirFile.exists() || !dataDirFile.isDirectory())
- {
- throw new IOException(dataDirFile + " does not exist or is not a directory");
- }
- IndexWriter writer = new IndexWriter(indexDirFile, new StandardAnalyzer(), true);
- writer.setUseCompoundFile(false);
- indexDirectory(writer, dataDirFile);
- int numIndexed = writer.docCount();
- writer.optimize();
- writer.close();
- return numIndexed;
- }
- private void indexDirectory(IndexWriter writer, File dataDirFile) throws IOException {
- File[] files = dataDirFile.listFiles();
- for(int i = 0; i<files.length; i++)
- {
- File f = files[i];
- if(f.isDirectory())
- {
- indexDirectory(writer, f);
- }else if(f.getName().endsWith(".java") || f.getName().endsWith(".txt"))//需要索引的文件类型
- {
- indexFile(writer, f);
- }
- }
- }
- private void indexFile(IndexWriter writer, File f) throws IOException {
- if(f.isHidden() || !f.exists() || !f.canRead())
- {
- return;
- }
- System.out.println("Indexing" + f.getCanonicalPath());
- Document doc = new Document();
- Reader txtReader = new FileReader(f);
- doc.add(new Field("path",f.getCanonicalPath(),Field.Store.YES,Field.Index.UN_TOKENIZED));
- doc.add(new Field("contents",txtReader));
- doc.add(new Field("name",f.getName(),Field.Store.YES,Field.Index.UN_TOKENIZED));
- writer.addDocument(doc);
- }
- }
- String filesRepoDir = "C:/workspace-2.0";//需要被索引的目录
- String indexDir = "C:/apache-tomcat-6.0.18/webapps/index";//存放索引的目录
- Indexer indexer= new Indexer();
- indexer.index(indexDir, filesRepoDir);
本文介绍了一个基于Lucene 2.4版本的索引创建示例,通过Java代码实现对指定目录下的文本文件进行索引,并详细展示了如何配置和使用IndexWriter来完成整个索引过程。
269

被折叠的 条评论
为什么被折叠?



