Indexperform的MyDocument类

最新推荐文章于 2025-12-02 19:53:50 发布

原创最新推荐文章于 2025-12-02 19:53:50 发布 · 178 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#lucene #Java #Apache

本文介绍了一个用于Lucene的Java类MyDocument，该类提供了一种方法来为指定文件创建Lucene文档。创建过程中，文件路径作为不可分析字段存储，文件名作为可分析字段存储，并将文件内容直接加入文档。

/*
*michzel new java files
*
*Created on 2010-9-15
*
*Copyright 2010 Anchora info company. all rights reserved
*/

package LuceneTest;
import java.io.*;
import java.util.StringTokenizer;
import org.apache.lucene.document.*;
@SuppressWarnings("unused")

public class MyDocument {
public static Document getDocument(File file) throws IOException{
Document doc = new Document();

doc.add(new Field("path", file.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));

doc.add(new Field("title",file.getName(), Field.Store.YES, Field.Index.ANALYZED));

doc.add(new Field("contents", new FileReader(file)));

return doc;

}
private static String getFileName(File file){
String path= file.getPath();
StringTokenizer st= new StringTokenizer(path,File.pathSeparator);
String token= "";
while (st.hasMoreTokens()){
token= st.nextToken();
}
if(token != null){
token= token.substring(0, token.indexOf(".txt"));
}
return token;

}

}