/*
*michzel new java files
*
*Created on 2010-9-15
*
*Copyright 2010 Anchora info company. all rights reserved
*/
package LuceneTest;
import java.io.*;
import java.util.StringTokenizer;
import org.apache.lucene.document.*;
@SuppressWarnings("unused")
public class MyDocument {
public static Document getDocument(File file) throws IOException{
Document doc = new Document();
doc.add(new Field("path", file.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title",file.getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("contents", new FileReader(file)));
return doc;
}
private static String getFileName(File file){
String path= file.getPath();
StringTokenizer st= new StringTokenizer(path,File.pathSeparator);
String token= "";
while (st.hasMoreTokens()){
token= st.nextToken();
}
if(token != null){
token= token.substring(0, token.indexOf(".txt"));
}
return token;
}
}
*michzel new java files
*
*Created on 2010-9-15
*
*Copyright 2010 Anchora info company. all rights reserved
*/
package LuceneTest;
import java.io.*;
import java.util.StringTokenizer;
import org.apache.lucene.document.*;
@SuppressWarnings("unused")
public class MyDocument {
public static Document getDocument(File file) throws IOException{
Document doc = new Document();
doc.add(new Field("path", file.getCanonicalPath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("title",file.getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("contents", new FileReader(file)));
return doc;
}
private static String getFileName(File file){
String path= file.getPath();
StringTokenizer st= new StringTokenizer(path,File.pathSeparator);
String token= "";
while (st.hasMoreTokens()){
token= st.nextToken();
}
if(token != null){
token= token.substring(0, token.indexOf(".txt"));
}
return token;
}
}
本文介绍了一个用于Lucene的Java类MyDocument,该类提供了一种方法来为指定文件创建Lucene文档。创建过程中,文件路径作为不可分析字段存储,文件名作为可分析字段存储,并将文件内容直接加入文档。
2077

被折叠的 条评论
为什么被折叠?



