/**
*
*/
package two;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* author lighter date 2006-8-7
*/
public class TextFileIndexer {
public static void main(String[] args) throws Exception {
File fileDir = new File("D:/MyEclipse/ws/test/src");
// Directory directory = new RAMDirectory();
Directory directory = FSDirectory.open(new File("D:/MyEclipse/ws/test/dir"));
Analyzer luceneAnalyzer = new StandardAnalyzer(Version.LUCENE_31);
@SuppressWarnings("deprecation")
IndexWriter indexWriter = new IndexWriter(directory, luceneAnalyzer,
true, new IndexWriter.MaxFieldLength(25000));
File[] textFiles = fileDir.listFiles();
long startTime = new Date().getTime();
// 增加document到索引去
for (int i = 0; i < textFiles.length; i++) {
if (textFiles[i].isFile()
&& textFiles[i].getName().endsWith(".txt")) {
System.out.println("File" + textFiles[i].getCanonicalPath()
+ " 正在被索引 . ");
String temp = FileReaderAll(textFiles[i].getCanonicalPath(),
"utf-8");
System.out.println(temp);
Document document = new Document();
Field FieldPath = new Field("path", textFiles[i].getPath(),
Field.Store.YES, Field.Index.NO);
Field FieldBody = new Field("body", temp, Field.Store.YES,
Field.Index.ANALYZED,
Field.TermVector.WITH_POSITIONS_OFFSETS);
document.add(FieldPath);
document.add(FieldBody);
indexWriter.addDocument(document);
}
}
// optimize()方法是对索引进行优化
indexWriter.optimize();
indexWriter.close();
// 测试一下索引的时间
long endTime = new Date().getTime();
System.out.println(" 这花费了 " + (endTime - startTime)
+ " 毫秒来把文档增加到索引里面去! " + fileDir.getPath());
String queryString = " 随便 ";
Query query = null;
IndexSearcher searcher = new IndexSearcher(directory, true);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
try {
QueryParser qp = new QueryParser(Version.LUCENE_31, "body",
analyzer);
query = qp.parse(queryString);
} catch (ParseException e) {
}
if (searcher != null) {
ScoreDoc[] hits = searcher.search(query, 3).scoreDocs;
if (hits.length > 0) {
System.out.println(" 找到: " + hits.length + " 个结果! ");
for (ScoreDoc doc : hits) {
System.out.println(searcher.doc(doc.doc).get("body"));
}
}
}
}
public static String FileReaderAll(String FileName, String charset)
throws IOException {
BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(FileName), charset));
String line = new String();
String temp = new String();
while ((line = reader.readLine()) != null) {
temp += line;
}
reader.close();
return temp;
}
}
可以参考:http://blog.youkuaiyun.com/neusoftware_20063500/article/details/3969365
但是参考中的lucene的版本与本例的版本不同