1. 在eclipse中安装配置lucene
a) 下载lucene-core-2.3.2.jar
b) File->new java project->properties->javabuild path->add external jars: lucene-core-2.3.2.jar
c) 为支持中文搜索,和导入lucene-core-2.3.2.jar相同的方式导入je-analysis-1.5.3.jar
在电脑中D盘建立lucene/docs的文件夹,其中包含要被检索的文档集合
在电脑中D盘建立index文件夹,用于存储索引
2. 索引相关的程序
a) 建立索引
package index;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;
public class Create {
static String docDirPath = "D://lucene/docs";
static String indexDirPath = "D://lucene/index";
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
File docDir = new File(docDirPath);
File indexDir = new File(indexDirPath);
File[] docFiles = docDir.listFiles();
//StandardAnalyzer analyzer = new StandardAnalyzer();
Analyzer analyzer = new MMAnalyzer();
try {
IndexWriter writer = new IndexWriter(indexDir,analyzer,true);
for(int i=0; i<docFiles.length; i++){
if(docFiles[i].isFile() && docFiles[i].getName().endsWith(".txt")){
Document document = new Document();
document.add(new Field("path", docFiles[i].getCanonicalPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));
document.add(new Field("contents",new FileReader(docFiles[i])));
//设置此参数,表明该文档比其他文档更重要,这时该document的所有字段都被增量,可以对field进行相同的操作以增量某个字段
document.setBoost((float)1.50);
writer.addDocument(document);
}
}
writer.optimize();
writer.close();
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
b) 删除索引中包含某个term的document
package index;
import java.io.IOException;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.StaleReaderException;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.LockObtainFailedException;
public class Delete_index_term {
static String indexDirPath = "D://lucene/index";
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
//索引中term的删除
IndexReader reader = IndexReader.open(indexDirPath);
//此处标记删除索引中带有“liu”的文档
reader.deleteDocuments(new Term("contents", "liu"));
reader.close();
//索引中document的更新:先删除该document,再添加该document
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (StaleReaderException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
3. 查询相关的程序
a) Term查询、phrase查询
package search;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
public class Term_Phrase_Search {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
File indexDir = new File("D://lucene/index");
try {
if (!indexDir.exists()) {
System.out.println("The Lucene index is not exist");
return;
}
FSDirectory directory = FSDirectory.getDirectory(indexDir, false);
IndexSearcher searcher = new IndexSearcher(directory);
/*// 严格包含词语“北京邮电”
PhraseQuery query = new PhraseQuery();
query.add(new Term("contents", "北京"));
query.add(new Term("contents", "邮电"));*/
// 查询包含term=“北京”
String queryStr = "liu";
Term term = new Term("contents",queryStr);
TermQuery query = new TermQuery(term);
Hits hits = searcher.search(query);
if(hits.length() == 0){
System.out.println("no result matches");
return;
}
for (int i = 0; i < hits.length(); i++) {
Document document = hits.doc(i);
System.out.println("File: " + document.get("path"));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
b) Boolean查询
package search;
import java.io.File;
import java.io.IOException;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class Boolean_search {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
try {
File indexDir = new File("D://lucene/index");
if (!indexDir.exists()) {
System.out.println("The Lucene index is not exist");
return;
}
Directory dir = FSDirectory.getDirectory(indexDir, false);
IndexSearcher indexSearcher = new IndexSearcher(dir);
Analyzer analyzer = new MMAnalyzer();
// Analyzer analyzer = new StandardAnalyzer();
QueryParser qp = new QueryParser("contents", analyzer);
// queryParser默认是求并搜索,此处设置为求交搜索
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
Query query;
String[] searchWords = { "北京 liu" };
for (int i = 0; i < searchWords.length; i++) {
query = qp.parse(searchWords[i]);
Hits results = indexSearcher.search(query);
System.out.println(results.length()
+ " search results for query " + searchWords[i]);
}
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}