package demo.first;
import java.io.*;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
public class Search {
Date startTime,endTime;
/**
* 索引文件的存放位置
*/
String path = "D://workspace//fwk//lucenedemo//firstLuceneIndex";
/**
* 创建索引
*/
public void createLuceneIndex(){
IndexWriter writer;
try {
writer = new IndexWriter(path,new StandardAnalyzer(),true);
Document docA = new Document();
//相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置
//Field.Store 是否覆盖原来的索引文件,而不是重新建一个
Field fieldA = new Field("content","搜索引擎",Field.Store.YES,Field.Index.TOKENIZED);
//我们把列(fieldA)加到某一行(docA)中
docA.add(fieldA);
//英文 测试
docA.add(new Field("content","hello llying ,I love you",Field.Store.YES,Field.Index.TOKENIZED));
docA.add(new Field("lastModifyTime","2010个人",Field.Store.YES,Field.Index.TOKENIZED));
Document docB = new Document();
//相当于数据库中列的概念,因此第一个参数是列名,第二个参数是列的值,最后两个参数是enum类型的(JDK1.5),对创建的索引的设置
Field fieldB = new Field("content","创建索引",Field.Store.YES,Field.Index.TOKENIZED);
//我们把列(fieldA)加到某一行(docA)中
docB.add(fieldB);
docB.add(new Field("content","i live in shanghai.i come from cn",Field.Store.YES,Field.Index.TOKENIZED));
docB.add(new Field("lastModifyTime","2020个人",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(docA);
writer.addDocument(docB);
//如果对海量数据进行创建索引的时候,需要对索引进行优化,以便提高速度
writer.optimize();
//跟数据库类似,打开一个连接,使用完后,要关闭它
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 创建文件索引
*/
public void createIndexByFile(){
IndexWriter writer;
try {
String filePath = "D://workspace//fwk//lucenedemo//firstLuceneIndex//test.txt";
String content = file2String(filePath, "GBK");
//System.out.println(content);
writer = new IndexWriter(path,new StandardAnalyzer(),true);
Document docA = new Document();
Field fieldA = new Field("content",content,Field.Store.YES,Field.Index.TOKENIZED);
docA.add(new Field("path",filePath,Field.Store.YES,Field.Index.UN_TOKENIZED));
docA.add(fieldA);
writer.addDocument(docA);
//如果对海量数据进行创建索引的时候,需要对索引进行优化,以便提高速度
writer.optimize();
//跟数据库类似,打开一个连接,使用完后,要关闭它
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private String file2String(String fileName,String charset) throws Exception{
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName),charset));
//StringBuilder ,StringBuffer
StringBuilder builder = new StringBuilder();
String line = null;
while((line = reader.readLine())!=null){
builder.append(line);
}
return builder.toString();
}
/**
* 相当于sql中where 后面的条件,WildcardQuery不推荐大家使用
* 通配符搜索
*/
private Query wildcardQuery(){
// where username = '张xiangqiang' and password='ILoveChina'
//?代表至少有一个字符在前面
//搜索"*搜*",找到一条数据;搜索"*索*",找到两条数据;搜索"*搜索*",找到0条数据;搜索"*索引*",找到0条数据;
//为什么是这样的结果呢?流行伏笔
Term term = new Term("content","*c*");
return new WildcardQuery(term);
}
//基于lucene的分词 -- TermQuery只能对当个中文进行搜索。英文只能对当个单词进行搜索
public Query termQuery(){
Term term = new Term("content","19:58:25");
return new TermQuery(term);
}
/**
* 智能搜索
*
* @return
*/
public Query queryParser(){
QueryParser queryParser = new QueryParser("content", new StandardAnalyzer());
try {
return queryParser.parse("搜索 - 擎");
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* '与或'--搜索
* @return
*/
public Query booleanQuery(){
Term term1 = new Term("content","索");
Term term2 = new Term("content","搜");
TermQuery tempQuery1 = new TermQuery(term1);
TermQuery tempQuery2 = new TermQuery(term2);
//本人觉得他更应该叫做JoinQuery
BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(tempQuery1,BooleanClause.Occur.MUST);
booleanQuery.add(tempQuery2,BooleanClause.Occur.SHOULD);
return booleanQuery;
}
/**
* 多关键词搜索
* @return
*/
public Query phraseQuery(){
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.setSlop(1);
phraseQuery.add(new Term("content","搜"));
phraseQuery.add(new Term("content","擎"));
return phraseQuery;
}
/**
* 范围搜索
* @return
*/
public Query rangeQuery(){
RangeQuery rangeQuery = new RangeQuery(new Term("lastModifyTime","20000808"),new Term("lastModifyTime","20150808"), true);
return rangeQuery;
}
public void search(){
try {
//相当于sql中的 select * from talbeName
IndexSearcher search = new IndexSearcher(path);
startTime = new Date();
//抽象的查询对象
Query query = queryParser();
//Query query = phraseQuery();
//搜索结果集Hits,和JDBC的查询结果集完全类似的概念 -- 为什么是这样的呢?
//lucene在设计的时候,就参照了JDBC的很多概念
Hits hits = search.search(query);
for (int i = 0; i < hits.length(); i++) {
System.out.println(hits.id(i));
System.out.println(hits.doc(i));
System.out.println(hits.score(i));
}
endTime = new Date();
System.out.println("本次搜索用时:" + (endTime.getTime() - startTime.getTime()) + "毫秒");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* @param args
*/
public static void main(String[] args) {
Search search = new Search();
//search.createLuceneIndex();
search.createIndexByFile();
search.search();
}
}