lucene的简单使用
public class Testlucene
{
@Test
public void createIndex() throws IOException
{
Directory directory=FSDirectory.open(new File("indexDir/"));
Analyzer analyzer=new IKAnalyzer();
IndexWriterConfig indexConfig=new IndexWriterConfig(Version.LUCENE_44, analyzer);
IndexWriter indexWriter = new IndexWriter(directory, indexConfig);
Document doc=new Document();
doc.add(new StringField("id", "1", Store.YES));
doc.add(new TextField("content", "我的测试数据", Store.YES));
doc.add(new StringField("city", "这是我的测试数据", Store.YES));
indexWriter.addDocument(doc);
indexWriter.commit();
indexWriter.close();
}
@Test
public void searcher() throws Exception
{
Directory directory=FSDirectory.open(new File("indexDir/"));
DirectoryReader reader = DirectoryReader. open ( directory );
QueryParser parser = new QueryParser(Version.LUCENE_44, "content" ,
new IKAnalyzer());
Query query = parser.parse( "测试" );
IndexSearcher searcher = new IndexSearcher( reader );
TopDocs docs = searcher.search(query,100);
for(ScoreDoc doc : docs. scoreDocs)
{
//Document document = reader .document(doc. doc );
Document document =searcher.doc(doc.doc);
System.out.println(document.get("id"));
System.out.println(document.get("content"));
System.out.println(document.get("city"));
}
}
}
lucene的优化
public class TestOptimise {
/**
* @throws IOException
*
*
*
*/
public void testOptimise1() throws IOException{
//可以通过indexWriterConfig 这个对象来进行优化..
//在lucene 4.0 之后的版本会对索引进行自动的优化..
//改几个配置..
Directory directory=FSDirectory.open(new File(Contants.INDEXURL));
IndexWriterConfig conf=new IndexWriterConfig(LuceneUtils.getMatchVersion(), LuceneUtils.getAnalyzer());
//在lucene 里面都是0 配置的.. 都是通过设置对象的参数来进行配置...
/**
* MergePolicy 设置合并规则...
*
*
*/
LogDocMergePolicy mergePolicy=new LogDocMergePolicy();
/**
*
* 1:mergeFactor
*
* 当这个值越小,更少的内存被运用在创建索引的时候,搜索的时候越快,创建索引的时候越慢..
*
* 当这个值越大,更多的内存被运用在创建索引的时候,搜索的时候越慢,创建的时候越快...
*
*
* smaller value 2 < smaller value <10
*
*/
//设置索引的合并因子...
mergePolicy.setMergeFactor(6);
conf.setMergePolicy(mergePolicy);
IndexWriter indexWriter=new IndexWriter(directory, conf);
}
/**
*
* 排除停用词,排除停用,被分词器过滤掉,词就不会建立索引,索引文件就会变小,这样搜索的时候就会变快...
*
* @throws IOException
*/
public void testOptimise2() throws IOException{
}
/**
*
* 将索引数据分区存放...
* @throws IOException
*/
public void testOptimise3() throws IOException{
}
/**
*
* 将索引数据分区存放...
* @throws IOException
* @throws ParseException
*/
@Test
public void testOptimise4() throws IOException, ParseException{
//索引在硬盘里面...
Directory directory1=FSDirectory.open(new File(Contants.INDEXURL));
IOContext ioContext=new IOContext();
//索引放在内存当中...
Directory directory=new RAMDirectory(directory1,ioContext);
IndexReader indexReader=DirectoryReader.open(directory);
IndexSearcher indexSearcher=new IndexSearcher(indexReader);
String fields []={"title"};
QueryParser queryParser=new MultiFieldQueryParser(LuceneUtils.getMatchVersion(),fields,LuceneUtils.getAnalyzer());
//不同的规则构造不同的子类..
//title:keywords ,content:keywords
Query query=queryParser.parse("学习");
TopDocs topDocs=indexSearcher.search(query, 100);
System.out.println(topDocs.totalHits);
}
排序Directory directory1=FSDirectory.open(new File(Contants.INDEXURL));
IOContext ioContext=new IOContext();
//索引放在内存当中...
Directory directory=new RAMDirectory(directory1,ioContext);
IndexReader indexReader=DirectoryReader.open(directory);
IndexSearcher indexSearcher=new IndexSearcher(indexReader);
String fields []={"title"};
QueryParser queryParser=new MultiFieldQueryParser(LuceneUtils.getMatchVersion(),fields,LuceneUtils.getAnalyzer());
//不同的规则构造不同的子类..
//title:keywords ,content:keywords
Query query=queryParser.parse("学习");
Sort sort=new Sort();
//升序
//SortField sortField=new SortField("id", Type.INT);
//降序
SortField sortField=new SortField("id", Type.INT,true);
//设置排序的字段...
sort.setSort(sortField);
TopDocs topDocs=indexSearcher.search(query,100,sort);
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document=indexSearcher.doc(scoreDoc.doc);
System.out.println(document.get("id"));
}
查询
Query query=new TermQuery(new Term("author","毕加索"));
//第二种查询:字符串搜索..
// String fields []={"author"};
//
// QueryParser queryParser=new MultiFieldQueryParser(LuceneUtils.getMatchVersion(),fields,LuceneUtils.getAnalyzer());
// Query query=queryParser.parse("毕加索");
//
// //author:毕 author:加
//第三种查询:查询所有..
// Query query=new MatchAllDocsQuery();
//第四种查询:范围查询,可以使用此查询来替代过滤器...
//我们完成一种需求有两种方式,我们推荐用这种...性能比filter要高
// Query query=NumericRangeQuery.newIntRange("id", 1, 10, true, true);
//第五种查询:通配符。。。
//?代表单个任意字符,* 代表多个任意字符...
// Query query=new WildcardQuery(new Term("title", "luce*"));
//第六种查询:模糊查询..。。。
//author String
/**
* 1:需要根据查询的条件
*
*
* 2:最大可编辑数 取值范围0,1 ,2
* 允许我的查询条件的值,可以错误几个字符...
*
*/
Query query=new FuzzyQuery(new Term("author", "爱新觉罗杜小"),1);
testQuery(query);
}
public static void testQuery(Query query) throws Exception{
IndexSearcher indexSearcher=LuceneUtils.getIndexSearcher();
TopDocs topDocs=indexSearcher.search(query,100);
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document=indexSearcher.doc(scoreDoc.doc);
System.out.println(document.get("id"));
System.out.println(document.get("title"));
System.out.println(document.get("content"));
System.out.println(document.get("author"));
System.out.println(document.get("link"));
}
}
过滤器
Directory directory1=FSDirectory.open(new File(Contants.INDEXURL));
IOContext ioContext=new IOContext();
//索引放在内存当中...
Directory directory=new RAMDirectory(directory1,ioContext);
IndexReader indexReader=DirectoryReader.open(directory);
IndexSearcher indexSearcher=new IndexSearcher(indexReader);
String fields []={"title"};
QueryParser queryParser=new MultiFieldQueryParser(LuceneUtils.getMatchVersion(),fields,LuceneUtils.getAnalyzer());
//不同的规则构造不同的子类..
//title:keywords ,content:keywords
Query query=queryParser.parse("抑郁症");
/**
* 1:需要根据那个字段进行过滤
*
*
* 2:字段对应的最小值
*
* 3:字段对应的最大值
*
* 4:是否包含最小值
*
* 5:是否包含最大值...
*
*
*/
//filter 是一个抽象类,定义不同的filter 相当于是不同的过滤规则...
Filter filter=NumericRangeFilter.newIntRange("id", 1, 10, false, true);
TopDocs topDocs=indexSearcher.search(query,filter,100);
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
Document document=indexSearcher.doc(scoreDoc.doc);
System.out.println(document.get("id"));
}
高亮
**
*
*
* 对查询出来的结果所包含的搜索关键字进行高亮...
* @author Administrator
*
*/
public class TestHighLighter {
public static void main(String[] args) throws Exception {
String fields []={"title"};
/**
* 使用lucene 自带的高亮器进行高亮...
*/
String keywords="lucene";
QueryParser queryParser=new MultiFieldQueryParser(LuceneUtils.getMatchVersion(),fields,LuceneUtils.getAnalyzer());
Query query=queryParser.parse(keywords);
IndexSearcher indexSearcher=LuceneUtils.getIndexSearcher();
TopDocs topDocs=indexSearcher.search(query,100);
/**
* 使用lucene 自带的高亮起进行高亮..
*
*/
// solr 是基于lucene 的一个全文检索服务器.
//solr 是基于<font color='red' >lucene</font> 的一个全文检索服务器.
//高亮显示的格式...
Formatter formatter=new SimpleHTMLFormatter("<font color='red'>", "</font>");
//query 里面条件,条件里面有搜索关键字
Scorer fragmentScorer=new QueryScorer(query);
//构造高亮气...
/**
* 1:我要高亮成什么颜色
* 2:我要将那些关键字进行高亮...
*
*/
Highlighter highlighter=new Highlighter(formatter, fragmentScorer);
Article article=null;
System.out.println("总记录数==="+topDocs.totalHits);
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
article=new Article();
Document document=indexSearcher.doc(scoreDoc.doc);
String title=document.get("title");
String content=document.get("content");
System.out.println("没有高亮之前的结果title=="+title);
System.out.println("没有高亮之前的结果content=="+content);
//将某段文本高亮,返回高亮过后的结果...
String hightitle=highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", title);
//如果这个字段当中没有包含搜索关键字,你对这个字段的值进行高亮,返回的是null...
String highcontent=highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "content", content);
System.out.println("高亮之后的结果------------------------------------------------------------");
System.out.println("高亮之后的结果hightitle=="+hightitle);
System.out.println("高亮之后的结果highcontent=="+highcontent);
if(hightitle==null){
article.setTitle(title);
}else{
article.setTitle(hightitle);
}
if(highcontent==null){
article.setContent(content);
}else{
article.setContent(highcontent);
}
//最终用户得到结果...
System.out.println("---"+article.getTitle());
System.out.println("---"+article.getContent());
}
}
}