lucene ik 小试一下,做技术储备

本文介绍了一个使用Lucene 3.5结合IK分词器实现的文本索引和搜索实例。该实例涵盖了建立索引、更新索引、删除索引及搜索功能,并通过代码展示了如何进行关键字高亮。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 

package lucene;
import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
 
 
/**
 * lucene3.5+ik的例子
 *
 * @author zz
 * @date   2012-11-29 
 */
public class LuceneDemo {
    //索引文件位置
    File dataFile = new File("D://indexFile");
    //使用IK分词器
    Analyzer analyzer = new IKAnalyzer();
     
     
    public void bulidIndex(){
        Directory directory = null;
        IndexWriter writer = null;
        try {
            directory = FSDirectory.open(dataFile);
             
            IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
            writer = new IndexWriter(directory, writerConfig);
             
            writer.addDocument(addDocument(1, "中国好平台", "中国好平台,1是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
            writer.addDocument(addDocument(2, "中国好平台", "中国好平台,2是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
            writer.addDocument(addDocument(3, "中国好平台", "中国好平台,3是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
            writer.addDocument(addDocument(4, "资料", "中国好平台,4是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
            writer.addDocument(addDocument(5, "微知识", "中国好平台,5是以分享知识与经验的学习交流平台,加入中国好平台,学习新知识、结交新朋友、塑造个人形象。"));
            writer.addDocument(addDocument(6, "我", "中国好平台,6是以分享知识与经验的交流平台,加入中国好平台,新知识、结交新朋友、塑造个人形象。"));
             
        }catch(Exception ex){
            ex.printStackTrace();
        } finally {
            try {
                writer.close();
                directory.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
     
     
    /**
     * 添加Document
     */
    @SuppressWarnings("deprecation")
	public Document addDocument(Integer id, String title, String content) {
        Document doc = new Document();
         
        //Field.Index.NO 表示不索引
        //Field.Index.ANALYZED 表示分词且索引
        //Field.Index.NOT_ANALYZED 表示不分词且索引
        doc.add(new Field("id", String.valueOf(id), Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
        return doc;
    }
     
     
    /**
     * 更新索引
     */
    public void update(Integer id, String title, String content) {
        try {          
            Directory directory = FSDirectory.open(dataFile);
             
            IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
            IndexWriter writer = new IndexWriter(directory, writerConfig);
             
            Document doc = addDocument(id, title, content);
             
            Term term = new Term("id", String.valueOf(id));
             
            writer.updateDocument(term, doc);
             
            writer.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
     
     
    /**
     * 删除索引
     */
    public void delete(Integer id) {
        try {
            Directory directory = FSDirectory.open(dataFile);
             
            IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
            IndexWriter writer = new IndexWriter(directory, writerConfig);
            Term term = new Term("id", String.valueOf(id));
            writer.deleteDocuments(term);
             
            writer.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
     
     
    /**
     * 搜索
     * @param where   搜索条件   
     * @param after   分页时要用到,不分页时为null
     */
    public void search(String where, ScoreDoc after) {
        IndexSearcher isearcher = null;
        Directory directory = null;
        try {
            directory = FSDirectory.open(dataFile);
             
            //创建索引搜索器 且只读
            IndexReader reader = IndexReader.open(FSDirectory.open(dataFile)); 
            isearcher = new IndexSearcher(reader);
             
            //在索引器中使用IKSimilarity相似度评估器
            //isearcher.setSimilarity(new IKSimilarity());
             
            QueryParser parser = new QueryParser(Version.LUCENE_35, null, analyzer);
            Query query = parser.parse(where);
             
            //lucene3.5深度分页,每页显示10条记录
            TopDocs topDocs = isearcher.searchAfter(after, query, 10);
            ScoreDoc[] hits = topDocs.scoreDocs;
             
            //关键字高亮
            Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
            Scorer scorer = new QueryScorer(query);
            Highlighter highlighter = new Highlighter(formatter, scorer);
 
            for(ScoreDoc scoreDoc : hits){
                Document hitDoc = isearcher.doc(scoreDoc.doc);
                String id = hitDoc.get("id");
                String title = hitDoc.get("title");
                String content = hitDoc.get("content");
                float score = scoreDoc.score;
                 
                title = highlighter.getBestFragment(analyzer, "title", title);
                content = highlighter.getBestFragment(analyzer, "content", content);
                 
                if(title == null){
                    title = hitDoc.get("title");
                }
                 
                if(content == null){
                    content = hitDoc.get("content");
                }
                 
                System.out.println("doc:" + scoreDoc.doc + "    score:" + score + "   id:" + id + "   title:" + title + "    content:" + content);
            }
             
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            try {
               // isearcher.close();
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
         
    }
}

 

 

package lucene;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
 
public class LuceneDemoTest {
    static LuceneDemo luceneDemo = null;
 
    @BeforeClass
    public static void setUpBeforeClass() throws Exception {
        luceneDemo = new LuceneDemo();
    }
 
    @AfterClass
    public static void tearDownAfterClass() throws Exception {
    }
 
    @Test
    public void testBulidIndex() {
        luceneDemo.bulidIndex();
    }
 
    @Test
    public void testUpdate() {
        luceneDemo.update(1, "测试更新", "更新内容。。。。");
    }
 
    @Test
    public void testDelete() {
        luceneDemo.delete(1);
    }
 
    @Test
    public void testSearch() {
        String where = "title:中国好平台 content:学习";           //类似SQL为:title='中国好平台' or content='学习'
        //String where = "title:中国好平台 +content:学习 -id:1";   //类似SQL为:title='中国好平台' and content='学习' and id!=1
        //String where = "title:我 -content:学习";         // 类似SQL为:title='中国好平台' and content!='学习'
         
        //ScoreDoc after = new ScoreDoc(0, (float)0.032529574);
        //luceneDemo.search(where, after);
         
        luceneDemo.search(where, null);
    }
 
}

 

此处为造轮子,闲来无事,造个轮子,看将来是否有用得上的地方,此处其实用的是lucene4.0和IK2012都是最新版本,可以去网站去下载

lucene:http://apache.etoak.com/lucene/java/4.0.0/

ik:http://code.google.com/p/ik-analyzer/downloads/list

 

参考:http://www.juziku.com/sunlightcs/wiki/4205.htm

 

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值