lucene-Field.Store解析

本文深入探讨Lucene中Field.Store的三种形态:YES、NO、COMPRESS,通过具体案例展示了不同存储策略对文档检索和内容获取的影响。YES允许查询并保存原始值;NO允许查询但不保存值;COMPRESS允许查询并压缩保存值。实际应用中,COMPRESS因效率问题被建议避免使用。

本文主要内容装载这里

Store 三种形态

      COMPRESS:压缩保存。用于长文本或二进制数据 (后期高版本舍弃了)
      YES:保存 
      NO:不保存 

 

具体案例

package demo.first;

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.LockObtainFailedException;

public class TestFieldStore {
    /**
     * 索引文件的存放位置
     */
    String path = "D://workspace//fwk//lucenedemo//firstLuceneIndex";
    
    public void createLuceneIndex(){
        try {
            IndexWriter iw = new IndexWriter(path,new StandardAnalyzer(),true);
            Document doc = new Document();
            //Store.YES 保存 可以查询 可以打印内容
            Field storeYes = new Field("storeyes","storeyes",Store.YES,Index.TOKENIZED);
            //Store.NO 不保存 可以查询 不可打印内容 由于不保存内容所以节省空间
            Field storeNo = new Field("storeno","storeno",Store.NO,Index.TOKENIZED);
            //Store.COMPRESS 压缩保存 可以查询 可以打印内容 可以节省生成索引文件的空间            Field storeCompress = new Field("storecompress","storecompress",Store.COMPRESS,Index.TOKENIZED);
            doc.add(storeYes);
            doc.add(storeNo);
            doc.add(storeCompress);
            iw.addDocument(doc);
            iw.optimize();
            iw.close();
        } catch (CorruptIndexException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    
    public void testSearch(){
        try {
            IndexSearcher iser = new IndexSearcher(path);

            /*
             * Store.YES 采用保存模式,可以查询到,并且可以打印出内容
             */
            System.out.println("---storeYes");
            QueryParser queryParser1 = new QueryParser("storeyes",new StandardAnalyzer());
            Hits hits1 = iser.search(queryParser1.parse("storeyes"));
            for(int i = 0;i<hits1.length();i++){
                System.out.println("id :"+hits1.id(i));
                System.out.println("doc :"+hits1.doc(i));
                System.out.println("context :"+hits1.doc(i).get("storeyes"));
                System.out.println("score :"+hits1.score(i));
            }
            
            /*
             * Store.NO 采用不保存模式,可以查询到,但是不能打印出内容
             */
            System.out.println("---storeNo");
            QueryParser queryParser2 = new QueryParser("storeno",new StandardAnalyzer());
            Hits hits2 = iser.search(queryParser2.parse("storeno"));
            for(int i = 0;i<hits2.length();i++){
                System.out.println("id :"+hits2.id(i));
                System.out.println("doc :"+hits2.doc(i));
                System.out.println("context :"+hits2.doc(i).get("storeno"));
                System.out.println("score :"+hits2.score(i));
            }
            
            /*
             * Store.COMPRESS 采用压缩保存模式,可以查询到,并且可以打印出内容
             */
            System.out.println("---storeCompress");
            QueryParser queryParser3 = new QueryParser("storecompress",new StandardAnalyzer());
            Hits hits3 = iser.search(queryParser3.parse("storecompress"));
            for(int i = 0;i<hits3.length();i++){
                System.out.println("id :"+hits3.id(i));
                System.out.println("doc :"+hits3.doc(i));
                System.out.println("context :"+hits3.doc(i).get("storecompress"));
                System.out.println("score :"+hits3.score(i));
            }
            
            iser.close();
        } catch (CorruptIndexException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    
    public static void main(String[] args) {
        TestFieldStore tfs = new TestFieldStore();
        tfs.createLuceneIndex();
        tfs.testSearch();
    }
}

 

由此可以看出Field.Store的设置与否与是否可以搜索到无关。 
这里整理一下 

Field.Store 
     :YES 可以搜索,保存原值 
      :NO  可以搜索,不保存原值 
     :COMPRESS 可以搜索,压缩保存原值 

 


这里需要注意的是在实际使用中,并不建议使用COMPRESS,存在压缩和解压过程,效率低下,对于大文本尽量使用NO 
还有一点就是是否可被搜索与Store无关,只与Index有关。 
这里使用的是lucene 2.3.2 

 

转载于:https://www.cnblogs.com/hwaggLee/p/5228971.html

package com.boe.cim.teacher.luence; import java.nio.file.Paths; import java.util.List; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.boe.cim.teacher.pojo.TeacherInfo; /** * @author hyh * @description 创建文档索引 */ public class LuceneIndex { private Directory dir; /** *实例化indexerWriter * @return * @throws Exception */ private IndexWriter getWriter()throws Exception{ //中文分词器 SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); IndexWriterConfig iwc=new IndexWriterConfig(analyzer); IndexWriter writer=new IndexWriter(dir, iwc); return writer; } /** * 获取indexDir * @param indexDir * @throws Exception */ public void index(String indexDir,List<TeacherInfo> listTeacher)throws Exception{ dir=FSDirectory.open(Paths.get(indexDir)); IndexWriter writer=getWriter(); for(int i=0;i<listTeacher.size();i++){ Document doc=new Document(); TeacherInfo teacher = listTeacher.get(i); //StringField 只索引不分词 doc.add(new StringField("id",String.valueOf(teacher.getId()), Field.Store.YES)); doc.add(new StringField("teacher", teacher.getTeacher(), Field.Store.YES)); doc.add(new StringField("school",teacher.getSchool(),Field.Store.YES)); doc.add(new StringField("department", teacher.getDepartment(), Field.Store.YES)); doc.add(new TextField("researchdirector", teacher.getResearchdirector(), Field.Store.YES)); doc.add(new TextField("instruments", teacher.getInstruments(), Field.Store.YES)); doc.add(new TextField("achievements", teacher.getAchievements(), Field.Store.YES)); // writer.deleteDocuments(new Term("id",String.valueOf(teacher.getId()))); writer.updateDocument(new Term("id",String.valueOf(teacher.getId())), doc); } writer.close(); } } 上述代码的作用
09-02
package com.boe.cim.teacher.luence; import java.nio.file.Paths; import java.util.List; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.boe.cim.teacher.pojo.RequirementInfo; /** * @author hyh * @description 创建文档索引 */ public class LuceneIndexRequirement { private Directory dir; /** *实例化indexerWriter * @return * @throws Exception */ private IndexWriter getWriter()throws Exception{ //中文分词器 SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer(); IndexWriterConfig iwc=new IndexWriterConfig(analyzer); IndexWriter writer=new IndexWriter(dir, iwc); return writer; } /** * 获取indexreDir * @param indexreDir * @throws Exception */ // public void index(String indexreDir, List<RequirementInfo> listRequirement) throws Exception { // dir = FSDirectory.open(Paths.get(indexreDir)); // IndexWriterConfig iwc = new IndexWriterConfig(new SmartChineseAnalyzer()); // iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); // 明确指定打开模式 // try (IndexWriter writer = new IndexWriter(dir, iwc)) { // for (RequirementInfo requirement : listRequirement) { // Document doc = new Document(); // doc.add(new StringField("id", String.valueOf(requirement.getId()), Field.Store.YES)); // doc.add(new StringField("requirement", requirement.getRequirement(), Field.Store.YES)); // doc.add(new StringField("department", requirement.getDepartment(), Field.Store.YES)); // doc.add(new StringField("liaisonman", requirement.getLiaisonman(), Field.Store.YES)); // doc.add(new StringField("requirementtype", Integer.toString(requirement.getRequirementtype()), Field.Store.YES)); // doc.add(new TextField("requirementbackground", requirement.getRequirementbackground(), Field.Store.YES)); // doc.add(new TextField("requirementcontents", requirement.getRequirementcontents(), Field.Store.YES)); // writer.updateDocument(new Term("id", String.valueOf(requirement.getId())), doc); // } // } // } public void index(String indexreDir,List<RequirementInfo> listRequirement)throws Exception{ dir=FSDirectory.open(Paths.get(indexreDir)); IndexWriter writer=getWriter(); writer.commit(); for(int i=0;i<listRequirement.size();i++){ Document doc=new Document(); RequirementInfo requirement = listRequirement.get(i); //StringField 只索引不分词 doc.add(new StringField("id",String.valueOf(requirement.getId()), Field.Store.YES)); doc.add(new StringField("requirement", requirement.getRequirement(), Field.Store.YES)); doc.add(new StringField("department",requirement.getDepartment(),Field.Store.YES)); doc.add(new StringField("liaisonman", requirement.getLiaisonman(), Field.Store.YES)); doc.add(new StringField("requirementtype", Integer.toString(requirement.getRequirementtype()), Field.Store.YES)); doc.add(new StringField("technicalfield", requirement.getTechnicalfield(), Field.Store.YES)); doc.add(new TextField("requirementbackground", requirement.getRequirementbackground(), Field.Store.YES)); doc.add(new TextField("requirementcontents", requirement.getRequirementcontents(), Field.Store.YES)); // doc.add(new StringField("requirementtype", Integer.toString(requirement.getRequirementtype()), Field.Store.YES)); // doc.add(new TextField("requirementbackground", requirement.getRequirementbackground(), Field.Store.YES)); // doc.add(new TextField("requirementcontents", requirement.getRequirementcontents(), Field.Store.YES)); // writer.deleteDocuments(new Term("id",String.valueOf(teacher.getId()))); writer.updateDocument(new Term("id",String.valueOf(requirement.getId())), doc); } writer.close(); } // public void indexSingleRequirement(String indexreDir, RequirementInfo requirement) throws Exception { // dir = FSDirectory.open(Paths.get(indexreDir)); // try (IndexWriter writer = getWriter()) { // Document doc = new Document(); // doc.add(new StringField("id",String.valueOf(requirement.getId()), Field.Store.YES)); // doc.add(new StringField("requirement", requirement.getRequirement(), Field.Store.YES)); // doc.add(new StringField("department",requirement.getDepartment(),Field.Store.YES)); // doc.add(new StringField("liaisonman", requirement.getLiaisonman(), Field.Store.YES)); // doc.add(new StringField("requirementtype", Integer.toString(requirement.getRequirementtype()), Field.Store.YES)); // doc.add(new StringField("technicalfield", requirement.getTechnicalfield(), Field.Store.YES)); // doc.add(new TextField("requirementbackground", requirement.getRequirementbackground(), Field.Store.YES)); // doc.add(new TextField("requirementcontents", requirement.getRequirementcontents(), Field.Store.YES)); // // 添加其他字段... // writer.updateDocument(new Term("id", String.valueOf(requirement.getId())), doc); // } // try-with-resources 自动关闭writer // } } package com.boe.cim.teacher.luence; import java.util.List; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.ApplicationArguments; import org.springframework.boot.ApplicationRunner; import org.springframework.core.annotation.Order; import org.springframework.retry.annotation.Backoff; import org.springframework.retry.annotation.Recover; import org.springframework.retry.annotation.Retryable; import org.springframework.stereotype.Component; import com.boe.cim.teacher.dao.TeacherInfoMapper; import com.boe.cim.teacher.pojo.TeacherInfo; import com.boe.cim.teacher.dao.RequirementInfoMapper; import com.boe.cim.teacher.pojo.RequirementInfo; import com.boe.cim.teacher.utils.FileOperationFunction; import lombok.extern.slf4j.Slf4j; @Slf4j @Component @Order(value=5) public class StartApplicationWithLuceneIndex implements ApplicationRunner{ @Value("${lucene.index.dir}") private String IndexDir; @Autowired private TeacherInfoMapper teacherInfoMapper; @Value("${lucenere.indexre.dir}") private String IndexreDir; @Autowired private RequirementInfoMapper requirementInfoMapper; @Override @Retryable(value= Exception.class,backoff=@Backoff(delay=2000)) //重试机制,默认重试三次,失败程序启动失败,Main函数run,异常直接捕获不了,因为被aop捕获了 public void run(ApplicationArguments args) { // TODO Auto-generated method stub FileOperationFunction.deleteFileFromPath(IndexDir); //有个文件权限太高,删不掉 log.info("生成教师索引文件ing..."); List<TeacherInfo> listTeacher = teacherInfoMapper.userTeacherInfoCreateLuceneIndex(); try { new LuceneIndex().index(IndexDir,listTeacher); } catch (Exception e) { // TODO Auto-generated catch block } log.info("生成教师索引文件成功,End"); // TODO Auto-generated method stub FileOperationFunction.deleteFileFromPath(IndexreDir); //有个文件权限太高,删不掉 log.info("生成需求索引文件ing..."); List<RequirementInfo> listRequirement = requirementInfoMapper.RequirementInfoCreateLuceneIndex(); try { new LuceneIndexRequirement().index(IndexreDir,listRequirement); } catch (Exception e) { // TODO Auto-generated catch block } log.info("生成需求索引文件成功,End"); } @Recover//重试多次皆失败回调方法 public void retryRecover(Exception e) { log.error("重试三次生成信息索引文件失败..."); log.error(e.getMessage()); } } 生成的是_0.fdt、_0.fdx
09-04
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值