lucene学习之一索引操作(Fragments)

本文介绍如何使用Lucene进行高效的文档索引及字段增强,包括不同字段的索引方式选择、文档重要性调整及数值字段的处理等关键技术点。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

 
class Fragments {

  public static void indexNumbersMethod() {
    // START
    new Field("size", "4096", Field.Store.YES, Field.Index.NOT_ANALYZED);
    new Field("price", "10.99", Field.Store.YES, Field.Index.NOT_ANALYZED);
    new Field("author", "Arthur C. Clark", Field.Store.YES, Field.Index.NOT_ANALYZED);
    // END
  }

  public static final String COMPANY_DOMAIN = "example.com";
  public static final String BAD_DOMAIN = "yucky-domain.com";

  private String getSenderEmail() {
    return "bob@smith.com";
  }

  private String getSenderName() {
    return "Bob Smith";
  }

  private String getSenderDomain() {
    return COMPANY_DOMAIN;
  }

  private String getSubject() {
    return "Hi there Lisa";
  }

  private String getBody() {
    return "I don't have much to say";
  }

  private boolean isImportant(String lowerDomain) {
    return lowerDomain.endsWith(COMPANY_DOMAIN);
  }

  private boolean isUnimportant(String lowerDomain) {
    return lowerDomain.endsWith(BAD_DOMAIN);
  }

  public void ramDirExample() throws Exception {
    Analyzer analyzer = new WhitespaceAnalyzer();
    // START
    Directory ramDir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(ramDir, analyzer,    
                                IndexWriter.MaxFieldLength.UNLIMITED);
    // END
  }

  public void dirCopy() throws Exception {
    Directory otherDir = null;

    // START
    Directory ramDir = new RAMDirectory(otherDir);
    // END
  }

  public void addIndexes() throws Exception {
    Directory otherDir = null;
    Directory ramDir = null;
    Analyzer analyzer = null;

    // START
    IndexWriter writer = new IndexWriter(otherDir, analyzer,
                                         IndexWriter.MaxFieldLength.UNLIMITED);
    writer.addIndexesNoOptimize(new Directory[] {ramDir});
    // END
  }

  public void docBoostMethod() throws IOException {

    Directory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED);

    // START
    Document doc = new Document();
    String senderEmail = getSenderEmail();
    String senderName = getSenderName();
    String subject = getSubject();
    String body = getBody();
    doc.add(new Field("senderEmail", senderEmail,
                      Field.Store.YES,
                      Field.Index.NOT_ANALYZED));
    doc.add(new Field("senderName", senderName,
                      Field.Store.YES,
                      Field.Index.ANALYZED));
    doc.add(new Field("subject", subject,
                      Field.Store.YES,
                      Field.Index.ANALYZED));
    doc.add(new Field("body", body,
                      Field.Store.NO,
                      Field.Index.ANALYZED));
    String lowerDomain = getSenderDomain().toLowerCase();
    if (isImportant(lowerDomain)) {
      doc.setBoost(1.5F);     //1
    } else if (isUnimportant(lowerDomain)) {
      doc.setBoost(0.1F);    //2 
    }
    writer.addDocument(doc);
    // END
    writer.close();

    /*
      #1 Good domain boost factor: 1.5
      #2 Bad domain boost factor: 0.1
    */
  }

  public void fieldBoostMethod() throws IOException {

    String senderName = getSenderName();
    String subject = getSubject();

    // START
    Field subjectField = new Field("subject", subject,
                                   Field.Store.YES,
                                   Field.Index.ANALYZED);
    subjectField.setBoost(1.2F);
    // END
  }

  public void numberField() {
    Document doc = new Document();
    // START
    doc.add(new NumericField("price").setDoubleValue(19.99));
    // END
  }

  public void numberTimestamp() {
    Document doc = new Document();
    // START
    doc.add(new NumericField("timestamp")
             .setLongValue(new Date().getTime()));
    // END

    // START
    doc.add(new NumericField("day")
            .setIntValue((int) (new Date().getTime()/24/3600)));
    // END

    Date date = new Date();
    // START
    Calendar cal = Calendar.getInstance();
    cal.setTime(date);
    doc.add(new NumericField("dayOfMonth")
            .setIntValue(cal.get(Calendar.DAY_OF_MONTH)));
    // END
  }

  public void setInfoStream() throws Exception {
    Directory dir = null;
    Analyzer analyzer = null;
    // START
    IndexWriter writer = new IndexWriter(dir, analyzer,
                                         true, IndexWriter.MaxFieldLength.UNLIMITED);
    writer.setInfoStream(System.out);
    // END
  }

  public void dateMethod() {
    Document doc = new Document();
    doc.add(new Field("indexDate",
                      DateTools.dateToString(new Date(), DateTools.Resolution.DAY),
                      Field.Store.YES,
                      Field.Index.NOT_ANALYZED));
  }

  public void numericField() throws Exception {
    Document doc = new Document();
    NumericField price = new NumericField("price");
    price.setDoubleValue(19.99);
    doc.add(price);

    NumericField timestamp = new NumericField("timestamp");
    timestamp.setLongValue(new Date().getTime());
    doc.add(timestamp);

    Date b = new Date();
    NumericField birthday = new NumericField("birthday");
    String v = DateTools.dateToString(b, DateTools.Resolution.DAY);
    birthday.setIntValue(Integer.parseInt(v));
    doc.add(birthday);
  }

  public void indexAuthors() throws Exception {
    String[] authors = new String[] {"lisa", "tom"};
    // START
    Document doc = new Document();
    for (String author: authors) {
      doc.add(new Field("author", author,
                        Field.Store.YES,
                        Field.Index.ANALYZED));
    }
    // END
  }
}


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值