lucene入门-索引网页

最新推荐文章于 2025-12-03 11:42:11 发布

原创最新推荐文章于 2025-12-03 11:42:11 发布 · 373 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#lucene #string #exception #path #file #class

搜索引擎与人工智能专栏收录该内容

217 篇文章

订阅专栏

本文介绍了一个使用Java和Lucene创建文档索引的例子。该示例通过解析两个HTML文件并将它们的内容、路径和文件名等信息索引到Lucene中。此过程涉及创建索引写入器、定义文档字段并添加文档。

package bindex;
import java.io.File;
import tool.FileText;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
public class FileIndexer {

/**
* @param args
*/
public static void main(String[] args) {
  // TODO Auto-generated method stub
        String indexPath ="indexes";
      try {

  IndexWriter indexWriter = new IndexWriter(indexPath,new StandardAnalyzer());
        Document doc=new Document();
        //第一个文档
        File f=new File("htmls/hao123.htm");
        String name=f.getName();
        Field field=new Field("name",name,Field.Store.YES,Field.Index.TOKENIZED);
        doc.add(field);
        String content=FileText.getText(f);
        field=new Field("conent",content,Field.Store.YES,Field.Index.TOKENIZED);
        doc.add(field);
        String path=f.getPath();
        field=new Field("path",path,Field.Store.YES,Field.Index.NO);
        doc.add(field);
        indexWriter.addDocument(doc);
        //第二个文档
        f=new File("htmls/home.htm");
        name=f.getName();
        field=new Field("name",name,Field.Store.YES,Field.Index.TOKENIZED);
        doc.add(field);
        content=FileText.getText(f);
        field=new Field("conent",content,Field.Store.YES,Field.Index.TOKENIZED);
        doc.add(field);
        path=f.getPath();
        field=new Field("path",path,Field.Store.YES,Field.Index.NO);
        doc.add(field);
        indexWriter.addDocument(doc);
  indexWriter.close();

  System.out.println("OK!");
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
  e.printStackTrace();
} catch (LockObtainFailedException e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
} catch (IOException e) {
  // TODO Auto-generated catch block
  e.printStackTrace();
}
}

}

package tool;
import java.io.*;

public class FileText {

/**
* @param args
*/
    public static String getText(File f){

    StringBuffer sb=new StringBuffer("");
    try{
      FileReader fr=new FileReader(f);
      BufferedReader br=new BufferedReader(fr);
      String s=br.readLine();
      while(s!=null){
       sb.append(s);
       s=br.readLine();
      }
      br.close();
    }
    catch (Exception e){
      sb.append("");
    }
  return sb.toString();
    }
    public static String getText(String s){
    String t="";
    try{
      File f=new File(s);
      t=getText(f);
    }
    catch (Exception e){
      t="";
    }
    return t;
    }
}