luence初级教程1

最新推荐文章于 2023-11-27 20:06:00 发布

原创最新推荐文章于 2023-11-27 20:06:00 发布 · 560 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#import #exception #string #file #query #filter

Luence 专栏收录该内容

24 篇文章

订阅专栏

所需jar包：

lucene-core-3.6.1.jar,lucene-test-framework-3.6.1.jar,ant-junit-1.7.1.jar,ant-1.7.1.jar,ant-launcher-1.7.1.jar,junit-4.10.jar,hamcrest-core-1.1.jar

####################################################################################################

package com.anjuke.Luence_01;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class LuceneUtils {

    //当前目录位置
    public static final String USERDIR = System.getProperty("user.dir");
    //存放索引的目录
    private static final String INDEXPATH = USERDIR + File.separator + "index";
    //数据源
    private static final String INDEXSOURCE = USERDIR + File.separator
            + "source" + File.separator + "lucene.txt";
    //使用版本
    public static final Version version = Version.LUCENE_35;

    /**
     * 获取分词器
     * */
    public static Analyzer getAnalyzer(){
        // 分词器
        Analyzer analyzer = new StandardAnalyzer(version);
        return analyzer;
    }

    /**
     * 创建一个索引器的操作类
     *
     * @param openMode
     * @return
     * @throws Exception
     */
    public static IndexWriter createIndexWriter(OpenMode openMode)
            throws Exception {
        // 索引存放位置设置
        Directory dir = FSDirectory.open(new File(INDEXPATH));
        // 索引配置类设置
        IndexWriterConfig iwc = new IndexWriterConfig(version,
                getAnalyzer());
        iwc.setOpenMode(openMode);
        IndexWriter writer = new IndexWriter(dir, iwc);
        return writer;
    }

    /***
     * 创建一个搜索的索引器
     * @throws IOException
     * @throws CorruptIndexException
     * */
    public static IndexSearcher createIndexSearcher() throws CorruptIndexException, IOException {
        IndexReader reader = IndexReader.open(FSDirectory.open(new File(INDEXPATH)));
        IndexSearcher searcher = new IndexSearcher(reader);
        return searcher;
    }

    /**
     * 创建一个查询器
     * @param queryFileds 在哪些字段上进行查询
     * @param queryString 查询内容
     * @return
     * @throws ParseException
     */
    public static Query createQuery(String [] queryFileds,String queryString) throws ParseException{
         QueryParser parser = new MultiFieldQueryParser(version, queryFileds, getAnalyzer());
         Query query = parser.parse(queryString);
         return query;
    }

    /***
     * 读取文件内容
     * */
    public static String readFileContext(File file){
        try {
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
            StringBuilder content = new StringBuilder();
            for(String line = null; (line = br.readLine())!= null;){
                content.append(line).append("\n");
            }
            return content.toString();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

    }


    public static void main(String[] args) {

        System.out.println(Thread.currentThread().getContextClassLoader()
                .getResource(""));
        System.out.println(LuceneUtils.class.getClassLoader().getResource(""));
        System.out.println(ClassLoader.getSystemResource(""));
        System.out.println(LuceneUtils.class.getResource(""));
        System.out.println(LuceneUtils.class.getResource("/")); // Class文件所在路径
        System.out.println(new File("/").getAbsolutePath());
        System.out.println(System.getProperty("user.dir"));
    }

    /**
     * 创建索引的数据源
     *
     * @return
     */
    public static File createSourceFile() {
        File file = new File(INDEXSOURCE);
        return file;
    }
}

##############################################################################################################

package com.anjuke.Luence_01;
import java.io.File;
import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.junit.Test;
public class IndeSearchFiles {

    /**
     * 创建索引
     * @throws IOException
     * @throws CorruptIndexException
     * */
    @Test
    public void createIndex() throws Exception{

        //操作增，删,改索引库的
        IndexWriter writer = LuceneUtils.createIndexWriter(OpenMode.CREATE);
        //数据源的位置
        File sourceFile = LuceneUtils.createSourceFile();
        System.out.println("文件路径：" + sourceFile.getAbsolutePath());
        //进行写入文档
        Document doc = new Document();
         doc.add(new Field("name",sourceFile.getName(),Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
        //文件路径
        Field pathField = new Field("path", sourceFile.getPath(), Field.Store.YES, Field.Index.NO);
        pathField.setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions.DOCS_ONLY);
        doc.add(pathField);
        //文件最后修改时间
        doc.add(new Field("modified",String.valueOf(sourceFile.lastModified()),Field.Store.YES, Field.Index.NO));
        //添加文件内容
        String content = LuceneUtils.readFileContext(sourceFile);
        System.out.println("content: " + content);
        doc.add(new Field("contents",content,Field.Store.YES, Field.Index.ANALYZED));
        //以下是官网的实现
       /* FileInputStream fis = new FileInputStream(sourceFile);
        doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));*/

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE)
        {
           writer.addDocument(doc);
        }
        else
        {
          writer.updateDocument(new Term("path", sourceFile.getPath()), doc);
        }
        //释放资源
        writer.close();
       // fis.close();

    }

    /***
     * 搜索
     * */
    @Test
    public void search() throws Exception{

        //查询的字符串:输入不存在的字符串是查询不到的,如：中国
        String queryString = "Lucene";
        //查询字段集合
        String [] queryFileds = {"contents"};
        IndexSearcher searcher = LuceneUtils.createIndexSearcher();
        Query query = LuceneUtils.createQuery(queryFileds, queryString);
        //在搜索器中进行查询
        //对查询内容进行过滤
        Filter filter = null;
        //一次在索引器查询多少条数据
        int queryCount = 100;
        TopDocs results = searcher.search(query,filter,queryCount);
        System.out.println("总符合: " + results.totalHits + "条数！");

        //显示记录
        for(ScoreDoc sr : results.scoreDocs){
            //文档编号
            int docID = sr.doc;
            //真正的内容
            Document doc = searcher.doc(docID);
            System.out.println("name = " + doc.get("name"));
            System.out.println("path = " + doc.get("path"));
            System.out.println("modified = " + doc.get("modified"));
            System.out.println("contents = " + doc.get("contents"));
        }
    }
}