全文检索引擎工具包Lucene初体验

最新推荐文章于 2025-12-03 11:42:11 发布

原创最新推荐文章于 2025-12-03 11:42:11 发布 · 1k 阅读

·

0

·

CC 4.0 BY-SA版权

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

文章标签：

#lucene #全文检索 #引擎 #工具 #import #file

Java相关专栏收录该内容

392 篇文章

订阅专栏

Lucene是apache软件基金会 jakarta项目组的一个子项目，是一个开放源代码的全文检索引擎工具包及架构，提供了完整的查询引擎和索引引擎，实现了一些通用的分词算法，预留很多词法分析器接口。

下载地址：http://lucene.apache.org/ (本文使用最新的lucene2.3)

A.创建索引文件

package demo;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.index.IndexWriter;

public class CreateDataBase {

public static void main(String[] args) {

CreateDataBase temp = new CreateDataBase();

if (temp.createDataBase("C:/temp") == 1) {

System.out.println("db init succ");

}

}

public CreateDataBase() {

}

public int createDataBase(File file) {

int returnValue = 0;

if (!file.isDirectory()) {

file.mkdirs();

}

try {

IndexWriter indexWriter = new IndexWriter(file,

new StandardAnalyzer(), true);

indexWriter.close();

returnValue = 1;

}

catch (Exception ex) {

ex.printStackTrace();

}

return returnValue;

}

public int createDataBase(String file) {

return this.createDataBase(new File(file));

}

}

B.添加记录：

1.先新建两个文本文件，文件名分别为ugg1.txt和ugg2.txt。内容随意，譬如：
ugg1.txt：
good
book
luky

ugg2.txt：
look
book
meet

并运行如下代码：

package demo;

import java.io.File;

import java.io.FileReader;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

public class InsertRecords {

public static void main(String[] args) {

InsertRecords temp = new InsertRecords();

String dbpath = "C:/temp";

// holen1.txt中包含关键字"holen"和"java"

if (temp.insertRecords(dbpath, "C:/temp/ugg1.txt") == 1) {

System.out.println("add file1 succ");

}

// holen2.txt中包含关键字"holen"和"chen"

if (temp.insertRecords(dbpath, "C:/temp/ugg2.txt") == 1) {

System.out.println("add file2 succ");

}

}

public InsertRecords() {

}

public int insertRecords(String dbpath, File file) {

int returnValue = 0;

try {

IndexWriter indexWriter = new IndexWriter(dbpath,

new StandardAnalyzer(), false);

this.addFiles(indexWriter, file);

returnValue = 1;

} catch (Exception ex) {

ex.printStackTrace();

}

return returnValue;

}

public int insertRecords(String dbpath, String file) {

return this.insertRecords(dbpath, new File(file));

}

public void addFiles(IndexWriter indexWriter, File file) {

Document doc = new Document();

try {

doc.add(new Field("filename", file.getName(), Field.Store.YES,

Field.Index.UN_TOKENIZED));

// 以下两句只能取一句,前者是索引不存储,后者是索引且存储

doc.add(new Field("contents", new FileReader(file)));

indexWriter.addDocument(doc);

indexWriter.close();

} catch (Exception ex) {

ex.printStackTrace();

}

}

}

C.进行查询

package demo;

import java.util.ArrayList;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.queryParser.QueryParser;

import org.apache.lucene.search.Hits;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.Searcher;

public class QueryRecords {

public QueryRecords() {

}

public ArrayList queryRecords(String searchkey, String dbpath,

String searchfield) {

ArrayList list = null;

try {

Searcher searcher = new IndexSearcher(dbpath);

QueryParser parser = new QueryParser(searchfield,

new StandardAnalyzer());

Query query = parser.parse(searchkey);

Hits hits = searcher.search(query);

if (hits != null) {

list = new ArrayList();

int temp_hitslength = hits.length();

Document doc = null;

for (int i = 0; i < temp_hitslength; i++) {

doc = hits.doc(i);

list.add(doc.get("filename"));

}

}

} catch (Exception ex) {

ex.printStackTrace();

}

return list;

}

public static void main(String[] args) {

QueryRecords temp = new QueryRecords();

ArrayList list = null;

list = temp.queryRecords("look", "c:/temp", "contents");

for (int i = 0; i < list.size(); i++) {

System.out.println((String) list.get(i));

}

}

}

评论

成就一亿技术人!

拼手气红包6.0元

还能输入1000个字符

添加红包

插入表情

表情包

代码片

HTML/XML
objective-c
Ruby
PHP
C
C++
JavaScript
Python
Java
CSS
SQL
其它

条评论被折叠查看

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。