Lucene(3.6.0)的简单示例_lucene 3.6.0 demo-优快云博客

本文介绍了一个基于Lucene的简单书籍搜索应用实例，通过创建书籍实体类，并利用Lucene进行索引和搜索操作，演示了如何实现文本的索引建立及中文分词处理。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

1. 在lucene官网lucene.apache.org下载相应版本的lucene。

2. 书籍实体：

package com.huey.entity;

/**
 * 书籍实体
 * @version 2014-03-15
 * @author huey2672
 *
 */
public class Book {
	
	private Integer id;
	private String title;
	private String author;
	private String publisher;
	
	public Integer getId() {
		return id;
	}
	
	public void setId(Integer id) {
		this.id = id;
	}
	
	public String getTitle() {
		return title;
	}
	
	public void setTitle(String title) {
		this.title = title;
	}
	
	public String getAuthor() {
		return author;
	}
	
	public void setAuthor(String author) {
		this.author = author;
	}
	
	public String getPublisher() {
		return publisher;
	}
	
	public void setPublisher(String publisher) {
		this.publisher = publisher;
	}
	
	public Book() {
	}
	
	public Book(String title, String author, String publisher) {
		setTitle(title);
		setAuthor(author);
		setPublisher(publisher);
	}
}

2. Lucene示例：

package com.huey.lucene;

import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.huey.entity.Book;

/**
 * Lucene 测试示例
 * @version 2014-03-15
 * @author huey2672
 *
 */
public class LuceneDemo {

	Book[] books = { new Book("倾我所有去生活", "苏美 ", "江苏文艺出版社"),
			new Book("那些年，我们在一起", "魏雪明 ", "现代出版社"),
			new Book("这个世界上的一切都是瘦子的", "荞麦", "中信出版社"),
			new Book("我们生活的风景", "中原慎一郎 ", "山东人民出版社"),
			new Book("水知道答案", "江本胜", "南海出版公司社"),
			new Book("十七个远方", "远子 ", "九州出版社"),
			new Book("柔软的距离", "邓安庆", "上海人民出版社"),
			new Book("一切都是最好的安排", " 辉姑娘", "中信出版社"),
			new Book("问题就是答案", "皮斯", " 九州出版社"),
			new Book("植物知道生命的答案", "丹尼尔•查莫维茨 ", "长江文艺出版社") };

	// 索引存储位置，这里存储在内存中，还可以指定其他Directory子类
	private Directory dir = new RAMDirectory();

	/**
	 * 建立索引示例
	 * @throws IOException
	 */
	public void index() throws IOException {

		// 配置并新建索引
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
				new IKAnalyzer(true));
		IndexWriter writer = new IndexWriter(dir, config);

		for (Book book : books) {
			// 新建文档
			Document doc = new Document();

			// 新建域，存储域值，对域进行索引并对域值分析
			Field titleField = new Field("title", book.getTitle(),
					Field.Store.YES, Field.Index.ANALYZED);
			// 新建域，存储域值，对域进行索引但不对域值分析
			Field authorField = new Field("author", book.getAuthor(),
					Field.Store.YES, Field.Index.NOT_ANALYZED);
			// 新建域，不存储域值，对域进行索引并对域值分析
			Field publisherField = new Field("publisher", book.getPublisher(),
					Field.Store.NO, Field.Index.ANALYZED);

			// 向文档中加入域
			doc.add(titleField);
			doc.add(authorField);
			doc.add(publisherField);

			// 添加文档
			writer.addDocument(doc);

		}

		System.out.println("被索引的文档个数：" + writer.numDocs());
		// 关闭writer
		writer.close();
	}

	/**
	 * 搜索立索引示例
	 * @throws IOException
	 */
	public void search() throws IOException, ParseException {

		// 打开索引
		IndexReader reader = IndexReader.open(dir);
		IndexSearcher searcher = new IndexSearcher(reader);

		// 解析查询字符串
		QueryParser parser = new QueryParser(Version.LUCENE_36, "title",
				new IKAnalyzer(true));
		Query query = parser.parse("我们的生活");

		// 搜索索引，得到相关性最高的5条记录
		TopDocs hits = searcher.search(query, 5);

		System.out.println("共搜索到" + hits.totalHits + "个文档。");
		System.out.println("列出相关性最高的5条记录：");
		// 遍历文档结果集，得到匹配文本
		for (ScoreDoc scoreDoc : hits.scoreDocs) {
			Document doc = searcher.doc(scoreDoc.doc);
			System.out.println(doc.get("author") + "：" + doc.get("title"));
		}

		// 关闭IndexSearch
		searcher.close();
	}
	
	/**
	 * 测试
	 * @throws Exception
	 */
	@Test
	public void testLucene() throws Exception {
		index();
		search();
	}

}

3. 结果输出：

被索引的文档个数：10
共搜索到7个文档。
列出相关性最高的5条记录：
中原慎一郎 ：我们生活的风景
魏雪明 ：那些年，我们在一起
苏美 ：倾我所有去生活
荞麦：这个世界上的一切都是瘦子的
邓安庆：柔软的距离

4. 这里的分词器用到了中文分词器IKAnalyzer，需要到 http://code.google.com/p/ik-analyzer/downloads/list下载相应版本的IKAnalyzer，导入相应的jar包，还需要配置IKAnalyzer.cfg.xml文件来扩充专有词典以及停止词典（过滤词典），具体配置方法可以参考IKAnalyzer的使用手册。

IKAnalyzer.cfg.xml：

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">  
<properties>  
	<comment>IK Analyzer 扩展配置</comment>
	<!--用户可以在这里配置自己的扩展字典 
	<entry key="ext_dict">ext.dic;</entry> 
	-->
	<!--用户可以在这里配置自己的扩展停止词字典-->
	<entry key="ext_stopwords">stopword.dic;</entry> 
	
</properties>