关于Lucene的学习日志

简单步骤描述:
1. 首先搜集数据(数据可以是文件系统,数据库,网络上,手工输入的,或者像本例直接写在内存上的)
2. 通过数据创建索引
3. 用户输入关键字
4. 通过关键字创建查询器
5. 根据查询器到索引里获取数据
6. 然后把查询结果展示在用户面前
思路图:

大致代码:

package com.how2java;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.sun.applet2.AppletParameters;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class TestLucene {

	public static void main(String[] args) throws Exception {
		// 1. 准备中文分词器
		IKAnalyzer analyzer = new IKAnalyzer();

		// 2. 索引
		Map<Integer,String> map = new HashMap<>();
		map.put(3,"飞利浦led灯泡e27螺口暖白球泡灯家用照明超亮节能灯泡转色温灯泡");
		map.put(4,"飞利浦led灯泡e14螺口蜡烛灯泡3W尖泡拉尾节能灯泡暖黄光源Lamp");
		map.put(7,"雷士照明 LED灯泡 e27大螺口节能灯3W球泡灯 Lamp led节能灯泡");
		map.put(5,"飞利浦 led灯泡 e27螺口家用3w暖白球泡灯节能灯5W灯泡LED单灯7w");
		map.put(11,"飞利浦led小球泡e14螺口4.5w透明款led节能灯泡照明光源lamp单灯");
		map.put(13,"飞利浦蒲公英护眼台灯工作学习阅读节能灯具30508带光源");
		map.put(12,"欧普照明led灯泡蜡烛节能灯泡e14螺口球泡灯超亮照明单灯光源");
		map.put(15,"欧普照明led灯泡节能灯泡超亮光源e14e27螺旋螺口小球泡暖黄家用");
		map.put(32,"聚欧普照明led灯泡节能灯泡e27螺口球泡家用led照明单灯超亮光源");

		Directory index1 = createIndex1(analyzer,map);

		// 3. 查询器
		String keyword = "照明带光源";
		Query query = new QueryParser("name", analyzer).parse(keyword);
		

		// 4. 搜索
		IndexReader reader = DirectoryReader.open(index1);
		IndexSearcher searcher = new IndexSearcher(reader);
		int numberPerPage = 1000;
		System.out.printf("当前一共有%d条数据%n",map.size());
		System.out.printf("查询关键字是:\"%s\"%n",keyword);
		ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs;

		// 5. 显示查询结果
		showSearchResults(searcher, hits, query, analyzer);
		// 6. 关闭查询
		reader.close();
	}

	private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer)
			throws Exception {
		System.out.println("找到 " + hits.length + " 个命中.");
		System.out.println("序号\t匹配度得分\t结果");
		// 以下两行为高亮显示(大体就是替换的方式)
		SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("","");
		Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(query));
		for (int i = 0; i < hits.length; ++i) {
			ScoreDoc scoreDoc= hits[i];
			int docId = scoreDoc.doc;
			Document d = searcher.doc(docId);
			List fields = d.getFields();
			System.out.print((i + 1));
			System.out.print("\t" + scoreDoc.score);
			for (IndexableField f : fields) {
				// 高亮内容
				if (f.name().equals("name")){
					TokenStream tokenStream = analyzer.tokenStream(f.name(),new StringReader(d.get(f.name())));
					String fieldContent = highlighter.getBestFragment(tokenStream,d.get(f.name()));
					// 这里相当于使用了 fieldContent 替换了 d.get(f.name())
					System.out.print("\t" + fieldContent);
				}else {
					System.out.print("\t" + d.get(f.name()));
				}

			}
			System.out.println();
		}
	}
	private static Directory createIndex1(IKAnalyzer analyzer, Map<Integer,String> products) throws IOException {
		Directory index = new RAMDirectory();
		IndexWriterConfig config = new IndexWriterConfig(analyzer);
		IndexWriter writer = new IndexWriter(index, config);
		products.forEach((integer, s) -> {
			try {
				// 添加内容
				addDoc1(writer,s,integer);
			} catch (IOException e) {
				e.printStackTrace();
			}
		});
		writer.close();
		return index;
	}

	private static void addDoc1(IndexWriter w, String name,Integer id) throws IOException {
		Document doc = new Document();
		doc.add(new TextField("name", name, Field.Store.YES));
		// 需要添加多行时,可以使用doc.add 每一个doc可以理解为一条记录
		doc.add(new TextField("id", String.valueOf(id), Field.Store.YES));
		w.addDocument(doc);
	}
}

分页查询时可以使用:

	private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize) throws IOException {
		int start = (pageNow - 1) * pageSize;
		if(0==start){
			TopDocs topDocs = searcher.search(query, pageNow*pageSize);
			return topDocs.scoreDocs;
		}
		// 查询数据, 结束页面自前的数据都会查询到,但是只取本页的数据
		TopDocs topDocs = searcher.search(query, start);
		//获取到上一页最后一条

		ScoreDoc preScore= topDocs.scoreDocs[start-1];
		//查询最后一条后的数据的一页数据
		topDocs = searcher.searchAfter(preScore, query, pageSize);
		return topDocs.scoreDocs;

	}

在实际的操作过程中一定会存在数据一致性的问题,所以需要对索引进行删除与更新操作
删除id为51173的数据:

        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter indexWriter = new IndexWriter(index, config);
        indexWriter.deleteDocuments(new Term("id", "51173"));
        indexWriter.commit();
        indexWriter.close();

利用条件删除数据:

    DeleteDocuments(Query query):根据Query条件来删除单个或多个Document
    DeleteDocuments(Query[] queries):根据Query条件来删除单个或多个Document
    DeleteDocuments(Term term):根据Term来删除单个或多个Document
    DeleteDocuments(Term[] terms):根据Term来删除单个或多个Document
    DeleteAll():删除所有的Document

更新数据:

// 更新索引
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(index, config);
Document doc = new Document();
doc.add(new TextField("id", "51173", Field.Store.YES));
doc.add(new TextField("name", "神鞭,鞭没了,神还在", Field.Store.YES));
doc.add(new TextField("category", "道具", Field.Store.YES));
doc.add(new TextField("price", "998", Field.Store.YES));
doc.add(new TextField("place", "南海群岛", Field.Store.YES));
doc.add(new TextField("code", "888888", Field.Store.YES));
indexWriter.updateDocument(new Term("id", "51173"), doc );
indexWriter.commit();
indexWriter.close();

参考资料:how2j搜索引擎技术

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值