深入解析Lucene索引创建、搜索与更新技术-优快云博客

为什么80%的码农都做不了架构师？>>>
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class LuceneIndexCRUDTest {

	// 索引存放目录
	private static final String indexSavePath = "f:\\lucene";
	// 查询关键字
	private static final String keywords = "lucene"; //keywords is hello or word after update index 
	// 索引目录
	private static final String dataPath = "G:\\java\\javaEE";
	private IndexWriter iw = null;
	//	java开源中文分词器 ---> https://code.google.com/p/ik-analyzer/
	private IKAnalyzer analyzer = null;
	private Directory dir = null;
	private Long start;

	@Before
	public void init() throws IOException {
		start = System.currentTimeMillis();
		File file = new File(indexSavePath);
		if (!file.exists())
			file.mkdir();
		//	lucene根据底层环境决定使用 MMapDirectory, NIOFSDirectory, SimpleFSDirectory的其中一种
		dir = FSDirectory.open(file);
		analyzer = new IKAnalyzer();
	}

	@After
	public void close() throws IOException {
		if (iw != null) {
			iw.close();
		}
		if (dir != null) {
			dir.close();
		}
		System.out.println("耗时："+(System.currentTimeMillis() - start));
	}

	@Test
	public void testCreate() throws IOException {
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46, analyzer);
		iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
		iw = new IndexWriter(dir, iwc);
		File fileDir = new File(dataPath);
		if (!fileDir.exists() || !fileDir.isDirectory())
			throw new RuntimeException(dataPath + " does not exists or is not directory");
		create(iw, fileDir);
	}
	
	//	引用Lucene in Action的代码
	private static void create(IndexWriter indexWriter, File fileDir)
			throws IOException {
		File[] files = fileDir.listFiles();
		for(File f : files) {
			if(f.isDirectory()) {
				create(indexWriter, f);
			} else {
				if(f.getName().endsWith("txt")) {
					if (f.isHidden() || !f.exists() || !f.canRead())
						return;
					System.out.println("Indexing ：" + f.getCanonicalPath());
					Document doc = new Document();
					doc.add(new Field("contents", new FileReader(f),
							TextField.TYPE_NOT_STORED));
					doc.add(new Field("filename", f.getCanonicalPath(),
							StringField.TYPE_STORED));
					indexWriter.addDocument(doc);
				}
			}
		}
	}

	@Test
	public void testSearch() throws Exception {
		IndexReader ir = DirectoryReader.open(dir);
		IndexSearcher is = new IndexSearcher(ir);
		Term t = new Term("contents",keywords);
		Query query = new TermQuery(t);
		TopDocs topDocs = is.search(query, 10);
		ScoreDoc[] scoreDocs = topDocs.scoreDocs;
		for (int i = 0; i < scoreDocs.length; i++) {
			Document doc = is.doc(scoreDocs[i].doc);
			System.out.println("hits ： " + doc.toString());
		}
	}

	@Test
	public void testUpdate() throws Exception {//	更新索引其实是先执行delete旧的索引，然后再add 新的索引
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46,
				analyzer);
		iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
		iw = new IndexWriter(dir, iwc);
		Term t = new Term("contents", keywords);
		iw.updateDocument(t, getSomeField(), analyzer);
	}

	private static List<Field> getSomeField() {
		List<Field> list = new ArrayList<Field>();
		Field f = new Field("contents", "helloword",
				StringField.TYPE_NOT_STORED);
		list.add(f);
		return list;
	}

	@Test
	public void testDelete() throws Exception {
		IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46,
				analyzer);
		iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
		iw = new IndexWriter(dir, iwc);
		QueryParser qp = new QueryParser(Version.LUCENE_46, "contents",
				analyzer);
		Query query = qp.parse(keywords);
		iw.deleteDocuments(query);
	}
}
转载于:https://my.oschina.net/lsw90/blog/186732