
IndexWriter | 创建和维护索引(向原索引中添加新Document,设置合并策略、优化等) |
FSDirectory | 最主要用来存储索引文件的类,表示将索引文件存储到文件系统 |
Document | 索引和查询的原子单元,一个Document包含一系列Field |
IndexReader | 一个抽象类,提供了访问索引的接口,当然访问索引也可以通过它的子类来完成 |
Analyzer | 分词类,它有一系列子类,都是用来将文本解析成TokenStream |
Searcher | 用于查询索引的核心类 |
- Directory dir = FSDirectory.open(new File("lucene.blog"));
- IndexWriter writer = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_29),true, IndexWriter.MaxFieldLength.UNLIMITED);
- Document doc = new Document();
- doc.add(new Field("id", "101", Field.Store.YES, Field.Index.NO));
- doc.add(new Field("name", "kobe bryant", Field.Store.YES, Field.Index.NO));
- writer.addDocument(doc);
- writer.optimize();
- writer.close();
- Directory dir = FSDirectory.open(new File("lucene.blog"));
- IndexWriter writer = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_29),true, IndexWriter.MaxFieldLength.UNLIMITED);
- Document doc = new Document();
- doc.add(new Field("id", "101", Field.Store.YES, Field.Index.NO));
- doc.add(new Field("name", "kobe bryant", Field.Store.YES, Field.Index.NO));
- writer.addDocument(doc);
- writer.optimize();
- writer.close();
- Directory dir = FSDirectory.open(new File("lucene.blog"));
- IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_29), true, IndexWriter.MaxFieldLength.UNLIMITED);
- writer.deleteDocuments(new Term("id", "101"));
- writer.commit();
- writer.close();
- Directory dir = FSDirectory.open(new File("lucene.blog"));
- IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_29), true, IndexWriter.MaxFieldLength.UNLIMITED);
- writer.deleteDocuments(new Term("id", "101"));
- writer.commit();
- writer.close();
- Directory dir = FSDirectory.open(new File("lucene.blog"));
- IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_29), true, IndexWriter.MaxFieldLength.UNLIMITED);
- Document doc = new Document();
- doc.add(new Field("id", "101", Field.Store.YES, Field.Index.ANALYZED)); // Field.Index.ANALYZED
- doc.add(new Field("name", "kylin soong", Field.Store.YES, Field.Index.ANALYZED));
- writer.updateDocument(new Term("id", "101"), doc);
- writer.commit();
- writer.close();
- Directory dir = FSDirectory.open(new File("lucene.blog"));
- IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_29), true, IndexWriter.MaxFieldLength.UNLIMITED);
- Document doc = new Document();
- doc.add(new Field("id", "101", Field.Store.YES, Field.Index.NO));
- doc.add(new Field("name", "kylin soong", Field.Store.YES, Field.Index.NO));
- writer.updateDocument(new Term("id", "101"), doc);
- writer.commit();
- writer.close();
- Field field = new Field(
- "101",
- "kobe bryant",
- Field.Store.YES,
- Field.Index.ANALYZED,
- Field.TermVector.YES);
- Field field = new Field(
- "101",
- "kobe bryant",
- Field.Store.YES,
- Field.Index.ANALYZED,
- Field.TermVector.YES);
Field.Store.YES | 存储,一旦存储,你可以用完整的Field的完全值作为查询条件查询(id:101) |
Field.Store.NO | 不存储 |
Field.Index.ANALYZED | 用Analyzer将Field的值分词成多个Token |
Field.Index.NOT_ANALYZED | 不对Field的值分词,将Field的值作为一个Token处理 |
Field.Index.ANALYZED_NO_NORMS | 类似ANALYZED,但不存常规信息到索引文件 |
Field.Index.NOT_ANALYZED_NO_NORMS | 类似NOT_ANALYZED,但不存常规信息到索引文件 |
Field.Index.NO | 不进行索引,Field的值不可被搜索 |
Field.TermVector.YES | 记录唯一的terms,当重复发生时记下重复数,在不做额外处理 |
Field.TermVector.WITH_POSITIONS | 在上面基础上记录下位置 |
Field.TermVector.WITH_OFFSETS | 在TermVector.YES基础上记录偏移量 |
Field.TermVector.WITH_POSITIONS_OFFSETS | 在TermVector.YES基础上记录偏移量和位置 |
Field.TermVector.NO | 不做任何处理 |
- Document doc = new Document();
- NumericField field1 = new NumericField("id");
- field1.setIntValue(101);
- doc.add(field1);
- NumericField field2 = new NumericField("price");
- field1.setDoubleValue(123.50);
- doc.add(field2);
- Document doc = new Document();
- NumericField field1 = new NumericField("id");
- field1.setIntValue(101);
- doc.add(field1);
- NumericField field2 = new NumericField("price");
- field1.setDoubleValue(123.50);
- doc.add(field2);
- Document doc = new Document();
- doc.add(new NumericField("timestamp").setLongValue(new Date().getTime()));
- doc.add(new NumericField("day").setIntValue((int) (new Date().getTime()/24/3600)));
- Calendar cal = Calendar.getInstance();
- cal.setTime(new Date());
- doc.add(new NumericField("dayOfMonth").setIntValue(cal.get(Calendar.DAY_OF_MONTH)));
- Document doc = new Document();
- doc.add(new NumericField("timestamp").setLongValue(new Date().getTime()));
- doc.add(new NumericField("day").setIntValue((int) (new Date().getTime()/24/3600)));
- Calendar cal = Calendar.getInstance();
- cal.setTime(new Date());
- doc.add(new NumericField("dayOfMonth").setIntValue(cal.get(Calendar.DAY_OF_MONTH)));
- Directory dir = FSDirectory.open(new File("lucene.blog"));
- IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_29), true, IndexWriter.MaxFieldLength.LIMITED);
- writer.setMaxFieldLength(1);
- MergePolicy policy = new LogByteSizeMergePolicy(writer);
- writer.setMergePolicy(policy);
- writer.optimize(5);
- writer.close();
- Directory dir = FSDirectory.open(new File("lucene.blog"));
- IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_29), true, IndexWriter.MaxFieldLength.LIMITED);
- writer.setMaxFieldLength(1);
- MergePolicy policy = new LogByteSizeMergePolicy(writer);
- writer.setMergePolicy(policy);
- writer.optimize(5);
- writer.close();
- IndexReader reader = IndexReader.open(FSDirectory.open(new File("lucene.blog")),true);
- IndexSearcher searcher = new IndexSearcher(reader);
- Term term = new Term("id","101");
- Query query = new TermQuery(term);
- TopDocs topDocs = searcher.search(query, 10);
- System.out.println(topDocs.totalHits);
- ScoreDoc[] docs = topDocs.scoreDocs;
- System.out.println(docs[0].doc + " " + docs[0].score);
- Document doc = searcher.doc(docs[0].doc);
- System.out.println(doc.get("id"));
- IndexReader reader = IndexReader.open(FSDirectory.open(new File("lucene.blog")),true);
- IndexSearcher searcher = new IndexSearcher(reader);
- Term term = new Term("id","101");
- Query query = new TermQuery(term);
- TopDocs topDocs = searcher.search(query, 10);
- System.out.println(topDocs.totalHits);
- ScoreDoc[] docs = topDocs.scoreDocs;
- System.out.println(docs[0].doc + " " + docs[0].score);
- Document doc = searcher.doc(docs[0].doc);
- System.out.println(doc.get("id"));
- IndexReader reader = IndexReader.open(FSDirectory.open(new File("lucene.blog")),true);
- IndexSearcher searcher = new IndexSearcher(reader);
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
- QueryParser parser = new QueryParser(Version.LUCENE_29,"name",analyzer);
- String queryString = "kobe";
- Query query = parser.parse(queryString);
- TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);
- searcher.search(query, collector);
- ScoreDoc[] hits = collector.topDocs().scoreDocs;
- for(int i = 0 ; i < hits.length ; i ++) {
- Document doc = searcher.doc(hits[i].doc);
- String name = doc.get("name");
- if (name != null) {
- System.out.println(name);
- }
- }
- IndexReader reader = IndexReader.open(FSDirectory.open(new File("lucene.blog")),true);
- IndexSearcher searcher = new IndexSearcher(reader);
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_29);
- QueryParser parser = new QueryParser(Version.LUCENE_29,"name",analyzer);
- String queryString = "kobe";
- Query query = parser.parse(queryString);
- TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);
- searcher.search(query, collector);
- ScoreDoc[] hits = collector.topDocs().scoreDocs;
- for(int i = 0 ; i < hits.length ; i ++) {
- Document doc = searcher.doc(hits[i].doc);
- String name = doc.get("name");
- if (name != null) {
- System.out.println(name);
- }
- }
http://johnoyoung.blog.sohu.com/90691910.html
http://blog.sina.com.cn/s/blog_51e1d40e0100aadc.html
http://blog.21cn.com/johnoyoung/article/51468
http://hi.baidu.com/johnoyoung/blog/item/76c5be6008c1e0da8cb10d00.html
http://johnoyoung.bokee.com/viewdiary.32408821.html
http://blog.yesky.com/blog/junhay/archive/2008/06/21/1855338.html
http://www.diybl.com/course/3_program/java/javajs/2008622/127288.html
http://dev-club.esnai.com/club/bbs/announce,2580533.htm
http://q.yesky.com/album/welcome.do?userId=2475401
http://www.chinaaspx.com/Comm/Dotnetbbs/Showtopic.aspx?Forum_ID=9&Id=296558&PPage=1
http://www.bitscn.com/member/index.php?uid=junhay
http://my.codepub.com/space-49916-do-blog-id-17751.html