Lucene的常用属性:
1、Filed
三种常用的字段属性Field :TextFiled、StringFiled、StoredFiled;
TextFiled 常用于 要分词、要搜索、要存储的数据库字段
StringFiled:常用于分词、搜索、不需要存储的数据库字段
StoredFiled:只存储的数据库字段
2分词器
private String en = "oh my lady gaga";
private String cn = "迅雷不及掩耳盗铃儿响叮当仁不让";
private String str = "学习使我进步FullText Search Lucene学习的好好";
public void testAnalyzer(Analyzer analyzer,String str) throws Exception {
TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(str));
//在读取单元池 需要重置
tokenStream.reset();
while(tokenStream.incrementToken()){
System.out.println(tokenStream);
}
}
/**
* 标准分词
* @throws Exception
*/
@Test
public void testStandardAnalyzer() throws Exception {
testAnalyzer(new StandardAnalyzer(), cn);
}
/**
* 简单分词
* @throws Exception
*/
@Test
public void testSimpleAnalyzer() throws Exception {
testAnalyzer(new SimpleAnalyzer(), cn);
}
/**
* 二分分词
* @throws Exception
*/
@Test
public void testCJKAnalyzer() throws Exception {
testAnalyzer(new CJKAnalyzer(), cn);
}
/**
* 词典分词
* @throws Exception
*/
@Test
public void testSmartCnAnalyzer() throws Exception {
CharArraySet stopWords = new CharArraySet(10, true);
stopWords.add("的");
testAnalyzer(new SmartChineseAnalyzer(stopWords ), str);
}
/**
* IK分词
* @throws Exception
*/
@Test
public void testIKAnalyzer() throws Exception {
testAnalyzer(new IKAnalyzer(true), str);
}
//查询
/**
* 索引的添删改
* @author admin
*
*/
public class QueryTestDemo {
// 准备数据源
private String doc1 = "hello world";
private String doc2 = "hello java world";
private String doc3 = "hello lucene world";
// 索引目录
private String dirPath = "G:\\Sirius\\soft\\eclipse\\workspace\\Lucene\\queryIndex";
/**
* 索引创建
* @throws Exception
*/
@Test
public void createIndex() throws Exception {
Directory directory = FSDirectory.open(Paths.get(dirPath));
IKAnalyzer analyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setOpenMode(OpenMode.CREATE); // 代表每次都新建(测试)
IndexWriter indexWriter = new IndexWriter(directory, config);
Document document1= new Document();
document1.add(new TextField("id", "1",Store.YES));
document1.add(new TextField("title", "doc1",Store.YES));
document1.add(new TextField("content", doc1,Store.YES));
document1.add(new TextField("inputtime", "20160812",Store.YES));
indexWriter.addDocument(document1);
Document document2= new Document();
document2.add(new TextField("id", "2",Store.YES));
document2.add(new TextField("title", "doc2",Store.YES));
document2.add(new TextField("content", doc2,Store.YES));
document2.add(new TextField("inputtime", "20160813",Store.YES));
indexWriter.addDocument(document2);
Document document3= new Document();
document3.add(new TextField("id", "3",Store.YES));
document3.add(new TextField("title", "doc3",Store.YES));
document3.add(new TextField("content", doc3,Store.YES));
document3.add(new TextField("inputtime", "20160814",Store.YES));
indexWriter.addDocument(document3);
// indexWriter.commit();
indexWriter.close();
}
/**
* 单词搜索
*/
@Test
public void testTermQuery() throws Exception {
search("content:hello");
System.out.println("==========================================");
TermQuery query = new TermQuery(new Term("content","hello"));
search(query );
}
/**
* 短语搜索
*/
@Test
public void testPhraseQuery() throws Exception {
search("\"hello world\"");
System.out.println("==========================================");
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("content","hello"));
builder.add(new Term("content","world"));
PhraseQuery query = builder.build();
search(query );
}
/**
* 通配符搜索
* ? : 1个任意字符(站位)
* * : 0~N个任意字符
* @throws Exception
*/
@Test
public void testWildcardQuery() throws Exception {
search("lu*n?");
System.out.println("==========================================");
WildcardQuery query = new WildcardQuery(new Term("content","lu*n?"));
search(query);
}
/**
* 模糊搜索
* 在单字搜索的基础上,后面跟一个~【0,2】整数,代表最大容错数
* @throws Exception
*/
@Test
public void testQuery() throws Exception {
search("luXenX~2");
System.out.println("==========================================");
FuzzyQuery query = new FuzzyQuery(new Term("content","luXenX"));
search(query);
}
/**
* 临近查询
* 在“短语”搜索的基础上,后面跟一个~【0,100】整数,代表最大间隔数
* @throws Exception
*/
@Test
public void testPhraseQuery2() throws Exception {
search("\"hello world\"~2");
System.out.println("==========================================");
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("content","hello"));
builder.add(new Term("content","world"));
builder.setSlop(2);// 最大间隔数
PhraseQuery query = builder.build();
search(query);
}
/**
* 匹配所有
* @throws Exception
*/
@Test
public void testMatchAll() throws Exception {
search("*:*");
System.out.println("==========================================");
search(new MatchAllDocsQuery());
}
/**
* 范围查询
* @throws Exception
*/
@Test
public void testTermRangeQuery() throws Exception {
// search("inputtime:[20160812 TO 20160814]");
// search("inputtime:{20160812 TO 20160814}");
search("inputtime:{20160812 TO 20160814]");
System.out.println("==========================================");
TermRangeQuery query = new TermRangeQuery("inputtime", new BytesRef("20160812"), new BytesRef("20160814"), false, true);
search(query);
}
//组合查询
@Test
public void testBooleanQuery() throws Exception {
search("+content:java -inputtime:[20160812 TO 20160814}");
System.out.println("==========================================");
Builder builder = new BooleanQuery.Builder();
Query qo1 = new TermQuery(new Term("content","java")); // 2,3
builder.add(qo1 , Occur.MUST);
TermRangeQuery qo2 = new TermRangeQuery("inputtime", new BytesRef("20160812"), new BytesRef("20160814"), true, false); // 1,2
builder.add(qo2, Occur.MUST_NOT);
BooleanQuery query = builder.build();
search(query);
}
public void search(String searchKey) throws Exception {
// 指定索引目录
Directory directory = new SimpleFSDirectory(Paths.get(dirPath));
// 创建一个索引读取器
IndexReader indexReader = DirectoryReader.open(directory);
// 索引搜索器
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// 创建分词器
Analyzer analyzer = new IKAnalyzer();
// 创建查询解析器
QueryParser queryParser = new QueryParser("content", analyzer);
// 创建查询对象
Query query = queryParser.parse(searchKey);
System.out.println("对应的查询对象:"+query.getClass().getName());
// 搜索
TopDocs tds = indexSearcher.search(query, 10000);
System.out.println("一共符合条件的有:" + tds.totalHits);
// 遍历集合
for (ScoreDoc scoreDoc : tds.scoreDocs) {
// 获取内部文档编号
int docId = scoreDoc.doc;
// 通过内部文档编号,获取文档
Document document = indexSearcher.doc(docId);
System.out.println("=====docId=====:" + docId);
System.out.println("=====score=====:" + scoreDoc.score);
System.out.println("===========>id:" + document.get("id")+",title:" + document.get("title")+",content:" + document.get("content")+",inputtime:" + document.get("inputtime"));
System.out.println();
}
}
public void search(Query query) throws Exception {
System.out.println("对应的查询语句:" + query.toString());
// 指定索引目录
Directory directory = new SimpleFSDirectory(Paths.get(dirPath));
// 创建一个索引读取器
IndexReader indexReader = DirectoryReader.open(directory);
// 索引搜索器
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
// 创建分词器
Analyzer analyzer = new IKAnalyzer();
// 创建查询解析器
// 创建查询对象
// 搜索
TopDocs tds = indexSearcher.search(query, 10000);
System.out.println("一共符合条件的有:" + tds.totalHits);
// 遍历集合
for (ScoreDoc scoreDoc : tds.scoreDocs) {
// 获取内部文档编号
int docId = scoreDoc.doc;
// 通过内部文档编号,获取文档
Document document = indexSearcher.doc(docId);
System.out.println("=====docId=====:" + docId);
System.out.println("===========>id:" + document.get("id")+",title:" + document.get("title")+",content:" + document.get("content")+",inputtime:" + document.get("inputtime"));
System.out.println();
}
}
@Test
public void testBoost() throws Exception{
search("java lucene^10");
}
}