一、Elasticsearch是一个建立在全文搜索引擎 Apache Lucene™ 基础上的搜索引擎,可以说Lucene是当今最先进,最高效的全功能开源搜索引擎框架。
最近项目中用到了elasticsearch作为搜索引擎,引入的是最新的es5.3;
由于网上许多教程都是以前老版本的api,基本都不能用,所有只能翻墙去ElasticSearch官网看英文版的教程,贴下英文教程url:
https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=3&ved=0ahUKEwi_9_L2vLrTAhUKNI8KHaCXABoQjBAIOjAC&url=%68%74%74%70%3a%2f%2f%77%77%77%2e%65%6c%61%73%74%69%63%2e%63%6f%2f%70%72%6f%64%75%63%74%73%2f%65%6c%61%73%74%69%63%73%65%61%72%63%68&usg=AFQjCNE7Yq8RM5DB_rCP91NLom135iwdig&cad=rjt
下面是关系型数据库和es对应的名词关系:
下面是spring整合es的maven依赖:
<dependency>
<groupId>org.springframework.data</groupId><artifactId>spring-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>5.3.0</version>
</dependency>
二、查询有两种方式:
es有9200,9300两个端口,9200 是服务地址,Elastic Search 默认利用 9200 作为 Rest 服务的端口
,9300 作为内部通信接口和 Java 客户端接口
1、用esClient客户端,客户端的方式其实也是把查询语句拼成json字符串,通过解析返回json对象来获取查询结果
2、直接发送http请求,相当于自己写客户端,只需要会写dsl查询语句,就能自己查询解析,不用再学client的api
首先介绍第一种方式:
引入esClient客户端依赖:
</dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>5.3.0</version>
</dependency>
获取客户端的java代码:
private Client getElkClient() throws Exception {
TransportAddress address = new InetSocketTransportAddress(
InetAddress.getByName("ip地址"), 9300);
Settings settings = Settings.builder()
.put("client.transport.sniff", true)
.build();
Client client = new PreBuiltTransportClient(settings).addTransportAddress(address);
return client;
}
@Test //查询的案例
public void boolquery() throws Exception {
Client client = getElkClient();
QueryBuilder qb1 = QueryBuilders.matchQuery("type", "3");
QueryBuilder qb2 = QueryBuilders.matchPhraseQuery("title", "萨芬");
QueryBuilder sd1 = QueryBuilders.matchPhraseQuery("_id", "5");
QueryBuilder sd2 = QueryBuilders.rangeQuery("price").gte(188).lte(188);
QueryBuilder boolqb = boolQuery().must(qb1).mustNot(qb2).should(sd1).should(sd2).minimumNumberShouldMatch(1);
SearchResponse response = client.prepareSearch("zmgindex").setTypes("zmgtype").setQuery(boolqb).setFrom(0).setSize(1).execute().actionGet();
SearchHits hits = response.getHits();
for (SearchHit hit : hits) { //hit就是返回的结果集
log.debug(hit.getSource().get("title").toString());
}
}
重点介绍第二种方式,只要会dsl查询语句,自己写查询,然后拼成字符串,比用客户端方便的多:
来几个简单的查询,这些查询是用kibana写好的,
GET /log/user/_search
{
"query" : {
"query_string" : {
"query" : "_index:log" //此处既可以放_source中的,也可以放_index,_type等
}
}
}
分页
GET /log/user/_search
{
"version" : true, //返回版本号
"from" : 1, //从哪个文档开始(数组索引有0,填1则从第二条开始查)
"size" : 20,//返回多少个文档
"query" : {
"query_string" : {
"query" : "_index:log"
}
}
}
//匹配所有文档
GET /log/user/_search
{
"query" : {
"match_all" : {}
}
}
GET /log/user/_search
{
"query": {
"query_string" : {
"fields" : ["productname", "refproductname"],
"query" : "this AND that"
}
}
}// 查询的效果相当于: (productname = this or refproductname = this) and (productname=that or refproductname=that)
GET /_search
{
"query": {
"query_string" : { //将fields中的每个字段与query中的值作比较,field之间用or, 外部统一和query对接
"fields" : ["content", "name^5"], ^5指的是权重,name字段的权重越大,含有name的文档就会被优先检索
"query" : "this AND that OR thus",
"use_dis_max" : true
}
}
}
GET /_search
{
"query": {
"query_string" : {
"fields" : ["city.*"],
"query" : "this AND that OR thus", //查询文档中city的对象属性,city的属性中有一个为this,that,thus,则(true AND true OR true)
"use_dis_max" : true
}
}
}
联合查询
GET /zmgindex/zmgtype/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"type": "3"
}
}
]
, "must_not": [
{
"match_phrase": {
"title": "萨芬"
}
}
]
, "should": [
{
"match_phrase": {
"_id": "5"
}
},{
"range": {
"price": {
"gte": 188,
"lte": 188
}
}
}
]
, "minimum_should_match": 1
}
}
}
聚合查询
聚合aggregation 返回"doc_count" from包头不包尾
GET /zmgindex/zmgtype/_search
{
"size": 1,
"aggs": {
"group_by_type": {
"range": {
"field": "type",
"ranges": [
{
"from": 1,
"to": 2
},{
"from": 2,
"to": 3
},
{
"from": 3,
"to": 4
},{
"from": 1,
"to":4
}
]
}
}
}
} 结果:
"aggregations": {
"group_by_type": {
"buckets": [
{
"key": "1.0-2.0",
"from": 1,
"to": 2,
"doc_count": 2
},
{
"key": "1.0-4.0",
"from": 1,
"to": 4,
"doc_count": 7
},
{
"key": "2.0-3.0",
"from": 2,
"to": 3,
"doc_count": 2
},
{
"key": "3.0-4.0",
"from": 3,
"to": 4,
"doc_count": 3
}
]
}
}
先查询type为3的结果集,然后以title.keyword进行分组聚合:title.keyword就是未分词的原始词汇
GET /zmgindex/zmgtype/_search
{
"query": {
"match": {
"type": "3"
}
}
, "aggs": {
"all_type": {
"terms": {
"field": "title.keyword" //此处指定分组字段的,此处的term和查询的term不同
}
}
}
}
"aggregations": {
"all_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "标签党1",
"doc_count": 1
},
{
"key": "标签党发生过的",
"doc_count": 1
},
{
"key": "萨芬标签党1",
"doc_count": 1
}
]
}
}
聚合的分级汇总 有了terms 就会分组为buckets ,在terms同一级别再次aggs,就可以在每个buckets中多一个aggs的name{"value":xx}
GET zmgindex/zmgtype/_search
{
"aggs": {
"group_type": {
"terms": {
"field": "type"
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
"aggregations": {
"group_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 3,
"doc_count": 3,
"avg_price": {
"value": 188
}
},
{
"key": 1,
"doc_count": 2,
"avg_price": {
"value": 100
}
},
{
"key": 2,
"doc_count": 2,
"avg_price": {
"value": 142.5
}
}
]
}
}
//分组中再分组
GET zmgindex/zmgtype/_search
{
"aggs": {
"group_type": {
"terms": {
"field": "type"
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
,"make":{ //对此平均值分组中再根据另一个字段分组,
"terms": {
"field": "onSale"
}
}
}
}
}
}
{
"key": 2,
"doc_count": 2,
"avg_price": {
"value": 142.5
},
"make": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [ //此处会将根据价格分完的小组,一级小组下面再根据另一个字段分
{
"key": 0,
"key_as_string": "false",
"doc_count": 1
},
{
"key": 1,
"key_as_string": "true",
"doc_count": 1
}
]
}
}
根据type分成多个bucket,做聚合运算,再将每个bucket按照onSale分城多个bucket1,再把每个bucket1坐聚合运算
GET zmgindex/zmgtype/_search
{
"aggs": { //开启聚合模式(不要在意这个aggs)
"group_type": { //第一次分组的名字
"terms": {
"field": "type" //以哪个字段分组
},
"aggs": { //分组之后的开启聚合运算
"avg_price": { //聚合运算结果的的名字以及模式
"avg": {
"field": "price"
}
}
,"onSale_group":{ 第二次分组的名字
"terms": { //以某个字段进行分组
"field": "onSale"
},
"aggs": { //第二次分组之后开启聚合运算
"最小价格":{"min": { 聚合结果名字以及方法
"field": "price"
}},
"最大价格":{"max": {
"field": "price"
}}
}
}
}
}
}
}
"aggregations": {
"group_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 4,
"group_onSale": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 0,
"key_as_string": "false",
"doc_count": 3,
"最小价格": {
"value": 155
},
"最大价格": {
"value": 422
}
},
{
"key": 1,
"key_as_string": "true",
"doc_count": 1,
"最小价格": {
"value": 130
},
"最大价格": {
"value": 130
}
}
]
},
"avg_price": {
"value": 215.5
}
},
{
"key": 3,
"doc_count": 4,
"group_onSale": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"key_as_string": "true",
"doc_count": 3,
"最小价格": {
"value": 188
},
"最大价格": {
"value": 188
"value": 188
。。。。。。。。。。。。。。。。
以上是dsl查询的案例,学会写这种查询,java代码直接拼字符串,发http请求就行
@Test
public void aggregationTest() throws Exception {
CloseableHttpClient httpClient = HttpClients.createDefault();
HttpPost httpPost = new HttpPost("http://172.16.2.113:9200/zmgindex/zmgtype/_search");
String paramStr = "{\n" +
" \"size\": 0, \n" +
" \"aggs\": {\n" +
" \"group_type\": {\n" +
" \"terms\": {\n" +
" \"field\": \"type\"\n" +
" },\n" +
" \"aggs\": {\n" +
" \"avg_price\": {\n" +
" \"avg\": {\n" +
" \"field\": \"price\"\n" +
" }\n" +
" }\n" +
" ,\"group_onSale\":{ \n" +
" \"terms\": {\n" +
" \"field\": \"onSale\"\n" +
" },\n" +
" \"aggs\": {\n" +
" \"最小价格\":{\"min\": {\n" +
" \"field\": \"price\"\n" +
" }},\n" +
" \"最大价格\":{\"max\": {\n" +
" \"field\": \"price\"\n" +
" }}\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
" }\n" +
"}";
// String s1 = paramStr.replaceAll("\\s*", "");
//StringEntity requestEntity = new StringEntity(s1,"utf-8");
StringEntity requestEntity = new StringEntity(paramStr, "utf-8");
requestEntity.setContentEncoding("UTF-8");
httpPost.setHeader("Content-type", "application/json");
httpPost.setEntity(requestEntity);
CloseableHttpResponse response = httpClient.execute(httpPost);
HttpEntity entity = response.getEntity();
String s = EntityUtils.toString(entity); //此处就能返回http请求的结果字符串,后面的就是处理字符串,获得想要的查询结果
ObjectMapper objm = new ObjectMapper();
Object o = new Object();
HashMap o1 = (HashMap) o;
HashMap hashMap = objm.readValue(s, HashMap.class);
HashMap aggregations = (HashMap) hashMap.get("aggregations");
HashMap group_type = (HashMap) aggregations.get("group_type");
ArrayList<HashMap> buckets = (ArrayList) group_type.get("buckets");
for (HashMap bucket : buckets) {
Integer key = (Integer) bucket.get("key");
log.debug(key.toString());
}
}
以上是部分总结,后续的有时间再补上,谢谢阅读!!!