本文测试数据为官方提供的测试数据,导入方法在学习笔记本章节第一篇中:https://blog.youkuaiyun.com/qq_20051535/article/details/113242821
Aggregations执行聚合
聚合提供了从数据中分组和提取数据的能力。最简单的聚合方法大致等于 SQL GROUP BY 和 SQL 聚合函数。在 Elasticsearch 中,您有执行索返回 hits(命中结果),并且同时返回聚合结果,把一个响应中的所有hits(命中结果)隔开的能力。这是非常强大且有效的,您可以执行查询和多个聚合,并且在一次使用得到各自的(任何一个的)返回结果,使用一次简洁和简化的AP来避免网络往返。
【例子1】
搜索 address中包含mill的所有人的年龄分布以及平均年龄,但不显示这些人的详情。
GET /bank/_search
{
"query": {
"match": {
"address": "mill"
}
},
"aggs": { //获取聚合
"ageAgg": { //自定义的聚合名
"terms": { //获取结果的不同数据个数
"field": "age", //获取字段是age
"size": 10 //可能有很多很多可能,只获取前10种
}
},
"ageAvg":{ //自定义的聚合名
"avg": { //求平均值
"field": "age" //获取字段是age
}
}
}
}
返回的结果:
{
"took" : 27,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 5.4032025,
"hits" : [
{
"_index" : "bank",
"_type" : "account",
"_id" : "970",
"_score" : 5.4032025,
"_source" : {
"account_number" : 970,
"balance" : 19648,
"firstname" : "Forbes",
"lastname" : "Wallace",
"age" : 28,
"gender" : "M",
"address" : "990 Mill Road",
"employer" : "Pheast",
"email" : "forbeswallace@pheast.com",
"city" : "Lopezo",
"state" : "AK"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "136",
"_score" : 5.4032025,
"_source" : {
"account_number" : 136,
"balance" : 45801,
"firstname" : "Winnie",
"lastname" : "Holland",
"age" : 38,
"gender" : "M",
"address" : "198 Mill Lane",
"employer" : "Neteria",
"email" : "winnieholland@neteria.com",
"city" : "Urie",
"state" : "IL"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "345",
"_score" : 5.4032025,
"_source" : {
"account_number" : 345,
"balance" : 9812,
"firstname" : "Parker",
"lastname" : "Hines",
"age" : 38,
"gender" : "M",
"address" : "715 Mill Avenue",
"employer" : "Baluba",
"email" : "parkerhines@baluba.com",
"city" : "Blackgum",
"state" : "KY"
}
},
{
"_index" : "bank",
"_type" : "account",
"_id" : "472",
"_score" : 5.4032025,
"_source" : {
"account_number" : 472,
"balance" : 25571,
"firstname" : "Lee",
"lastname" : "Long",
"age" : 32,
"gender" : "F",
"address" : "288 Mill Street",
"employer" : "Comverges",
"email" : "leelong@comverges.com",
"city" : "Movico",
"state" : "MT"
}
}
]
},
"aggregations" : {
"ageAgg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 38,
"doc_count" : 2
},
{
"key" : 28,
"doc_count" : 1
},
{
"key" : 32,
"doc_count" : 1
}
]
},
"ageAvg" : {
"value" : 34.0
}
}
}
如果我们不希望返回数据,只需要分析结果,可以设置 size 为 0
GET /bank/_search
{
"query": {~},
"aggs": {~},
"size": 0
}
【例子2】
按照年龄聚合,并且请求这些年龄段的这些人的平均薪资
GET /bank/_search
{
"query": {
"match_all": {}
},
"aggs": {
"ageAgg": {
"terms": {
"field": "age",
"size": 100
},
"aggs": { //子聚合
"ageAvg": {
"avg": {
"field": "balance"
}
}
}
}
},
"size": 0
}
返回结果:
{
"took" : 16,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1000,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"ageAgg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 31,
"doc_count" : 61,
"ageAvg" : {
"value" : 28312.918032786885
}
},
{
"key" : 39,
"doc_count" : 60,
"ageAvg" : {
"value" : 25269.583333333332
}
},
{
"key" : 26,
"doc_count" : 59,
"ageAvg" : {
"value" : 23194.813559322032
}
},
...
【例子3】
查询出所有年龄分布,并且这些 年龄段中 性别为 M 的平均薪资 和 性别为 F 的平均薪资 以及 这个年龄段的总体平均薪资
GET /bank/_search
{
"query": {
"match_all": {}
},
"aggs": {
"ageAgg": {
"terms": {
"field": "age",
"size": 100
},
"aggs": {
"genderAgg":{
"terms": {
"field": "gender.keyword",
"size": 10
},
"aggs": {
"balanceAvg": {
"avg": {
"field": "balance"
}
}
}
},
"ageBlanace":{
"avg": {
"field": "balance"
}
}
}
}
},
"size": 0
}
返回结果:
{
"took" : 16,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1000,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"ageAgg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 31,
"doc_count" : 61,
"genderAgg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "M",
"doc_count" : 35,
"balanceAvg" : {
"value" : 29565.628571428573
}
},
{
"key" : "F",
"doc_count" : 26,
"balanceAvg" : {
"value" : 26626.576923076922
}
}
]
},
"ageBlanace" : {
"value" : 28312.918032786885
}
},
{
"key" : 39,
"doc_count" : 60,
"genderAgg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "F",
"doc_count" : 38,
"balanceAvg" : {
"value" : 26348.684210526317
}
},
{
"key" : "M",
"doc_count" : 22,
"balanceAvg" : {
"value" : 23405.68181818182
}
}
]
},
"ageBlanace" : {
"value" : 25269.583333333332
}
},
{
"key" : 26,
"doc_count" : 59,
"genderAgg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "M",
"doc_count" : 32,
"balanceAvg" : {
"value" : 25094.78125
}
},
{
"key" : "F",
"doc_count" : 27,
"balanceAvg" : {
"value" : 20943.0
}
}
]
},
"ageBlanace" : {
"value" : 23194.813559322032
}
},
...