1.连接es
from elasticsearch import Elasticsearch
from elasticsearch_dsl import Search,Q
# 连接es
es = Elasticsearch(hosts="127.0.0.1:9200")
# 创建实例
s = Search(using=es)
2.创建索引数据
index_body = { "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2014-05-18" }
es.index(index="cars", doc_type="transactions", id="bulk", body=index_body)
为了后续测试,同时创建几条数据:
{ "price" : 10000, "color" : "red", "make" : "honda", "sold" : "2014-10-28" }
{ "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2014-11-05" }
{ "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2014-05-18" }
{ "price" : 15000, "color" : "blue", "make" : "toyota", "sold" : "2014-07-02" }
{ "price" : 12000, "color" : "green", "make" : "toyota", "sold" : "2014-08-19" }
{ "price" : 20000, "color" : "red", "make" : "honda", "sold" : "2014-11-05" }
{ "price" : 80000, "color" : "red", "make" : "bmw", "sold" : "2014-01-01" }
{ "price" : 25000, "color" : "blue", "make" : "ford", "sold" : "2014-02-12" }
3.查询
3.1 index查询
res1 = s.index("cars").execute()
print(res1)
3.1 query查询
# 空查询
res2 = s.query("match_all").execute()
print(res2)
# 精确查询,遍历搜索返回的命中文档
res3 = s.query("term", price=25000)
for hit in res3:
print(hit.color)
# 多值查询,可以将查询结果直接转化为字典格式方便阅读
res4 = s.query("term", price=25000).query("match", color="blue").execute().to_dict()
print(res4)
3.2 filter查询
res5 = s.filter({"term":{"price":25000}}).execute()
print(res5)
3.3 sort查询
res6 = s.sort("sold").execute()
print(res6)
3.4 分页查询
res7 = s[10:20].execute() # {"from": 10, "size": 10}
print(res7)
3.5 高亮查询
res8 = s.highlight("sold").execute().to_dict()
print(res8)
3.6 限制返回字段查询 source
res9 = s.query("match",price=25000).source(['price', 'color']).execute().to_dict()
print(res9)
3.2 Q查询
将原生的查询语句嵌入Q中,实现组合查询。比如:
q = Q({"constant_score": {"filter": {"term":{"price":25000}}}})
res10 = s.query(q).execute()
print(res10)
或者;
q = Q('bool',
must=[Q('match', color='blue')],
should=[Q('match', make='ford'), Q('match', sold='2014-02-12')],
minimum_should_match=1
)
res11 = s.query(q).execute()
print(res11)
4.聚合
聚合有bucket()
,metric()
方法:
-
bucket: 又被称作桶,满足特定条件的文档集合,可以看作是一个数据分组。聚合开始后,Elasticsearch会根据文档的值计算出文档究竟符合哪个桶,如果匹配,则将文档放入相应的桶。当所有的文档都经过计算后,再分别对每个桶进行聚合操作。
-
metric: 又被称作指标,对桶内的文档进行进行聚合分析操作,操作有若干种类别,如:求和、最大值、最小值、平均值等,通过对桶内文档进行相应的操作,我们可以得到想要的指标。
在/cars/transactions/
中加入数据:
_index | _id | _type | _score | price | color | make | sold |
---|---|---|---|---|---|---|---|
cars | transactions | v9VaYHIBPeXg5lVgIA9f | 1 | 10000 | red | honda | 2014-10-28 |
cars | transactions | wNVaYHIBPeXg5lVgIA-8 | 1 | 20000 | red | honda | 2014-11-05 |
cars | transactions | wdVaYHIBPeXg5lVgIA-8 | 1 | 30000 | green | ford | 2014-05-18 |
cars | transactions | wtVaYHIBPeXg5lVgIA_M | 1 | 15000 | blue | toyota | 2014-07-02 |
cars | transactions | w9VaYHIBPeXg5lVgIA_c | 1 | 12000 | green | toyota | 2014-08-19 |
cars | transactions | xNVaYHIBPeXg5lVgIA_r | 1 | 20000 | red | honda | 2014-11-05 |
cars | transactions | xdVaYHIBPeXg5lVgIA_7 | 1 | 80000 | red | bmw | 2014-01-01 |
cars | transactions | xtVaYHIBPeXg5lVgIA_7 | 1 | 25000 | blue | ford | 2014-02-12 |
4.1以价格分桶
GET /cars/transactions/_search
{
"size" : 0,
"aggs" : {
"price" : {
"terms" : {
"field" : "price"
}
}
}
}
elasticsearch-dsl:
s = Search(using=es, index="cars")
s.aggs.bucket("price", "terms", field="price")
res1 = s.execute()
print(res1.to_dict())
4.2查询平均价格
GET /cars/transactions/_search
{
"size" : 0,
"aggs": {
"price": {
"terms": {
"field": "price"
},
"aggs": {
"avg_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
elasticsearch-dsl:
s = Search(using=es, index="cars")
s.aggs.bucket("price", "terms", field="price").metric("avg_price", "avg", field="price")
res2 = s.execute()
print(res2.to_dict())
4.3 以价格分桶,并计算每桶中车辆最先销售日期和最后销售日期
GET /cars/transactions/_search
{
"size" : 0,
"aggs": {
"price": {
"terms": {
"field": "price"
},
"aggs" : {
"min_sold" : { "min": { "field": "sold"} },
"max_sold" : { "max": { "field": "sold"} }
}
}
}
}
elasticsearch-dsl:
s = Search(using=es, index="cars")
s.aggs.bucket("price", "terms", field="price")
s.aggs["price"].metric("min_sold", "min", field="sold")
s.aggs["price"].metric("max_sold", "max", field="sold")
res3 = s.execute()
print(res3.to_dict())
4.4 统计每个月销售多少台汽车
GET /cars/transactions/_search
{
"size" : 0,
"aggs": {
"sales": {
"date_histogram": {
"field": "sold",
"interval": "month",
"format": "yyyy-MM-dd" ,
"extended_bounds": {
"min": "2014-01-01",
"max": "2014-12-31"
}
}
}
}
}
elasticsearch-dsl:
s = Search(using=es, index="cars")
s.aggs.bucket("sales", "date_histogram", field="sold", interval="month", format="yyyy-MM-dd", extended_bounds={"min": "2014-01-01", "max": "2014-12-31"})
res4 = s.execute()
print(res4.to_dict())
4.5限定范围内的聚合,福特车有几种价格?
GET /cars/transactions/_search
{
"query" : {
"match" : {
"make" : "ford"
}
},
"aggs" : {
"price" : {
"terms" : {
"field" : "price"
}
}
}
}
elasticsearch-dsl:
s = Search(using=es, index="cars").query("match", make="ford")
s.aggs.bucket("price", "terms", field="price")
res5 = s.execute()
print(res5.to_dict())
4.6全局桶(global),找出福特汽车与所有汽车平均售价
GET /cars/transactions/_search
{
"size" : 0,
"query" : {
"match" : {
"make" : "ford"
}
},
"aggs" : {
"single_avg_price": {
"avg" : { "field" : "price" }
},
"all": {
"global" : {},
"aggs" : {
"avg_price": {
"avg" : { "field" : "price" }
}
}
}
}
}
elasticsearch-dsl:
s = Search(using=es, index="cars").query("match", make="ford")
s.aggs.metric("single_avg_price", "avg", field="price")
s.aggs.bucket("all", "global").metric("avg_price", "avg", field="price")
res6 = s.execute()
print(res6.to_dict())
4.7过滤查找,找到售价在 $10,000 美元之上的所有汽车同时计算这些车的平均售价
GET /cars/transactions/_search
{
"size" : 0,
"query" : {
"constant_score": {
"filter": {
"range": {
"price": {
"gte": 10000
}
}
}
}
},
"aggs" : {
"single_avg_price": {
"avg" : { "field" : "price" }
}
}
}
elasticsearch-dsl:
s = Search(using=es, index="cars").query("range", price={"gte": 10000})
s.aggs.metric("single_avg_price", "avg", field="price")
res7 = s.execute()
print(res7.to_dict())
4.8内置排序。以价格分桶,且排序
GET /cars/transactions/_search
{
"size" : 0,
"aggs" : {
"prices" : {
"terms" : {
"field" : "price",
"order": {
"_count" : "asc"
}
}
}
}
}
elasticsearch-dsl:
s = Search(using=es, index="cars")
s.aggs.bucket("prices", "terms", field="price", order={"_count": "asc"})
res8 = s.execute()
print(res8.to_dict())