本文基于<<python操作ElasticSearch–文档增删改查>>进行创作
第一个分析需求:计算每个tag下的商品数量
"""
# Fielddata is disabled on text fields by default. Set fielddata=true on [tags] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead.
# 方式一:在field后面加上`.keyword`
GET /ecommerce/product/_search
{
"aggs": {
"group_by_tags": {
"terms": { "field": "tags.keyword" }
}
}
}
# 将文本field的fielddata属性设置为true (会占用大量内存,不推荐)
PUT /ecommerce/_mapping/product
{
"properties": {
"tags": {
"type": "text",
"fielddata": true
}
}
}
import json
from elasticsearch import Elasticsearch
body = {
"properties": {
"tags": {
"type": "text",
"fielddata": True
}
}
}
es = Elasticsearch(hosts="ip:port")
result = es.indices.put_mapping(doc_type="product", body=body, index="ecommerce")
print(result)
"""
GET /ecommerce/product/_search
{
"aggs": {
"group_by_tags": {
"terms": { "field": "tags" }
}
}
}
from elasticsearch_dsl import Search
s = Search(using=es, index="ecommerce")
# terms按指定的field进行分组,得出每组下document的数量
# bucket桶
s.aggs.bucket('group_by_tags', 'terms', field='tags')
response = s.execute()
print(response.to_dict())
GET /ecommerce/product/_search
{
"size": 0,
"aggs": {
"all_tags": {
"terms": { "field": "tags" }
}
}
}
from elasticsearch_dsl import A
# size=0只显示聚合后的结果
s = Search(using=es, index="ecommerce").extra(size=0)
# {"terms": {"field": "tags"}}
a = A('terms', field='tags')
s.aggs.bucket('all_tags', a)
response = s.execute()
print(response.to_dict())
第二个聚合分析的需求:对名称中包含yagao的商品,计算每个tag下的商品数量
GET /ecommerce/product/_search
{
"size": 0,
"query": {
"match": {
"name": "yagao"
}
},
"aggs": {
"all_tags": {
"terms": {
"field": "tags"
}
}
}
}
s = Search(using=es, index="ecommerce").extra(size=0).query("match", name="yagao")
# {"terms": {"field": "tags"}}
a = A('terms', field='tags')
s.aggs.bucket('all_tags', a)
response = s.execute()
print(json.dumps(response.to_dict(), indent=2, ensure_ascii=False))
第三个聚合分析的需求:先分组,再算每组的平均值,计算每个tag下的商品的平均价格
GET /ecommerce/product/_search
{
"size": 0,
"aggs" : {
"group_by_tags" : {
"terms" : { "field" : "tags" },
"aggs" : {
"avg_price" : {
"avg" : { "field" : "price" }
}
}
}
}
}
s = Search(using=es, index="ecommerce").extra(size=0)
# {"terms": {"field": "tags"}}
a = A('terms', field='tags')
s.aggs.bucket('group_by_tags', a).metric('avg_price', 'avg', field='price')
response = s.execute()
print(json.dumps(response.to_dict(), indent=2, ensure_ascii=False))
第四个数据分析需求:计算每个tag下的商品的平均价格,并且按照平均价格降序排序
GET /ecommerce/product/_search
{
"size": 0,
"aggs" : {
"all_tags" : {
"terms" : { "field" : "tags", "order": { "avg_price": "desc" } },
"aggs" : {
"avg_price" : {
"avg" : { "field" : "price" }
}
}
}
}
}
s = Search(using=es, index="ecommerce").extra(size=0)
a = A('terms', field='tags', order={"avg_price": "desc"})
s.aggs.bucket('all_tags', a).metric('avg_price', 'avg', field='price')
response = s.execute()
print(json.dumps(response.to_dict(), indent=2, ensure_ascii=False))
第五个数据分析需求:按照指定的价格范围区间进行分组,然后在每组内再按照tag进行分组,最后再计算每组的平均价格
GET /ecommerce/product/_search
{
"size": 0,
"aggs": {
"group_by_price": {
"range": {
"field": "price",
"ranges": [
{
"from": 0,
"to": 20
},
{
"from": 20,
"to": 40
},
{
"from": 40,
"to": 50
}
]
},
"aggs": {
"group_by_tags": {
"terms": {
"field": "tags"
},
"aggs": {
"average_price": {
"avg": {
"field": "price"
}
}
}
}
}
}
}
}
s = Search(using=es, index="ecommerce").extra(size=0)
a = A('range', field='price', ranges=[{"from": 0, "to": 20}, {"from": 20, "to": 40}, {"from": 40, "to": 50}])
s.aggs.bucket('group_by_price', a).bucket('group_by_tags', 'terms', field='tags').bucket('average_price', 'avg',
field='price')
response = s.execute()
print(json.dumps(response.to_dict(), indent=2, ensure_ascii=False))
参考链接 https://elasticsearch-dsl.readthedocs.io/en/latest/index.html
本文详细介绍使用ElasticSearch进行商品数据的聚合分析,包括计算每个标签下的商品数量、特定名称商品的标签分布、标签下商品平均价格及排序、价格区间内商品的进一步分组分析等,提供丰富的代码示例。
1636

被折叠的 条评论
为什么被折叠?



