ES 自己做的例子汇总

最新推荐文章于 2024-03-22 17:09:00 发布

原创最新推荐文章于 2024-03-22 17:09:00 发布 · 503 阅读

0 ·

CC 4.0 BY-SA版权

ES 专栏收录该内容

2 篇文章

订阅专栏

本文深入探讨Elasticsearch中的复杂查询技术，包括精确日期、多条件组合、特定标签匹配及高级字段权重调整等，旨在帮助读者掌握高效数据检索的方法。

首先初始化数据

POST /forums/articles/_bulk
{ "index": { "_id": 1 }}
{ "articleID" : "XHDK-A-1293-#fJ3", "userID" : 1, "hidden": false, "postDate": "2017-01-01" }
{ "index": { "_id": 2 }}
{ "articleID" : "KDKE-B-9947-#kL5", "userID" : 1, "hidden": false, "postDate": "2017-01-02" }
{ "index": { "_id": 3 }}
{ "articleID" : "JODL-X-1937-#pV7", "userID" : 2, "hidden": false, "postDate": "2017-01-01" }
{ "index": { "_id": 4 }}
{ "articleID" : "QQPX-R-3956-#aD8", "userID" : 2, "hidden": true, "postDate": "2017-01-02" }

POST /forums/articles/_bulk
{ "index": { "_id": 5 }}
{ "articleID" : "XHDK-C-1293-#fJ3", "userID" : 2, "hidden": false, "postDate": "2017-02-01" }
{ "index": { "_id": 6 }}
{ "articleID" : "KDKE-D-9947-#kL5", "userID" : 3, "hidden": false, "postDate": "2017-02-02" }
{ "index": { "_id": 7 }}
{ "articleID" : "JODL-E-1937-#pV7", "userID" : 4, "hidden": false, "postDate": "2017-02-01" }
{ "index": { "_id": 8 }}
{ "articleID" : "QQPX-F-3956-#aD8", "userID" : 5, "hidden": true, "postDate": "2017-03-02" }

POST /forum/article/_bulk
{ "update": { "_id": "1"} }
{ "doc" : {"tag" : ["java", "hadoop"]} }
{ "update": { "_id": "2"} }
{ "doc" : {"tag" : ["java"]} }
{ "update": { "_id": "3"} }
{ "doc" : {"tag" : ["hadoop"]} }
{ "update": { "_id": "4"} }
{ "doc" : {"tag" : ["java", "elasticsearch"]} }
{ "update": { "_id": "5"} }
{ "doc" : {"tag" : ["java", "hadoop"]} }
{ "update": { "_id": "6"} }
{ "doc" : {"tag" : ["java","mysql"]} }
{ "update": { "_id": "7"} }
{ "doc" : {"tag" : ["hadoop"]} }
{ "update": { "_id": "8"} }
{ "doc" : {"tag" : ["java", "elasticsearch","mysql"]} }

第一个简单的查询查询postDate 在2017-01-01的帖子

GET /forums/articles/_search
{
  "query": {
    "constant_score": {
      "filter": {
          "bool": {
            "must": [
              { "term": {
                "postDate": "2017-01-01"
              } }
            ]
          }
      },
      "boost": 1.2
    }
  }
}

第二个查询，查询查询postDate 在2017-01-01的帖子或者是用户userid=2发的帖子

POST /forums/articles/_search
{
"query": {
"constant_score": {
"filter": {
"bool": {
"should": [
{
"term": {
"postDate": "2017-01-01"
}
},
{
"term": {
"userID": 2
}
}
]
}
}
}
}
}

第三个查询，查询用户userid=2 在2017-01-01和2017-01-01发的帖子

POST /forums/articles/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": {
            "term": {
              "userID": 2
            }
          },
          "should": [
            {
              "term": {
                "postDate": "2017-01-01"
              }
            },
            {
              "term": {
                "postDate": "2017-01-02"
              }
            }
          ]
        }
      }
    }
  }
}

第4个需求查询tag中包含java和hadoop的文章

POST /forums/articles/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "terms": {
                "tag": [
                  "java",
                  "hadoop"
                ]
              }
            }
          ]
        }
      }
    }
  }
}

第5个需求，我们要查询tag只是java的数据

这时原有数据如果统计的话比较麻烦，我们更新一下原有数据，添加一个新的字段tag_cnt用来表示tag数量，这样只需要查tag中含有java并且tag_cnt=1的数据即可。

POST /forums/articles/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "term": {
                "tag_cnt": 1
              }
            },
            {
              "term": {
                "tag": "java"
              }
            }
          ]
        }
      }
    }
  }
}

第6个需求添加了字段view_cnt用来表示访问次数，查询访问次数在40到80的文章并且发帖日期在2017年2月1日后

POST /forums/articles/_search
{
  "query": {
    "constant_score": {
      "filter": {
        "bool": {
          "must": [
            {
              "range": {
                "view_cnt": {
                  "gte": 40,
                  "lte": 80
                }
              }
            },
            {
              "range": {
                "postDate": {
                  "gte": "2017-02-01"
                }
              }
            }
          ]
        }
      }
    }
  }
}

postDate": {
                  "gte": "2017-01-01||+30d"
                }

上面这种是针对日期类型的一种用法。

第7个需求：查询title中多个关键词的情况，比如java hadoop mysql 或者elasticsearch中出现3次的

GET /forums/articles/_search
{
  "query": {
    "bool": {
      "should": [
        {"match": {
          "title": "java"
        }},
        {"match": {
          "title": "elasticsearch"
        }},
        {
          "match": {
            "title": "hadoop"
          }
        },
        {"match": {
          "title": "mysql"
        }}
      ],
        "minimum_should_match": 3
    }
  }
}

boost 可以进行细粒度的权重控制。

muti_query的例子，先更新数据

POST /forums/articles/_bulk
{ "update": { "_id": "1"} }
{ "doc" : {"content" : "i like to write best elasticsearch article"} }
{ "update": { "_id": "2"} }
{ "doc" : {"content" : "i think java is the best programming language"} }
{ "update": { "_id": "3"} }
{ "doc" : {"content" : "i am only an elasticsearch beginner"} }
{ "update": { "_id": "4"} }
{ "doc" : {"content" : "elasticsearch and hadoop are all very good solution, i am a beginner"} }
{ "update": { "_id": "5"} }
{ "doc" : {"content" : "spark is best big data solution based on scala ,an programming language similar to java"} }

按单个字段的结果优先排序

POST /forums/articles/_search
{
  "query": {
    "dis_max": {
      "queries": [
        {
          "match": {
            "title": "java solution"
          }
        },
        {
          "match": {
            "content": "java solution"
          }
        }
      ]
    }
  }
}

加入字段权重同时加入 tie_breaker进行score分值折中，title权重为2

POST /forums/articles/_search
{
  "query": {
    "dis_max": {
      "queries": [
        {
          "match": {
            "title": { "query": "java solution",
            "minimum_should_match": "50%",
            "boost" : 2
            }
            
          }
        },
        {
          "match": {
            "content": "java solution"
          }
        }
      ],
      "tie_breaker": 0.7
    }
  }
}

cross_fields的

POST /forums/articles/_search
{
  "query": {
    "multi_match": {
      "query": "Peter Smith",
      "type": "cross_fields",
      "operator" : "and",
      "fields": [
        "new_author_first_name",
        "new_author_last_name"
      ]
    }
  }
}