elasticsearch pipeline

1 使用 pipeline 简单的添加一些数据

例如,录入一个员工信息中,公司信息基本上固定的,可以做成固定的信息。
 

创建一个公司信息的 pipeline=company_info_001查看 pipeline 信息 
GET _ingest/pipeline/company_info_001
PUT _ingest/pipeline/company_info_001
{
  "description": "区域字段",
  "processors": [
    {
      "set": {
        "field": "companyId",
        "value": 1
      }
    },
    {
      "set": {
        "field": "companyName",
        "value": "baidu"
      }
    }
  ]
}
{
  "company_info_001" : {
    "description" : "区域字段",
    "processors" : [
      {
        "set" : {
          "field" : "companyId",
          "value" : 1
        }
      },
      {
        "set" : {
          "field" : "companyName",
          "value" : "baidu"
        }
      }
    ]
  }
}

 

创建索引添加数据的同时使用 pipeline=company_info_001
PUT worker-001
{
  "settings": {
    "number_of_replicas": 2,
    "number_of_shards": 3
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "name": {
        "type": "text",
        "fields": {
          "key":{
            "type":"keyword"
          }
        }
      },
      "age":{
        "type": "integer"
      },
      "workAge":{
        "type": "short"
      },
      "companyId":{
        "type":"keyword"
      },
      "companyName":{
        "type": "text",
        "fields": {
          "key": {
            "type": "keyword"
          }
        }
      }
    }
  }
}
POST _bulk?pipeline=company_info_001
{"index":{"_index":"worker-001","_id":1}}
{"id":1,"name":"张三","age":28,"workAge":2}
{"index":{"_index":"worker-001","_id":2}}
{"id":2,"name":"李四","age":29,"workAge":3}
{"index":{"_index":"worker-001","_id":3}}
{"id":3,"name":"王五","age":36,"workAge":5}

 

其实,最安全的方式是先模拟,查看效果。
数据模拟,模拟出数据的效果,而不是真正的添加数据,就是模拟添加数据的时候使用 pipeline,查看添加数据的效果。

模拟添加数据模拟出来的效果
POST _ingest/pipeline/company_info_001/_simulate
{
  "docs": [
    {
      "_source": {
        "town": "tangXia"
      }
    },
    {
      "_source": {
        "id": 1,
        "name": "张三",
        "age": 28,
        "workAge": 2
      }
    }
  ]
}
{
  "docs" : [
    {
      "doc" : {
        "_index" : "_index",
        "_type" : "_doc",
        "_id" : "_id",
        "_source" : {
          "companyId" : 1,
          "town" : "tangXia",
          "companyName" : "baidu"
        },
        "_ingest" : {
          "timestamp" : "2021-03-30T06:50:54.832018962Z"
        }
      }
    },
    {
      "doc" : {
        "_index" : "_index",
        "_type" : "_doc",
        "_id" : "_id",
        "_source" : {
          "companyId" : 1,
          "companyName" : "baidu",
          "name" : "张三",
          "workAge" : 2,
          "id" : 1,
          "age" : 28
        },
        "_ingest" : {
          "timestamp" : "2021-03-30T06:50:54.83202477Z"
        }
      }
    }
  ]
}
 

 

添加数据,同时添加 pipeline=company_info_001

添加数据,同时添加 pipeline=company_info_001GET worker-001/_doc/1
POST _bulk?pipeline=company_info_001
{"index":{"_index":"worker-001","_id":1}}
{"id":1,"name":"张三","age":28,"workAge":2}
{
  "_index" : "worker-001",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 4,
  "_seq_no" : 3,
  "_primary_term" : 1,
  "found" : true,
  "_source" : {
    "companyId" : 1,
    "companyName" : "baidu",
    "name" : "张三",
    "workAge" : 2,
    "id" : 1,
    "age" : 28
  }
}
 

 

可以对对象类型的字段进行创建与赋值
例如,可以把员工所在公司的信息存放在一个对象中

创建 pipeline添加数据,同时添加 pipeline=company_info_002
PUT _ingest/pipeline/company_info_002
{
  "description": "区域字段",
  "processors": [
    {
      "set": {
        "field": "company.companyId",
        "value": "1"
      }
    },
    {
      "set": {
        "field": "company.companyName",
        "value": "baidu"
      }
    }
  ]
}
POST _bulk?pipeline=area_002
{"index":{"_index":"worker-003","_id":1}}
{"id":1,"name":"张三","age":28,"workAge":2}

 

查询结果

  
GET worker-003/_search{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "worker-003",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_source" : {
          "name" : "张三",
          "workAge" : 2,
          "company" : {
            "companyId" : "1",
            "companyName" : "baidu"
          },
          "id" : 1,
          "age" : 28
        }
      }
    ]
  }
}
 

 

可以看出,company 的信息是一个 object

2 读取源数据,创建新字段

可以读取源数据,放入另一个字段中,也就是利用源数据来创建新的字段

创建 pipeline添加数据
PUT _ingest/pipeline/company_info_002
{
  "description": "员工公司信息",
  "processors": [
    {
      "set": {
        "field": "company.companyId",
        "value": "{{_source.companyId}}"
      }
    },
    {
      "set": {
        "field": "company.companyName",
        "value": "{{_source.companyName}}"
      }
    }
  ]
}
PUT worker-003/_doc/4?pipeline=company_info_002
{
  "id":4,
  "name": "李四",
  "companyId": 1,
  "companyName": "baidu"
}

 

查看结果

查看数据结果
GET worker-003/_doc/4{
  "_index" : "worker-003",
  "_type" : "_doc",
  "_id" : "4",
  "_version" : 1,
  "_seq_no" : 0,
  "_primary_term" : 1,
  "found" : true,
  "_source" : {
    "companyId" : 1,
    "companyName" : "baidu",
    "name" : "李四",
    "company" : {
      "companyId" : "1",
      "companyName" : "baidu"
    },
    "id" : 4
  }
}

 

可以看到,成功的利用 companId 和 companyName 数据创建出 company 字段 object.

可以读取源数据,放入另一个字段中。同时,还可以把源数据字段删除。

创建索引添加数据
PUT _ingest/pipeline/company_info_003
{
  "description": "员工公司信息",
  "processors": [
    {
      "set": {
        "field": "company.companyId",
        "value": "{{_source.companyId}}"
      }
    },
    {
      "set": {
        "field": "company.companyName",
        "value": "{{_source.companyName}}"
      }
    },
    {
      "remove": {
        "field": ["companyId","companyName"]
      }
    }
  ]
}
PUT worker-003/_doc/5?pipeline=company_info_003
{
  "id":4,
  "name": "李四一",
  "companyId": 1,
  "companyName": "baidu"
}

查询数据

GET worker-003/_doc/5{
  "_index" : "worker-003",
  "_type" : "_doc",
  "_id" : "5",
  "_version" : 1,
  "_seq_no" : 0,
  "_primary_term" : 1,
  "found" : true,
  "_source" : {
    "name" : "李四一",
    "company" : {
      "companyId" : "1",
      "companyName" : "baidu"
    },
    "id" : 4
  }
}

 

可以看到,companyId 和 companyName 字段只存在于 company 字段 object 中。

3 读取元数据

我们还可以读取索引的元数据:_index、_version 等一些信息

创建 pipeline添加数据
PUT _ingest/pipeline/company_info_004
{
  "description": "员工公司信息",
  "processors": [
    {
      "set": {
        "field": "company.companyId",
        "value": "{{_source.companyId}}"
      }
    },
    {
      "set": {
        "field": "company.companyName",
        "value": "{{_source.companyName}}"
      }
    },
    {
      "set": {
        "field": "@timestamp",
        "value": "{{_ingest.timestamp}}"
      }
    },
    {
      "set": {
        "field": "indexInfo",
        "value": "索引名称:{{_index}},数据id={{_id}}数据version={{_version}}"
      }
    },
    {
      "remove": {
        "field": ["companyId","companyName"]
      }
    }
  ]
}
PUT worker-003/_doc/6?pipeline=company_info_004
{
  "id":4,
  "name": "李四二",
  "companyId": 1,
  "companyName": "baidu"
}

效果

GET worker-003/_doc/6{
  "_index" : "worker-003",
  "_type" : "_doc",
  "_id" : "6",
  "_version" : 2,
  "_seq_no" : 2,
  "_primary_term" : 1,
  "found" : true,
  "_source" : {
    "@timestamp" : "2021-03-30T07:24:10.290252436Z",
    "name" : "李四二",
    "indexInfo" : "索引名称:worker-003,数据id=6数据version=-3",
    "company" : {
      "companyId" : "1",
      "companyName" : "baidu"
    },
    "id" : 4
  }
}
 

@tempstamp 字段名称,是自定义的,没什么特别的。目的是用来区分该字段的数据是系统来的。

indexInfo 字段,已经成功的创建,{{_version}} 读取出来的值是-3,这个不知道是什么回事。如果有哪位大神知道的话就指导下,我会非常感激。

4 多管道处理

就是使用多个 pipeline,就是把多个 pipeline 组合成一个 pipeline。

这是非常有用的。我们可以把功能细分来处理,就像写接口功能一样,尽量的单一,这样才简单。


 

创建索引添加数据

PUT _ingest/pipeline/programmer_001
{
  "description": "开发部门程序员码农",
  "processors": [
    {
      "set": {
        "field": "position.work",
        "value": "programmer"
      }
    },
    {
      "set": {
        "field": "position.lang",
        "value": "java+html5+css+mysql"
      }
    }
  ]
}

PUT _ingest/pipeline/multi_company_001
{
  "description": "多管道处理器",
  "processors": [
    {
      "pipeline": {
        "name": "programmer_001"
      }
    },
    {
      "pipeline": {
        "name": "company_info_004"
      }
    }
  ]
}

PUT worker-003/_doc/6?pipeline=multi_company_001
{
  "id":6,
  "name": "李四三",
  "companyId": 1,
  "companyName": "baidu"
}

查看数据

GET worker-003/_doc/6{
  "_index" : "worker-003",
  "_type" : "_doc",
  "_id" : "6",
  "_version" : 1,
  "_seq_no" : 0,
  "_primary_term" : 1,
  "found" : true,
  "_source" : {
    "@timestamp" : "2021-03-30T07:53:34.394245938Z",
    "name" : "李四三",
    "indexInfo" : "索引名称:worker-003,数据id=6数据version=-3",
    "company" : {
      "companyId" : "1",
      "companyName" : "baidu"
    },
    "id" : 6,
    "position" : {
      "work" : "programmer",
      "lang" : "java+html5+css+mysql"
    }
  }
}

 

可以看到 position 和 company 都成功创建。

5 索引更新

如果对旧数据进行更新加工处理,可以使用 pipeline 进行加工处理。

使用查询更新的时候添加 pipeline 参数。 POST 索引/_update_by_query?pipeline=

POST worker-003/_update_by_query?pipeline=multi_company_001
{
  "query": {
    "match": {
      "id": "1"
    }
  }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值