Logstash同步mysql一对多的数据

最新推荐文章于 2024-04-23 10:33:19 发布

爱代码的小呆

最新推荐文章于 2024-04-23 10:33:19 发布

阅读量611

点赞数 1

文章标签： Logstash es 嵌套文档 mysql 一对多

本文链接：https://blog.youkuaiyun.com/weixin_42388220/article/details/117299625

版权

给你们两篇博客参考一下
Logstash同步mysql一对多数据到ES（踩坑日记系列）
如何实现将mysql 1对多数据导入到es 的nested 嵌套字段中
我自己的配置文件
目的：
创建一个问题索引（问题中包括多个答案，多个评论，多个回复）
索引question：是嵌套文档类型

#创建问题index
POST /question/_doc
{
  "setting":{
    "number_of_shards":3,
    "number_of_replicas":2,
    "analysis":{
      "analyzer":{
        "default":{
          "type":"custom",
          "tokenizer":"ik_max_word",
          "filter":"stemmer"
        },
        "searchIk":{
          "type":"custom",
          "tokenizer":"ik_smart",
          "filter":"stemmer"
        }
      }
    }
  },
  "mapping":{
    "properties":{
      "qid":{
        "type":"integer"
      },
      "questiontitle":{
        "type":"text",
        "analyzer":"default",
        "search_analyzer":"searchIk",
        "fields":{
          "kw":{
            "type":"completion"
          },
          "py":{
            "type":"completion",
            "analyzer":"pinyin"
          }
        }
      },
      "questiondesc":{
        "type":"text",
         "analyzer":"default",
        "search_analyzer":"searchIk"
      },
      "qtype":{
        "type":"text",
         "analyzer":"default",
        "search_analyzer":"searchIk"
      },
      "questiontags":{
         "type":"text",
         "analyzer":"default",
        "search_analyzer":"searchIk"
      },
      "questionername":{
         "type":"keyword"
      },
      "questiontime":{
        "type":"date",
        "format":"yyyy-MM-dd hh:mm:ss"
      },
      "questionfileuuid":{
        "type":"keyword"
      },
      "platformname":{
        "type":"keyword"
      },
      "answer":{
        "type":"nested",
        "properties":{
          "aid":{
            "type":"integer"
          },
          "answername":{
            "type":"keyword"
          },
          "answertime":{
            "type":"date",
            "format":"yyyy-MM-dd hh:mm:ss"
          },
          "answerfileuuid":{
            "type":"keyword"
          },
          "vid":{
            "type":"integer"
          },
          "numversion":{
            "type":"double"
          },
          "greekversion":{
            "type":"keyword"
          }
        }
      },
       "comment":{
          "type":"nested",
          "properties":{
            "cid":{
              "type":"integer"
            },
            "c_aid":{
              "type":"integer"
            },
            "commentername":{
              "type":"keyword"
            },
            "commenttime":{
             "type":"date",
             "format":"yyyy-MM-dd hh:mm:ss"
            },
            "commentcontent":{
              "type":"keyword"
            }
          }
        },
      "reply":{
        "type":"nested",
        "properties":{
          "rid":{
            "type":"integer"
          },
          "r_aid":{
              "type":"integer"
            },
          "fromname":{
            "type":"keyword"
          },
          "toname":{
            "type":"keyword"
          },
          "replycontent":{
            "type":"keyword"
          },
          "replytime":{
            "type":"date",
           "format":"yyyy-MM-dd hh:mm:ss"
          }
        }
      }
    }
  }
}

input {
 stdin { }
    jdbc {
        #需要同步的数据库
        jdbc_connection_string => "jdbc:mysql://192.168.252.***:3306/knowledge?serverTimezone=GMT%2B8&useSSL=false"

        jdbc_user => "root"

        jdbc_password => "****"
        #本地jar包
        jdbc_driver_library => "mysql-connector-java-5.1.45.jar"

        jdbc_driver_class => "com.mysql.jdbc.Driver"

        jdbc_paging_enabled => "true"

        jdbc_page_size => "50000"
        #获取到记录的SQL查询语句
        statement => "select q.qid as qid,questiontitle,questiondesc,qtype,questiontags,questionername,questiontime,questionfileuuid,platformname,
a.aid,answername ,answertime ,answerfileuuid ,
a.vid ,numversion,greekversion,
c.cid,c.aid as c_aid,commentcontent ,commentername ,commenttime,
r.rid,r.aid as r_aid,fromname,toname,replycontent,replytime from question q LEFT JOIN answer a on a.qid=q.qid left JOIN version v on a.vid=v.vid  LEFT JOIN comment c on c.aid=a.aid LEFT JOIN reply r on r.aid=a.aid "
        #定时字段 各字段含义（由左至右）分、时、天、月、年，全部为*默认含义为每分钟都更新
        schedule => "* * * * *"
        #type=> "question"
        #tags=> "question"
    }
}


filter{
    ruby {
            code => "event.set('questiontime', event.get('questiontime').time.localtime + 8*60*60)"
    }
    aggregate{
        task_id=> "%{qid}"
        code => "
            map['qid'] = event.get('qid')
            map['questiontitle'] = event.get('questiontitle')
            map['questiondesc'] = event.get('questiondesc')
            map['qtype'] = event.get('qtype')
            map['questiontags'] = event.get('questiontags')
            map['questionername'] = event.get('questionername')
            map['questiontime'] = event.get('questiontime')
            map['questionfileuuid'] = event.get('questionfileuuid')
            map['platformname'] = event.get('platformname')
            map['answer_list'] ||=[]
            map['comment_list'] ||=[]
            map['reply_list'] ||=[]
            map['answer'] ||=[]
            map['comment'] ||=[]
            map['reply'] ||=[]
            if (event.get('aid') != nil)
                if !(map['answer_list'].include? event.get('aid'))
                    map['answer_list'] << event.get('aid')
                    map['answer'] << {
                        'aid' => event.get('aid'),
                        'answername' => event.get('answername'),
                        'answertime' => event.get('answertime'),
                        'answerfileuuid' => event.get('answerfileuuid'),
                        'vid' => event.get('vid'),
                        'numberversion' => event.get('numversion'),
                        'greekversion' => event.get('greekversion')
                    }
                end
            end
            if (event.get('cid') != nil)
                 if !(map['comment_list'].include? event.get('cid'))
                     map['comment_list'] << event.get('cid')
                     map['comment'] << {
                         'cid' => event.get('cid'),
                         'c_aid' =>event.get('c_aid'),
                         'commentername' => event.get('commentername'),
                         'commenttime' => event.get('commenttime'),
                         'commentcontent' => event.get('commentcontent')
                      }
                end
            end
             if (event.get('rid') != nil)
                if !(map['reply_list'].include? event.get('rid'))
                     map['reply_list'] << event.get('rid')
                     map['reply'] << {
                         'rid' => event.get('rid'),
                         'r_aid' => event.get('r_aid'),
                         'fromname' => event.get('fromname'),
                         'toname' => event.get('toname'),
                         'replycontent' => event.get('replycontent'),
                         'replytime' => event.get('replytime')
                      }
                end
            end
            event.cancel()
        "
       push_previous_map_as_event => true
       timeout => 3
    }
    json {
        source => "message"
        remove_field => ["message"]
        #remove_field => ["message", "type", "@timestamp", "@version"]
    }
    mutate  {
        #将不需要的JSON字段过滤，且不会被存入 ES 中
        remove_field => ["tags", "@timestamp", "@version"]
    }
  }

 output {
     stdout {
        codec => json_lines
    }
    elasticsearch {
        #ESIP地址与端口
        hosts => ["192.168.252.130:9200","192.168.252.132:9200","192.168.252.133:9200"]
        #ES索引名称（自己定义的）
        index => "question"
        #文档类型
        document_type => "_doc"
        #文档类型id，%{qid}意思是取查询出来的qid的值，并将其映射到es中_id字段中
        document_id => "%{qid}"
    }
}