给你们两篇博客参考一下
Logstash同步mysql一对多数据到ES(踩坑日记系列)
如何实现将mysql 1对多数据导入到es 的nested 嵌套字段中
我自己的配置文件
目的:
创建一个问题索引( 问题中包括多个答案,多个评论,多个回复)
索引question:是嵌套文档类型
#创建问题index
POST /question/_doc
{
"setting":{
"number_of_shards":3,
"number_of_replicas":2,
"analysis":{
"analyzer":{
"default":{
"type":"custom",
"tokenizer":"ik_max_word",
"filter":"stemmer"
},
"searchIk":{
"type":"custom",
"tokenizer":"ik_smart",
"filter":"stemmer"
}
}
}
},
"mapping":{
"properties":{
"qid":{
"type":"integer"
},
"questiontitle":{
"type":"text",
"analyzer":"default",
"search_analyzer":"searchIk",
"fields":{
"kw":{
"type":"completion"
},
"py":{
"type":"completion",
"analyzer":"pinyin"
}
}
},
"questiondesc":{
"type":"text",
"analyzer":"default",
"search_analyzer":"searchIk"
},
"qtype":{
"type":"text",
"analyzer":"default",
"search_analyzer":"searchIk"
},
"questiontags":{
"type":"text",
"analyzer":"default",
"search_analyzer":"searchIk"
},
"questionername":{
"type":"keyword"
},
"questiontime":{
"type":"date",
"format":"yyyy-MM-dd hh:mm:ss"
},
"questionfileuuid":{
"type":"keyword"
},
"platformname":{
"type":"keyword"
},
"answer":{
"type":"nested",
"properties":{
"aid":{
"type":"integer"
},
"answername":{
"type":"keyword"
},
"answertime":{
"type":"date",
"format":"yyyy-MM-dd hh:mm:ss"
},
"answerfileuuid":{
"type":"keyword"
},
"vid":{
"type":"integer"
},
"numversion":{
"type":"double"
},
"greekversion":{
"type":"keyword"
}
}
},
"comment":{
"type":"nested",
"properties":{
"cid":{
"type":"integer"
},
"c_aid":{
"type":"integer"
},
"commentername":{
"type":"keyword"
},
"commenttime":{
"type":"date",
"format":"yyyy-MM-dd hh:mm:ss"
},
"commentcontent":{
"type":"keyword"
}
}
},
"reply":{
"type":"nested",
"properties":{
"rid":{
"type":"integer"
},
"r_aid":{
"type":"integer"
},
"fromname":{
"type":"keyword"
},
"toname":{
"type":"keyword"
},
"replycontent":{
"type":"keyword"
},
"replytime":{
"type":"date",
"format":"yyyy-MM-dd hh:mm:ss"
}
}
}
}
}
}
input {
stdin { }
jdbc {
#需要同步的数据库
jdbc_connection_string => "jdbc:mysql://192.168.252.***:3306/knowledge?serverTimezone=GMT%2B8&useSSL=false"
jdbc_user => "root"
jdbc_password => "****"
#本地jar包
jdbc_driver_library => "mysql-connector-java-5.1.45.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_paging_enabled => "true"
jdbc_page_size => "50000"
#获取到记录的SQL查询语句
statement => "select q.qid as qid,questiontitle,questiondesc,qtype,questiontags,questionername,questiontime,questionfileuuid,platformname,
a.aid,answername ,answertime ,answerfileuuid ,
a.vid ,numversion,greekversion,
c.cid,c.aid as c_aid,commentcontent ,commentername ,commenttime,
r.rid,r.aid as r_aid,fromname,toname,replycontent,replytime from question q LEFT JOIN answer a on a.qid=q.qid left JOIN version v on a.vid=v.vid LEFT JOIN comment c on c.aid=a.aid LEFT JOIN reply r on r.aid=a.aid "
#定时字段 各字段含义(由左至右)分、时、天、月、年,全部为*默认含义为每分钟都更新
schedule => "* * * * *"
#type=> "question"
#tags=> "question"
}
}
filter{
ruby {
code => "event.set('questiontime', event.get('questiontime').time.localtime + 8*60*60)"
}
aggregate{
task_id=> "%{qid}"
code => "
map['qid'] = event.get('qid')
map['questiontitle'] = event.get('questiontitle')
map['questiondesc'] = event.get('questiondesc')
map['qtype'] = event.get('qtype')
map['questiontags'] = event.get('questiontags')
map['questionername'] = event.get('questionername')
map['questiontime'] = event.get('questiontime')
map['questionfileuuid'] = event.get('questionfileuuid')
map['platformname'] = event.get('platformname')
map['answer_list'] ||=[]
map['comment_list'] ||=[]
map['reply_list'] ||=[]
map['answer'] ||=[]
map['comment'] ||=[]
map['reply'] ||=[]
if (event.get('aid') != nil)
if !(map['answer_list'].include? event.get('aid'))
map['answer_list'] << event.get('aid')
map['answer'] << {
'aid' => event.get('aid'),
'answername' => event.get('answername'),
'answertime' => event.get('answertime'),
'answerfileuuid' => event.get('answerfileuuid'),
'vid' => event.get('vid'),
'numberversion' => event.get('numversion'),
'greekversion' => event.get('greekversion')
}
end
end
if (event.get('cid') != nil)
if !(map['comment_list'].include? event.get('cid'))
map['comment_list'] << event.get('cid')
map['comment'] << {
'cid' => event.get('cid'),
'c_aid' =>event.get('c_aid'),
'commentername' => event.get('commentername'),
'commenttime' => event.get('commenttime'),
'commentcontent' => event.get('commentcontent')
}
end
end
if (event.get('rid') != nil)
if !(map['reply_list'].include? event.get('rid'))
map['reply_list'] << event.get('rid')
map['reply'] << {
'rid' => event.get('rid'),
'r_aid' => event.get('r_aid'),
'fromname' => event.get('fromname'),
'toname' => event.get('toname'),
'replycontent' => event.get('replycontent'),
'replytime' => event.get('replytime')
}
end
end
event.cancel()
"
push_previous_map_as_event => true
timeout => 3
}
json {
source => "message"
remove_field => ["message"]
#remove_field => ["message", "type", "@timestamp", "@version"]
}
mutate {
#将不需要的JSON字段过滤,且不会被存入 ES 中
remove_field => ["tags", "@timestamp", "@version"]
}
}
output {
stdout {
codec => json_lines
}
elasticsearch {
#ESIP地址与端口
hosts => ["192.168.252.130:9200","192.168.252.132:9200","192.168.252.133:9200"]
#ES索引名称(自己定义的)
index => "question"
#文档类型
document_type => "_doc"
#文档类型id,%{qid}意思是取查询出来的qid的值,并将其映射到es中_id字段中
document_id => "%{qid}"
}
}
各个字段的功能网上都有解释,主要是filter这一块
截图:
我现在还没出现踩坑事件,如果有其他问题,可以参考上两篇博客!!!