情况:
这边对接了kafka,但是,kafka里面数据是乱序的,比如,5月20号,可能里面包含了,17号,18号,19号的数据,而且,队列里数据,是只能向前消费,无法回到之前的offset进行消费。
于是,以前按照系统当前时间,创建索引的方式,不能使用了。
index => "ods_harmful_%{+YYYY-MM-dd}"
修改:
注意字段对应关系:
mutate{
split => ["message","^"]
add_field => {
"my_time" => "%{[message][8]}"
"my_timest" => "%{[message][8]}"
}
}
date {
match => ["my_time","yyyyMMddHHmmss"]
target => "my_time"
}
date {
match => ["my_timest","yyyyMMddHHmmss"]
target => "my_timest"
}
ruby {
code => "event.set('my_time', (event.get('my_time').time.localtime).strftime('%Y-%m-%d %H:%M:%S'))"
}
ruby {
code => "event.set('my_timest', (event.get('my_timest').time.localtime).strftime('%Y-%m-%d'))"
}
elasticsearch {
hosts => "127.0.0.1:9200"
index => "whitelist_message_%{my_timest}"
retry_on_conflict => 5
document_id => "%{acc_num}"
codec => plain { charset => "UTF-8" }
}
完整配置如下:
input{
file{
path => "C:/Users/Administrator/Desktop/logstash_7.12.0_kafka_test/alldata/*"
type => "default"
add_field => {"flag"=>"topic1"}
start_position => "beginning"
type => "default"
codec => plain { charset => "UTF-8" }
}
}
filter {
mutate{
split => ["message","^"]
add_field => {
"my_time" => "%{[message][8]}"
"my_timest" => "%{[message][8]}"
}
}
date {
match => ["my_time","yyyyMMddHHmmss"]
target => "my_time"
}
date {
match => ["my_timest","yyyyMMddHHmmss"]
target => "my_timest"
}
ruby {
code => "event.set('my_time', (event.get('my_time').time.localtime).strftime('%Y-%m-%d %H:%M:%S'))"
}
ruby {
code => "event.set('my_timest', (event.get('my_timest').time.localtime).strftime('%Y-%m-%d'))"
}
if [message][0] {
mutate {
add_field => {
"acc_num" => "%{[message][0]}"
}
}
} else {
mutate {
add_field => {
"acc_num" => ""
}
}
}
if [message][1] {
mutate {
add_field => {
"province" => "%{[message][1]}"
}
}
} else {
mutate {
add_field => {
"province" => ""
}
}
}
if [message][2] {
mutate {
add_field => {
"city_name" => "%{[message][2]}"
}
}
} else {
mutate {
add_field => {
"city_name" => ""
}
}
}
if [message][3] {
mutate {
add_field => {
"area_code" => "%{[message][3]}"
}
}
} else {
mutate {
add_field => {
"area_code" => ""
}
}
}
if [message][4] {
mutate {
add_field => {
"application" => "%{[message][4]}"
}
}
} else {
mutate {
add_field => {
"application" => ""
}
}
}
if [message][5] {
mutate {
add_field => {
"channel_name" => "%{[message][5]}"
}
}
} else {
mutate {
add_field => {
"channel_name" => ""
}
}
}
if [message][6] {
mutate {
add_field => {
"channel_type" => "%{[message][6]}"
}
}
} else {
mutate {
add_field => {
"channel_type" => ""
}
}
}
if [message][7] {
mutate {
add_field => {
"violation_type" => "%{[message][7]}"
}
}
} else {
mutate {
add_field => {
"violation_type" => ""
}
}
}
if [message][8] {
mutate {
add_field => {
"violation_time" => "%{my_time}"
}
}
} else {
mutate {
add_field => {
"violation_time" => ""
}
}
}
if [message][9] {
mutate {
add_field => {
"exceed_orient_note_number" => "%{[message][9]}"
}
}
} else {
mutate {
add_field => {
"exceed_orient_note_number" => ""
}
}
}
mutate{
add_field => {
"scene_name" => "WHITELIST_MESSAGE"
"violation_type_code" => "21"
"kafka_topic" => "topic1"
}
remove_field=>["message"]
remove_field=>["@version"]
remove_field=>["my_time"]
}
}
output {
elasticsearch {
hosts => "127.0.0.1:9200"
index => "whitelist_message_%{my_timest}"
retry_on_conflict => 5
document_id => "%{acc_num}"
codec => plain { charset => "UTF-8" }
}
stdout { codec => rubydebug }
}
这样配置后,就一次性生成多个索引

本文介绍如何在Kafka接收到乱序数据的情况下,通过Logstash配置更新,使用自定义时间字段生成按日期划分的Elasticsearch索引,确保数据有序存储。
1254

被折叠的 条评论
为什么被折叠?



