flume常用配置文件

本文介绍了Flume的几个常见配置文件,包括exec-kafka.conf用于从命令行输出到Kafka,flume_pull_streaming.conf和flume_push_streaming.conf分别涉及拉取和推送流式数据,taildir_kafka.conf监控文件尾部并发送到Kafka,taildir_hdfs.conf则将文件尾部数据写入HDFS,最后kafka-hdfs.conf实现了Kafka到HDFS的数据流转。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

exec-kafka.conf

a1.sources = r1
a1.channels = c1
a1.sinks = k1


# define source
a1.sources.r1.type=exec
a1.sources.r1.command = tail -F /root/tmp/cmcc.log
a1.sources.r1.shell = /bin/bash -c

# define channel
a1.channels.c1.type=memory
a1.channels.c1.capacity = 1000

# define sink
a1.sinks.k1.type=org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = kafka_streaming_topic
a1.sinks.k1.kafka.bootstrap.servers = hadoop01:9092
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.producer.acks = 1


a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1

flume_pull_streaming.conf

a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = hadoop01
a1.sources.r1.port = 44444

# Describe the sink
a1.sinks.k1.type = org.apache.spark.streaming.flume.sink.SparkSink
a1.sinks.k1.hostname =hadoop01
a1.sinks.k1.port =41414
a1.sinks.k1.channel = memoryChannel

# Use a channel which buffers events in memory
a1.channels.c1.type = memory


# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

flume_push_streaming.conf

# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = hadoop01
a1.sources.r1.port = 44444

# Describe the sink
a1.sinks.k1.type = avro
a1.sinks.k1.hostname = 本地ip
a1.sinks.k1.port = 41414

# Use a channel which buffers events in memory
a1.channels.c1.type = memory


# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

taildir_kafka.conf

a1.sources = r1
a1.channels = c1
a1.sinks = k1


# define source
a1.sources.r1.type = TAILDIR
a1.sources.r1.positionFile = /usr/local/apache-flume-1.8.0-bin/taildir_position.json
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /root/tmp/ip.*


# define channel
a1.channels.c1.type=memory


# define sink
a1.sinks.k1.type=org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = kafka_streaming_topic
a1.sinks.k1.kafka.bootstrap.servers = hadoop01:9092
a1.sinks.k1.kafka.flumeBatchSize = 20
a1.sinks.k1.kafka.producer.acks = 1


a1.sources.r1.channels=c1
a1.sinks.k1.channel=c1

taildir_hdfs.conf

#定义agent名, source、channel、sink的名称
a1.sources = r1
a1.channels = c1
a1.sinks = k1

#具体定义source
a1.sources.r1.type = TAILDIR
a1.sources.r1.filePath = /root/myde/logs/access.log
a1.sources.r1.posiFile = /usr/local/apache-flume-1.8.0-bin/posi.txt
a1.sources.r1.interval = 2000
a1.sources.r1.charset = UTF-8

a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

a1.sinks.k1.type = hdfs
#按日期生成目录,必须添加header
a1.sinks.k1.hdfs.useLocalTimeStamp=true
a1.sinks.k1.hdfs.path = hdfs://spark01:9000/flume/access-logs/%y-%m-%d
a1.sinks.k1.hdfs.filePrefix = events-
#生成文件的时间
a1.sinks.k1.hdfs.rollInterval=30
#不按条数生成文件
a1.sinks.k1.hdfs.rollCount=0
#生成文件的大小
a1.sinks.k1.hdfs.rollSize=104857600
#文件写入到HDFS中的存储格式
a1.sinks.k1.hdfs.fileType=DataStream

# Bind the sink to the channel
a1.sources.r1.channels=c1
a1.sinks.k1.channel = c1

kafka-hdfs.conf

#定义agent名, source、channel、sink的名称
a1.channels = c1
a1.sinks = k1

a1.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel
a1.channels.c1.kafka.bootstrap.servers = spark01:9092,spark02:9092,spark03:9092
#将flume中的数据从kafka中以文本的形式取出来,不封装到Event,不然有乱码
a1.channels.c1.parseAsFlumeEvent = false
a1.channels.c1.kafka.topic = helloTopic
a1.channels.c1.kafka.consumer.group.id = flume-consumer

a1.sinks.k1.type = hdfs
#按日期生成目录,必须添加header
a1.sinks.k1.hdfs.useLocalTimeStamp=true
a1.sinks.k1.hdfs.path = hdfs://spark01:9000/flume/access-logs/%y-%m-%d
a1.sinks.k1.hdfs.filePrefix = events-
#生成文件的时间
a1.sinks.k1.hdfs.rollInterval=30
#不按条数生成文件
a1.sinks.k1.hdfs.rollCount=0
#生成文件的大小
a1.sinks.k1.hdfs.rollSize=104857600
#文件写入到HDFS中的存储格式
a1.sinks.k1.hdfs.fileType=DataStream

# Bind the sink to the channel
a1.sinks.k1.channel = c1

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值