#定义三大组件的名称
agent1.sources = source1
agent1.sinks = sink1
agent1.channels = channel1
# 配置source组件
# Spooling Directory Source,因为flume服务down掉的时候,能自动记录上一次读到的数据
agent1.sources.source1.type = spooldir
agent1.sources.source1.spoolDir = /home/hadoop/test/
agent1.sources.source1.ignorePattern = ^error.*\.log$
agent1.sources.source1.fileHeader = false
#配置拦截器
agent1.sources.source1.interceptors = i1
agent1.sources.source1.interceptors.i1.type = timestamp
# 配置sink组件
# 数据落地位置为hdfs
agent1.sinks.sink1.type = hdfs
agent1.sinks.sink1.hdfs.path =hdfs://master:9000/weblog/%y-%m-%d/
# 落地的数据为access_log
agent1.sinks.sink1.hdfs.filePrefix = access_log
agent1.sinks.sink1.hdfs.maxOpenFiles = 5000
# 单次批处理次数为100
agent1.sinks.sink1.hdfs.batchSize= 100
agent1.sinks.sink1.hdfs.fileType = DataStream
agent1.sinks.sink1.hdfs.writeFormat =Text
#滚动生成的文件按大小生成
agent1.sinks.sink1.hdfs.rollSize = 135000000
#滚动生成的文件按行数生成
agent1.sinks.sink1.hdfs.rollCount = 1000000
agent1.sinks.sink1.hdfs.maxOpenFiles = 5000
agent1.sinks.sink1.hdfs.batchSize= 100
agent1.sinks.sink1.hdfs.fileType = DataStream
agent1.sinks.sink1.hdfs.writeFormat =Text
#滚动生成的文件按大小生成
agent1.sinks.sink1.hdfs.rollSize = 135000000
#滚动生成的文件按行数生成
agent1.sinks.sink1.hdfs.rollCount = 1000000
#滚动生成的文件按时间生成
agent1.sinks.sink1.hdfs.rollInterval = 60
#开启滚动生成目录
agent1.sinks.sink1.hdfs.round = true
#以10为一梯度滚动生成,每十分钟滚动生成一次
agent1.sinks.sink1.hdfs.roundValue = 10
#单位为分钟
agent1.sinks.sink1.hdfs.roundUnit = minute
# Use a channel which buffers events in memory
agent1.channels.channel1.type = memory
agent1.channels.channel1.capacity = 500000
agent1.channels.channel1.transactionCapacity = 600
agent1.channels.channel1.keep-alive = 120
# Bind the source and sink to the channel
agent1.sources.source1.channels = channel1
agent1.sinks.sink1.channel = channel1