编写flume配置
使用TAILDIR source
#表示将r1和c1连接起来
# Name the components on this agent #表示agent的名称
#r1表示a1的输入源
a1.sources = r1
#k1表示a1的输出目的地
a1.sinks = k1
#c1表示a1的缓冲区
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = TAILDIR
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /opt/logs/visitaction.log
#表示将r1和c1连接起来
a1.sources.r1.channels = c1
# Describe the sink
#表示a1的输出目的地是控制台logger类型
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.path = /flume/visitactionlog/%y-%m/%d
a1.sinks.k1.hdfs.filePrefix = visitaction-
a1.sinks.k1.hdfs.useLocalTimeStamp = true
a1.sinks.k1.hdfs.fileType=DataStream
# Use a channel which buffers events in memory
#表示a1的channel类型是memory内存型
a1.channels.c1.type = memory
#表示a1的channel总容量是1000个event
a1.channels.c1.capacity = 1000
#表示a1的channel传输时收集到了100条event以后再去提交事务
a1.channels.c1.transactionCapacity = 100
命名为visitaction.conf
启动flume
bin/flume-ng agent --conf conf/ --name a1 --conf-file job/visitaction.conf
启动打印日志程序
nohup java -cp printlog.jar com.zj.mysparkproject.PrintVisitActionLog start