1:start case (single-node configuration)
#文件名:case1_example.conf
#配置内容:
# case1_example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
a1.sources.r1.bind = localhost
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
#开始命令
flume-ng agent -c conf -f conf/case1_example.conf -n a1 -Dflume.root.logger=INFO,console
#启动参数说明
-c conf 指定配置目录为conf
-f conf/case1_example.conf 指定配置文件为conf/case1_example.conf
-n a1 指定agent名字为a1,需要与case1_example.conf中的一致
-Dflume.root.logger=INFO,console 指定DEBUF模式在console输出INFO信息
#在另一个终端进行测试
telnet 127.0.0.1 44444
Trying 127.0.0.1...
Connected to localhost.localdomain (127.0.0.1).
Escape character is '^]'.
hello world!
OK
#在启动的终端查看console输出
2013-05-24 00:00:24,306 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 68 65 6C 6C 6F 20 77 6F 72 6C 64 21 0D hello world!. }
2:Test Avro Source
#文件名:case2_avro.conf
#配置内容:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = avro
a1.sources.r1.channels = c1
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 4141
# Describe the sink
a1.sinks.k1.type = logger
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#Start flume agent a1
flume-ng agent -c . -f case2_avro.conf -n a1 -Dflume.root.logger=INFO,console
#创建指定文件
echo "hello world" > /usr/logs/log.10
#使用avro-client发送文件
flume-ng avro-client -c . -H localhost -p 4141 -F /usr/logs/log.10
#在启动的终端查看console输出
2013-05-27 01:11:45,852 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 68 65 6C 6C 6F 20 77 6F 72 6C 64 hello world }
3:Test Exec Source
#文件名:case3_exec.conf
#配置内容:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = cat /usr/logs/log.10
a1.sources.r1.channels = c1
# Describe the sink
a1.sinks.k1.type = logger
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#启动flume agent a1
flume-ng agent -c . -f case3_exec.conf -n a1 -Dflume.root.logger=INFO,console
#追加内容到文件
echo "exec test" >> /usr/logs/log.10
#在启动的终端查看console输出
2018-10-27 01:50:12,825 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 68 65 6C 6C 6F 20 77 6F 72 6C 64 hello world }
2018-10-27 01:50:12,826 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 65 78 65 63 20 74 65 73 74 exec test }
#如果要使用tail命令,必选使得file足够大才能看到输出内容
a1.sources.r1.command = tail -F /usr/logs/log.10
#生成足够多的内容在文件里
for i in {1..100};do echo "exec test$i" >> /usr/logs/log.10;echo $i;done
#可以在console看到output
2018-10-27 19:17:18,157 (lifecycleSupervisor-1-1) [INFO - org.apache.flume.source.ExecSource.start(ExecSource.java:155)] Exec source starting with command:tail -n 5 -F /usr/logs/log.10
2018-10-27 19:19:50,334 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{} body: 65 78 65 63 20 74 65 73 74 37 exec test7 }
4:Test spool Source
#文件名:case4_spool.conf
#配置内容:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = spooldir
a1.sources.r1.spoolDir = /usr/logs/flumeSpool
a1.sources.r1.fileHeader = true
a1.sources.r1.channels = c1
# Describe the sink
a1.sinks.k1.type = logger
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#启动flume agent a1
flume-ng agent -c . -f case4_spool.conf -n a1 -Dflume.root.logger=INFO,console
#追加内容到spool目录
echo "spool test1" > /usr/logs/flumeSpool/spool1.log
#在启动的终端查看console输出
2018-10-27 22:49:06,098 (pool-4-thread-1) [INFO - org.apache.flume.client.avro.SpoolingFileLineReader.retireCurrentFile(SpoolingFileLineReader.java:229)] Preparing to move file /usr/logs/flumeSpool/spool1.log to /usr/logs/flumeSpool/spool1.log.COMPLETED
2018-10-27 22:49:06,101 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{file=/usr/logs/flumeSpool/spool1.log} body: 73 70 6F 6F 6C 20 74 65 73 74 31 spool test1 }
5:Test Syslog tcp source
#文件名:case5_syslog.conf
#配置内容:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = syslogtcp
a1.sources.r1.port = 5140
a1.sources.r1.host = localhost
a1.sources.r1.channels = c1
# Describe the sink
a1.sinks.k1.type = logger
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#启动flume agent a1
flume-ng agent -c . -f case5_syslog.conf -n a1 -Dflume.root.logger=INFO,console
#测试产生syslog, <37>因为需要wire format数据,否则会报错” Failed to extract syslog wire entry”
echo "<37>hello via syslog" | nc localhost 5140
#在启动的终端查看console输出
2018-10-27 23:39:10,755 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{Severity=5, Facility=4} body: 68 65 6C 6C 6F 20 76 69 61 20 73 79 73 6C 6F 67 hello via syslog }
6:Test Syslog udp source
#文件名:case6_syslogudp.conf
#配置内容:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = syslogudp
a1.sources.r1.port = 5140
a1.sources.r1.host = localhost
a1.sources.r1.channels = c1
# Describe the sink
a1.sinks.k1.type = logger
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#启动flume agent a1
flume-ng agent -c . -f case6_syslogudp.conf -n a1 -Dflume.root.logger=INFO,console
#测试产生syslog
echo "<37>hello via syslogudp" | nc -u localhost 5140
#在启动的终端查看console输出
2018-10-27 23:39:10,755 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{Severity=5, Facility=4} body: 68 65 6C 6C 6F 20 76 69 61 20 73 79 73 6C 6F 67 hello via syslogudp }
7:Test HTTP source JSONHandler
#文件名:case7_httppost.conf
#配置内容:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = org.apache.flume.source.http.HTTPSource
a1.sources.r1.port = 5140
a1.sources.r1.channels = c1
# Describe the sink
a1.sinks.k1.type = logger
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#启动flume agent a1
flume-ng agent -c . -f case7_httppost.conf -n a1 -Dflume.root.logger=INFO,console
#生成JSON 格式的POST request
curl -X POST -d '[{ "headers" :{"namenode" : "namenode.example.com","datanode" : "random_datanode.example.com"},"body" : "really_random_body"}]' http://localhost:5140
#在启动的终端查看console输出
2018-10-27 01:17:47,186 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{namenode=namenode.example.com, datanode=random_datanode.example.com} body: 72 65 61 6C 6C 79 5F 72 61 6E 64 6F 6D 5F 62 6F random_datanode_body
8:Test HDFS Sink
#文件名:case8_hdfs.conf
在/usr/local/apache-flume-1.3.1-bin/conf/flume-env.sh加入
export HADOOP_HOME=/usr/local/hadoop
#配置内容:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = syslogtcp
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 5140
a1.sources.r1.channels = c1
# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.path = hdfs://master:9000/user/hadoop/flume/collected/
a1.sinks.k1.hdfs.filePrefix = Syslog
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#启动flume agent a1
flume-ng agent -c . -f case8_hdfs.conf -n a1 -Dflume.root.logger=INFO,console
#测试产生syslog
echo "<37>hello via syslog to hdfs testing one" | nc -u localhost 5140
#在启动的终端查看console输出,文件生成成功
2018-10-27 00:53:58,078 (hdfs-k1-call-runner-0) [INFO - org.apache.flume.sink.hdfs.BucketWriter.doOpen(BucketWriter.java:208)] Creating hdfs://master:9000/user/hadoop/flume/collected//Syslog.1369814037714.tmp
2018-10-27 00:54:28,220 (hdfs-k1-roll-timer-0) [INFO - org.apache.flume.sink.hdfs.BucketWriter.renameBucket(BucketWriter.java:427)] Renaming hdfs://master:9000/user/hadoop/flume/collected/Syslog.1369814037714.tmp to hdfs://master:9000/user/hadoop/flume/collected/Syslog.1369814037714
#在hadoop上查看文件
./hadoop dfs -cat hdfs://172.25.4.35:9000/user/hadoop/flume/collected/Syslog.1369814037714
SEQ!org.apache.hadoop.io.LongWritable"org.apache.hadoop.io.BytesWritable^;>Gv$hello via syslog to hdfs testing one
9:Test HDFS Sink
#文件名:case9_hdfs.conf
在/usr/local/apache-flume-1.3.1-bin/conf/flume-env.sh加入
export HADOOP_HOME=/usr/local/hadoop
#配置内容:
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = org.apache.flume.source.http.HTTPSource
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 5140
a1.sources.r1.channels = c1
# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.path= = hdfs://master:9000/user/hadoop/flume/collected/%y-%m-%d/%H%M/%S
a1.sinks.k1.hdfs.filePrefix = Syslog.%{host}
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#启动flume agent a1
flume-ng agent -c . -f case9_hdfs.conf -n a1 -Dflume.root.logger=INFO,console
#生成JSON 格式的POST request, header的timestamp 参数如果格式不对则无法解析
需要生成13为的timestamp才能解析出正确的时间,包含MilliSec
#linux生成当前时间10位Unix timestamp
date +%s
#linux生成当前时间13位Unix timestamp
date +%s%N|awk '{print substr($0,1,13)}'
curl -X POST -d '[{ "headers":{"timestamp":"1369818213654","host":"cc-staging-loginmgr2"},"body": "hello via post"}]' http://localhost:5140
#在启动的终端查看console输出,文件生成成功
2018-10-27 02:03:38,646 (hdfs-k1-call-runner-4) [INFO - org.apache.flume.sink.hdfs.BucketWriter.doOpen(BucketWriter.java:208)] Creating hdfs://master:9000/user/hadoop/flume/collected/2018-10-27/0203/cc-staging-loginmgr2..1369818218614.tmp
2018-10-27 02:04:08,714 (hdfs-k1-roll-timer-0) [INFO - org.apache.flume.sink.hdfs.BucketWriter.renameBucket(BucketWriter.java:427)] Renaming hdfs://master:9000/user/hadoop/flume/collected/2018-10-27/0203/cc-staging-loginmgr2..1369818218614.tmp to hdfs://master:9000/user/hadoop/flume/collected/2018-10-27/0203/cc-staging-loginmgr2..1369818218614
#在hadoop上查看文件
./hadoop dfs -ls hdfs://172.25.4.35:9000/user/hadoop/flume/collected/2018-10-27/0203
Found 1 items
-rw-r--r-- 3 root supergroup 129 2018-10-27 02:04 /user/hadoop/flume/collected/2018-10-27/0203/cc-staging-loginmgr2..1369818218614
10:Test Avro Sink
#文件名:case10_avro.conf、case10_avro_sink.conf
#配置内容:
#case10_avro.conf
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = avro
a1.sources.r1.channels = c1
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 4545
# Describe the sink
a1.sinks.k1.type = logger
a1.sinks.k1.channel = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#case10_avro_sink.conf
# Name the components on this agent
a2.sources = r1
a2.sinks = k1
a2.channels = c1
# Describe/configure the source
a2.sources.r1.type = syslogtcp
a2.sources.r1.port = 5140
a2.sources.r1.host = localhost
a2.sources.r1.channels = c1
# Describe the sink
a2.sinks.k1.type = avro
a2.sinks.k1.channel = c1
a2.sinks.k1.hostname = 172.25.4.23
a2.sinks.k1.port = 4545
# Use a channel which buffers events in memory
a2.channels.c1.type = memory
a2.channels.c1.capacity = 1000
a2.channels.c1.transactionCapacity = 100
#先启动Avro的Source,监听端口
flume-ng agent -c . -f case10_avro.conf -n a1 -Dflume.root.logger=INFO,console
#再启动Avro的Sink
flume-ng agent -c . -f case10_avro_sink.conf -n a2 -Dflume.root.logger=INFO,console
#可以看到已经建立连接
2018-10-27 19:23:00,237 (pool-5-thread-1) [INFO - org.apache.avro.ipc.NettyServer$NettyServerAvroHandler.handleUpstream(NettyServer.java:171)] [id: 0x7a0e28bf, /172.25.4.32:14894 => /172.25.4.23:4545] CONNECTED: /172.25.4.32:14894
#在Avro Sink上生成测试log
echo "<37>hello via avro sink" | nc localhost 5140
#在Avro Source上可以看到log已经生成
2018-10-27 19:24:13,740 (SinkRunner-PollingRunner-DefaultSinkProcessor) [INFO - org.apache.flume.sink.LoggerSink.process(LoggerSink.java:70)] Event: { headers:{Severity=5, Facility=4} body: 68 65 6C 6C 6F 20 76 69 61 20 61 76 72 6F 20 73 hello via avro sink }