Flume搭建测试

最新推荐文章于 2023-10-15 15:59:44 发布
妄念驱动
最新推荐文章于 2023-10-15 15:59:44 发布
阅读量279
点赞数
CC 4.0 BY-SA版权
分类专栏： hadoop 文章标签： hadoop flume
本文链接：https://blog.youkuaiyun.com/hx2017/article/details/77891289
hadoop 专栏收录该内容
9 篇文章
订阅专栏
日志收集之Flume
case 1:
1. 
wget http://archive.apache.org/dist/flume/1.6.0/apache-flume-1.6.0-bin.tar.gz
2. 
tar -zxvf apache-flume-1.6.0-bin.tar.gz
3. 
mv apache-flume-1.6.0 flume
4. 
cd conf
vi commands.conf
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = echo 'hello'
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000 
# The maximum number of events stored in the channel
a1.channels.c1.transactionCapacity = 100
# The maximum number of events the channel will take from a source or give to a sink per transaction
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

5.
cd ..
bin/flume-ng agent --conf conf --conf-file ./conf/commands.conf --name a1 -Dflume.root.logger=INFO,console

case 2:
0. 检查
rpm -qa | grep telnet  #查看是否有安装telnet 
yum list | grep telnet  #查看yum安装列表中是否有telnet 
＃安装 
yum install xinetd     #telnet依赖
sudo yum -y install telnet   #telnet客户端 
sudo yum -y install telnet-server  #telnet服务器端
# 启动服务   
service xinetd restart  #重启xinetd
service iptables stop

1. cd conf
vi example.conf
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

# Describe/configure the source
a1.sources.r1.type = netcat
# netcat,nc,用于创建 TCP/IP 连接，最大的用途就是用来处理 TCP/UDP 套接字,一个非常标准的telnet客户端工具
a1.sources.r1.bind = hd
a1.sources.r1.port = 44444

# Describe the sink
a1.sinks.k1.type = logger

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

2. 
cd ..
bin/flume-ng agent --conf conf --conf-file conf/example.conf --name a1 -Dflume.root.logger=INFO,console

3. 另一个窗口
telnet xxxx 44444
hello,world
hi,China

case 3:
1. cd conf
mv flume-env.sh.template flume-env.sh
vi flume-env.sh
export JAVA_HOME=/usr/java/jdk1.7.0_51
JAVA_OPTS="-Xms8192m -Xmx8192m -Xss256k -Xmn2g -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-UseGCOverheadLimit"

#堆空间是内存中划拨给JVM的一块保留区域，为java程序使用。
设置JVM初始内存 最大可用内存 每个线程的堆栈大小 年轻代大小为2G 年轻代为并行收集 年老代为并发收集 在抛出OOM之前限制jvm耗费在GC上的时间比例

2. 
vi messages.conf
agent.sources = s1  
agent.channels = c1  
agent.sinks = sk1  

#设置spooldir  
agent.sources.s1.type = spooldir  
agent.sources.s1.spoolDir = /var/log/messages  
agent.sources.s1.fileHeader = true  

agent.sources.s1.channels = c1  
agent.sinks.sk1.type = logger  
agent.sinks.sk1.channel = c1  

#In Memory !!!  
agent.channels.c1.type = memory  
agent.channels.c1.capacity = 10004  
agent.channels.c1.transactionCapacity = 100 

3.
cd ..
bin/flume-ng agent --conf conf --conf-file conf/messages.conf --name agent -Dflume.root.logger=INFO,console



case 4:
0. 检查安装如下依赖
sudo apt-get install make gcc gcc-c++ kernel-devel m4 ncurses-devel openssl-devel unixODBC unixODBC-devel wxBase wxGTK SDL wxGTK-gl
sudo yum -y install epel-release
1. rabbitmq依赖Erlang
sudo apt-get install erlang

2. 部署rabbitmq
sudo apt-get install rabbitmq-server

3. 启动服务
sudo rabbitmq-plugins enable rabbitmq_management
sudo service rabbitmq-server restart

4. 检查服务是否启动
http://node01:15672

5. rabbitmq消息队列
gedit new_task.py 

#!/usr/bin/env python
import pika
import sys

connection = pika.BlockingConnection(pika.ConnectionParameters(
        host='localhost'))
channel = connection.channel()

channel.queue_declare(queue='task_queue', durable=True)

message = ' '.join(sys.argv[1:]) or "Hello World!"
channel.basic_publish(exchange='',
                      routing_key='task_queue',
                      body=message,
                      properties=pika.BasicProperties(
                         delivery_mode = 2, # make message persistent
                      ))
print(" [x] Sent %r" % message)
connection.close()


gedit pro_task.py 


#!/usr/bin/env python
import pika
import time

connection = pika.BlockingConnection(pika.ConnectionParameters(
        host='localhost'))
channel = connection.channel()

channel.queue_declare(queue='task_queue', durable=True)
print(' [*] Waiting for messages. To exit press CTRL+C')

def callback(ch, method, properties, body):
    print(" [x] Received %r" % body)
    time.sleep(body.count(b'.'))
    print(" [x] Done")
    ch.basic_ack(delivery_tag = method.delivery_tag)

channel.basic_qos(prefetch_count=1)
channel.basic_consume(callback,
                      queue='task_queue')

channel.start_consuming()

检查：
python new_task.py 
另一个窗口： python pro_task.py

sudo apt-get install python-pip
sudo pip install pika

6. flume/conf
gedit rabbitmq.conf
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.channels = c1
a1.sources.r1.type = org.apache.flume.source.rabbitmq.RabbitMQSource
a1.sources.r1.hostname = node01
a1.sources.r1.port = 5672
a1.sources.r1.queuename = task_queue
a1.sources.r1.threads = 2
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

7.
bin/flume-ng agent --conf conf --conf-file ./conf/rabbitmq.conf --name a1 -Dflume.root.logger=INFO,console
另一个窗口：python new_task.py 
python new_task.py "Hi,China"


case 5:
hdfs dfs -mkdir -p /flume/events

vi hdfs.conf
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.channels = c1
a1.sources.r1.type = org.apache.flume.source.rabbitmq.RabbitMQSource
a1.sources.r1.hostname = localhost
a1.sources.r1.port = 5672
a1.sources.r1.queuename = task_queue
a1.sources.r1.threads = 2
# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.path = /flume/event/standardlog/%Y/%m/%d 
a1.sinks.k1.hdfs.filePrefix = standardlog-%Y-%m-%d-%H
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 60
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.batchSize = 50000
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1

bin/flume-ng agent --conf conf --conf-file ./conf/hdfs.conf --name a1 -Dflume.root.logger=INFO,console
另一个窗口：python new_task.py 
python new_task.py "Hi,China"