日志收集之Flume
case 1:
1.
wget http://archive.apache.org/dist/flume/1.6.0/apache-flume-1.6.0-bin.tar.gz
2.
tar -zxvf apache-flume-1.6.0-bin.tar.gz
3.
mv apache-flume-1.6.0 flume
4.
cd conf
vi commands.conf
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = exec
a1.sources.r1.command = echo 'hello'
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
# The maximum number of events stored in the channel
a1.channels.c1.transactionCapacity = 100
# The maximum number of events the channel will take from a source or give to a sink per transaction
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
5.
cd ..
bin/flume-ng agent --conf conf --conf-file ./conf/commands.conf --name a1 -Dflume.root.logger=INFO,console
case 2:
0. 检查
rpm -qa | grep telnet #查看是否有安装telnet
yum list | grep telnet #查看yum安装列表中是否有telnet
#安装
yum install xinetd #telnet依赖
sudo yum -y install telnet #telnet客户端
sudo yum -y install telnet-server #telnet服务器端
# 启动服务
service xinetd restart #重启xinetd
service iptables stop
1. cd conf
vi example.conf
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = netcat
# netcat,nc,用于创建 TCP/IP 连接,最大的用途就是用来处理 TCP/UDP 套接字,一个非常标准的telnet客户端工具
a1.sources.r1.bind = hd
a1.sources.r1.port = 44444
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
2.
cd ..
bin/flume-ng agent --conf conf --conf-file conf/example.conf --name a1 -Dflume.root.logger=INFO,console
3. 另一个窗口
telnet xxxx 44444
hello,world
hi,China
case 3:
1. cd conf
mv flume-env.sh.template flume-env.sh
vi flume-env.sh
export JAVA_HOME=/usr/java/jdk1.7.0_51
JAVA_OPTS="-Xms8192m -Xmx8192m -Xss256k -Xmn2g -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:-UseGCOverheadLimit"
#堆空间是内存中划拨给JVM的一块保留区域,为java程序使用。
设置JVM初始内存 最大可用内存 每个线程的堆栈大小 年轻代大小为2G 年轻代为并行收集 年老代为并发收集 在抛出OOM之前限制jvm耗费在GC上的时间比例
2.
vi messages.conf
agent.sources = s1
agent.channels = c1
agent.sinks = sk1
#设置spooldir
agent.sources.s1.type = spooldir
agent.sources.s1.spoolDir = /var/log/messages
agent.sources.s1.fileHeader = true
agent.sources.s1.channels = c1
agent.sinks.sk1.type = logger
agent.sinks.sk1.channel = c1
#In Memory !!!
agent.channels.c1.type = memory
agent.channels.c1.capacity = 10004
agent.channels.c1.transactionCapacity = 100
3.
cd ..
bin/flume-ng agent --conf conf --conf-file conf/messages.conf --name agent -Dflume.root.logger=INFO,console
case 4:
0. 检查安装如下依赖
sudo apt-get install make gcc gcc-c++ kernel-devel m4 ncurses-devel openssl-devel unixODBC unixODBC-devel wxBase wxGTK SDL wxGTK-gl
sudo yum -y install epel-release
1. rabbitmq依赖Erlang
sudo apt-get install erlang
2. 部署rabbitmq
sudo apt-get install rabbitmq-server
3. 启动服务
sudo rabbitmq-plugins enable rabbitmq_management
sudo service rabbitmq-server restart
4. 检查服务是否启动
http://node01:15672
5. rabbitmq消息队列
gedit new_task.py
#!/usr/bin/env python
import pika
import sys
connection = pika.BlockingConnection(pika.ConnectionParameters(
host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue', durable=True)
message = ' '.join(sys.argv[1:]) or "Hello World!"
channel.basic_publish(exchange='',
routing_key='task_queue',
body=message,
properties=pika.BasicProperties(
delivery_mode = 2, # make message persistent
))
print(" [x] Sent %r" % message)
connection.close()
gedit pro_task.py
#!/usr/bin/env python
import pika
import time
connection = pika.BlockingConnection(pika.ConnectionParameters(
host='localhost'))
channel = connection.channel()
channel.queue_declare(queue='task_queue', durable=True)
print(' [*] Waiting for messages. To exit press CTRL+C')
def callback(ch, method, properties, body):
print(" [x] Received %r" % body)
time.sleep(body.count(b'.'))
print(" [x] Done")
ch.basic_ack(delivery_tag = method.delivery_tag)
channel.basic_qos(prefetch_count=1)
channel.basic_consume(callback,
queue='task_queue')
channel.start_consuming()
检查:
python new_task.py
另一个窗口: python pro_task.py
sudo apt-get install python-pip
sudo pip install pika
6. flume/conf
gedit rabbitmq.conf
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.channels = c1
a1.sources.r1.type = org.apache.flume.source.rabbitmq.RabbitMQSource
a1.sources.r1.hostname = node01
a1.sources.r1.port = 5672
a1.sources.r1.queuename = task_queue
a1.sources.r1.threads = 2
# Describe the sink
a1.sinks.k1.type = logger
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
7.
bin/flume-ng agent --conf conf --conf-file ./conf/rabbitmq.conf --name a1 -Dflume.root.logger=INFO,console
另一个窗口:python new_task.py
python new_task.py "Hi,China"
case 5:
hdfs dfs -mkdir -p /flume/events
vi hdfs.conf
# example.conf: A single-node Flume configuration
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.channels = c1
a1.sources.r1.type = org.apache.flume.source.rabbitmq.RabbitMQSource
a1.sources.r1.hostname = localhost
a1.sources.r1.port = 5672
a1.sources.r1.queuename = task_queue
a1.sources.r1.threads = 2
# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.path = /flume/event/standardlog/%Y/%m/%d
a1.sinks.k1.hdfs.filePrefix = standardlog-%Y-%m-%d-%H
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute
a1.sinks.k1.hdfs.writeFormat = Text
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.rollInterval = 60
a1.sinks.k1.hdfs.rollCount = 0
a1.sinks.k1.hdfs.rollSize = 0
a1.sinks.k1.hdfs.batchSize = 50000
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
bin/flume-ng agent --conf conf --conf-file ./conf/hdfs.conf --name a1 -Dflume.root.logger=INFO,console
另一个窗口:python new_task.py
python new_task.py "Hi,China"