1.flume实时采集数据
修改 Flume agent config 文件
#cd 到flume的conf目录
[root@localhost conf]# cd /usr/hdp/2.6.4.0-91/flume/conf
# 编辑配置文件
[root@localhost conf]# vim flume-Aerospace.conf
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.type = exec
a1.sources.r1.command = tail -f /usr/local/dataaccess.log
a1.sources.r1.channels = c1
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
#设置Kafka的Topic
a1.sinks.k1.kafka.topic = AerospaceBigBigData
#根据具体的情况进行调整
a1.sinks.k1.kafka.bootstrap.servers = slave2:6667,slave1:6667,master:6667
a1.sinks.k1.kafka.flumeBatchSize = 10
a1.sinks.k1.kafka.batchSize=10
a1.sinks.k1.kafka.producer.acks = -1
a1.sinks.k1.kafka.producer.linger.ms = 1000
a1.channels.c1.type = memory
agent.channels.c1.capacity=100000
agent.channels.c1.transactionCapacity=100
a1.sinks.k1.channel = c1
启动flume命令
#启动flume agent
bin/flume-ng agent -c conf -f conf/flume-Aerospace.conf -n a1 -Dflume.root.logger=INFO,console
2.创建 kafka
[root@localhost kafka]# cd /usr/hdp/2.6.4.0-91/kafka
#创建topic
./bin/kafka-topics.sh --create --topic AerospaceBigBigData --partitions 1 --replication-factor 1 --zookeeper slave2:2181,slave1:2181,master:2181
#消费者
./bin/kafka-console-consumer.sh --topic AerospaceBigBigData --bootstrap-serve slave2:6667,slave1:6667,master:6667 --from-beginning
#生产者
./bin/kafka-console-producer.sh --topic AerospaceBigBigData --broker-list slave2:6667,slave1:6667,master:6667
#删除topic
./bin/kafka-topics.sh --delete --topic AerospaceBigBigData --zookeeper slave2:2181,slave1:2181,master:2181
#查看所有的topic
./bin/kafka-topics.sh --zookeeper slave2:2181 --list
3.flume脚本文件
编辑 vim AerospaceBigData.sh 脚本文件
[root@localhost ~]# vim AerospaceBigData.sh
#!/bin/bash
export JAVA_HOME=/opt/java/jdk1.8.0_171
path=/usr/hdp/2.6.4.0-91/flume
echo $path
JAR="flume"
function start(){
echo "开始启动 ...."
num=`ps -ef|grep java|grep $JAR|wc -l`
echo "进程数:$num"
if [ "$num" = "0" ] ; then
# 请自行修改启动的所需要的参数
eval nohup $path/bin/flume-ng agent -c $path/conf -f $path/conf/flume-Aerospace.conf -n a1 -Dflume.root.logger=INFO
echo "启动成功...."
echo "日志路径: $path/logs/flume.log"
exit 0
else
echo "进程已经存在,启动失败,请检查....."
exit 0
fi
}
function stop(){
echo "开始stop ....."
num=`ps -ef|grep java|grep $JAR|wc -l`
if [ "$num" != "0" ] ; then
#ps -ef|grep java|grep $JAR|awk '{print $2;}'|xargs kill -9
# 正常停止flume
ps -ef|grep java|grep $JAR|awk '{print $2;}'|xargs kill
echo "进程已经关闭..."
else
echo "服务未启动,无需停止..."
fi
}
function restart(){
echo "begin stop process ..."
stop
# 判断程序是否彻底停止
num=`ps -ef|grep java|grep $JAR|wc -l`
while [ $num -gt 0 ]; do
sleep 1
num=`ps -ef|grep java|grep $JAR|wc -l`
done
echo "process stoped,and starting ..."
start
echo "started ..."
}
case "$1" in
"start")
start $@
exit 0
;;
"stop")
stop
exit 0
;;
"restart")
restart
exit 0
;;
*)
echo "用法: $0 {start|stop|restart}"
exit 1
;;
esac
4.定时任务
用crontab -e进入当前用户的工作表编辑,是常见的vim界面
[root@localhost ~]# crontab -e
# 编辑自己的定时任务
* 9 * * * /usr/local/code/generate_history_v2.py
* 8 * * * /usr/hdp/2.6.4.0-91/flume/AerospaceBigData.sh start
* 23 * * * /usr/hdp/2.6.4.0-91/flume/AerospaceBigData.sh stop
重新启动crontab服务
/sbin/service crond start //启动服务
/sbin/service crond stop //关闭服务
/sbin/service crond restart //重启服务
/sbin/service crond reload //重新载入配置