1.上传 apache-flume-1.9.0-bin.tar.gz
2.解压
[hadoop@master apps]$ tar -zxvf apache-flume-1.9.0-bin.tar.gz
3.配置环境变量
#vi ~/.bashrc
export FLUME_HOME=/home/hadoop/apps/apache-flume-1.9.0-bin
export PATH=$PATH:$FLUME_HOME/bin
#使变量设置生效
#source ~/.bashrc
4.修改配置文件flume-env.sh
[hadoop@master apps]$ cd apache-flume-1.9.0-bin
[hadoop@master apache-flume-1.9.0-bin]$ cd conf
[hadoop@master conf]$ ls
flume-conf.properties.template flume-env.sh.template
flume-env.ps1.template log4j.properties
[hadoop@master conf]$ pwd
/home/hadoop/apps/apache-flume-1.9.0-bin/conf
#把临时的配置文件改名
[hadoop@master conf]$ mv flume-env.sh.template flume-env.sh
[hadoop@master conf]$ ls
flume-conf.properties.template flume-env.ps1.template flume-env.sh log4j.properties
[hadoop@master conf]$ vi flume-env.sh
加入jdk安装路径:
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64
5.验证是否安装成功
[hadoop@master01 apache-flume-1.9.0-bin]$ flume-ng version
Flume 1.9.0
Source code repository: https://git-wip-us.apache.org/repos/asf/flume.git
Revision: d4fcab4f501d41597bc616921329a4339f73585e
Compiled by fszabo on Mon Dec 17 20:45:25 CET 2018
From source with checksum 35db629a3bda49d23e9b3690c80737f9
[hadoop@master01 apache-flume-1.9.0-bin]$
安装完成!
6.加入flume和mysql依赖jar包*
将flume-ng-sql-source-1.5.2.jar、mysql-connector-java-5.1.47.jar放到flume的lib目录下
7.将flume发送到其他主机,构建flume集群
[hadoop@master ~]$ scp -r ~/apps/apache-flume-1.9.0-bin/ slave1:~/apps/
[hadoop@master ~]$ scp -r ~/apps/apache-flume-1.9.0-bin/ slave2:~/apps/
拷贝完之后记得去其他主机设置环境变量并验证是否安装成功:
#vi ~/.bashrc
export FLUME_HOME=/home/hadoop/apps/apache-flume-1.9.0-bin
export PATH=$PATH:$FLUME_HOME/bin
#使变量设置生效
#source ~/.bashrc
#flume-ng version
``
8.创建测试文件
[hadoop@master01 apache-flume-1.9.0-bin]$ mkdir test
[hadoop@master01 apache-flume-1.9.0-bin]$ ls
bin conf doap_Flume.rdf lib NOTICE RELEASE-NOTES tools
CHANGELOG DEVNOTES docs LICENSE README.md test
9.创建HDFS目录,为配置文件设置存储数据目录
[hadoop@master01 ~]
h
d
f
s
d
f
s
−
m
k
d
i
r
/
f
l
u
m
e
/
m
y
s
q
l
[
h
a
d
o
o
p
@
m
a
s
t
e
r
01
]
hdfs dfs -mkdir /flume/mysql [hadoop@master01 ~]
hdfsdfs−mkdir/flume/mysql[hadoop@master01 ]hdfs dfs -chmod -R 777 /flume/mysql
hadoop fs -ls / #查看是否创建成功
如下:
[hadoop@master01 ~]$ hadoop fs -ls /
Found 3 items
drwxr-xr-x - hadoop supergroup 0 2020-05-22 09:27 /flume
drwxr-xr-x - hadoop supergroup 0 2020-05-19 14:07 /test
drwxrwx— - hadoop supergroup 0 2020-05-21 13:07 /tmp
[hadoop@master01 ~]$
[hadoop@master01 ~]$ hadoop fs -ls /flume
Found 1 items
drwxrwxrwx - hadoop supergroup 0 2020-05-22 09:27 /flume/mysql
******10.编辑flume抽取mysql数据配置进行测试******
Flume 配置文件
[hadoop@master01 ~]$ cd /home/hadoop/apps/apache-flume-1.9.0-bin/test
[hadoop@master01 test]$ vi mysqltohdfs.conf
#配置sql source
a1.sources = r1
a1.sinks = k1
a1.channels = c1
a1.sources.r1.channels = c1
a1.sources.r1.type= org.keedio.flume.source.SQLSource
#a1.sources.r1.hibernate.connection.url = jdbc:mysql://地址:3306/库名
a1.sources.r1.hibernate.connection.url = jdbc:mysql://127.0.0.1:3306/demo
a1.sources.r1.hibernate.connection.user = root
a1.sources.r1.hibernate.connection.password = 123456
a1.sources.r1.hibernate.connection.autocommit = true
a1.sources.r1.hibernate.dialect = org.hibernate.dialect.MySQL5Dialect
a1.sources.r1.hibernate.connection.driver_class = com.mysql.jdbc.Driver
a1.sources.r1.table = op_automatic
#查询列
a1.sources.r1.columns.to.select = *
a1.sources.r1.incremental.column.name = auto_id
a1.sources.r1.incremental.value = f01560b79dc149658a3779af73a78792
a1.sources.r1.run.query.delay=5000
a1.sources.r1.status.file.path = /var/lib/flume
a1.sources.r1.status.file.name = sql-source.status
#配置channel
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
#配置 hdfs sink
# Sink对应的channel名称
a1.sinks.k1.channel = c1
# Sink类型
a1.sinks.k1.type = hdfs
# Sink路径
a1.sinks.k1.hdfs.path = hdfs://master02:9000/flume/mysql
# 流数据的文件类型
a1.sinks.k1.hdfs.fileType = DataStream
# 数据写入格式
a1.sinks.k1.hdfs.writeFormat = text
# 目标文件轮转大小,单位是字节
a1.sinks.k1.hdfs.rollSize = 268435456
# hdfs sink间隔多长将临时文件滚动成最终目标文件,单位是秒;如果设置成0,则表示不根据时间来滚动文件
a1.sinks.k1.hdfs.rollInterval = 0
#当events数据达到该数量时候,将临时文件滚动成目标文件;如果设置成0,则表示不根据events数据来滚动文件
a1.sinks.k1.hdfs.rollCount = 0
```启动flume
到flume 目录下启动flume
注意!是在以下目录执行
#cd /home/hadoop/apps/apache-flume-1.9.0-bin
[hadoop@master01 apache-flume-1.9.0-bin]$ flume-ng agent -c test -f test/mysqltohdfs.conf -n a1 -Dflume.root.logger=INFO,console
在浏览器输入
http://172.xx.xx.xx:50070/ 查看上传的数据
执行conf成功会出现!
type: SINK, name: k1: Successfully registered new MBean.