一.测试在某个文件里面追加数据,然后流到flume,再流到kafka
1.配置flume
# Please paste flume.conf here. Example:
# Sources, channels, and sinks are defined per
# agent name, in this case 'tier1'.
tier1.sources = source1 fileSource
tier1.channels = channel1 fileChannel
tier1.sinks = sink1 fileSink
# For each source, channel, and sink, set
# standard properties.
tier1.sources.source1.type = netcat
tier1.sources.source1.bind = 127.0.0.1
tier1.sources.source1.port = 9999
tier1.sources.source1.channels = channel1
tier1.channels.channel1.type = memory
tier1.sinks.sink1.type = logger
tier1.sinks.sink1.channel = channel1
# Other properties are specific to each type of
# source, channel, or sink. In this case, we
# specify the capacity of the memory channel.
tier1.channels.channel1.capacity = 100
#测试在某个文件里面追加数据,然后流到flume,再流到kafka
#tier1.sources =fileSource
#tier1.channels = fileChannel
#tier1.sinks = fileSink
#监听/home/flumeTest/下的fileFlumeKafka.txt文件的数据的追加
tier1.sources.fileSource.type =exec
tier1.sources.fileSource.command = tail -F /home/flumeTest/fileFlumeKafka.txt
#tier1.sources.fileSource.fileHeader = false
tier1.sources.fileSource.channels = fileChannel
#configure host for source 将输出Event: { headers:{hostname=master01} body: 7A 68 6F 75 6C 73 0D zhouls. }
tier1.sources.fileSource.interceptors = i1
tier1.sources.fileSource.interceptors.i1.type = host
tier1.sources.fileSource.interceptors.i1.useIP = false
tier1.sources.fileSource.interceptors.i1.hostHeader = hostname
tier1.channels.fileChannel.type = memory
tier1.channels.fileChannel.capacity = 10000
tier1.channels.fileChannel.transactionCapacity = 1000
#set sink1
tier1.sinks.fileSink.channel = fileChannel
tier1.sinks.fileSink.type = org.apache.flume.sink.kafka.KafkaSink
tier1.sinks.fileSink.topic = fileFlumeTest
tier1.sinks.fileSink.brokerList =master01:9092
tier1.sinks.fileSink.requiredAcks = all
tier1.sinks.fileSink.batchSize = 100
刷新flume配置
2.创建topic
/opt/cloudera/parcels/KAFKA-3.1.0-1.3.1.0.p0.35/lib/kafka/bin/kafka-topics.sh --zookeeper slave02:2181 --topic fileFlumeTest --replication-factor 2 --partitions 1 --create
3.创建文件,并追加数据
[root@master01 flumeTest]# pwd
/home/flumeTest
[root@master01 flumeTest]# vi fileFlumeKafka.txt
1,zhangsan,23
2,lisi,24
3,wangwu,25
4,lier,12
5,sadm,54
[root@master01 flumeTest]# echo "6,kevin,54">>fileFlumeKafka.txt
[root@master01 flumeTest]# echo "7,tutengfei,54">>fileFlumeKafka.txt
[root@master01 flumeTest]# echo "8,tutengfei,54">>fileFlumeKafka.txt
4.启动kafka消费者,查看是否有数据
mysql+flume+kafka
参考:https://github.com/keedio/flume-ng-sql-source/
A custom query is supported to bring the possibility of using the entire SQL language. This is powerful, but risky, be careful with the custom queries used.
To avoid row export repetitions use the $@$ special character in WHERE clause, to incrementaly export not processed rows and the new ones inserted.
IMPORTANT: For proper operation of Custom Query ensure that incremental field will be returned in the first position of the Query result.
Example:
agent.sources.sql-source.custom.query = SELECT incrementalField,field2 FROM table1 WHERE incrementalField > $@$
例子
1. 到相关agent的节点:cd /opt/cloudera/parcels/CDH/lib/flume-ng/lib
# 添加flume-ng-sql-source-1.5.2.jar 和mysql-connector-java.jar到该目录
2.flume配置文件
# Please paste flume.conf here. Example:
# Sources, channels, and sinks are defined per
# agent name, in this case 'tier1'.
tier1.sources = source1 fileSource s1
tier1.channels = channel1 fileChannel c1
tier1.sinks = sink1 fileSink k1
# For each source, channel, and sink, set
# standard properties.
tier1.sources.source1.type = netcat
tier1.sources.source1.bind = 127.0.0.1
tier1.sources.source1.port = 9999
tier1.sources.source1.channels = channel1
tier1.channels.channel1.type = memory
tier1.sinks.sink1.type = logger
tier1.sinks.sink1.channel = channel1
# Other properties are specific to each type of
# source, channel, or sink. In this case, we
# specify the capacity of the memory channel.
tier1.channels.channel1.capacity = 100
#测试在某个文件里面追加数据,然后流到flume,再流到kafka
#tier1.sources =fileSource
#tier1.channels = fileChannel
#tier1.sinks = fileSink
#监听/home/flumeTest/下的fileFlumeKafka.txt文件的数据的追加
tier1.sources.fileSource.type =exec
tier1.sources.fileSource.command = tail -F /home/flumeTest/fileFlumeKafka.txt
#tier1.sources.fileSource.fileHeader = false
tier1.sources.fileSource.channels = fileChannel
#configure host for source 将输出Event: { headers:{hostname=master01} body: 7A 68 6F 75 6C 73 0D zhouls. }
tier1.sources.fileSource.interceptors = i1
tier1.sources.fileSource.interceptors.i1.type = host
tier1.sources.fileSource.interceptors.i1.useIP = false
tier1.sources.fileSource.interceptors.i1.hostHeader = hostname
tier1.channels.fileChannel.type = memory
tier1.channels.fileChannel.capacity = 10000
tier1.channels.fileChannel.transactionCapacity = 1000
#set sink1
tier1.sinks.fileSink.channel = fileChannel
tier1.sinks.fileSink.type = org.apache.flume.sink.kafka.KafkaSink
tier1.sinks.fileSink.topic = fileFlumeTest
tier1.sinks.fileSink.brokerList =master01:9092
tier1.sinks.fileSink.requiredAcks = all
tier1.sinks.fileSink.batchSize = 100
# 实时读取mysql表 增量到flume 再写入kafka
tier1.sources.s1.type = org.keedio.flume.source.SQLSource
tier1.sources.s1.hibernate.connection.url = jdbc:mysql://192.168.108.140:3306/cdctest
tier1.sources.s1.hibernate.connection.user = root
tier1.sources.s1.hibernate.connection.password = passwd
tier1.sources.s1.hibernate.connection.autocommit = true
tier1.sources.s1.hibernate.dialect = org.hibernate.dialect.MySQL5Dialect
tier1.sources.s1.hibernate.connection.driver_class = com.mysql.jdbc.Driver
# 延迟的毫秒数
tier1.sources.s1.run.query.delay=100
tier1.sources.s1.status.file.path = /root/data/flume/
tier1.sources.s1.status.file.name = sqlSource.status
tier1.sources.s1.start.from = 0
# tier1.sources.s1.incremental.value = 0
# tier1.sources.s1.incremental.column.name = id
#tier1.sources.s1.custom.query = select * from user
tier1.sources.s1delimiter.entry= ,
tier1.sources.s1.enclose.by.quotes= false
# 增量字段(id),为了正确执行自定义查询,请确保在查询结果的第一个位置返回增量字段
tier1.sources.s1.custom.query = SELECT id,username,password,create_time FROM user WHERE id > $@$
tier1.sources.s1.batch.size = 1000
tier1.sources.s1.max.rows = 1000
tier1.sources.s1.hibernate.connection.provider_class = org.hibernate.connection.C3P0ConnectionProvider
tier1.sources.s1.hibernate.c3p0.min_size=1
tier1.sources.s1.hibernate.c3p0.max_size=10
tier1.sources.s1.channels=c1
tier1.channels.c1.type = memory
tier1.channels.c1.capacity = 10000
tier1.channels.c1.transactionCapacity = 10000
tier1.channels.c1.byteCapacityBufferPercentage = 20
tier1.channels.c1.byteCapacity = 800000
tier1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
tier1.sinks.k1.topic = mysqlToKafka
tier1.sinks.k1.brokerList = 192.168.108.82:9092
tier1.sinks.k1.requiredAcks = 1
tier1.sinks.k1.batchSize = 20
tier1.sinks.k1.channel = c1