大数据——从Flume传输数据到Kafka并读取

从Flume传输数据到Kafka并读取

  • 创建八个队列信息
//users
[root@hadoop100 opt]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --create --topic users --partitions 1 --replication-factor 1

//user_friends_raw
[root@hadoop100 opt]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --create --topic user_friends_raw --partitions 1 --replication-factor 1

//user_friends
[root@hadoop100 opt]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --create --topic user_friends --partitions 1 --replication-factor 1

//events
[root@hadoop100 opt]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --create --topic events --partitions 1 --replication-factor 1

//event_attendees_raw
[root@hadoop100 opt]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --create --topic event_attendees_raw --partitions 1 --replication-factor 1

//event_attendees
[root@hadoop100 opt]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --create --topic event_attendees --partitions 1 --replication-factor 1

//train
[root@hadoop100 opt]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --create --topic train --partitions 1 --replication-factor 1

//test
[root@hadoop100 opt]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --create --topic test --partitions 1 --replication-factor 1
  • 查看一下队列
[root@hadoop100 ~]# kafka-topics.sh --zookeeper 192.168.136.100:2181 --list

在这里插入图片描述

user_friends_raw

  • 创建配置文件userFriend-flume-kafka.conf
userfriend.sources=userfriendSource
userfriend.channels=userfriendChannel
userfriend.sinks=userfriendSink

userfriend.sources.userfriendSource.type=spooldir
userfriend.sources.userfriendSource.spoolDir=/opt/flume160/conf/jobkb09/dataSourceFile/userfriend
userfriend.sources.userfriendSource.deserializer=LINE
userfriend.sources.userfriendSource.deserializer.maxLineLength=320000
userfriend.sources.userfriendSource.includePattern=userfriend_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
userfriend.sources.userfriendSource.interceptors=head_filter
userfriend.sources.userfriendSource.interceptors.head_filter.type=regex_filter
userfriend.sources.userfriendSource.interceptors.head_filter.regex=^user,friends*
userfriend.sources.userfriendSource.interceptors.head_filter.excludeEvents=true

userfriend.channels.userfriendChannel.type=file
userfriend.channels.userfriendChannel.checkpointDir=/opt/flume160/conf/jobkb09/cheakPointFile/userfriend
userfriend.channels.userfriendChannel.dataDirs=/opt/flume160/conf/jobkb09/dataChannelFile/userfriend

userfriend.sinks.userfriendSink.type=org.apache.flume.sink.kafka.KafkaSink
userfriend.sinks.userfriendSink.batchSize=640
userfriend.sinks.userfriendSink.brokerList=192.168.136.100:9092
userfriend.sinks.userfriendSink.topic=user_friends_raw

userfriend.sources.userfriendSource.channels=userfriendChannel
userfriend.sinks.userfriendSink.channel=userfriendChannel
  • 保存退出,启动flume
[root@hadoop100 flume160]# ./bin/flume-ng agent --name userfriend ./conf/ --conf-file ./conf/jobkb09/userFriend-flume-kafka.conf -Dflume.root.logger=INFO,console
  • 拷贝文件user_friends.csv文件
[root@hadoop100 tmp]# cp user_friends.csv /opt/flume160/conf/jobkb09/dataSourceFile/userfriend/userfriend_2020-12-08.csv
  • 进入kafka查询topic中的数据
[root@hadoop100 opt]# kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.136.100:9092 --topic user_friends_raw --time -1 --offsets 1

在这里插入图片描述

  • 从消费者拉取数据
[root@hadoop100 ~]# kafka-console-consumer.sh --bootstrap-server 192.168.136.100:9092 --topic user_friends_raw --from-beginning

注意:数据量过大,谨慎拉取数据

users

  • 创建配置文件users-flume-kafka.conf
users.sources=usersSource
users.channels=usersChannel
users.sinks=usersSink

users.sources.usersSource.type=spooldir
users.sources.usersSource.spoolDir=/opt/flume160/conf/jobkb09/dataSourceFile/users
users.sources.usersSource.includePattern=users_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
users.sources.usersSource.deserializer=LINE
users.sources.usersSource.deserializer.maxLineLength=10000
users.sources.usersSource.interceptors=head_filter
users.sources.usersSource.interceptors.head_filter.type=regex_filter
users.sources.usersSource.interceptors.head_filter.regex=^user_id*
users.sources.usersSource.interceptors.head_filter.excludeEvents=true

users.channels.usersChannel.type=file
users.channels.usersChannel.checkpointDir=/opt/flume160/conf/jobkb09/cheakPointFile/users
users.channels.usersChannel.dataDirs=/opt/flume160/conf/jobkb09/dataChannelFile/users

users.sinks.usersSink.type=org.apache.flume.sink.kafka.KafkaSink
users.sinks.usersSink.batchSize=640
users.sinks.usersSink.brokerList=192.168.136.100:9092
users.sinks.usersSink.topic=users

users.sources.usersSource.channels=usersChannel
users.sinks.usersSink.channel=usersChannel
  • 保存退出,启动flume
[root@hadoop100 jobkb09]# flume-ng agent --name users /opt/flume160/conf/ --conf-file users-flume-kafka.conf -Dflume.root.logger=INFO,console
  • 拷贝文件users.csv文件
[root@hadoop100 tmp]# cp users.csv /opt/flume160/conf/jobkb09/dataSourceFile/users/users_2020-12-08.csv
  • 进入kafka中查询topic中的数据
[root@hadoop100 ~]# kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.136.100:9092 --topic users --time -1 --offsets 1

在这里插入图片描述

events

  • 创建配置文件events-flume-kafka.conf
events.sources=eventsSource
events.channels=eventsChannel
events.sinks=eventsSink

events.sources.eventsSource.type=spooldir
events.sources.eventsSource.spoolDir=/opt/flume160/conf/jobkb09/dataSourceFile/events
events.sources.eventsSource.deserializer=LINE
events.sources.eventsSource.deserializer.maxLineLength=10000
events.sources.eventsSource.includePattern=events_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
events.sources.eventsSource.interceptors=head_filter
events.sources.eventsSource.interceptors.head_filter.type=regex_filter
events.sources.eventsSource.interceptors.head_filter.regex=^event_id*
events.sources.eventsSource.interceptors.head_filter.excludeEvents=true

events.channels.eventsChannel.type=file
events.channels.eventsChannel.checkpointDir=/opt/flume160/conf/jobkb09/cheakPointFile/events
events.channels.eventsChannel.dataDirs=/opt/flume160/conf/jobkb09/dataChannelFile/events

events.sinks.eventsSink.type=org.apache.flume.sink.kafka.KafkaSink
events.sinks.eventsSink.batchSize=640
events.sinks.eventsSink.brokerList=192.168.136.100:9092
events.sinks.eventsSink.topic=events

events.sources.eventsSource.channels=eventsChannel
events.sinks.eventsSink.channel=eventsChannel
  • 保存退出,启动flume
[root@hadoop100 flume160]# flume-ng agent --name events conf/ --conf-file conf/jobkb09/events-flume-kafka.conf -Dflume.root.logger=INFO,console
  • 拷贝文件events.csv文件
[root@hadoop100 tmp]# cp events.csv /opt/flume160/conf/jobkb09/dataSourceFile/events/events_2020-12-08.csv
  • 进入kafka中查询topic中的数据
[root@hadoop100 ~]# kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.136.100:9092 --topic events --time -1 --offsets 1

在这里插入图片描述

event_attendees_raw

  • 创建配置文件event-flume-kafka.conf
event.sources=eventSource
event.channels=eventChannel
event.sinks=eventSink

event.sources.eventSource.type=spooldir
event.sources.eventSource.spoolDir=/opt/flume160/conf/jobkb09/dataSourceFile/event
event.sources.eventSource.includePattern=event_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
event.sources.eventSource.deserializer=LINE
event.sources.eventSource.deserializer.maxLineLength=10000
event.sources.eventSource.interceptors=head_filter
event.sources.eventSource.interceptors.head_filter.type=regex_filter
event.sources.eventSource.interceptors.head_filter.regex=^event*
event.sources.eventSource.interceptors.head_filter.excludeEvents=true

event.channels.eventChannel.type=file
event.channels.eventChannel.checkpointDir=/opt/flume160/conf/jobkb09/cheakPointFile/event
event.channels.eventChannel.dataDirs=/opt/flume160/conf/jobkb09/dataChannelFile/event

event.sinks.eventSink.type=org.apache.flume.sink.kafka.KafkaSink
event.sinks.eventSink.batchSize=640
event.sinks.eventSink.brokerList=192.168.136.100:9092
event.sinks.eventSink.topic=event_attendees_raw

event.sources.eventSource.channels=eventChannel
event.sinks.eventSink.channel=eventChannel
  • 保存退出,启动flume
[root@hadoop100 flume160]# flume-ng agent --n event -c conf/ -f conf/jobkb09/event-flume-kafka.conf -Dflume.root.logger=INFO,console
  • 拷贝event_attendees.csv文件
[root@hadoop100 tmp]# cp event_attendees.csv /opt/flume160/conf/jobkb09/dataSourceFile/event/event_2020-12-08.csv
  • 进入kafka中查询topic中的数据
[root@hadoop100 ~]# kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.136.100:9092 --topic event_attendees_raw --time -1 --offsets 1

在这里插入图片描述

train

  • 创建配置文件train-flume-kafka.conf
train.sources=trainSource
train.channels=trainChannel
train.sinks=trainSink

train.sources.trainSource.type=spooldir
train.sources.trainSource.spoolDir=/opt/flume160/conf/jobkb09/dataSourceFile/train
train.sources.trainSource.includePattern=train_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
train.sources.trainSource.deserializer=LINE
train.sources.trainSource.deserializer.maxLineLength=10000
train.sources.trainSource.interceptors=head_filter
train.sources.trainSource.interceptors.head_filter.type=regex_filter
train.sources.trainSource.interceptors.head_filter.regex=^user*
train.sources.trainSource.interceptors.head_filter.excludeEvents=true

train.channels.trainChannel.type=file
train.channels.trainChannel.checkpointDir=/opt/flume160/conf/jobkb09/cheakPointFile/train
train.channels.trainChannel.dataDirs=/opt/flume160/conf/jobkb09/dataChannelFile/train

train.sinks.trainSink.type=org.apache.flume.sink.kafka.KafkaSink
train.sinks.trainSink.batchSize=640
train.sinks.trainSink.brokerList=192.168.136.100:9092
train.sinks.trainSink.topic=train

train.sources.trainSource.channels=trainChannel
train.sinks.trainSink.channel=trainChannel
  • 保存退出,启动flume
[root@hadoop100 flume160]# flume-ng agent --name train --conf conf/ --conf-file conf/jobkb09/train-flume-kafka.conf -Dflume.root.logger=INFO,console
  • 拷贝train.csvwe文件
[root@hadoop100 tmp]# cp train.csv /opt/flume160/conf/jobkb09/dataSourceFile/train/train_2020-12-08.csv
  • 进入kafka中出查询topic中的数据
[root@hadoop100 ~]# kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list 192.168.136.100:9092 --topic train --time -1 --offsets 1

在这里插入图片描述

test

  • 创建配置文件test-flume-kafka.conf
test.sources=testSource
test.channels=testChannel
test.sinks=testSink

test.sources.testSource.type=spooldir
test.sources.testSource.spoolDir=/opt/flume160/conf/jobkb09/dataSourceFile/test
test.sources.testSource.includePattern=test_[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
test.sources.testSource.deserializer=LINE
test.sources.testSource.deserializer.maxLineLength=10000
test.sources.testSource.interceptors=head_filter
test.sources.testSource.interceptors.head_filter.type=regex_filter
test.sources.testSource.interceptors.head_filter.regex=^user*
test.sources.testSource.interceptors.head_filter.excludeEvents=true

test.channels.testChannel.type=file
test.channels.testChannel.checkpointDir=/opt/flume160/conf/jobkb09/cheakPointFile/test
test.channels.testChannel.dataDirs=/opt/flume160/conf/jobkb09/dataChannelFile/test

test.sinks.testSink.type=org.apache.flume.sink.kafka.KafkaSink
test.sinks.testSink.batchSize=640
test.sinks.testSink.brokerList=192.168.136.100:9092
test.sinks.testSink.topic=test

test.sources.testSource.channels=testChannel
test.sinks.testSink.channel=testChannel
  • 保存退出,启动flume
[root@hadoop100 flume160]# flume-ng agent --n test -c conf/ -f conf/jobkb09/test-flume-kafka.conf -Dflume.root.logger=INFO,console
  • 拷贝test.csv文件
[root@hadoop100 tmp]# cp test.csv /opt/flume160/conf/jobkb09/dataSourceFile/test/test_2020-12-08.csv
  • 进入kafka查询topic中的数据
[root@hadoop100 ~]# kafka-run-class.sh kafka.tools.GetOffsetShell --broker-list       192.168.136.100:9092 --topic test --time -1 --offsets 1

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值