文件写到kafka中
userfriends.sources=userfriendsSource
userfriends.channels=userfriendsChannel
userfriends.sinks=userfriendsSink
userfriends.sources.userfriendsSource.type=spooldir
userfriends.sources.userfriendsSource.spoolDir=/opt/ev
userfriends.sources.userfriendsSource.deserializer=LINE
userfriends.sources.userfriendsSource.deserializer.maxLineLength=64000
userfriends.sources.userfriendsSource.includePattern=user_friends-[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
userfriends.sources.userfriendsSource.interceptors=head_filter
userfriends.sources.userfriendsSource.interceptors.head_filter.type=regex_filter
userfriends.sources.userfriendsSource.interceptors.head_filter.regex=^user*
userfriends.sources.userfriendsSource.interceptors.head_filter.excludeEvents=true
userfriends.channels.userfriendsChannel.type=file
userfriedns.channels.userfriendsChannel.checkpointDir=/opt/checkpoint/friend
userfriends.channels.userfriendsChannel.dataDirs=/opt/data/friend
userfriends.sinks.userfriendsSink.type=org.apache.flume.sink.kafka.KafkaSink
userfriends.sinks.userfriendsSink.batchSize=640
userfriends.sinks.userfriendsSink.brokerList=192.168.232.211:9092
userfriends.sinks.userfriendsSink.topic=user_friends
userfriends.sources.userfriendsSource.channels=userfriendsChannel
userfriends.sinks.userfriendsSink.channel=userfriendsChannel
./bin/flume-ng agent --name userfriends --conf ./conf --conf-file ./conf/kb11job/user_friends-kafka.conf -Dflume.root.logger=INFO.console

不同的channel进入不同的sink
train.sources=trainSource
train.channels=kafkaChannel hdfsChannel
train.sinks=kafkaSink hdfsSink
train.sources.trainSource.type=spooldir
train.sources.trainSource.spoolDir=/opt/ev
train.sources.trainSource.deserializer=LINE
train.sources.trainSource.deserializer.maxLineLength=64000
train.sources.trainSource.includePattern=train-[0-9]{4}-[0-9]{2}-[0-9]{2}.csv
train.sources.trainSource.interceptors=head_filter
train.sources.trainSource.interceptors.head_filter.type=regex_filter
train.sources.trainSource.interceptors.head_filter.regex=^user*
train.sources.trainSource.interceptors.head_filter.excludeEvents=true
train.channels.kafkaChannel.type=file
train.channels.kafkaChannel.checkpointDir=/opt/checkpoint/train
train.channels.kafkaChannel.dataDirs=/opt/data/train
train.channels.hdfsChannel.type=memory
train.channels.hdfsChannel.capacity=64000
train.channels.hdfsChannel.transactionCapacity=16000
train.sinks.kafkaSink.type=org.apache.flume.sink.kafka.KafkaSink
train.sinks.kafkaSink.batchSize=640
train.sinks.kafkaSink.brokerList=192.168.232.211:9092
train.sinks.kafkaSink.topic=train
train.sinks.hdfsSink.type=hdfs
train.sinks.hdfsSink.hdfs.fileType=DataStream
train.sinks.hdfsSink.hdfs.filePrefix=train
train.sinks.hdfsSink.hdfs.fileSuffix=.csv
train.sinks.hdfsSink.hdfs.path=hdfs://192.168.232.211:9000/kb11file/train/%Y-%m-%d
train.sinks.hdfsSink.hdfs.useLocalTimeStamp=true
train.sinks.hdfsSink.hdfs.batchSize=640
train.sinks.hdfsSink.hdfs.rollCount=0
train.sinks.hdfsSink.hdfs.rollSize=6400000
train.sinks.hdfsSink.hdfs.rollInterval=30
train.sources.trainSource.channels=kafkaChannel hdfsChannel
train.sinks.kafkaSink.channel=kafkaChannel
train.sinks.hdfsSink.channel=hdfsChannel
interceptor拦截器
拦截的对象是实现interceptor接口的类
1、创建类实现interceptor接口
package cn.kb11;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
* 对source接收到的event进行分辨
* event对象:header body
* 如果body内容hello开头,则将当前的event header打入hello标签
*如果body内容hi开头,event header打入hi标签
*/
public class InterceptorDemo implements Interceptor {
ArrayList<Event> addHeaderEvent = null;
@Override
public void initialize() {
addHeaderEvent=new ArrayList<>();
}
@Override
public Event intercept(Event event) {
Map<String, String> headers = event.getHeaders();
byte[] body = event.getBody();
String bodyStr = new String(body);
if (bodyStr.startsWith("hello")){
headers.put("type","hello");
}else if (bodyStr.startsWith("hi")){
headers.put("type","hi");
}else{
headers.put("type","other");
}
return event;
}
@Override
public List<Event> intercept(List<Event> list) {
addHeaderEvent.clear();
for (Event event : list) {
Event opEvent = intercept(event);
addHeaderEvent.add(opEvent);
}
return addHeaderEvent;
}
@Override
public void close() {
addHeaderEvent.clear();
addHeaderEvent=null;
}
public static class Builder implements Interceptor.Builder{
@Override
public Interceptor build() {
return new InterceptorDemo();
}
@Override
public void configure(Context context) {
}
}
}
2、打包上传到/flume/lib下
3、新建配置文件
vi netcat-interceptor.conf
interceptordemo.sources=interceptorDemoSource
interceptordemo.channels=helloChannel hiChannel otherChannel
interceptordemo.sinks=helloSink hiSink otherSink
interceptordemo.sources.interceptorDemoSource.type=netcat
interceptordemo.sources.interceptorDemoSource.bind=localhost
interceptordemo.sources.interceptorDemoSource.port=44444
interceptordemo.sources.interceptorDemoSource.interceptors=interceptor1
interceptordemo.sources.interceptorDemoSource.interceptors.interceptor1.type=cn.kb11.InterceptorDemo$Builder
interceptordemo.sources.interceptorDemoSource.selector.type=multiplexing
interceptordemo.sources.interceptorDemoSource.selector.mapping.hello=helloChannel
interceptordemo.sources.interceptorDemoSource.selector.mapping.hi=hiChannel
interceptordemo.sources.interceptorDemoSource.selector.mapping.other=otherChannel
interceptordemo.sources.interceptorDemoSource.selector.header=type
interceptordemo.channels.helloChannel.type=memory
interceptordemo.channels.helloChannel.capacity=1000
interceptordemo.channels.helloChannel.transactionCapacity=1000
interceptordemo.channels.hiChannel.type=memory
interceptordemo.channels.hiChannel.capacity=1000
interceptordemo.channels.hiChannel.transactionCapacity=1000
interceptordemo.channels.otherChannel.type=memory
interceptordemo.channels.otherChannel.capacity=1000
interceptordemo.channels.otherChannel.transactionCapacity=1000
interceptordemo.sinks.helloSink.type=hdfs
interceptordemo.sinks.helloSink.hdfs.fileType=DataStream
interceptordemo.sinks.helloSink.hdfs.filePrefix=hello
interceptordemo.sinks.helloSink.hdfs.fileSuffix=.csv
interceptordemo.sinks.helloSink.hdfs.path=hdfs://192.168.232.211:9000/kb11file/hello/%Y-%m-%d
interceptordemo.sinks.helloSink.hdfs.useLocalTimeStamp=true
interceptordemo.sinks.helloSink.hdfs.batchSize=640
interceptordemo.sinks.helloSink.hdfs.rollCount=0
interceptordemo.sinks.helloSink.hdfs.rollSize=6400000
interceptordemo.sinks.helloSink.hdfs.rollInterval=3
interceptordemo.sinks.hiSink.type=org.apache.flume.sink.kafka.KafkaSink
interceptordemo.sinks.hiSink.batchSize=640
interceptordemo.sinks.hiSink.brokerList=192.168.232.211:9092
interceptordemo.sinks.hiSink.topic=hi
interceptordemo.sinks.otherSink.type=logger
interceptordemo.sources.interceptorDemoSource.channels=helloChannel hiChannel otherChannel
interceptordemo.sinks.helloSink.channel=helloChannel
interceptordemo.sinks.hiSink.channel=hiChannel
interceptordemo.sinks.otherSink.channel=otherChannel
4、实现截图
telnet localhost 44444

./bin/flume-ng agent --name interceptordemo --conf ./conf --conf-file ./conf/kb11job/netcat-interceptor.conf -Dflume.root.logger=INFO,console



625

被折叠的 条评论
为什么被折叠?



