Flume自定义拦截器
代码示例
-----------------------自定义拦截器-------------------------
public class InterceptorDemo implements Interceptor {
private List<Event> opList;
@Override
public void initialize() {
}
@Override
public Event intercept(Event event) {
final Map<String, String> headers = event.getHeaders();
final String body = new String(event.getBody());
if(body.startsWith("hello")){
headers.put("type","hello");
}else{
headers.put("type","other");
}
return event;
}
@Override
public List<Event> intercept(List<Event> list) {
opList.clear();
for (Event event : list) {
opList.add(intercept(event));
}
return opList;
}
@Override
public void close() {
}
public static class Builder implements Interceptor.Builder{
@Override
public Interceptor build() {
return new InterceptorDemo();
}
@Override
public void configure(Context context) {
}
}
}
然后我们进行configure编写
打完jar放进flume的lib文件夹下
作用是含有hello的输入会进入hdfs下的hello.csv,没有的就是进入hdfs的other.csv
a3.sources = r1
a3.channels = c1 c2
a3.sinks = k1 k2
a3.sources.r1.type = netcat
a3.sources.r1.bind = localhost
a3.sources.r1.port = 44444
a3.sources.r1.interceptors = i1
a3.sources.r1.interceptors.i1.type = nj.zb.kb05.InterceptorDemo$Builder
a3.sources.r1.selector.type = multiplexing
a3.sources.r1.selector.header = type
a3.sources.r1.selector.mapping.hello = c1
a3.sources.r1.selector.mapping.other = c2
a3.sinks.k1.type = hdfs
a3.sinks.k1.type = hdfs
a3.sinks.k1.hdfs.fileType = DataStream
a3.sinks.k1.hdfs.filePrefix = hello
a3.sinks.k1.hdfs.fileSuffix = .csv
a3.sinks.k1.hdfs.path = hdfs://192.168.126.166:9000/user/hello/%Y-%m-%d
a3.sinks.k1.hdfs.useLocalTimeStamp = true
a3.sinks.k1.hdfs.batchSize = 640
a3.sinks.k1.hdfs.rollCount = 0
a3.sinks.k1.hdfs.rollSize = 100000000
a3.sinks.k1.hdfs.rollInterval = 3
a3.sinks.k2.type = hdfs
a3.sinks.k2.type = hdfs
a3.sinks.k2.hdfs.fileType = DataStream
a3.sinks.k2.hdfs.filePrefix = other
a3.sinks.k2.hdfs.fileSuffix = .csv
a3.sinks.k2.hdfs.path = hdfs://192.168.126.166:9000/user/other/%Y-%m-%d
a3.sinks.k2.hdfs.useLocalTimeStamp = true
a3.sinks.k2.hdfs.batchSize = 640
a3.sinks.k2.hdfs.rollCount = 0
a3.sinks.k2.hdfs.rollSize = 100000000
a3.sinks.k2.hdfs.rollInterval = 3
a3.channels.c1.type = memory
a3.channels.c1.capacity = 1000
a3.channels.c1.transactionCapacity = 100
a3.channels.c2.type = memory
a3.channels.c2.capacity = 1000
a3.channels.c2.transactionCapacity = 100
a3.sources.r1.channels = c1 c2
a3.sinks.k1.channel = c1
a3.sinks.k2.channel = c2