一、intervalJoin
流A intervalJoin 流B,只要满足以下条件即为join成功。
流B的时间戳大于等于流A的时间戳减下界
且流B的时间戳小于等于流A的时间戳加上界
且流A的key等于流B的key。
ProcessJoinFunction
public class TwoStreamJoinStream {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStreamSource<String> sourceStream1 = env.addSource(new FlinkKafkaConsumer010<String>("stream1", new SimpleStringSchema(), KafkaUtils.comsumerProps()));
DataStreamSource<String> sourceStream2 = env.addSource(new FlinkKafkaConsumer010<String>("stream2", new SimpleStringSchema(), KafkaUtils.comsumerProps()));
KeyedStream<String, String> streamJoin1 = sourceStream1
.assignTimestampsAndWatermarks(new EventTimeExtractor())
.keyBy(new KeySelector<String, String>() {
@Override
public String getKey(String line) throws Exception {
JSONObject jn = JSON.parseObject(line);
return jn.getString("uuid");
}
});
KeyedStream<String, String> streamJoin2 = sourceStream2
.assignTimestampsAndWatermarks(new EventTimeExtractor())
.keyBy(new KeySelector<String, String>() {
@Override
public String getKey(String line) throws Exception {
JSONObject jn = JSON.parseObject(line);
return jn.getString("uuid");
}
});
SingleOutputStreamOperator<String> resuleStream = streamJoin1
.intervalJoin(streamJoin2)
// streamJoin2的时间戳 >= streamJoin1的时间戳-3 并且 streamJoin2的时间戳 <= streamJoin1的时间戳+3
// 且 streamJoin1的key = streamJoin2的key 即可关联上
.between(Time.seconds(-3), Time.se