一、基于两条流的connect,和CoProcessFunction
1.实现一个有限流和无限流,connect,利用触发器控制打印输出的持续时长
import com.atguigu.day2.{
SensorReading, SensorSource}
import org.apache.flink.api.common.state.ValueStateDescriptor
import org.apache.flink.api.scala.typeutils.Types
import org.apache.flink.streaming.api.functions.co.CoProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
object CoProcessFunctionExample {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val readings = env
.addSource(new SensorSource)
.keyBy(_.id)
val filterSwitches = env
.fromElements(
("sensor_2", 10 * 1000L),
("sensor_7", 30 * 1000L)
)
.keyBy(_._1)
readings
.connect(filterSwitches)
.process(new ReadingFilter)
.print()
env.execute()
}
class ReadingFilter extends CoProcessFunction[SensorReading, (String, Long), SensorReading] {
lazy val forwardingEnabled = getRuntimeContext.getState(
new ValueStateDescriptor[Boolean]("filter-switch", Types.of[Boolean])
)
override def processElement1(value: SensorReading, ctx: CoProcessFunction[SensorReading, (String, Long), SensorReading]#Context, out: Collector[SensorReading]): Unit = {
if (forwardingEnabled.value()) {
out.collect(value)
}
}
override def processElement2(value: (String, Long), ctx: CoProcessFunction[SensorReading, (String, Long), SensorReading]#Context, out: Collector[SensorReading]): Unit = {
forwardingEnabled.update(true)
val timerTs = ctx.timerService().currentProcessingTime() + value._2
ctx.timerService().registerProcessingTimeTimer(timerTs)
}
override def onTimer(timestamp: Long, ctx: CoProcessFunction[SensorReading, (String, Long), SensorReading]#OnTimerContext, out: Collector[SensorReading]): Unit = {
forwardingEnabled.update(false)
}
}
}
二、基于时间间隔的Join
1.用户点击日志和用户浏览日志的Join
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time