1、滚动窗口(TumblingEventTimeWindows)
def main(args: Array[String]): Unit = {
// 环境
val env: StreamExecutionEnvironment =StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val dstream: DataStream[String] = env.socketTextStream("localhost",7777)
val textWithTsDstream: DataStream[(String, Long, Int)] = dstream.map{ text =>
val arr: Array[String] = text.split(" ")
(arr(0), arr(1).toLong, 1)
}
val textWithEventTimeDstream: DataStream[(String, Long, Int)] =textWithTsDstream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor[(String, Long,Int)](Time.milliseconds(1000)){
override def extractTimestamp(element: (String, Long, Int)): Long = {
return element._2
}
})
val textKeyStream: KeyedStream[(String, Long, Int), Tuple] =textWithEventTimeDstream.keyBy(0)
textKeyStream.print("textkey:")
val windowStream: WindowedStream[(String, Long, Int), Tuple, TimeWindow]= textKeyStream.window(TumblingEventTimeWindows.of(Time.seconds(2)))
val groupDstream: DataStream[mutable.HashSet[Long]] =windowStream.fold(
new mutable.HashSet[Long]()) { case (set, (key, ts, count))=>set += ts}
groupDstream.print("window::::").setParallelism(1)
env.execute()
}
结果是按照Event Time 的时间窗口计算得出的,而无关系统的时间(包括输入的快慢)。
2、滑动窗口(SlidingEventTimeWindows)
def main(args: Array[String]): Unit = {
// 环境
val env: StreamExecutionEnvironment =StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val dstream: DataStream[String] = env.socketTextStream("localhost",7777)
val textWithTsDstream: DataStream[(String, Long, Int)] = dstream.map { text=>
val arr: Array[String] = text.split(" ")
(arr(0), arr(1).toLong, 1)
}
val textWithEventTimeDstream: DataStream[(String, Long, Int)] =textWithTsDstream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor[(String, Long,Int)](Time.milliseconds(1000)){
override def extractTimestamp(element: (String, Long, Int)): Long = {
return element._2
}
})
val textKeyStream: KeyedStream[(String, Long, Int), Tuple] =
textWithEventTimeDstream.keyBy(0)
textKeyStream.print("textkey:")
val windowStream: WindowedStream[(String, Long, Int), Tuple, TimeWindow] =textKeyStream.window(SlidingEventTimeWindows.of(Time.seconds(2),Time.milliseconds(500)))
val groupDstream: DataStream[mutable.HashSet[Long]] = windowStream.fold(
new mutable.HashSet[Long]()) { case (set, (key, ts, count)) =>set += ts}
groupDstream.print("window::::").setParallelism(1)
env.execute()
}
3、会话窗口(EventTimeSessionWindows)
相邻两次数据的EventTime 的时间差超过指定的时间间隔就会触发执行。如果加入Watermark, 会在符合窗口触发的情况下进行延迟。到达延迟水位再进行窗口触发。
def main(args: Array[String]): Unit = {
// 环境
val env: StreamExecutionEnvironment =StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val dstream: DataStream[String] = env.socketTextStream("localhost",7777)
val textWithTsDstream: DataStream[(String, Long, Int)] = dstream.map { text=>
val arr: Array[String] = text.split(" ")
(arr(0), arr(1).toLong, 1)
}
val textWithEventTimeDstream: DataStream[(String, Long, Int)] =textWithTsDstream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor[(String, Long, Int)](Time.milliseconds(1000){
override def extractTimestamp(element: (String, Long, Int)): Long = {
return element._2
}
})
val textKeyStream: KeyedStream[(String, Long, Int), Tuple] =textWithEventTimeDstream.keyBy(0)
textKeyStream.print("textkey:")
val windowStream: WindowedStream[(String, Long, Int), Tuple, TimeWindow]=textKeyStream.window(EventTimeSessionWindows.withGap(Time.milliseconds(500)))
windowStream.reduce((text1,text2)=>(text1._1,0L,text1._3+text2._3))
.map(_._3).print("windows:::").setParallelism(1)
env.execute()
}