Flink 中 Trigger 用来定义窗口的触发策略,Evictor 用来定义窗口中元素的清除策略。
Trigger
注册的定时器会会保存在一个优先队列中
* onElement() 每次往 window 增加一个元素的时候都会触发
* onEventTime() 当 event-time timer 被触发的时候会调用
* onProcessingTime() 当 processing-time timer 被触发的时候会调用
* onMerge() 对两个 trigger 的 state 进行 merge 操作 用来处理 session window
* clear() window 销毁的时候被调用
public abstract class Trigger<T, W extends Window> implements Serializable {
private static final long serialVersionUID = -4104633972991191369L;
// 每来一条元素都会调用
public abstract TriggerResult onElement(T element, long timestamp, W window, TriggerContext ctx)
throws Exception;
// 事件时间触发
public abstract TriggerResult onProcessingTime(long time, W window, TriggerContext ctx)
throws Exception;
// 处理时间触发
public abstract TriggerResult onEventTime(long time, W window, TriggerContext ctx)
throws Exception;
// 是否合并触发器,在 session 窗口时会调用
public boolean canMerge() {
return false;
}
// 触发器合并策略
public void onMerge(W window, OnMergeContext ctx) throws Exception {
throw new UnsupportedOperationException("This trigger does not support merging.");
}
// 触发器清理策略
public abstract void clear(W window, TriggerContext ctx) throws Exception;
// ------------------------------------------------------------------------
// 触发器上下文内容
public interface TriggerContext {
// 返回当前处理时间
long getCurrentProcessingTime();
// 监控器
MetricGroup getMetricGroup();
// 返回当前 watermark
long getCurrentWatermark();
// 注册处理时间定时器
void registerProcessingTimeTimer(long time);
// 注册事件时间触发器
void registerEventTimeTimer(long time);
void deleteProcessingTimeTimer(long time);
void deleteEventTimeTimer(long time);
// 容错机制,可以通过其获取 state,不能再 KeyStream 中使用(function is not part os a KeyedStream)
<S extends State> S getPartitionedState(StateDescriptor<S, ?> stateDescriptor);
/**
* Retrieves a {@link ValueState} object that can be used to interact with fault-tolerant
* state that is scoped to the window and key of the current trigger invocation.
*
* @param name The name of the key/value state.
* @param stateType The class of the type that is stored in the state. Used to generate
* serializers for managed memory and checkpointing.
* @param defaultState The default state value, returned when the state is accessed and no
* value has yet been set for the key. May be null.
* @param <S> The type of the state.
* @return The partitioned state object.
* @throws UnsupportedOperationException Thrown, if no partitioned state is available for
* the function (function is not part os a KeyedStream).
* @deprecated Use {@link #getPartitionedState(StateDescriptor)}.
*/
@Deprecated
<S extends Serializable> ValueState<S> getKeyValueState(
String name, Class<S> stateType, S defaultState);
/**
* Retrieves a {@link ValueState} object that can be used to interact with fault-tolerant
* state that is scoped to the window and key of the current trigger invocation.
*
* @param name The name of the key/value state.
* @param stateType The type information for the type that is stored in the state. Used to
* create serializers for managed memory and checkpoints.
* @param defaultState The default state value, returned when the state is accessed and no
* value has yet been set for the key. May be null.
* @param <S> The type of the state.
* @return The partitioned state object.
* @throws UnsupportedOperationException Thrown, if no partitioned state is available for
* the function (function is not part os a KeyedStream).
* @deprecated Use {@link #getPartitionedState(StateDescriptor)}.
*/
@Deprecated
<S extends Serializable> ValueState<S> getKeyValueState(
String name, TypeInformation<S> stateType, S defaultState);
}
/**
* Extension of {@link TriggerContext} that is given to {@link Trigger#onMerge(Window,
* OnMergeContext)}.
*/
public interface OnMergeContext extends TriggerContext {
<S extends MergingState<?, ?>> void mergePartitionedState(
StateDescriptor<S, ?> stateDescriptor);
}
}
触发器返回值
org.apache.flink.streaming.api.windowing.triggers.TriggerResult
* CONTINUE 不做任何事情
* FIRE 触发 window
* PURGE 清空整个 window 的元素并销毁窗口
* FIRE_AND_PURGE 触发窗口,然后销毁窗口
public class TriggerDemo {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env
.addSource(new ClickSource())
.assignTimestampsAndWatermarks(
WatermarkStrategy
.<Event>forBoundedOutOfOrderness(Duration.ofSeconds(0L))
.withTimestampAssigner(
new SerializableTimestampAssigner<Event>() {
@Override
public long extractTimestamp(Event element, long recordTimestamp) {
return element.timestamp;
}
}
)
)
.keyBy(event -> true)
.countWindow(50)
.trigger(
// 每 20 条数据触发一次窗口计算
// 窗口结束 触发窗口计算并清空窗口
new Trigger<Event, GlobalWindow>() {
@Override
public TriggerResult onElement(Event element, long timestamp, GlobalWindow window, TriggerContext ctx) throws Exception {
ValueState<Long> countState = ctx.getPartitionedState(new ValueStateDescriptor<Long>("countState",Types.LONG));
if (countState.value() == null){
// 第一条元素
countState.update(1L);
}else {
countState.update(countState.value() + 1L);
}
// 每 20 条数据触发一次窗口
if (countState.value() % 20 == 0){
ctx.registerProcessingTimeTimer(ctx.getCurrentProcessingTime() + 1L);
}
// 窗口结束触发触发并情清空窗口中的元素
if (countState.value().longValue() == 50L){
ctx.registerProcessingTimeTimer(ctx.getCurrentProcessingTime()+ 1L);
}
return TriggerResult.CONTINUE;
}
@Override
public TriggerResult onProcessingTime(long time, GlobalWindow window, TriggerContext ctx) throws Exception {
ValueState<Long> countState = ctx.getPartitionedState(new ValueStateDescriptor<Long>("countState",Types.LONG));
if(countState.value() % 20 ==0){
return TriggerResult.FIRE;
}else {
countState.clear();
return TriggerResult.FIRE_AND_PURGE;
}
}
@Override
public TriggerResult onEventTime(long time, GlobalWindow window, TriggerContext ctx) throws Exception {
return TriggerResult.CONTINUE;
}
@Override
public void clear(GlobalWindow window, TriggerContext ctx) throws Exception {
System.out.println("=============================");
}
}
)
.process(
new ProcessWindowFunction<Event, String, Boolean, GlobalWindow>() {
@Override
public void process(Boolean s, Context context, Iterable<Event> elements, Collector<String> out) throws Exception {
out.collect("窗口中有 " + elements.spliterator().getExactSizeIfKnown() + " 条元素");
}
}
)
.print();
env.execute();
}
}
Evictor
在 Trigge r触发之后,在窗口被处理之前,Evictor(如果有 Evictor 的话)会用来剔除窗口中不需要的元素,相当于一个 filter
org.apache.flink.streaming.api.windowing.triggers.Trigger
public abstract class Trigger<T, W extends Window> implements Serializable {
private static final long serialVersionUID = -4104633972991191369L;
/**
* Called for every element that gets added to a pane. The result of this will determine whether
* the pane is evaluated to emit results.
*
* @param element The element that arrived.
* @param timestamp The timestamp of the element that arrived.
* @param window The window to which the element is being added.
* @param ctx A context object that can be used to register timer callbacks.
*/
public abstract TriggerResult onElement(T element, long timestamp, W window, TriggerContext ctx)
throws Exception;
/**
* Called when a processing-time timer that was set using the trigger context fires.
*
* @param time The timestamp at which the timer fired.
* @param window The window for which the timer fired.
* @param ctx A context object that can be used to register timer callbacks.
*/
public abstract TriggerResult onProcessingTime(long time, W window, TriggerContext ctx)
throws Exception;
/**
* Called when an event-time timer that was set using the trigger context fires.
*
* @param time The timestamp at which the timer fired.
* @param window The window for which the timer fired.
* @param ctx A context object that can be used to register timer callbacks.
*/
public abstract TriggerResult onEventTime(long time, W window, TriggerContext ctx)
throws Exception;
/**
* Returns true if this trigger supports merging of trigger state and can therefore be used with
* a {@link org.apache.flink.streaming.api.windowing.assigners.MergingWindowAssigner}.
*
* <p>If this returns {@code true} you must properly implement {@link #onMerge(Window,
* OnMergeContext)}
*/
public boolean canMerge() {
return false;
}
/**
* Called when several windows have been merged into one window by the {@link
* org.apache.flink.streaming.api.windowing.assigners.WindowAssigner}.
*
* @param window The new window that results from the merge.
* @param ctx A context object that can be used to register timer callbacks and access state.
*/
public void onMerge(W window, OnMergeContext ctx) throws Exception {
throw new UnsupportedOperationException("This trigger does not support merging.");
}
/**
* Clears any state that the trigger might still hold for the given window. This is called when
* a window is purged. Timers set using {@link TriggerContext#registerEventTimeTimer(long)} and
* {@link TriggerContext#registerProcessingTimeTimer(long)} should be deleted here as well as
* state acquired using {@link TriggerContext#getPartitionedState(StateDescriptor)}.
*/
public abstract void clear(W window, TriggerContext ctx) throws Exception;
// ------------------------------------------------------------------------
/**
* A context object that is given to {@link Trigger} methods to allow them to register timer
* callbacks and deal with state.
*/
public interface TriggerContext {
/** Returns the current processing time. */
long getCurrentProcessingTime();
/**
* Returns the metric group for this {@link Trigger}. This is the same metric group that
* would be returned from {@link RuntimeContext#getMetricGroup()} in a user function.
*
* <p>You must not call methods that create metric objects (such as {@link
* MetricGroup#counter(int)} multiple times but instead call once and store the metric
* object in a field.
*/
MetricGroup getMetricGroup();
/** Returns the current watermark time. */
long getCurrentWatermark();
/**
* Register a system time callback. When the current system time passes the specified time
* {@link Trigger#onProcessingTime(long, Window, TriggerContext)} is called with the time
* specified here.
*
* @param time The time at which to invoke {@link Trigger#onProcessingTime(long, Window,
* TriggerContext)}
*/
void registerProcessingTimeTimer(long time);
/**
* Register an event-time callback. When the current watermark passes the specified time
* {@link Trigger#onEventTime(long, Window, TriggerContext)} is called with the time
* specified here.
*
* @param time The watermark at which to invoke {@link Trigger#onEventTime(long, Window,
* TriggerContext)}
* @see org.apache.flink.streaming.api.watermark.Watermark
*/
void registerEventTimeTimer(long time);
/** Delete the processing time trigger for the given time. */
void deleteProcessingTimeTimer(long time);
/** Delete the event-time trigger for the given time. */
void deleteEventTimeTimer(long time);
/**
* Retrieves a {@link State} object that can be used to interact with fault-tolerant state
* that is scoped to the window and key of the current trigger invocation.
*
* @param stateDescriptor The StateDescriptor that contains the name and type of the state
* that is being accessed.
* @param <S> The type of the state.
* @return The partitioned state object.
* @throws UnsupportedOperationException Thrown, if no partitioned state is available for
* the function (function is not part os a KeyedStream).
*/
<S extends State> S getPartitionedState(StateDescriptor<S, ?> stateDescriptor);
/**
* Retrieves a {@link ValueState} object that can be used to interact with fault-tolerant
* state that is scoped to the window and key of the current trigger invocation.
*
* @param name The name of the key/value state.
* @param stateType The class of the type that is stored in the state. Used to generate
* serializers for managed memory and checkpointing.
* @param defaultState The default state value, returned when the state is accessed and no
* value has yet been set for the key. May be null.
* @param <S> The type of the state.
* @return The partitioned state object.
* @throws UnsupportedOperationException Thrown, if no partitioned state is available for
* the function (function is not part os a KeyedStream).
* @deprecated Use {@link #getPartitionedState(StateDescriptor)}.
*/
@Deprecated
<S extends Serializable> ValueState<S> getKeyValueState(
String name, Class<S> stateType, S defaultState);
/**
* Retrieves a {@link ValueState} object that can be used to interact with fault-tolerant
* state that is scoped to the window and key of the current trigger invocation.
*
* @param name The name of the key/value state.
* @param stateType The type information for the type that is stored in the state. Used to
* create serializers for managed memory and checkpoints.
* @param defaultState The default state value, returned when the state is accessed and no
* value has yet been set for the key. May be null.
* @param <S> The type of the state.
* @return The partitioned state object.
* @throws UnsupportedOperationException Thrown, if no partitioned state is available for
* the function (function is not part os a KeyedStream).
* @deprecated Use {@link #getPartitionedState(StateDescriptor)}.
*/
@Deprecated
<S extends Serializable> ValueState<S> getKeyValueState(
String name, TypeInformation<S> stateType, S defaultState);
}
/**
* Extension of {@link TriggerContext} that is given to {@link Trigger#onMerge(Window,
* OnMergeContext)}.
*/
public interface OnMergeContext extends TriggerContext {
<S extends MergingState<?, ?>> void mergePartitionedState(
StateDescriptor<S, ?> stateDescriptor);
}
}
*evictBefore() 窗口函数调用前调用
*evictAfter() 窗口函数执行完后调用
驱逐器的内容
interface EvictorContext {
*getCurrentProcessingTime()
*getMetricGroup()
*getCurrentWatermark()
}
public class WindowEvictor {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env
.addSource(new ClickSource())
.keyBy(event -> 1)
.windowAll(GlobalWindows.create())
// 窗口 只 保存 20 条最近数据
.trigger(
// 1min 触发一次
new Trigger<Event, GlobalWindow>() {
@Override
public TriggerResult onElement(Event element, long timestamp, GlobalWindow window, TriggerContext ctx) throws Exception {
ValueState<Boolean> firstSeen = ctx.getPartitionedState(new ValueStateDescriptor<Boolean>("first-seen", Types.BOOLEAN));
if (!Optional.ofNullable(firstSeen.value()).orElseGet(() -> false)) {
// 窗口的第一个元素
//注册一个 1min 的定时器
// System.out.println("timestamp = " + timestamp); // -9223372036854775808
long ts = ctx.getCurrentProcessingTime() + 5 * 1000L;
System.out.println("register timer = " + ts);
ctx.registerProcessingTimeTimer(ts);
firstSeen.update(true);
}
return TriggerResult.CONTINUE;
}
@Override
public TriggerResult onProcessingTime(long time, GlobalWindow window, TriggerContext ctx) throws Exception {
System.out.println("timer trigger = " + time);
ctx.registerProcessingTimeTimer(time + 1L + 5 * 1000L);
return TriggerResult.FIRE;
}
@Override
public TriggerResult onEventTime(long time, GlobalWindow window, TriggerContext ctx) throws Exception {
return TriggerResult.CONTINUE;
}
@Override
public void clear(GlobalWindow window, TriggerContext ctx) throws Exception {
ValueState<Boolean> firstSeen = ctx.getPartitionedState(new ValueStateDescriptor<Boolean>("first-seen", Types.BOOLEAN));
firstSeen.clear();
}
}
)
// org.apache.flink.streaming.api.windowing.evictors.CountEvictor
.evictor(
// 保留最近 20 条数据
new Evictor<Event, GlobalWindow>() {
private int maxCount = 20;
@Override
public void evictBefore(Iterable<TimestampedValue<Event>> elements, int size, GlobalWindow window, EvictorContext evictorContext) {
// System.out.println("class = "+elements.getClass()); // class org.apache.flink.shaded.guava30.com.google.common.collect.Iterables$5
long total = elements.spliterator().getExactSizeIfKnown();
if (total < maxCount) {
return;
} else {
// 需要移除窗口元素的数量 evictedCount = total - maxCount
int evictedCount = 0;
for (Iterator<TimestampedValue<Event>> iterator = elements.iterator(); iterator.hasNext();) {
iterator.next();
evictedCount++;
if (evictedCount > total - maxCount) {
break;
} else {
iterator.remove();
}
}
}
}
@Override
public void evictAfter(Iterable<TimestampedValue<Event>> elements, int size, GlobalWindow window, EvictorContext evictorContext) {
}
}
)
.process(
new ProcessAllWindowFunction<Event, String, GlobalWindow>() {
@Override
public void process(Context context, Iterable<Event> elements, Collector<String> out) throws Exception {
out.collect("窗口中有 " + elements.spliterator().getExactSizeIfKnown() + " 条元素");
}
}
)
.print();
env.execute();
}
}