记录:
功能需求:对每天(00:00:00-23:59:59)、每小时(00:00-59:59)这个两个区间段内的数据进行统计。
发现滑动时间窗口不满足这个功能,查找资料后发现具有这个function:TumblingEventTimeWindows
代码如下:
可以对这种区间窗口进行聚合。
示例代码:
对每小时的数据进行统计,求count
package flink.stream.streamAPI;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.triggers.ContinuousEventTimeTrigger;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;
import javax.annotation.Nullable;
import java.util.Properties;
import static flink.GetTime.dateToTimestamp;
import static stranger.PropertyLoader.getPropertiesConfig;
/**
* @author
* @description 对每小时的数据进行统计
* @date 2019/6/6
*/
public class EventTimeStreamWindowAll {
public static void main(String[] args) {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
final String configPath = "config.properties";
final Properties pro = getPropertiesConfig(configPath);
final String topic = "stranger";
final String groupId = "mainStranger";
String bootstrapServers = pro.getProperty("bootstrap.servers");
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", bootstrapServers);//kafka的节点的IP或者hostName,多个使用逗号分隔
properties.setProperty("group.id", groupId);//flink consumer flink的消费者的group.id
FlinkKafkaConsumer011<String> kafkaSource = new FlinkKafkaConsumer011<>(topic, new SimpleStringSchema(), properties);
kafkaSource.setStartFromLatest();
SingleOutputStreamOperator<String> mainStream = env.addSource(kafkaSource).assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<String>() {
@Nullable
@Override
public Watermark getCurrentWatermark() {
return new Watermark(System.currentTimeMillis() - 5000);
}
@Override
public long extractTimestamp(String s, long l) {
String[] split = s.split("\\t");
long timestamp = dateToTimestamp(split[0]);
return timestamp;
}
});
//2019-06-05 15:13:32,people6,进入,place3
DataStream<Tuple5<String, String, String, String, Long>> mainCount = mainStream.map(new MapFunction<String, Tuple4<String, String, String, String>>() {
@Override
public Tuple4<String, String, String, String> map(String s) throws Exception {
String[] split = s.split("\\t");
return new Tuple4<>(split[0], split[1], split[2], split[3]);
}
}).keyBy(2)
//以整点为窗口,每小时的窗口,offset 默认为0
.windowAll(TumblingEventTimeWindows.of(Time.hours(1)))
//以每天为窗口,进行统计
// .windowAll(TumblingEventTimeWindows.of(Time.days(1),Time.hours(-8)))
//每5秒触发一次
.trigger(ContinuousEventTimeTrigger.of(Time.seconds(5)))
//每多少条触发一次
// .trigger(CountTrigger.of(1))
.process(new ProcessAllWindowFunction<Tuple4<String, String, String, String>, Tuple5<String, String, String, String, Long>, TimeWindow>() {
@Override
public void process(Context context, Iterable<Tuple4<String, String, String, String>> iterables, Collector<Tuple5<String, String, String, String, Long>> collector) throws Exception {
Long sum = 0L;
String time = null;
String people = null;
String behavior = null;
String place = null;
for (Tuple4<String, String, String, String> iterable : iterables) {
sum += 1;
time = iterable.f0;
people = iterable.f1;
behavior = iterable.f2;
place = iterable.f3;
}
collector.collect(new Tuple5<>(time, people, behavior, place, sum));
}
});
mainCount.print();
try {
env.execute("test count");
} catch (Exception e) {
e.printStackTrace();
}
}
}