flink timeWindow

记录:

功能需求:对每天(00:00:00-23:59:59)、每小时(00:00-59:59)这个两个区间段内的数据进行统计。
发现滑动时间窗口不满足这个功能,查找资料后发现具有这个function:TumblingEventTimeWindows

代码如下:
TumblingEventTimeWindows
可以对这种区间窗口进行聚合。

示例代码:

对每小时的数据进行统计,求count

package flink.stream.streamAPI;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.triggers.ContinuousEventTimeTrigger;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import org.apache.flink.util.Collector;

import javax.annotation.Nullable;
import java.util.Properties;

import static flink.GetTime.dateToTimestamp;
import static stranger.PropertyLoader.getPropertiesConfig;

/**
 * @author 
 * @description 对每小时的数据进行统计
 * @date 2019/6/6
 */
public class EventTimeStreamWindowAll {
    public static void main(String[] args) {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        final String configPath = "config.properties";
        final Properties pro = getPropertiesConfig(configPath);
        final String topic = "stranger";
        final String groupId = "mainStranger";
        String bootstrapServers = pro.getProperty("bootstrap.servers");
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", bootstrapServers);//kafka的节点的IP或者hostName,多个使用逗号分隔
        properties.setProperty("group.id", groupId);//flink consumer flink的消费者的group.id
        FlinkKafkaConsumer011<String> kafkaSource = new FlinkKafkaConsumer011<>(topic, new SimpleStringSchema(), properties);
        kafkaSource.setStartFromLatest();
        SingleOutputStreamOperator<String> mainStream = env.addSource(kafkaSource).assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<String>() {
            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(System.currentTimeMillis() - 5000);
            }

            @Override
            public long extractTimestamp(String s, long l) {
                String[] split = s.split("\\t");
                long timestamp = dateToTimestamp(split[0]);
                return timestamp;
            }
        });

        //2019-06-05 15:13:32,people6,进入,place3
        DataStream<Tuple5<String, String, String, String, Long>> mainCount = mainStream.map(new MapFunction<String, Tuple4<String, String, String, String>>() {
            @Override
            public Tuple4<String, String, String, String> map(String s) throws Exception {
                String[] split = s.split("\\t");
                return new Tuple4<>(split[0], split[1], split[2], split[3]);
            }
        }).keyBy(2)

                //以整点为窗口,每小时的窗口,offset 默认为0
                .windowAll(TumblingEventTimeWindows.of(Time.hours(1)))
                //以每天为窗口,进行统计
//                .windowAll(TumblingEventTimeWindows.of(Time.days(1),Time.hours(-8)))
                //每5秒触发一次
                .trigger(ContinuousEventTimeTrigger.of(Time.seconds(5)))
                //每多少条触发一次
//                .trigger(CountTrigger.of(1))
                .process(new ProcessAllWindowFunction<Tuple4<String, String, String, String>, Tuple5<String, String, String, String, Long>, TimeWindow>() {
                    @Override
                    public void process(Context context, Iterable<Tuple4<String, String, String, String>> iterables, Collector<Tuple5<String, String, String, String, Long>> collector) throws Exception {
                        Long sum = 0L;
                        String time = null;
                        String people = null;
                        String behavior = null;
                        String place = null;
                        for (Tuple4<String, String, String, String> iterable : iterables) {
                            sum += 1;
                            time = iterable.f0;
                            people = iterable.f1;
                            behavior = iterable.f2;
                            place = iterable.f3;
                        }
                        collector.collect(new Tuple5<>(time, people, behavior, place, sum));
                    }
                });

        mainCount.print();
        try {
            env.execute("test count");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值