Flink-Table Streaming 时间属性-Event time-实践编程（五）下

最新推荐文章于 2025-02-10 17:27:38 发布

springk

最新推荐文章于 2025-02-10 17:27:38 发布

阅读量761

点赞数

CC 4.0 BY-SA版权

分类专栏： flink 文章标签： flink

本文链接：https://blog.youkuaiyun.com/springk/article/details/104265666

flink 专栏收录该内容

23 篇文章

订阅专栏

针对Flink-Table Streaming 时间属性 Event time 事件属性，实践编程如下，演示编码如下：
需要依赖的公共实体类：

@Data
public class StudentInfo{
    private String name;
    private String sex;
    private String course;
    private Float score;
    private Long timestamp;
}

@Data
public class StudentScoreResult {
    public String name;
    public float sum_total_score;
}

1、在数据流到表转换，具体实现代码如下：

import javax.annotation.Nullable;

public class TableStreamFlinkStudentTimerTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
         
        DataStreamSource<String> text = env.socketTextStream("127.0.0.1", 9999, "\n");

        SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
            @Override
            public void flatMap(String s, Collector<StudentInfo> collector){
                String infos[] = s.split(",");
                if(StringUtils.isNotBlank(s) && infos.length==5){
                    StudentInfo studentInfo = new StudentInfo();
                    studentInfo.setName(infos[0]);
                    studentInfo.setSex(infos[1]);
                    studentInfo.setCourse(infos[2]);
                    studentInfo.setScore(Float.parseFloat(infos[3]));
                    studentInfo.setTimestamp(Long.parseLong(infos[4]));
                    collector.collect(studentInfo);
                }
            }
        });

        //只有定义了保留水印的水印策略，才能保留数据流的水印。否则，只有表源的rowtime属性的值是相关的。
        DataStream<StudentInfo> dataStream = dataStreamStudent.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<StudentInfo>() {
            private final long maxTimeLag = 5000; // 5 seconds
            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(System.currentTimeMillis() - maxTimeLag);
            }
            @Override
            public long extractTimestamp(StudentInfo studentInfo, long l) {
                return studentInfo.getTimestamp();
            }
        });

        Table tableEvent = streamTableEnvironment.fromDataStream(dataStream, "name,score,timestamp.rowtime");
        Table eventResultTable = tableEvent.window(Tumble.over("2.minutes")
                                            .on("timestamp")
                                            .as("userEventActionWindow"))
                                            .groupBy("userEventActionWindow,name")
                                            .select("name,sum(score) as sum_total_score");

        DataStream<Tuple2<Boolean, StudentScoreResult>> studentScoreResultDataStream = streamTableEnvironment.toRetractStream(eventResultTable,StudentScoreResult.class);
        studentScoreResultDataStream.print();
      
        env.execute("studentScoreAnalyse");
    }
}

输入数据：
在这里插入图片描述
输出数据汇总信息：

2、使用TableSource，具体代码实现如下：

import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.Types;
import org.apache.flink.table.sources.DefinedRowtimeAttributes;
import org.apache.flink.table.sources.RowtimeAttributeDescriptor;
import org.apache.flink.table.sources.StreamTableSource;
import org.apache.flink.table.sources.tsextractors.ExistingField;
import org.apache.flink.table.sources.wmstrategies.AscendingTimestamps;
import org.apache.flink.util.Collector;

import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List;

public class StudentActionSource implements  StreamTableSource<StudentInfo>, DefinedRowtimeAttributes {

    @Override
    public DataStream<StudentInfo> getDataStream(StreamExecutionEnvironment execEnv) {

        DataStreamSource<String> text = execEnv.socketTextStream("127.0.0.1", 9999, "\n");

        SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
            @Override
            public void flatMap(String s, Collector<StudentInfo> collector){
                String infos[] = s.split(",");
                if(StringUtils.isNotBlank(s) && infos.length==5){
                    StudentInfo studentInfo = new StudentInfo();
                    studentInfo.setName(infos[0]);
                    studentInfo.setSex(infos[1]);
                    studentInfo.setCourse(infos[2]);
                    studentInfo.setScore(Float.parseFloat(infos[3]));
                    studentInfo.setTimestamp(Long.parseLong(infos[4]));
                    collector.collect(studentInfo);
                }
            }
        });

        //只有定义了保留水印的水印策略，才能保留数据流的水印
        DataStream<StudentInfo> dataStream = dataStreamStudent.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<StudentInfo>() {
            private final long maxOutOfOrderness = 5000; // 最大允许的乱序时间是5s
            private Long currentMaxTimestamp = 0L;

            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
            }

            @Override
            public long extractTimestamp(StudentInfo studentInfo, long l) {
                currentMaxTimestamp = Math.max(studentInfo.getTimestamp(), currentMaxTimestamp);
                System.out.println("currentMaxTimestamp:"+currentMaxTimestamp);
                return studentInfo.getTimestamp();
            }
        });

        return dataStream;
    }

    //重要：timestamp作为proctimeAttribute返回类型，必须设置为SQL_TIMESTAMP类型
    @Override
    public TableSchema getTableSchema() {
        String[] names = new String[] {"name" , "sex" , "course" , "score" , "timestamp"};
        TypeInformation<StudentInfo>[] types = new TypeInformation[] {Types.STRING(), Types.STRING(), Types.STRING(), Types.FLOAT(), Types.SQL_TIMESTAMP()};
        TableSchema tableSchema = new TableSchema(names,types);
        return  tableSchema;
    }

    @Override
    public List<RowtimeAttributeDescriptor> getRowtimeAttributeDescriptors() {
        RowtimeAttributeDescriptor rowtimeAttrDescr = new RowtimeAttributeDescriptor(
                "timestamp",
                new ExistingField("timestamp"),
                new AscendingTimestamps());
        List<RowtimeAttributeDescriptor> listRowtimeAttrDescr = Collections.singletonList(rowtimeAttrDescr);
        return listRowtimeAttrDescr;
    }

    @Override
    public TypeInformation<StudentInfo> getReturnType() {
        return TypeExtractor.createTypeInfo(StudentInfo.class);
    }
}

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;

public class TableStreamFlinkStudentTableSourceTimerTest {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        // register table source
        streamTableEnvironment.registerTableSource("StudentActions", new StudentActionSource());
        Table windowedTable = streamTableEnvironment
                        .scan("StudentActions")
                        .window((Tumble.over("2.minutes"))
                        .on("timestamp")
                        .as("studentActionWindow"))
                        .groupBy("studentActionWindow,name")
                        .select("name,sum(score) as sum_total_score");

        DataStream<Tuple2<Boolean, StudentScoreResult>> studentScoreResultDataStream = streamTableEnvironment.toRetractStream(windowedTable,StudentScoreResult.class);
        studentScoreResultDataStream.print();

        env.execute("studentScoreAnalyse");
    }
}