Flink-Table Streaming 时间属性-Event time-实践编程(五)下

针对Flink-Table Streaming 时间属性 Event time 事件属性,实践编程如下,演示编码如下:
需要依赖的公共实体类:

@Data
public class StudentInfo{
    private String name;
    private String sex;
    private String course;
    private Float score;
    private Long timestamp;
}
@Data
public class StudentScoreResult {
    public String name;
    public float sum_total_score;
}

1、在数据流到表转换,具体实现代码如下:

import javax.annotation.Nullable;

public class TableStreamFlinkStudentTimerTest {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
         
        DataStreamSource<String> text = env.socketTextStream("127.0.0.1", 9999, "\n");

        SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
            @Override
            public void flatMap(String s, Collector<StudentInfo> collector){
                String infos[] = s.split(",");
                if(StringUtils.isNotBlank(s) && infos.length==5){
                    StudentInfo studentInfo = new StudentInfo();
                    studentInfo.setName(infos[0]);
                    studentInfo.setSex(infos[1]);
                    studentInfo.setCourse(infos[2]);
                    studentInfo.setScore(Float.parseFloat(infos[3]));
                    studentInfo.setTimestamp(Long.parseLong(infos[4]));
                    collector.collect(studentInfo);
                }
            }
        });

        //只有定义了保留水印的水印策略,才能保留数据流的水印。否则,只有表源的rowtime属性的值是相关的。
        DataStream<StudentInfo> dataStream = dataStreamStudent.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<StudentInfo>() {
            private final long maxTimeLag = 5000; // 5 seconds
            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(System.currentTimeMillis() - maxTimeLag);
            }
            @Override
            public long extractTimestamp(StudentInfo studentInfo, long l) {
                return studentInfo.getTimestamp();
            }
        });

        Table tableEvent = streamTableEnvironment.fromDataStream(dataStream, "name,score,timestamp.rowtime");
        Table eventResultTable = tableEvent.window(Tumble.over("2.minutes")
                                            .on("timestamp")
                                            .as("userEventActionWindow"))
                                            .groupBy("userEventActionWindow,name")
                                            .select("name,sum(score) as sum_total_score");

        DataStream<Tuple2<Boolean, StudentScoreResult>> studentScoreResultDataStream = streamTableEnvironment.toRetractStream(eventResultTable,StudentScoreResult.class);
        studentScoreResultDataStream.print();
      
        env.execute("studentScoreAnalyse");
    }
}

输入数据:
在这里插入图片描述
输出数据汇总信息:
在这里插入图片描述
2、使用TableSource,具体代码实现如下:

import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.Types;
import org.apache.flink.table.sources.DefinedRowtimeAttributes;
import org.apache.flink.table.sources.RowtimeAttributeDescriptor;
import org.apache.flink.table.sources.StreamTableSource;
import org.apache.flink.table.sources.tsextractors.ExistingField;
import org.apache.flink.table.sources.wmstrategies.AscendingTimestamps;
import org.apache.flink.util.Collector;

import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List;

public class StudentActionSource implements  StreamTableSource<StudentInfo>, DefinedRowtimeAttributes {

    @Override
    public DataStream<StudentInfo> getDataStream(StreamExecutionEnvironment execEnv) {

        DataStreamSource<String> text = execEnv.socketTextStream("127.0.0.1", 9999, "\n");

        SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
            @Override
            public void flatMap(String s, Collector<StudentInfo> collector){
                String infos[] = s.split(",");
                if(StringUtils.isNotBlank(s) && infos.length==5){
                    StudentInfo studentInfo = new StudentInfo();
                    studentInfo.setName(infos[0]);
                    studentInfo.setSex(infos[1]);
                    studentInfo.setCourse(infos[2]);
                    studentInfo.setScore(Float.parseFloat(infos[3]));
                    studentInfo.setTimestamp(Long.parseLong(infos[4]));
                    collector.collect(studentInfo);
                }
            }
        });

        //只有定义了保留水印的水印策略,才能保留数据流的水印
        DataStream<StudentInfo> dataStream = dataStreamStudent.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<StudentInfo>() {
            private final long maxOutOfOrderness = 5000; // 最大允许的乱序时间是5s
            private Long currentMaxTimestamp = 0L;

            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
            }

            @Override
            public long extractTimestamp(StudentInfo studentInfo, long l) {
                currentMaxTimestamp = Math.max(studentInfo.getTimestamp(), currentMaxTimestamp);
                System.out.println("currentMaxTimestamp:"+currentMaxTimestamp);
                return studentInfo.getTimestamp();
            }
        });

        return dataStream;
    }

    //重要:timestamp作为proctimeAttribute返回类型,必须设置为SQL_TIMESTAMP类型
    @Override
    public TableSchema getTableSchema() {
        String[] names = new String[] {"name" , "sex" , "course" , "score" , "timestamp"};
        TypeInformation<StudentInfo>[] types = new TypeInformation[] {Types.STRING(), Types.STRING(), Types.STRING(), Types.FLOAT(), Types.SQL_TIMESTAMP()};
        TableSchema tableSchema = new TableSchema(names,types);
        return  tableSchema;
    }

    @Override
    public List<RowtimeAttributeDescriptor> getRowtimeAttributeDescriptors() {
        RowtimeAttributeDescriptor rowtimeAttrDescr = new RowtimeAttributeDescriptor(
                "timestamp",
                new ExistingField("timestamp"),
                new AscendingTimestamps());
        List<RowtimeAttributeDescriptor> listRowtimeAttrDescr = Collections.singletonList(rowtimeAttrDescr);
        return listRowtimeAttrDescr;
    }

    @Override
    public TypeInformation<StudentInfo> getReturnType() {
        return TypeExtractor.createTypeInfo(StudentInfo.class);
    }
}
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;

public class TableStreamFlinkStudentTableSourceTimerTest {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        // register table source
        streamTableEnvironment.registerTableSource("StudentActions", new StudentActionSource());
        Table windowedTable = streamTableEnvironment
                        .scan("StudentActions")
                        .window((Tumble.over("2.minutes"))
                        .on("timestamp")
                        .as("studentActionWindow"))
                        .groupBy("studentActionWindow,name")
                        .select("name,sum(score) as sum_total_score");

        DataStream<Tuple2<Boolean, StudentScoreResult>> studentScoreResultDataStream = streamTableEnvironment.toRetractStream(windowedTable,StudentScoreResult.class);
        studentScoreResultDataStream.print();

        env.execute("studentScoreAnalyse");
    }
}

上面实例在运行过程中,print一直没有结果信息,数据信息没有统计出来,参考文档书写正确,还要看到的小伙伴指点,具体错误原因在哪里

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

springk

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值