针对Flink-Table Streaming 时间属性 Event time 事件属性,实践编程如下,演示编码如下:
需要依赖的公共实体类:
@Data
public class StudentInfo{
private String name;
private String sex;
private String course;
private Float score;
private Long timestamp;
}
@Data
public class StudentScoreResult {
public String name;
public float sum_total_score;
}
1、在数据流到表转换,具体实现代码如下:
import javax.annotation.Nullable;
public class TableStreamFlinkStudentTimerTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStreamSource<String> text = env.socketTextStream("127.0.0.1", 9999, "\n");
SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
@Override
public void flatMap(String s, Collector<StudentInfo> collector){
String infos[] = s.split(",");
if(StringUtils.isNotBlank(s) && infos.length==5){
StudentInfo studentInfo = new StudentInfo();
studentInfo.setName(infos[0]);
studentInfo.setSex(infos[1]);
studentInfo.setCourse(infos[2]);
studentInfo.setScore(Float.parseFloat(infos[3]));
studentInfo.setTimestamp(Long.parseLong(infos[4]));
collector.collect(studentInfo);
}
}
});
//只有定义了保留水印的水印策略,才能保留数据流的水印。否则,只有表源的rowtime属性的值是相关的。
DataStream<StudentInfo> dataStream = dataStreamStudent.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<StudentInfo>() {
private final long maxTimeLag = 5000; // 5 seconds
@Nullable
@Override
public Watermark getCurrentWatermark() {
return new Watermark(System.currentTimeMillis() - maxTimeLag);
}
@Override
public long extractTimestamp(StudentInfo studentInfo, long l) {
return studentInfo.getTimestamp();
}
});
Table tableEvent = streamTableEnvironment.fromDataStream(dataStream, "name,score,timestamp.rowtime");
Table eventResultTable = tableEvent.window(Tumble.over("2.minutes")
.on("timestamp")
.as("userEventActionWindow"))
.groupBy("userEventActionWindow,name")
.select("name,sum(score) as sum_total_score");
DataStream<Tuple2<Boolean, StudentScoreResult>> studentScoreResultDataStream = streamTableEnvironment.toRetractStream(eventResultTable,StudentScoreResult.class);
studentScoreResultDataStream.print();
env.execute("studentScoreAnalyse");
}
}
输入数据:
输出数据汇总信息:
2、使用TableSource,具体代码实现如下:
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.Types;
import org.apache.flink.table.sources.DefinedRowtimeAttributes;
import org.apache.flink.table.sources.RowtimeAttributeDescriptor;
import org.apache.flink.table.sources.StreamTableSource;
import org.apache.flink.table.sources.tsextractors.ExistingField;
import org.apache.flink.table.sources.wmstrategies.AscendingTimestamps;
import org.apache.flink.util.Collector;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.List;
public class StudentActionSource implements StreamTableSource<StudentInfo>, DefinedRowtimeAttributes {
@Override
public DataStream<StudentInfo> getDataStream(StreamExecutionEnvironment execEnv) {
DataStreamSource<String> text = execEnv.socketTextStream("127.0.0.1", 9999, "\n");
SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
@Override
public void flatMap(String s, Collector<StudentInfo> collector){
String infos[] = s.split(",");
if(StringUtils.isNotBlank(s) && infos.length==5){
StudentInfo studentInfo = new StudentInfo();
studentInfo.setName(infos[0]);
studentInfo.setSex(infos[1]);
studentInfo.setCourse(infos[2]);
studentInfo.setScore(Float.parseFloat(infos[3]));
studentInfo.setTimestamp(Long.parseLong(infos[4]));
collector.collect(studentInfo);
}
}
});
//只有定义了保留水印的水印策略,才能保留数据流的水印
DataStream<StudentInfo> dataStream = dataStreamStudent.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<StudentInfo>() {
private final long maxOutOfOrderness = 5000; // 最大允许的乱序时间是5s
private Long currentMaxTimestamp = 0L;
@Nullable
@Override
public Watermark getCurrentWatermark() {
return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
}
@Override
public long extractTimestamp(StudentInfo studentInfo, long l) {
currentMaxTimestamp = Math.max(studentInfo.getTimestamp(), currentMaxTimestamp);
System.out.println("currentMaxTimestamp:"+currentMaxTimestamp);
return studentInfo.getTimestamp();
}
});
return dataStream;
}
//重要:timestamp作为proctimeAttribute返回类型,必须设置为SQL_TIMESTAMP类型
@Override
public TableSchema getTableSchema() {
String[] names = new String[] {"name" , "sex" , "course" , "score" , "timestamp"};
TypeInformation<StudentInfo>[] types = new TypeInformation[] {Types.STRING(), Types.STRING(), Types.STRING(), Types.FLOAT(), Types.SQL_TIMESTAMP()};
TableSchema tableSchema = new TableSchema(names,types);
return tableSchema;
}
@Override
public List<RowtimeAttributeDescriptor> getRowtimeAttributeDescriptors() {
RowtimeAttributeDescriptor rowtimeAttrDescr = new RowtimeAttributeDescriptor(
"timestamp",
new ExistingField("timestamp"),
new AscendingTimestamps());
List<RowtimeAttributeDescriptor> listRowtimeAttrDescr = Collections.singletonList(rowtimeAttrDescr);
return listRowtimeAttrDescr;
}
@Override
public TypeInformation<StudentInfo> getReturnType() {
return TypeExtractor.createTypeInfo(StudentInfo.class);
}
}
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;
public class TableStreamFlinkStudentTableSourceTimerTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// register table source
streamTableEnvironment.registerTableSource("StudentActions", new StudentActionSource());
Table windowedTable = streamTableEnvironment
.scan("StudentActions")
.window((Tumble.over("2.minutes"))
.on("timestamp")
.as("studentActionWindow"))
.groupBy("studentActionWindow,name")
.select("name,sum(score) as sum_total_score");
DataStream<Tuple2<Boolean, StudentScoreResult>> studentScoreResultDataStream = streamTableEnvironment.toRetractStream(windowedTable,StudentScoreResult.class);
studentScoreResultDataStream.print();
env.execute("studentScoreAnalyse");
}
}
上面实例在运行过程中,print一直没有结果信息,数据信息没有统计出来,参考文档书写正确,还要看到的小伙伴指点,具体错误原因在哪里