针对Flink-Table Streaming时间Processing time属性对应的,实践编程如下,演示编码如下:
需要依赖的公共实体类:
import lombok.Data;
@Data
public class StudentInfo {
private String name;
private String sex;
private String course;
private Float score;
private Long timestamp;
}
1、在数据流到表转换期间
具体实现代码如下:
@Data
public class StudentScoreResult {
public String name;
public float sum_total_score;
public Long current5SecondTime;
public StudentScoreResult() {}
}
public class TableStreamFlinkStudentTimerTest1 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); // default 设置时间属性
//采用socket信息模拟输入数据流
DataStreamSource<String> text = env.socketTextStream("127.0.0.1", 9999, "\n");
//转换输入数据流到StudentInfo,便于后面计算
SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
@Override
public void flatMap(String s, Collector<StudentInfo> collector){
String infos[] = s.split(",");
if(StringUtils.isNotBlank(s) && infos.length==4){
StudentInfo studentInfo = new StudentInfo();
studentInfo.setName(infos[0]);
studentInfo.setSex(infos[1]);
studentInfo.setCourse(infos[2]);
studentInfo.setScore(Float.parseFloat(infos[3]));
studentInfo.setTimestamp(System.currentTimeMillis());
collector.collect(studentInfo);
}
}
});
//ProcessingTime 设置5秒为一个时间窗口
Table table = streamTableEnvironment.fromDataStream(dataStreamStudent, "name,score,process_time.proctime"); //使用.proctime做后缀,且必须放到最后面
Table windowedTable = table
.window(Tumble.over("5.seconds")
.on("process_time")
.as("userActionWindow"))
.groupBy("userActionWindow,name")
.select("name,sum(score) as sum_total_score,0L as current5SecondTime");
DataStream<Tuple2<Boolean, StudentScoreResult>> studentScoreResultDataStream = streamTableEnvironment.toRetractStream(windowedTable,StudentScoreResult.class);
//多增加一层数据转换,增加时间信息,便于结果对比,时间统计
SingleOutputStreamOperator<Object> result = studentScoreResultDataStream.flatMap(new FlatMapFunction<Tuple2<Boolean, StudentScoreResult>, Object>() {
@Override
public void flatMap(Tuple2<Boolean, StudentScoreResult> booleanStudentScoreResultTuple2, Collector<Object> collector) throws Exception {
booleanStudentScoreResultTuple2.f1.setCurrent5SecondTime(System.currentTimeMillis());
collector.collect(booleanStudentScoreResultTuple2);
}
});
result.print();
env.execute("studentScoreAnalyse");
}
}
输入数据信息:前四个为同一个时间段,输出结果汇总在一起,后面的两条记录分属在不同的时间窗口中
输出汇总数据信息:
2、使用TableSource具体实现
具体代码实现如下:
import lombok.Data;
@Data
public class StudentScoreResult {
public String name;
public float sum_total_score;
}
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.Types;
import org.apache.flink.table.sources.DefinedProctimeAttribute;
import org.apache.flink.table.sources.StreamTableSource;
import org.apache.flink.util.Collector;
import javax.annotation.Nullable;
public class StudentActionSource implements StreamTableSource<StudentInfo>, DefinedProctimeAttribute {
@Nullable
@Override
public String getProctimeAttribute() {
return "timestamp";
}
@Override
public DataStream<StudentInfo> getDataStream(StreamExecutionEnvironment execEnv) {
DataStreamSource<String> text = execEnv.socketTextStream("127.0.0.1", 9999, "\n");
SingleOutputStreamOperator<StudentInfo> dataStreamStudent = text.flatMap(new FlatMapFunction<String, StudentInfo>() {
@Override
public void flatMap(String s, Collector<StudentInfo> collector){
String infos[] = s.split(",");
if(StringUtils.isNotBlank(s) && infos.length==4){
StudentInfo studentInfo = new StudentInfo();
studentInfo.setName(infos[0]);
studentInfo.setSex(infos[1]);
studentInfo.setCourse(infos[2]);
studentInfo.setScore(Float.parseFloat(infos[3]));
collector.collect(studentInfo);
}
}
});
return dataStreamStudent;
}
//重要:timestamp作为proctimeAttribute返回类型,必须设置为SQL_TIMESTAMP类型
@Override
public TableSchema getTableSchema() {
String[] names = new String[] {"name" , "sex" , "course" , "score" , "timestamp"};
TypeInformation<StudentInfo>[] types = new TypeInformation[] {Types.STRING(), Types.STRING(), Types.STRING(), Types.FLOAT(), Types.SQL_TIMESTAMP()};
TableSchema tableSchema = new TableSchema(names,types);
return tableSchema;
}
@Override
public TypeInformation<StudentInfo> getReturnType() {
return TypeExtractor.createTypeInfo(StudentInfo.class);
}
}
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.java.StreamTableEnvironment;
public class TableStreamFlinkStudentTableSourceTimerTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment streamTableEnvironment = StreamTableEnvironment.create(env);
// env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); // default 设置时间属性
// register table source
streamTableEnvironment.registerTableSource("StudentActions", new StudentActionSource());
Table windowedTable = streamTableEnvironment
.scan("StudentActions")
.window((Tumble.over("5.seconds")).on("timestamp").as("studentActionWindow"))
.groupBy("studentActionWindow,name")
.select("name,sum(score) as sum_total_score");
DataStream<StudentScoreResult> studentScoreResultDataStream = streamTableEnvironment.toAppendStream(windowedTable,StudentScoreResult.class);
studentScoreResultDataStream.print();
env.execute("studentScoreAnalyse");
}
}
输入数据信息:分两次输入,上面为前五秒,后五秒之后再次输入两条数据
输出结果信息: