public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
StreamTableEnvironment tableEnvironment = StreamTableEnvironment.create(env);
// tableEnvironment.
env.disableOperatorChaining();
String s1="CREATE TABLE datagen (\n" +
" f_sequence INT,\n" +
" f_random_str STRING\n" +
") WITH ( " +
" 'connector' = 'datagen',\n" +
" 'rows-per-second'='50000',\n" +
" 'fields.f_sequence.min'='1',\n" +
" 'fields.f_sequence.max'='100',\n" +
" 'fields.f_random_str.length'='4'" +
")";
TableResult tableResult = tableEnvironment.executeSql(s1);
String s2="CREATE TABLE print_table (\n" +
" f_sequence INT,\n" +
" f_random_str STRING\n" +
") WITH (\n" +
" 'connector' = 'print' " +
")";
/* String s3=" select aa \n" +
", count(*) as pv\n" +
", count(distinct f_random_str ) as uv \n " +
"\nfrom \n" +
"( \n" +
" select case when f_sequence < 50 then 1 else f_sequence end as aa \n" +
" , f_random_str \n" +
" from datagen ) tmps\n" +
" group by aa " ;*/
String s4=" select aa, count(*) as pv,count(distinct f_random_str ) as uv from (select case when f_sequence < 50 then 1 else f_sequence end as aa , f_random_str from datagen ) as tmps group by aa ";
// tableEnvironment.executeSql(s4);
Table table = tableEnvironment.sqlQuery(s4);
//GroupedTable aa = table.groupBy("aa");
// aa.aggregate()
// 这是 toRetractStream
DataStream<Tuple2<Boolean, Row>> tuple2DataStream = tableEnvironment.toRetractStream(table, Row.class);
tuple2DataStream.addSink(new RichSinkFunction<Tuple2<Boolean, Row>>() {
@Override
public void invoke(Tuple2<Boolean, Row> value, Context context) throws Exception {
Thread.sleep(10);
System.out.println(value);
}
}).setParallelism(3).name("zcSink");
/*
这个是构造 某个task 的cpu 负载过多,造成某个task 被压 ,只是toAppendStream
DataStream<Row> rowDataStream = tableEnvironment.toAppendStream(table, Row.class);
SingleOutputStreamOperator<String> map = rowDataStream.map(new RichMapFunction<Row, String>() {
public boolean process;
@Override
public void open(Configuration parameters) throws Exception {
int indexOfThisSubtask = getRuntimeContext().getIndexOfThisSubtask();
if(indexOfThisSubtask==0){
process=true;
}else {
process=false;
}
}
@Override
public String map(Row row) throws Exception {
if(process){
for (int i = 0; i < 100000; i++) {
//MessageDigest.getInstance("nd").digest(row.toString().getBytes());
String s="1";
s=s+1;
}}
return "222";
}
}).setParallelism(3);
map.print().setParallelism(3);
*/
System.out.println(FlinkTest.class.getSimpleName());
System.out.println(FlinkTest.class.getName());
env.execute(FlinkTest.class.getSimpleName());
// tableEnvironment.execute(FlinkTest.class.getSimpleName());
}
flinksql 构造倾斜、某个task 被压的数据案例
最新推荐文章于 2025-11-24 14:38:27 发布
该博客演示了如何使用Apache Flink的SQL接口创建Datagen流表并执行查询,以及将结果转换为DataStream进行处理。内容包括创建Datagen源表,定义打印目标表,执行SQL查询,对数据进行分组聚合,并通过自定义的Sink函数处理结果流。此外,还展示了如何避免任务CPU过载的问题。
2068

被折叠的 条评论
为什么被折叠?



