Flink UDF

BatchTableEnvironment:自定义标量函数

Scalar functions:映射scalar值为新scalar值
Table functions:映射scalar值为新rows
Aggregate functions:映射多个rows的scalar值为新scalar值
Table aggregate functions:映射多个rows的scalar值为新rows
Async table functions:table source执行查询操作

  maven 依赖:

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>1.9.1</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>1.9.1</version>
        </dependency>
        <!--使用Java编程语言支持DataStream / DataSet API的Table&SQL API-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_2.11</artifactId>
            <version>1.9.1</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <!--表程序规划器和运行时-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_2.11</artifactId>
            <version>1.9.1</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-jdbc_2.11</artifactId>
            <version>1.9.1</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.16.18</version>
        </dependency>

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.49</version>
        </dependency>
// 自定义函数1
import org.apache.flink.table.functions.ScalarFunction;

import java.util.Date;
import java.text.SimpleDateFormat;
import java.util.Calendar;

// 定义一个简单的标量函数,用于将Date转换为String
public class StrToTimestamp extends ScalarFunction {
    private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    public Long eval(String dateStr) {
        long timestamp = 0L;
        try {
            Date date = sdf.parse(dateStr);
            Calendar calendar = Calendar.getInstance();
            calendar.setTime(date);
            timestamp = calendar.getTimeInMillis() / 1000;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return timestamp;
    }
}

// 自定义函数2
import org.apache.flink.table.functions.ScalarFunction;

import java.sql.Date;
import java.text.SimpleDateFormat;

public class DateToStr extends ScalarFunction {
    private SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

    public String eval(Date date) {
        return sdf.format(date);
    }
}

// 自定义函数3
import org.apache.flink.table.functions.ScalarFunction;

public class FirstOrMaxQuantity extends ScalarFunction {
    public Float eval(Float firstTotalChargeQuantity, Float maxTotalChargeQuantity) {
        Float result = 0.0F;
        try {
            if (maxTotalChargeQuantity == null ) {
                result = firstTotalChargeQuantity;
            } else {
                result = maxTotalChargeQuantity;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return result;
    }
}

// 自定义函数4
import org.apache.flink.table.functions.ScalarFunction;

import java.sql.Timestamp;
import java.text.DecimalFormat;

public class TimeCeil extends ScalarFunction {
    public String eval(Timestamp column) {
        String field = column.toString().split(" ")[1];
        String[] timeSplit = field.split(":");
        // 数字字符串前补零
        DecimalFormat g1 = new DecimalFormat("00");
        String hour = timeSplit[0];
        String standard;
        // 时间向上取整:取半小时整点
        if (Integer.parseInt(timeSplit[1]) >= 30) {
            hour = g1.format(Integer.parseInt(hour) + 1);
            standard = "00";
        } else {
            standard = "30";
        }
        if ((hour + ":" + standard + ":00").equals("24:00:00.0")) {
            return "23:59:59";
        } else {
            return hour + ":" + standard + ":00";
        }
    }
}

// 自定义函数5
import org.apache.flink.table.functions.ScalarFunction;

public class TsMinusHalf extends ScalarFunction {
    public Long eval(String dateStr) {
        long timestamp = 0L;
        try {
            if (dateStr.equals("23:30:00")) {
                timestamp = 1799;
            } else {
                timestamp = 1800;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return timestamp;
    }
}

  注:BatchTableEnvironment 该方法在 Flink 1.9.1 中还可以用,1.13.6 已经不再推荐使用了,1.17.2 已经彻底没有了。

在这里插入图片描述

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.io.jdbc.JDBCInputFormat;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.BatchTableEnvironment;
import org.apache.flink.types.Row;

import java.sql.Date;
import java.sql.Timestamp;

public class FlinkTest {

    public static void main(String[] args) throws Exception {
        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        BatchTableEnvironment bTableEnv = BatchTableEnvironment.create(env);

        bTableEnv.registerFunction("TsMinusHalf", new TsMinusHalf());
        bTableEnv.registerFunction("DateToStr", new DateToStr());
        bTableEnv.registerFunction("TimeCeil", new TimeCeil());
        bTableEnv.registerFunction("StrToTimestamp", new StrToTimestamp());
        bTableEnv.registerFunction("FirstOrMaxQuantity", new FirstOrMaxQuantity());

        // 方式一:自己造数据
//        DataSet<Row> sourceData = env.fromElements(
//                Row.of("cabinet", "0723HBGCES5050-ssgj", "0723HBGCES5050", "des-1691575468267", new java.sql.Date(new java.util.Date().getTime()), new Timestamp(System.currentTimeMillis()), "des-1691575468267-subSys-1", "9", 189501.0f, 179381.0f),
//                Row.of("cabinet", "0723HBGCES5050-ssgj", "0723HBGCES5050", "des-1691575468267", new java.sql.Date(new java.util.Date().getTime()), new Timestamp(System.currentTimeMillis()), "des-1691575468267-subSys-1", "9", 189505.0f, 179387.0f),
//                Row.of("cabinet", "0723HBGCES5050-ssgj", "0723HBGCES5050", "des-1691575468267", new java.sql.Date(new java.util.Date().getTime()), new Timestamp(System.currentTimeMillis()), "des-1691575468267-subSys-1", "9", 189505.0f, 179387.0f)
//        );

        // 方式二:从 Starrocks 中获取数据
        TypeInformation[] fieldTypes = {
                Types.STRING,
                Types.STRING,
                Types.STRING,
                Types.STRING,
                Types.SQL_DATE,
                Types.SQL_TIMESTAMP,
                Types.STRING,
                Types.INT,
                Types.FLOAT,
                Types.FLOAT
        };

        RowTypeInfo rowTypeInfo = new RowTypeInfo(fieldTypes);

        JDBCInputFormat jdbcInputFormat = JDBCInputFormat.buildJDBCInputFormat().setDrivername("com.mysql.jdbc.Driver")
                .setDBUrl("jdbc:mysql://xxx.xxx.xxx.xxx:9030/dws?useUnicode=true&characterEncoding=utf-8&useSSL=false&serverTimezone=Asia/Shanghai")
                .setUsername("root")
                .setPassword("")
                .setQuery("select student_type, student_id, teacher_id, school_id, data_date, data_fact_time, class_id, status, english_score_total, math_score_total from dwd.dwd_student where student_id = '4145646564erww-heheda' and data_date = '2024-12-16'").setRowTypeInfo(rowTypeInfo).finish();

        DataSet<Row> sourceData = env.createInput(jdbcInputFormat);

        Table dwdstudent = bTableEnv.fromDataSet(sourceData);

        bTableEnv.registerTable("dwd_student", dwdstudent);

        Table result = bTableEnv.sqlQuery("select\n" +
                "    studentType,\n" +
                "    studentId,\n" +
                "    teacherId,\n" +
                "    schoolId,\n" +
                "    classId,\n" +
                "    DateStr,\n" +
                "    HalfHour,\n" +
                "    StrToTimestamp(concat(DateToStr(DateStr), ' ', HalfHour)) as ts,\n" +
                "    min(englishScoreTotal) as first_englishScoreTotal,\n" +
                "    max(englishScoreTotal) as last_englishScoreTotal,\n" +
                "    min(mathScoreTotal) as first_mathScoreTotal,\n" +
                "    max(mathScoreTotal) as last_mathScoreTotal\n" +
                "from\n" +
                "    (\n" +
                "        select\n" +
                "            f0 as studentType,\n" +
                "            f1 as studentId,\n" +
                "            f2 as teacherId,\n" +
                "            f3 as schoolId,\n" +
                "            f4 as DateStr,\n" +
                "            f5 as TimeStr,\n" +
                "            TimeCeil(f5) as HalfHour,\n" +
                "            f6 as classId,\n" +
                "            f7 as status,\n" +
                "            f8 as englishScoreTotal,\n" +
                "            f9 as mathScoreTotal\n" +
                "        from\n" +
                "            dwd_student\n" +
                "    )\n" +
                "where\n" +
                "    englishScoreTotal <> 0 and mathScoreTotal <> 0\n" +
                "group by\n" +
                "    studentType, studentId, teacherId, schoolId, classId, DateStr, HalfHour");

//        result.printSchema();

        Table result2 = result
                .select("classId as classIdTmp, ts as tsTmp, ts + TsMinusHalf(HalfHour) as tsMinusHalf, last_englishScoreTotal as max_englishScoreTotal, last_mathScoreTotal as max_mathScoreTotal");

        Table result3 = result
                .leftOuterJoin(result2, "classIdTmp = classId && tsMinusHalf = ts")
                .select("studentType, studentId, teacherId, schoolId, classId, DateStr, HalfHour, ts, FirstOrMaxQuantity(first_englishScoreTotal, max_englishScoreTotal) as first_englishScoreTotal, last_englishScoreTotal, FirstOrMaxQuantity(first_mathScoreTotal, max_mathScoreTotal) as first_mathScoreTotal, last_mathScoreTotal");

        Table result4 = result3.select("studentType, studentId, teacherId, schoolId, classId, DateStr, ts, HalfHour, last_englishScoreTotal - first_englishScoreTotal as _diff_englishScoreTotal, last_mathScoreTotal - first_mathScoreTotal as _diff_mathScoreTotal");

        DataSet<Row> rowDataset = bTableEnv.toDataSet(result4, Row.class);

        rowDataset.print();
    }
}

  执行结果:

4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734346800,19:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734352200,20:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734318000,11:00:00,0.0,51.40625
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734334200,15:30:00,0.0,50.890625
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734355800,21:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734361200,23:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734319800,11:30:00,0.0,50.796875
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734345000,18:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734283800,01:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734289200,03:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734298200,05:30:00,28.3125,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734364800,24:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734300000,06:00:00,28.09375,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734316200,10:30:00,0.0,49.09375
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734336000,16:00:00,0.0,28.109375
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734343200,18:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734303600,07:00:00,28.296875,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734354000,21:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734357600,22:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734339600,17:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734350400,20:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734292800,04:00:00,27.203125,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734310800,09:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734314400,10:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734363000,23:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734296400,05:00:00,28.390625,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734327000,13:30:00,53.09375,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734330600,14:30:00,0.0,49.296875
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734309000,08:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734312600,09:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734332400,15:00:00,0.0,51.40625
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734359400,22:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734285600,02:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734291000,03:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734301800,06:30:00,28.296875,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734305400,07:30:00,4.203125,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734321600,12:00:00,0.0,34.90625
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734337800,16:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734280200,00:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734282000,01:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734307200,08:00:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734323400,12:30:00,51.296875,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734348600,19:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734328800,14:00:00,35.40625,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734341400,17:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734325200,13:00:00,50.8125,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734287400,02:30:00,0.0,0.0
4145646564erww-heheda,fasdfasdf,new,student-abcdefg,student-abcdefg-class-1,2024-12-16,1734294600,04:30:00,28.0,0.0

参考:
flink 定义一个全局的状态 flink 自定义udf
Flink1.9 UDF使用教程
在Apache Flink中,Java UDF(用户自定义函数)的使用涉及几个关键步骤
Flink UDF使用指南,你竟然不会用?

  DataSet API 的未来:需要注意的是,Flink 的官方路线图中已经不再优先开发 DataSet API 的新特性,未来的主要开发将集中在 DataStream API,甚至批处理功能都将通过 DataStream API 来实现。因此,如果可能,建议新项目尽量使用 DataStream API 来替代 DataSet API。特别是 Flink 的 Table APISQL API 也适用于批处理和流处理,这些高层 API 提供了更简洁的语法和更强的优化能力。来自:大数据-118 - Flink DataSet 基本介绍 核心特性 创建、转换、输出等 原创

流式处理:自定义聚合函数

  maven 依赖:

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>1.13.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>1.13.6</version>
        </dependency>
        <!--表程序规划器和运行时-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.12</artifactId>
            <version>1.13.6</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId> <!-- 注意Scala版本应与您的项目匹配 -->
            <version>1.13.6</version>
        </dependency>
    </dependencies>

  自定义聚合函数:AggregationFunction要求必须实现的方法

  • createAccumulator()
  • accumulate()
  • getValue()
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.table.functions.AggregateFunction;

//实现自定义的AggregateFunction<结果值,中间状态>
public class AvgTemp extends AggregateFunction<Double, Tuple2<Double,Integer>> {
    //输出结果
    @Override
    public Double getValue(Tuple2<Double, Integer> acc) {
        return acc.f0/acc.f1;
    }
    //初始化累加器
    @Override
    public Tuple2<Double, Integer> createAccumulator() {
        return new Tuple2<Double,Integer>(0.0,0);
    }
    //必须实现一个accumulate(中间状态,传入的一条数据)方法,来数据之后更新状态
    public void accumulate(Tuple2<Double,Integer> acc, Double temp){
        acc.f0 += temp;
        acc.f1 += 1;
    }
}

  main 方法测试:

import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

public class FlinkTest {

    public static void main(String[] args) throws Exception {
        // 设置流式执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 创建流式表环境
        EnvironmentSettings environmentSettings = EnvironmentSettings
                .newInstance()
                .build();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, environmentSettings);

        // 创建数据流并转换为表
        DataStreamSource<Tuple3<String,Long,Double>> dataStream = env.fromElements(
                new Tuple3<>("hehe", 1L, 0.23),
                new Tuple3<>("haha", 2L, 3.5)
        );

        //流转换成表 Tuple3<String,Long,Double>
        Table sourceTable = tableEnv.fromDataStream(dataStream, "f0 as id, f1 as ts, f2 as temp,pt.proctime");
        //在环境中注册UDAF
        AvgTemp avgTemp = new AvgTemp();
        tableEnv.registerFunction("avgTemp",avgTemp);
        //tableAPI
        Table resultTable = sourceTable.groupBy("id")
                .aggregate("avgTemp(temp) as avgtemp")
                .select("id,avgtemp");
        tableEnv.toRetractStream(resultTable, Row.class).print();
        // 执行Flink作业
        env.execute("Flink sql SplitFunction Test");

        System.out.println("---------------------");

        //SQL
        tableEnv.createTemporaryView("sensor",sourceTable);
        Table resultSqlTbale = tableEnv.sqlQuery("select id,avgTemp(temp) as avgtemp from sensor group by id");
        tableEnv.toRetractStream(resultSqlTbale,Row.class).print();
        // 执行Flink作业
        env.execute("Flink sql SplitFunction Test");
    }
}

  执行结果:

(true,+I[hehe, 0.23])
(true,+I[haha, 3.5])
---------------------
(true,+I[hehe, 0.23])
(true,+I[haha, 3.5])

参考:FlinkSQL-自定义聚合函数AggregateFunction

流式处理:自定义标量函数

  maven 依赖:

<!-- 1.13.6 版本 -->
    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>1.13.6</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>1.13.6</version>
        </dependency>
        <!--表程序规划器和运行时-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.12</artifactId>
            <version>1.13.6</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId> <!-- 注意Scala版本应与您的项目匹配 -->
            <version>1.13.6</version>
        </dependency>
    </dependencies>

<!-- 1.17.2 版本 -->
    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>1.17.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>1.17.2</version>
        </dependency>
        <!--表程序规划器和运行时-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-loader</artifactId>
            <version>1.17.2</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge</artifactId>
            <version>1.17.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId> <!-- 注意Scala版本应与您的项目匹配 -->
            <version>1.17.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-runtime</artifactId>
            <version>1.17.2</version>
        </dependency>
    </dependencies>

  自定义 udf 函数:字符串拆分

import org.apache.flink.table.annotation.DataTypeHint;
import org.apache.flink.table.annotation.FunctionHint;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;

@FunctionHint(output = @DataTypeHint("ROW<`str_value` STRING>"))
public class SplitFunction extends TableFunction<Row> {
    // 实现eval方法,用于拆分输入字符串并输出每个子串
    public void eval(String str, String regex) {
        if (str != null) {
            // 使用指定正则表达式对输入字符串进行拆分
            for (String s : str.split(regex)) {
                // 使用collect(...)方法发射一行数据
                collect(Row.of(s));
            }
        }
    }
}

  main 方法测试:

import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

public class FlinkTest {

    public static void main(String[] args) throws Exception {
        // 设置流式执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);

        // 创建流式表环境
        EnvironmentSettings environmentSettings = EnvironmentSettings
                .newInstance()
                .build();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, environmentSettings);

        // 创建数据流并转换为表
        DataStreamSource<String> dataStream = env.fromElements("hello,world");
        Table table = tableEnv.fromDataStream(dataStream);
        table.printSchema();// 打印表结构

        // 创建临时视图
        tableEnv.createTemporaryView("MyTable", table);

        // 注册自定义函数SplitFunction
        tableEnv.createTemporarySystemFunction("SplitFunction", SplitFunction.class);

        // 执行SQL查询,调用SplitFunction拆分字符串
        Table result = tableEnv.sqlQuery(
                "SELECT f0, str_value " +
                        "FROM MyTable " +
                        "LEFT JOIN LATERAL TABLE(SplitFunction(f0, ',')) ON TRUE");

        // 将结果转换为数据流并打印
        tableEnv.toDataStream(result, Row.class).print();

        // 执行Flink作业
        env.execute("Flink sql SplitFunction Test");
    }

}

  执行结果:

(
  `f0` STRING
)
+I[hello,world, hello]
+I[hello,world, world]

Process finished with exit code 0

参考:Flink SQL 自定义函数 - 字符串拆分

Flink Table API 支持的操作示例

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>1.17.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>1.17.2</version>
        </dependency>
        <!--表程序规划器和运行时-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-loader</artifactId>
            <version>1.17.2</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge</artifactId>
            <version>1.17.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId> <!-- 注意Scala版本应与您的项目匹配 -->
            <version>1.17.2</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-runtime</artifactId>
            <version>1.17.2</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.26</version>
        </dependency>
    </dependencies>

  GroupBy Window Aggregation:

import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.Tumble;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;

import java.time.Duration;
import java.util.Arrays;
import java.util.List;

import static org.apache.flink.table.api.Expressions.$;
import static org.apache.flink.table.api.Expressions.lit;

/**
 * @author alanchan
 *
 */
public class TestTableAPIOperationDemo2 {
    final static List<User> userList = Arrays.asList(
            new User(1L, "alan", 18, 1698742358391L),
            new User(2L, "alan", 19, 1698742359396L),
            new User(3L, "alan", 25, 1698742360407L),
            new User(4L, "alanchan", 28, 1698742361409L),
            new User(5L, "alanchan", 29, 1698742362424L)
    );

    /**
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        StreamTableEnvironment tenv = StreamTableEnvironment.create(env);

        DataStream<User> users = env.fromCollection(userList)
                .assignTimestampsAndWatermarks(
                        WatermarkStrategy
                                .<User>forBoundedOutOfOrderness(Duration.ofSeconds(1))
                                .withTimestampAssigner((user, recordTimestamp) -> user.getRowtime())
                )
                ;

        Table usersTable = tenv.fromDataStream(users, $("id"), $("name"), $("balance"),$("rowtime").rowtime());

        //使用分组窗口结合单个或者多个分组键对表进行分组和聚合。
        Table result = usersTable
                .window(Tumble.over(lit(5).minutes()).on($("rowtime")).as("w")) // 定义窗口
                .groupBy($("name"), $("w")) // 按窗口和键分组
                // 访问窗口属性并聚合
                .select(
                        $("name"),
                        $("w").start(),
                        $("w").end(),
                        $("w").rowtime(),
                        $("balance").sum().as("sum(balance)")
                );

        DataStream<Tuple2<Boolean, Row>> resultDS = tenv.toRetractStream(result, Row.class);
        resultDS.print();
//		2> (true,+I[alan, 2023-10-31T08:50, 2023-10-31T08:55, 2023-10-31T08:54:59.999, 62])
//		16> (true,+I[alanchan, 2023-10-31T08:50, 2023-10-31T08:55, 2023-10-31T08:54:59.999, 57])
        env.execute();
    }

    @Data
    @NoArgsConstructor
    @AllArgsConstructor
    public static class User {
        private long id;
        private String name;
        private int balance;
        private Long rowtime;
    }

}

  执行结果:

2> (true,+I[alan, 2023-10-31T08:50, 2023-10-31T08:55, 2023-10-31T08:54:59.999, 62])
16> (true,+I[alanchan, 2023-10-31T08:50, 2023-10-31T08:55, 2023-10-31T08:54:59.999, 57])

来自:
【flink番外篇】9、Flink Table API 支持的操作示例(1)-完整版
【flink番外篇】9、Flink Table API 支持的操作示例(6)- 表的聚合(group by、Distinct、GroupBy/Over Window Aggregation)操作
Flink教程(16)- Flink Table与SQL

Flink 启动程序命令

  通过命令行提交任务:

./bin/flink run -c com.my.program.Main /path/to/user/jar/my-program.jar
# 在这个命令中,-c 参数后面跟的是主类的全路径,而接着的 .jar 文件则是用户编写的程序的 JAR 包。

  提交带有参数的任务:

./bin/flink run -c com.my.program.Main -p 5 /path/to/user/jar/my-program.jar --arg1 value1 --arg2 value2
# 在这个命令中,-p 参数用于指定任务执行时的并行度,后面的数字 5 表示并行执行的任务数目。接着的 --arg1 value1 --arg2 value2 则是传给主类的参数。

  提交 YARN 任务:

./bin/flink run -m yarn-cluster -c com.my.program.Main /path/to/user/jar/my-program.jar
# 在这个命令中,-m 参数后面指定了任务提交的目标,yarn-cluster 表示任务将会在 YARN 集群上执行。

  提交 Kubernetes 任务:

./bin/flink run-application -t kubernetes-application -Dkubernetes.cluster-id=my-cluster -Dkubernetes.namespace=default -c com.my.program.Main /path/to/user/jar/my-program.jar
# 在这个命令中,-t 参数后面指定了任务提交的目标,kubernetes-application 表示任务将会在 Kubernetes 上执行。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小强签名设计

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值