FLINKSQL-使用table-api执行flinksql语句详细介绍(根据代码讲逻辑)

本文风格,根据代码讲逻辑,有问题可以留言,看到了都会回复的。

  • Catalogs到底是什么?
    Catalog是FLINK TABLE API内部定义的一个接口类,定义了很多访问或者操纵数据库的方法。例如dropDataBase(),createDataBase(), listTables()等等。来实现统一管理元数据的作用。
    元数据就是数据库、视图、分区、函数等信息,FLINKSQL的信息大多是临时存储的,session关闭后不会存储。

  • Flinksql存储数据吗?
    FLINKSQL本身不存储任何数据,底层逻辑就是解释sql封装为FLINK程序进行计算。

代码展示,每行都有注释,代码可以调通的:

package cn.cgnb.flinksql;

import org.apache.commons.lang3.StringUtils;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.StatementSet;
import org.apache.flink.table.api.java.StreamTableEnvironment;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;


public class FlinkSqlDriver {
    public static void main(String[] args) throws IOException {
        // 创建一个流式任务的环境,便于jar包在FLINK集群上执行
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // Java 的 Table API 通过引入 org.apache.flink.table.api.java.* 来使用
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);

 
        // 这个地方可以这样写,定义环境配置来创建表执行环境,可以自己选择配置
        // 不单独设置的话使用的是默认配置,程序默认封装一个EnvironmentSettings,
        // 调用的还是StreamTableEnvironment.create(env,settings)放发
        /**
         *         EnvironmentSettings settings = EnvironmentSettings.newInstance()
         *                 .inStreamingMode()
         *                 .build();
         *         StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,settings);
         */
        // StatementSet也是一个接口类,定义了很多方法,继承StatementSet接口类
        StatementSet statementSet = tableEnv.createStatementSet();
        //存放sql的路径
        String allFielPath = "项目路径/flinksql/";
        File allFiles = new File(allFielPath);
        File[] files = allFiles.listFiles();
        assert files !=null;
        for(File oneFile:files){
            String onefilePath = oneFile.getPath();
            List<String> alllines = Files.readAllLines(Paths.get(onefilePath));
            StringBuilder stringBuilder = new StringBuilder();
            for(String line :alllines){
                //行数为空,则跳过
                if(!StringUtils.isNotBlank(line)){
                    continue;
                }
                //注释,则跳过
                if (line.startsWith("--")) {
                    continue;
                }
                //flinksql的配置读取
                if(line.startsWith("set")){
                    line = line.replaceAll(";","");
                    String[] split = line.split("\\s+");
                    String key = split[1].split("=")[0];
                    String value = split[1].split("=")[1];
                    tableEnv .getConfig().set(key,value);
                    continue;
                }
                //flinksql是以分号为分割,执行语句
                if(line.endsWith(";")){
                    line = line.replaceAll(";","  ");
                    String sql = stringBuilder.append(line).toString();
                    if(sql.toLowerCase().startsWith("insert")){
                        statementSet.addInsertSql(sql);
                    }else {
                        tableEnv.executeSql(sql);
                    }
                    stringBuilder = new StringBuilder();
                }
                stringBuilder.append(line);
                stringBuilder.append("\n");
            }

        }
        statementSet.execute();
    }
}


FLINKSQL示例:

-- 创建一个catelog存储元数据信息,也可以不创建,默认的是default_catelog
create catelog ctlg;
-- 创建一个数据库信息,默认的是default_database,这个不是真的数据库哦
create database db;

-- flinksql创建数据来源为 kafka的表格,这个时候其实会自动匹配数据类型,
-- 可能kafka里面存的是string,建表写的是int,会自动处理为int类型,如果有脏数据,则会报错
drop table if exists ctlg.db.tbSource;
create table ctlg.db.tbSource--来源表
(
    dataid string,--身份证号码
    cnname string,-- 名字
    busstp string,-- 交易类型
    bussgn string,-- 交易机构
    amount double,-- 余额
    acctct int-- 账户数
)with(
    'connector'='kafka', -- 连接kafka
    'topic'='my_name_topic',-- 输入topic
    'format'='json',-- 格式化模式,也有avro
    'properties.bootstrap.servers'='90.1.1.1:9092',-- kafka地址
    'scan.startup.mode'='earliest-offset',-- 设计消费策略
    'properties.group.id'='flink-my_name_topic-2030'-- 指定消费组
);

drop table if exists ctlg.db.tbSink;--去向表
create table ctlg.db.tbSink--来源表
(
    dataid string,--主键
    Z00001 double,-- 总余额
    Z00002 int,-- 总账户数
    Z00003 double,-- 交易类型为 车贷 时的账户余额
    Z00004 int,-- 交易类型为 车贷 时的账户数
    Z00005 int,-- 交易类型为 车贷 时的机构数
    primary key(dataid) NOT ENFORCED
)with(
    'connector'='elasticsearch-7',-- 连接elasticsearch  7版本数据库或者8版本数据库都行
    'index'='my_name_index',-- 指定索引,会自动创建的,如果想走模板,提前创建模板
    'username'='elastic',-- 指定用户名
    'password'='elastic',-- 指定密码
    'hosts'='https://90.1.1.1:9210'-- elasticsearch地址
);

-- 插入语句
-- 目前已经发现 count()函数会返回long类型数据,如果想要int类型,需要转类型
insert into ctlg.db.tbSink
select
    a.dataid  as dataid,
    sum(a.amount) as Z00001,
    sum(a.acctct) as Z00002,
    sum(case when a.busstp = '车贷' then a.amount else 0 end) as Z00003,
    sum(case when a.busstp = '车贷' then a.acctct else 0 end) as Z00003,
    cast(count(distinct case when a.busstp = '车贷' then a.busstp else 0 end) as int) as Z00005
from ctlg.db.tbSource a
group by a.dataid;

pom.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>cn.cgnb.flinksql</groupId>
    <artifactId>local-flinksql</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <java.version>1.8</java.version>
        <flink.version>1.15.1</flink.version>
        <scala.binary.version>2.11</scala.binary.version>
        <log4j.version>2.17.1</log4j.version>
        <fastjson.version>1.2.83</fastjson.version>
        <kafkaAppender.version>1.0</kafkaAppender.version>
        <mysql.version>5.1.39</mysql.version>
        <avroBean.version>1.0</avroBean.version>
        <httpClient.version>4.5.3</httpClient.version>
        <druid.version>1.2.8</druid.version>
    </properties>
    <profiles>
        <profile>
            <id>dev</id>
            <properties>
                <profile.id>dev</profile.id>
                <dep_scope>compile</dep_scope>
            </properties>
            <activation>
                <activeByDefault>true</activeByDefault>
            </activation>
        </profile>
    </profiles>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-runtime-web</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>${flink.version}</version>
         </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java</artifactId>
        <version>${flink.version}</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-kafka</artifactId>
        <version>${flink.version}</version>
        <scope>provided</scope>
    </dependency>

    <dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-connector-base</artifactId>
    <version>${flink.version}</version>
    <scope>provided</scope>
</dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-jdbc</artifactId>
        <version>${flink.version}</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-kafka</artifactId>
        <version>${flink.version}</version>
        <scope>provided</scope>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-table-api-java-bridge_2.11</artifactId>
        <version>1.10.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-table-api-java</artifactId>
        <version>1.15.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-json</artifactId>
        <version>1.15.1</version>
        <scope>compile</scope>
    </dependency>
</dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <configuration>
                    <source>${maven.compiler.source}</source>
                    <source>${maven.compiler.target}</source>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <createDependencyReducedPom>false</createDependencyReducedPom>
                </configuration>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <artifactSet>
                                <excludes>
                                    <exclude>org.apache.flink:force-shading</exclude>
                                    <exclude>com.google.code.fingbugs:jsr305</exclude>
                                    <exclude>org.slf4j:*</exclude>
                                    <exclude>log4j:*</exclude>
                                </excludes>
                            </artifactSet>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>cn.cgnb.flinksql.FlinkSqlDriver</mainClass>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

有问题欢迎留言讨论呀。代码调试报错啥都可以。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值