本文风格,根据代码讲逻辑,有问题可以留言,看到了都会回复的。
-
Catalogs到底是什么?
Catalog是FLINK TABLE API内部定义的一个接口类,定义了很多访问或者操纵数据库的方法。例如dropDataBase(),createDataBase(), listTables()等等。来实现统一管理元数据的作用。
元数据就是数据库、视图、分区、函数等信息,FLINKSQL的信息大多是临时存储的,session关闭后不会存储。 -
Flinksql存储数据吗?
FLINKSQL本身不存储任何数据,底层逻辑就是解释sql封装为FLINK程序进行计算。
代码展示,每行都有注释,代码可以调通的:
package cn.cgnb.flinksql;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.StatementSet;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
public class FlinkSqlDriver {
public static void main(String[] args) throws IOException {
// 创建一个流式任务的环境,便于jar包在FLINK集群上执行
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// Java 的 Table API 通过引入 org.apache.flink.table.api.java.* 来使用
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
// 这个地方可以这样写,定义环境配置来创建表执行环境,可以自己选择配置
// 不单独设置的话使用的是默认配置,程序默认封装一个EnvironmentSettings,
// 调用的还是StreamTableEnvironment.create(env,settings)放发
/**
* EnvironmentSettings settings = EnvironmentSettings.newInstance()
* .inStreamingMode()
* .build();
* StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env,settings);
*/
// StatementSet也是一个接口类,定义了很多方法,继承StatementSet接口类
StatementSet statementSet = tableEnv.createStatementSet();
//存放sql的路径
String allFielPath = "项目路径/flinksql/";
File allFiles = new File(allFielPath);
File[] files = allFiles.listFiles();
assert files !=null;
for(File oneFile:files){
String onefilePath = oneFile.getPath();
List<String> alllines = Files.readAllLines(Paths.get(onefilePath));
StringBuilder stringBuilder = new StringBuilder();
for(String line :alllines){
//行数为空,则跳过
if(!StringUtils.isNotBlank(line)){
continue;
}
//注释,则跳过
if (line.startsWith("--")) {
continue;
}
//flinksql的配置读取
if(line.startsWith("set")){
line = line.replaceAll(";","");
String[] split = line.split("\\s+");
String key = split[1].split("=")[0];
String value = split[1].split("=")[1];
tableEnv .getConfig().set(key,value);
continue;
}
//flinksql是以分号为分割,执行语句
if(line.endsWith(";")){
line = line.replaceAll(";"," ");
String sql = stringBuilder.append(line).toString();
if(sql.toLowerCase().startsWith("insert")){
statementSet.addInsertSql(sql);
}else {
tableEnv.executeSql(sql);
}
stringBuilder = new StringBuilder();
}
stringBuilder.append(line);
stringBuilder.append("\n");
}
}
statementSet.execute();
}
}
FLINKSQL示例:
-- 创建一个catelog存储元数据信息,也可以不创建,默认的是default_catelog
create catelog ctlg;
-- 创建一个数据库信息,默认的是default_database,这个不是真的数据库哦
create database db;
-- flinksql创建数据来源为 kafka的表格,这个时候其实会自动匹配数据类型,
-- 可能kafka里面存的是string,建表写的是int,会自动处理为int类型,如果有脏数据,则会报错
drop table if exists ctlg.db.tbSource;
create table ctlg.db.tbSource--来源表
(
dataid string,--身份证号码
cnname string,-- 名字
busstp string,-- 交易类型
bussgn string,-- 交易机构
amount double,-- 余额
acctct int-- 账户数
)with(
'connector'='kafka', -- 连接kafka
'topic'='my_name_topic',-- 输入topic
'format'='json',-- 格式化模式,也有avro
'properties.bootstrap.servers'='90.1.1.1:9092',-- kafka地址
'scan.startup.mode'='earliest-offset',-- 设计消费策略
'properties.group.id'='flink-my_name_topic-2030'-- 指定消费组
);
drop table if exists ctlg.db.tbSink;--去向表
create table ctlg.db.tbSink--来源表
(
dataid string,--主键
Z00001 double,-- 总余额
Z00002 int,-- 总账户数
Z00003 double,-- 交易类型为 车贷 时的账户余额
Z00004 int,-- 交易类型为 车贷 时的账户数
Z00005 int,-- 交易类型为 车贷 时的机构数
primary key(dataid) NOT ENFORCED
)with(
'connector'='elasticsearch-7',-- 连接elasticsearch 7版本数据库或者8版本数据库都行
'index'='my_name_index',-- 指定索引,会自动创建的,如果想走模板,提前创建模板
'username'='elastic',-- 指定用户名
'password'='elastic',-- 指定密码
'hosts'='https://90.1.1.1:9210'-- elasticsearch地址
);
-- 插入语句
-- 目前已经发现 count()函数会返回long类型数据,如果想要int类型,需要转类型
insert into ctlg.db.tbSink
select
a.dataid as dataid,
sum(a.amount) as Z00001,
sum(a.acctct) as Z00002,
sum(case when a.busstp = '车贷' then a.amount else 0 end) as Z00003,
sum(case when a.busstp = '车贷' then a.acctct else 0 end) as Z00003,
cast(count(distinct case when a.busstp = '车贷' then a.busstp else 0 end) as int) as Z00005
from ctlg.db.tbSource a
group by a.dataid;
pom.xml文件
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.cgnb.flinksql</groupId>
<artifactId>local-flinksql</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
<flink.version>1.15.1</flink.version>
<scala.binary.version>2.11</scala.binary.version>
<log4j.version>2.17.1</log4j.version>
<fastjson.version>1.2.83</fastjson.version>
<kafkaAppender.version>1.0</kafkaAppender.version>
<mysql.version>5.1.39</mysql.version>
<avroBean.version>1.0</avroBean.version>
<httpClient.version>4.5.3</httpClient.version>
<druid.version>1.2.8</druid.version>
</properties>
<profiles>
<profile>
<id>dev</id>
<properties>
<profile.id>dev</profile.id>
<dep_scope>compile</dep_scope>
</properties>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
</profile>
</profiles>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-base</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>1.15.1</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>1.15.1</version>
<scope>compile</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>${maven.compiler.source}</source>
<source>${maven.compiler.target}</source>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<excludes>
<exclude>org.apache.flink:force-shading</exclude>
<exclude>com.google.code.fingbugs:jsr305</exclude>
<exclude>org.slf4j:*</exclude>
<exclude>log4j:*</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>cn.cgnb.flinksql.FlinkSqlDriver</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
有问题欢迎留言讨论呀。代码调试报错啥都可以。