一、代码实现
<一>、读取Json数据,写入HDFS
package cn.flink.opt1;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
public class FlinkJson2HDFSCsv {
public static void main(String[] args) {
EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
.build();
TableEnvironment tEnv = TableEnvironment.create(environmentSettings);
String source_sql = "CREATE TABLE json_table (\n" +
" id Integer,\n" +
" name STRING,\n" +
" email STRING,\n" +
" date_time STRING" +
") WITH (\n" +
" 'connector'='filesystem',\n" +
" 'path'='input/userbase.json',\n" +
" 'format'='json'\n" +
")";
String sink_sql = "CREATE TABLE sink_hdfs (\n" +
" id Integer,\n" +
" name STRING,\n" +
" email STRING,\n" +
" date_time STRING" +
") WITH ( \n " +
" 'connector' = 'filesystem',\n" +
" 'path' = 'hdfs://localhost:9000/output_csv/' , \n" +
" 'format' = 'csv'\n" +
")";
String insert_sql = "insert into sink_hdfs select id,name,date_time,email from json_table ";
tEnv.executeSql(source_sql);
tEnv.executeSql(sink_sql);
tEnv.executeSql(insert_sql);
}
}

<二>、读取HDFS数据,写入HBase
package cn.flink.opt1;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
public class FlinkWithHDFSCSV2HBase {
public static void main(String[] args) {
EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
.build();
TableEnvironment tEnv = TableEnvironment.create(environmentSettings);
String source_sql = "CREATE TABLE source_hdfs (\n" +
" id Integer,\n" +
" name STRING,\n" +
" email STRING,\n" +
" date_time STRING" +
") WITH ( \n " +
" 'connector' = 'filesystem',\n" +
" 'path' = 'hdfs://localhost:9000/output_csv/' , \n" +
" 'format' = 'csv'\n" +
")";
String sink_sql = "CREATE TABLE sink_table (\n" +
" rowkey Integer,\n" +
" f1 ROW<name STRING,email STRING,date_time STRING > ,\n" +
" PRIMARY KEY (rowkey) NOT ENFORCED \n" +
") WITH (\n" +
" 'connector' = 'hbase-2.2',\n" +
" 'table-name' = 'hTable',\n" +
" 'zookeeper.quorum' = 'bigdata01:2181,bigdata02:2181,bigdata03:2181'\n" +
") ";
String execute_sql = "insert into sink_table select id as rowkey ,ROW(name,email,date_time) from source_hdfs";
tEnv.executeSql(source_sql);
tEnv.executeSql(sink_sql);
tEnv.executeSql(execute_sql);
}
}
<三>、读取HBase数据写入Kafka
package cn.flink.opt1;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
public class FlinkTableWithHBase2Kafka {
public static void main(String[] args) {
EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
.build();
TableEnvironment tEnv = TableEnvironment.create(environmentSettings);
String source_table = "CREATE TABLE opt_log (\n" +
" rowkey Integer,\n" +
" f1 ROW<username STRING,email STRING,date_time STRING > ,\n" +
" PRIMARY KEY (rowkey) NOT ENFORCED \n" +
") WITH (\n" +
" 'connector' = 'hbase-2.2',\n" +
" 'table-name' = 'opt_log',\n" +
" 'zookeeper.quorum' = 'bigdata01:2181,bigdata02:2181,bigdata03:2181'\n" +
") ";
String sink_table = "CREATE TABLE KafkaTable (\n" +
" `username` STRING,\n" +
" `email` STRING,\n" +
" `date_time` STRING \n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'user_output',\n" +
" 'properties.bootstrap.servers' = 'bigdata01:9092,bigdata02:9092,bigdata03:9092',\n" +
" 'format' = 'json'\n" +
")";
String execute_sql = "insert into KafkaTable select username,email,date_time from opt_log";
tEnv.executeSql(source_table);
tEnv.executeSql(sink_table);
tEnv.executeSql(execute_sql);
}
}
<四>、读取Kafka数据写入Mysql
package cn.flink.opt1;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
public class FlinkTableWithKafka2MySQL {
public static void main(String[] args) {
EnvironmentSettings environmentSettings = EnvironmentSettings.newInstance()
.build();
TableEnvironment tEnv = TableEnvironment.create(environmentSettings);
String source_table = "CREATE TABLE KafkaTable (\n" +
" `id` Integer,\n" +
" `name` STRING,\n" +
" `email` STRING,\n" +
" `date_time` STRING \n" +
") WITH (\n" +
" 'connector' = 'kafka',\n" +
" 'topic' = 'usr_opt',\n" +
" 'properties.bootstrap.servers' = 'bigdata01:9092,bigdata02:9092,bigdata03:9092',\n" +
" 'properties.group.id' = 'user_opt_group',\n" +
" 'scan.startup.mode' = 'earliest-offset',\n" +
" 'format' = 'json',\n" +
" 'json.fail-on-missing-field' = 'false',\n" +
" 'json.ignore-parse-errors' = 'true'\n" +
")";
String sink_sql = "CREATE TABLE mysql_sink (\n" +
" id Integer,\n" +
" username STRING,\n" +
" email STRING,\n" +
" date_time STRING" +
") WITH (\n" +
" 'connector' = 'jdbc',\n" +
" 'url' = 'jdbc:mysql://bigdata03:3306/user_log?characterEncoding=utf-8&serverTimezone=GMT%2B8',\n" +
" 'driver' = 'com.mysql.jdbc.Driver',\n" +
" 'table-name' = 'clicklog',\n" +
" 'username' = 'root',\n" +
" 'password' = '123456'\n" +
")";
String execute_sql ="insert into mysql_sink select id , name as username,email,date_time from KafkaTable";
tEnv.executeSql(source_table);
tEnv.executeSql(sink_sql);
tEnv.executeSql(execute_sql);
}
}
二、pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.flinksql</groupId>
<artifactId>flink_sql_study</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<scala.binary.version>2.12</scala.binary.version>
<flink.version>1.14.3</flink.version>
<hadoop.version>3.1.4</hadoop.version>
<hbase.version>2.2.7</hbase.version>
<hive.version>3.1.2</hive.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.22</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.73</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.15</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-hadoop-compatibility_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.5</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hbase-2.2_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
<exclusions>
<exclusion>
<artifactId>commons-math3</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.38</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hive_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<artifactId>hadoop-hdfs</artifactId>
<groupId>org.apache.hadoop</groupId>
</exclusion>
<exclusion>
<artifactId>commons-compress</artifactId>
<groupId>org.apache.commons</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr-runtime</artifactId>
<version>3.5.2</version>
</dependency>
<dependency>
<groupId>org.apache.thrift</groupId>
<artifactId>libfb303</artifactId>
<version>0.9.3</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.1.1</version>
<configuration>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>