Flink CDC监听Mysql及SQLserver

Flink CDC实时监听mysql及SQLserver

1.在sqlserver中开启cdc模式
–开启cdc日志
EXEC sys.sp_cdc_enable_table
@source_schema = ‘dbo’,
@source_name = table_name’,
@role_name = ‘用户名’;
–查询是否开启成功,结果为1表示成功
select is_tracked_by_cdc from sys.tables where name = ‘table_name’;
–关闭表CDC
EXEC sys.sp_cdc_disable_table
@source_schema = ‘dbo’,
@source_name = ‘table_name’,
@capture_instance = ‘用户名’;

监听SQLSERVER

package com.example.flink;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.connectors.sqlserver.SqlServerSource;
import com.ververica.cdc.connectors.sqlserver.table.StartupOptions;
import com.ververica.cdc.debezium.DebeziumSourceFunction;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import okhttp3.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.shaded.zookeeper3.org.apache.jute.compiler.JString;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;

import java.sql.*;
import java.io.IOException;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

public class flink_cdc_sqlserver {
    public static final String url = "jdbc:sqlserver://ip:1433;databaseName=数据库";
    public static final String user = "root";
    public static final String password = "password";
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
                3, // 尝试重启的次数
                Time.of(10, TimeUnit.SECONDS) // 两次尝试之间的延迟
        ));
        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        DebeziumSourceFunction<String> source = SqlServerSource.<String>builder()
                .hostname("ip")
                .port(1433)
                .username("root")
                .password("password")
                .database("数据库")
                .tableList("表名")
                //.tableList("表名,表名") 多张表,都好隔开
                //.tableList("dbo.test_lhh")
                .startupOptions(StartupOptions.latest()) //生产用这个
                .deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String
                .build();
        DataStreamSource<String> ds = env.addSource(source);
        // process算子、map算子等等都可以
//        ds.process()
        ds.addSink(new SinkFunction<String>() {
            @Override
            public void invoke(String value, Context context) throws Exception {
                JSONObject jsonObject = JSON.parseObject(value);
                String op = jsonObject.getString("op");
                //op就是你监控的表操作的状态,c-新增,u-修改,d-删除
                //新增 修改都是获取after中的值
                //删除是获取before的值
                if (StringUtils.equalsIgnoreCase("c", op)) {
                    //insert操作逻辑
                    JSONObject after = jsonObject.getJSONObject("after");
                    JSONObject jsonObject1 = JSON.parseObject(String.valueOf(after));
                    //获取值
                    String no= jsonObject1.getString("xxx").toString(); 
                }
            }
        });
        env.execute("flink cdc sqlserver");
    }
}

– 检查 MySQL 服务器是否支持 CDC
SHOW VARIABLES LIKE ‘binlog_row_image’;
– 检查二进制日志是否开启
SHOW VARIABLES LIKE ‘log_bin’;
– 检查是否有足够的权限读取二进制日志
SHOW BINLOG EVENTS;
如果上述查询返回了结果,并且 log_bin 的值为 ON,那么 CDC 就被启用了。如果你无法看到任何事件或者收到权限错误,你可能需要联系数据库管理员来启用 CDC 或者调整权限
监听Mysql

package com.example.flink;


import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.connectors.mysql.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.DebeziumSourceFunction;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import okhttp3.MediaType;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.SinkFunction;

import java.io.IOException;
import java.util.concurrent.TimeUnit;
public class flink_cdc_bz_stk {
    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
                3, // 尝试重启的次数
                Time.of(10, TimeUnit.SECONDS) // 两次尝试之间的延迟
        ));
        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        DebeziumSourceFunction<String> source = MySqlSource.<String>builder()
                .hostname("ip")
                .port(3306)
                .username("root")
                .password("password")
                .databaseList("库")
                .tableList("table_name")
                .startupOptions(StartupOptions.latest()) //生产用这个
                .deserializer(new JsonDebeziumDeserializationSchema()) // converts SourceRecord to JSON String
                .build();
        DataStreamSource<String> ds = env.addSource(source);
        //国家
        // process算子、map算子等等都可以
//        ds.process()
        ds.addSink(new SinkFunction<String>() {
            @Override
            public void invoke(String value, Context context) throws Exception {
                JSONObject jsonObject = JSON.parseObject(value);
                String op = jsonObject.getString("op");
                //op就是你监控的表操作的状态,c-新增,u-修改,d-删除
                //新增 修改都是获取after中的值
                //删除是获取before的值
                if (StringUtils.equalsIgnoreCase("c", op)) {
                    //insert操作逻辑
                    JSONObject after = jsonObject.getJSONObject("after");
                    JSONObject jsonObject1 = JSON.parseObject(String.valueOf(after));
                    //获取值
                    String no= jsonObject1.getString("xxx").toString(); 
                }
            }
        });
        env.execute("flink_cdc_nysql");
    }

依赖

<dependency>
    <groupId>mysql</groupId>
    <artifactId>mysql-connector-java</artifactId>
    <version>8.0.24</version>
</dependency>

<dependency>
    <groupId>com.ververica</groupId>
    <artifactId>flink-connector-mysql-cdc</artifactId>
    <version>2.1.0</version>
    <exclusions>
        <exclusion>
            <artifactId>flink-shaded-guava</artifactId>
            <groupId>org.apache.flink</groupId>
        </exclusion>
    </exclusions>
</dependency>

POM依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
<!--    下载jar包-->
    <repositories>
        <repository>
            <id>alimaven</id>
            <url>https://maven.aliyun.com/repository/public</url>
        </repository>
    </repositories>
    <pluginRepositories>
        <pluginRepository>
            <id>alimaven</id>
            <url>https://maven.aliyun.com/repository/public</url>
        </pluginRepository>
    </pluginRepositories>

    <groupId>com.example</groupId>
    <artifactId>flink</artifactId>
    <version>1.0-SNAPSHOT</version>
    <name>flink</name>
    <packaging>jar</packaging>

    <properties>
        <flink.version>1.13.5</flink.version>
        <scala.version>2.11</scala.version>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
    </properties>
    <dependencies>
        <!-- https://mvnrepository.com/artifact/mysql/mysql-connector-java -->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>8.0.28</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.ververica/flink-sql-connector-sqlserver-cdc -->
        <dependency>
            <groupId>com.ververica</groupId>
            <artifactId>flink-sql-connector-sqlserver-cdc</artifactId>
            <version>2.3.0</version>
            <exclusions>
                <exclusion>
                    <artifactId>flink-shaded-guava</artifactId>
                    <groupId>org.apache.flink</groupId>
                </exclusion>
            </exclusions>
        </dependency>


        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>scala-library</artifactId>
                    <groupId>org.scala-lang</groupId>
                </exclusion>
            </exclusions>
            <!--            <scope>provided</scope>-->
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-runtime-web_2.11</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>flink-shaded-guava</artifactId>
                    <groupId>org.apache.flink</groupId>
                </exclusion>
            </exclusions>
            <!--            <scope>provided</scope>-->
        </dependency>
        <!--        &lt;!&ndash; https://mvnrepository.com/artifact/org.apache.flink/flink-java &ndash;&gt;-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-statebackend-rocksdb_2.11</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>joda-time</groupId>
            <artifactId>joda-time</artifactId>
            <version>2.7</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>1.19.1</version>
            <!--            <scope>test</scope>-->
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.76</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp -->
        <dependency>
            <groupId>com.squareup.okhttp3</groupId>
            <artifactId>okhttp</artifactId>
            <version>4.0.0</version>
        </dependency>

    </dependencies>


    <build>
        <finalName>real_time_broadcast_of_new_orders</finalName>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-shade-plugin</artifactId>
                <version>3.2.4</version> <!-- 使用最新版本 -->
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>shade</goal>
                        </goals>
                        <configuration>
                            <transformers>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.example.flink.flink_cdc_sqlserver</mainClass> <!-- 设置主类 -->
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <version>3.4.2</version>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <classpathPrefix>lib/</classpathPrefix>
                            <mainClass>com.example.flink.flink_cdc</mainClass>
                            <useUniqueVersions>false</useUniqueVersions>
                        </manifest>
                    </archive>
                </configuration>
                <executions>
                    <execution>
                        <configuration>
                            <filters>
                                <filter>
                                    <artifact>*:*</artifact>
                                    <excludes>
                                        <!-- 滤掉这些文件,避免影响-->
                                        <exclude>META-INF/*.SF</exclude>
                                        <exclude>META-INF/*.DSA</exclude>
                                        <exclude>META-INF/*.RSA</exclude>
                                    </excludes>
                                </filter>
                            </filters>
                            <transformers>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
                                    <resource>reference.conf</resource>
                                </transformer>
                            </transformers>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <version>3.3.0</version>
                <configuration>
                    <argLine>--add-opens java.base/java.lang=ALL-UNNAMED</argLine>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

如果有疑问,可以私信我

### Flink CDC MySQL 数据变更监听配置方法 Flink CDC 提供了一种高效、实时的方式来捕获 MySQL 数据库中的数据变更。以下是使用 Flink CDC 监听 MySQL 数据库变更的具体配置方法,适用于基于 Flink 的流处理应用。 #### 1. 环境准备与依赖配置 首先,确保已经安装并配置好了 Apache Flink 运行环境。然后,需要将 Flink CDCMySQL 连接器添加到项目中。如果你使用的是 Maven 项目,可以在 `pom.xml` 中添加如下依赖: ```xml <dependency> <groupId>com.alibaba.ververica</groupId> <artifactId>flink-connector-mysql-cdc_2.11</artifactId> <version>2.4.0</version> </dependency> ``` 如果使用 Flink SQL,则可以通过 Flink SQL CLI 或者 Table API 来实现,此时需确保 `flink-sql-connector-mysql-cdc` 已经被正确加载[^2]。 #### 2. 配置 MySQL 源数据库 在 MySQL 方面,需要确保以下几点: - 启用二进制日志(Binary Log),并且格式为 `ROW` 模式。 - 创建一个用于连接 MySQL 的用户,并授予相应的权限,包括 `REPLICATION SLAVE`, `REPLICATION CLIENT`, `SELECT`, `RELOAD` 等权限。 - 配置 MySQL 的服务器 ID,确保每个 CDC 实例拥有唯一的 server-id。 示例的 MySQL 用户授权语句如下: ```sql CREATE USER 'flink_user'@'%' IDENTIFIED BY 'flink_pwd'; GRANT REPLICATION SLAVE, REPLICATION CLIENT, SELECT, RELOAD ON *.* TO 'flink_user'@'%'; FLUSH PRIVILEGES; ``` #### 3. 编写 Flink CDC 数据流程序 以下是一个基于 Flink DataStream API 的 Java 示例代码,展示如何使用 Flink CDC 监听 MySQL 的变更数据: ```java import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; public class FlinkCDCMySQLExample { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment tEnv = StreamTableEnvironment.create(env); // 创建 MySQL CDC tEnv.executeSql( "CREATE TABLE mysql_cdc_source ( " + " id INT PRIMARY KEY," + " name STRING," + " age INT" + ") WITH ( " + " 'connector' = 'mysql-cdc'," + " 'hostname' = 'localhost'," + " 'port' = '3306'," + " 'database-name' = 'test_db'," + " 'table-name' = 'user_table'," + " 'username' = 'flink_user'," + " 'password' = 'flink_pwd'" + ")" ); // 查询并打印结果 tEnv.from("mysql_cdc_source").executeInsert("print").print(); env.execute("Flink CDC MySQL Example"); } } ``` 上述代码通过 Flink SQL 的方式定义了一个 MySQL CDC 数据源,并将其内容输出到控制台[^2]。 #### 4. 配置参数说明 | 参数名 | 描述 | |--------|------| | `connector` | 指定连接器类型,此处为 `mysql-cdc` | | `hostname` | MySQL 数据库的主机地址 | | `port` | MySQL 数据库的端口号,默认为 `3306` | | `database-name` | 要监听的数据库名称 | | `table-name` | 要监听的数据名称 | | `username` | MySQL 数据库的用户名 | | `password` | MySQL 数据库的密码 | 此外,还可以设置其他高级参数,如 `scan.startup.mode`(指定初始读取模式,如 latest、initial)、`debezium.database.server.id`(避免多个 CDC 实例冲突)等[^3]。 #### 5. 启动与监控 将程序打包并提交至 Flink 集群运行。可以通过 Flink Web UI 查看任务状态和日志信息,确保 CDC 连接正常建立,并能够持续捕获 MySQL 数据库中的变更事件。 ---
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

热心市民爱抽烟屁

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值