Flink-Kafka-MySQL

本文介绍了如何使用Flink进行实时数据处理,从启动Zookeeper和Kafka开始,接收业务系统发送的消息,通过Flink进行计算,最后将处理结果存储到MySQL数据库。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

2018年开始处理大数据相关的业务,Flink作为流处理新秀,在实时计算领域发挥着越来越大作用,本文主要整理在以往开发中Flink使用Kafka作为数据源,计算处理之后,再将数据存到MySQL的处理过程。
在这里插入图片描述
前置条件
启动zookeeper,启动kafka

业务系统发送消息到Kafka,使之作为数据源

import com.alibaba.fastjson.JSON;
import com.example.flinkdemo.model.User;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Properties;

@Slf4j
public class KafkaSender {

    private static final String kafkaTopic = "flink-kafka";
    private static final String brokerAddress = "192.168.100.10:9092";
    private static Properties properties;
    private static void init() {
        properties = new Properties();
        properties.put("bootstrap.servers", brokerAddress);
        properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
    }
    public static void main(String[] args) throws InterruptedException {
        init();
        sendUrlToKafka(); // 发送kafka消息
    }
    private static void sendUrlToKafka() {
        KafkaProducer producer = new KafkaProducer<String, String>(properties);
        long currentMills = System.currentTimeMillis();
        User user = new User();
        for (int i = 0; i <100000 ; i++) {
            user.setId(i+"");
            user.setName("test-flink-kafka-mysql"+i);
            user.setAge(i+"");
            // 确保发送的消息都是string类型
            String msgContent = JSON.toJSONString(user);
            ProducerRecord record = new ProducerRecord<String, String>(kafkaTopic, null, null, msgContent);
            producer.send(record);
            log.info("send msg:" + msgContent);
        }
        producer.flush();
    }
}

Flink从Kafka取数据进行计算

import com.alibaba.fastjson.JSON;
import com.example.flinkdemo.flink1.MySQLSink;
import com.example.flinkdemo.model.User;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleString
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>org.example</groupId> <artifactId>flink-cdc</artifactId> <version>1.0-SNAPSHOT</version> <packaging>jar</packaging> <name>flink-cdc</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <flink-version>1.16.0</flink-version> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <!--flink依赖--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>${flink-version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-core</artifactId> <version>${flink-version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java</artifactId> <version>${flink-version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-scala_2.12</artifactId> <version>${flink-version}</version> </dependency> <!-- flink-table APi 和sql 的支持--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-api-scala_2.12</artifactId> <version>${flink-version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-scala_2.12</artifactId> <version>${flink-version}</version> </dependency> <!-- 本地运行需要flink-clients--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients</artifactId> <version>${flink-version}</version> </dependency> <!-- 查询器支持flinksql的方式--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-planner_2.12</artifactId> <version>${flink-version}</version> </dependency> <!-- 查看本地客户端runtime--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-runtime</artifactId> <version>${flink-version}</version> </dependency> <!-- bridge桥接器--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-table-api-java-bridge</artifactId> <version>${flink-version}</version> </dependency> <!-- flinkcdc的三个jar包--> <!-- cdc(捕获数据改变)--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-base</artifactId> <version>${flink-version}</version> </dependency> <dependency> <groupId>com.ververica</groupId> <artifactId>flink-connector-mysql-cdc</artifactId> <version>3.0.0</version> </dependency> <!-- <dependency>--> <!-- <groupId>org.apache.flink</groupId>--> <!-- <artifactId>flink-connector-kafka_2.12</artifactId>--> <!-- <version>1.14.6</version>--> <!-- </dependency>--> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka</artifactId> <version>${flink-version}</version> </dependency> <!--mysql驱动--> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>8.0.33</version> </dependency> </dependencies> </project> 参考这套pom文件依赖修改并去掉不必要的依赖,写一个flinkcdc的程序监控mysql的某个库下的表的变更将binlog日志写到kf的scr主题即可
最新发布
08-02
为了实现使用 Apache Flink CDC 监控 MySQL 数据库表变更并将 binlog 写入 Kafka 主题的功能,首先需要优化 Maven POM 文件中的依赖项,以确保所有必要的组件(如 FlinkFlink CDC 连接器、Kafka 生产者等)都能正确引入。 ### 优化 Maven POM 依赖 以下是一个优化后的 `pom.xml` 示例,包含 FlinkFlink CDC 连接器和 Kafka 生产者的必要依赖: ```xml <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.example</groupId> <artifactId>flink-cdc-to-kafka</artifactId> <version>1.0-SNAPSHOT</version> <properties> <flink.version>1.16.0</flink.version> <kafka.version>3.3.0</kafka.version> <mysql.cdc.version>2.4.1</mysql.cdc.version> </properties> <dependencies> <!-- Flink Core --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-java</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients</artifactId> <version>${flink.version}</version> </dependency> <!-- Flink CDC Connector for MySQL --> <dependency> <groupId>com.ververica</groupId> <artifactId>flink-connector-mysql-cdc</artifactId> <version>${mysql.cdc.version}</version> </dependency> <!-- Kafka Producer --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka</artifactId> <version>${flink.version}</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>${kafka.version}</version> </dependency> <!-- JSON Processing --> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-json</artifactId> <version>${flink.version}</version> </dependency> </dependencies> <build> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.8.1</version> <configuration> <source>1.8</source> <target>1.8</target> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-shade-plugin</artifactId> <version>3.2.4</version> <executions> <execution> <phase>package</phase> <goals> <goal>shade</goal> </goals> <configuration> <artifactSet> <includes> <include>org.apache.flink:*</include> <include>com.ververica:*</include> <include>org.apache.kafka:*</include> </includes> </artifactSet> <transformers> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> <mainClass>com.example.FlinkCdcToKafka</mainClass> </transformer> </transformers> </configuration> </execution> </executions> </plugin> </plugins> </build> </project> ``` ### 编写 Flink CDC 程序 以下是一个完整的 Java 程序示例,使用 Flink CDC 监控 MySQL 数据库表变更,并将 binlog 事件写入 Kafka 主题。 ```java package com.example; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; public class FlinkCdcToKafka { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment tEnv = StreamTableEnvironment.create(env); // 创建 MySQL CDC 源表 tEnv.executeSql( "CREATE TABLE mysql_cdc_source (" + " id INT PRIMARY KEY," + " name STRING," + " email STRING," + " ts TIMESTAMP(3)" + ") WITH (" + " 'connector' = 'mysql-cdc'," + " 'hostname' = 'localhost'," + " 'port' = '3306'," + " 'database-name' = 'testdb'," + " 'table-name' = 'users'," + " 'username' = 'flinkuser'," + " 'password' = 'flinkpw'" + ")" ); // 创建 Kafka Sink 表 tEnv.executeSql( "CREATE TABLE kafka_sink (" + " id INT," + " name STRING," + " email STRING," + " ts TIMESTAMP(3)" + ") WITH (" + " 'connector' = 'kafka'," + " 'topic' = 'flink_cdc_output'," + " 'properties.bootstrap.servers' = 'localhost:9092'," + " 'format' = 'json'" + ")" ); // 将数据从 MySQL CDC 源表写入 Kafka Sink 表 tEnv.executeSql("INSERT INTO kafka_sink SELECT * FROM mysql_cdc_source"); } } ``` ### 程序说明 - **MySQL CDC 源表**:使用 `mysql-cdc` 连接器定义源表,连接到 MySQL 数据库并监控 `testdb.users` 表的变更。 - **Kafka Sink 表**:使用 `kafka` 连接器定义目标表,将数据写入 Kafka 主题 `flink_cdc_output`。 - **数据流**:通过 `INSERT INTO ... SELECT FROM` 语句将 CDC 数据流式写入 Kafka。 ### 启动和运行 1. 启动 Kafka 和 Zookeeper。 2. 启动 MySQL 并确保已启用 binlog。 3. 执行 Flink 程序。 4. 使用 Kafka 控制台消费者验证数据是否写入 Kafka: ```bash bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic flink_cdc_output --from-beginning ``` 该程序将实时捕获 MySQL 表的结构变更和数据变更,并将其以 JSON 格式发送到 Kafka 主题中。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值