Flink两阶段提交Mysql(连接池生产稳定运行版本)


前言

目前大多数网上能搜索到的关于flink两阶段提交的内容大多雷同,均为直接对连接进行序列化,并且由单一的直接连接的方式处理。本文章结合网上的部分代码进行优化,解决了使用连接池各种异常的问题,并且处理好了单纯序列化连接带来的弊端,经过笔者生产在线跑数稳定无异常


一、关键代码

1.主类

package com.baidu.twoPhaseCommit.twoPC;

import com.baidu.twoPhaseCommit.sink.MysqlSinkPlus;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;

import java.util.Properties;

/**
 * @author Mu
 * @ClassName MysqlTwoPhaseCommit
 * @Description flink to mysql 两阶段提交代码
 * @date 2022/4/14 10:01
 * @Version 1.0
 */
public class MysqlTwoPhaseCommit {
    //topic
    private static final String topic_ExactlyOnce = "TwoPhaseCommit";
    private static final String group_id = "TwoPhaseCommitConsumer";
    private static final String bootstrap_servers = "HM-DATA04:9092,HM-DATA05:9092,HM-DATA06:9092";
    private static final String statebackend_address = "file:///D:/code/wangmu/test/ckeckpoint";

    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //设置并行度,为了方便测试,查看消息的顺序,这里设置为1,可以更改为多并行度
        env.setParallelism(1);
        //checkpoint的设置
        //每隔10s进行启动一个检查点【设置checkpoint的周期】
        env.enableCheckpointing(10000);
        //设置模式为:exactly_one,仅一次语义
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        //确保检查点之间有1s的时间间隔【checkpoint最小间隔】
        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(1000);
        //检查点必须在10s之内完成,或者被丢弃【checkpoint超时时间】
        env.getCheckpointConfig().setCheckpointTimeout(10000);
        //同一时间只允许进行一次检查点
        env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
        //表示一旦Flink程序被cancel后,会保留checkpoint数据,以便根据实际需要恢复到指定的checkpoint
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        //设置statebackend,将检查点保存在hdfs上面,默认保存在内存中。这里先保存到本地
        env.setStateBackend(new FsStateBackend(statebackend_address));


        //设置kafka消费参数
        Properties properties = new Properties();
        properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers);
        properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, group_id);

        /*SimpleStringSchema可以获取到kafka消息,JSONKeyValueDeserializationSchema可以获取都消息的key,value,metadata:topic,partition,offset等信息*/
        FlinkKafkaConsumer<String> kafkaConsumer011 = new FlinkKafkaConsumer<>(
                topic_ExactlyOnce,
                new SimpleStringSchema(),
                properties);

        //加入kafka数据源
        DataStreamSource<String> streamSource = env.addSource(kafkaConsumer011);

        SingleOutputStreamOperator<Tuple2<String, Integer>> tupleStream = streamSource.map(str -> Tuple2.of(str, 1)).returns(Types.TUPLE(Types.STRING, Types.INT));

        tupleStream.print();
        //数据传输到下游
        tupleStream.addSink(new MysqlSinkPlus()).name("MySqlTwoPhaseCommitSink");
        //触发执行
        env.execute("Kafka2MysqlDemo");
    }
}

2.自定义sink

package com.baidu.twoPhaseCommit.sink;

import com.baidu.twoPhaseCommit.util.HikariUtil;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.typeutils.base.VoidSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer;
import org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.sql.PreparedStatement;
import java.sql.Timestamp;

/**
 * @author Mu
 * @ClassName MysqlSinkPlus
 * @Description TODO
 * @date 2022/4/14 10:03
 * @Version 1.0
 */
public class MysqlSinkPlus extends TwoPhaseCommitSinkFunction<Tuple2<String,Integer>, HikariUtil,Void> {
    private static final Logger log = LoggerFactory.getLogger(MysqlSinkPlus.class);

    public MysqlSinkPlus() {
        super(new KryoSerializer<>(HikariUtil.class,new ExecutionConfig()), VoidSerializer.INSTANCE);
    }
    /**
     * 执行数据库入库操作  task初始化的时候调用
     * @param hikariUtil
     * @param tuple
     * @param context
     * @throws Exception
     */
    @Override
    protected void invoke(HikariUtil hikariUtil, Tuple2<String, Integer> tuple, Context context) throws Exception {
        log.info("start invoke...");
        String value = tuple.f0;
        Integer total = tuple.f1;
        String sql = "insert into `t_test` (`value`,`total`,`insert_time`) values (?,?,?)";
        log.info("====执行SQL:{}===",sql);
        PreparedStatement ps = hikariUtil.getconn().prepareStatement(sql);
        ps.setString(1, value);
        ps.setInt(2, total);
        ps.setTimestamp(3, new Timestamp(System.currentTimeMillis()));
        log.info("要插入的数据:{}----{}",value,total);
        if (ps != null) {
            String sqlStr = ps.toString().substring(ps.toString().indexOf(":")+2);
            log.error("执行的SQL语句:{}",sqlStr);
        }
        //执行insert语句
        ps.execute();
    }
    /**
     * 获取连接,开启手动提交事物(getConnection方法中)
     * @return
     * @throws Exception
     */
    @Override
    protected HikariUtil beginTransaction() throws Exception {
        log.info("start beginTransaction.......");
        return new HikariUtil();
    }
    /**
     *预提交,这里预提交的逻辑在invoke方法中
     * @param hikariUtil
     * @throws Exception
     */
    @Override
    protected void preCommit(HikariUtil hikariUtil) throws Exception {
        log.info("start preCommit...");
    }
    /**
     * 如果invoke方法执行正常,则提交事务
     * @param hikariUtil
     */
    @Override
    protected void commit(HikariUtil hikariUtil) {
        log.info("start commit...");
        hikariUtil.commit();
    }
    /**
     * 如果invoke执行异常则回滚事物,下一次的checkpoint操作也不会执行
     * @param hikariUtil
     */
    @Override
    protected void abort(HikariUtil hikariUtil) {
        log.info("start abort rollback...");
        hikariUtil.rollback();
    }
}

3.连接池工具类

package com.baidu.twoPhaseCommit.util;

import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.sql.DataSource;
import java.io.FileReader;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Objects;
import java.util.Properties;

/**
 * @author Mu
 * @ClassName HikariUtil
 * @Description JDBC工具类,使用HikariCP做连接池
 * @date 2022/4/18 14:21
 * @Version 1.0
 */
public class HikariUtil {
    private static final Logger log = LoggerFactory.getLogger(HikariUtil.class);
    private final static HikariUtil HIKARI_CP_UTI = new HikariUtil();

    private static Properties properties = null;
    private static HikariDataSource dataSource = null;
    private static transient Connection conn = null;

    //1.单例模式中,应该创建私有构造方法
    public HikariUtil() {}

    /**
     * 1.配置和获取数据库连接配置信息
     * 2.扩展HikariCP功能,进行配置
     * 3.获取数据库连接,提供对外获取数据库资源的方法
     */
    private void initConfig() throws IOException {
        String filePath = Objects.requireNonNull(HikariUtil.class.getClassLoader().getResource("hikaricp.properties")).getFile();
        FileReader fileReader = new FileReader(filePath);
        properties = new Properties();
        properties.load(fileReader);
        properties.forEach((k, v) -> {
            log.debug(String.format("key:%s value:%S", k, v));
        });
        log.info("初始化配置文件成功.....");
    }

    /**
     * 注册HikariCP
     */
    private void registerHikariCP() {
        if (null != dataSource) {
            return;
        }
        HikariConfig config = new HikariConfig(properties);
        dataSource = new HikariDataSource(config);
    }

    /**
     * 提供对外 获取 HikariCPDatasource 的方法
     * @return
     */
    public static DataSource getHikariCPDataSource() {
        if (null != dataSource) {
            return dataSource;
        }
        try {
            HIKARI_CP_UTI.initConfig();
            HIKARI_CP_UTI.registerHikariCP();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return dataSource;
    }

    /**
     * 提供对外链接
     */
    public static Connection getconn() {
        try {
            if(conn == null) {
                if (dataSource != null) {
                    conn = dataSource.getConnection();
                } else {
                    conn = getHikariCPDataSource().getConnection();
                }
            }
            //设置手动提交
            conn.setAutoCommit(false);
        } catch (SQLException e) {
            e.printStackTrace();
        }
        return conn;
    }

    /**
     * 提交事务
     */
    public static void commit() {
        if (conn != null) {
            try {
                conn.commit();
            } catch (SQLException e) {
                log.error("提交事务失败,Connection:" + conn);
                e.printStackTrace();
            }
        }
    }

    /**
     * 事务回滚
     */
    public static void rollback() {
        if (conn != null) {
            try {
                conn.rollback();
            } catch (SQLException e) {
                log.error("事务回滚失败,Connection:" + conn);
                e.printStackTrace();
            }
        }
    }

    /**
     * 关闭连接
     */
    public static void close() {
        if (conn != null) {
            try {
                conn.close();
            } catch (SQLException e) {
                log.error("关闭连接失败,Connection:" + conn);
                e.printStackTrace();
            }
        }
    }

}


二、配置文件

1.pom.xml

    <properties>
        <maven.compiler.source>1.8</maven.compiler.source>
        <maven.compiler.target>1.8</maven.compiler.target>
        <scala.binary.version>2.12</scala.binary.version>
        <flink.version>1.13.0</flink.version>
        <target.java.version>1.8</target.java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.1.3</version>
            <exclusions>
                <exclusion>
                    <artifactId>log4j</artifactId>
                    <groupId>log4j</groupId>
                </exclusion>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.49</version>
        </dependency>
        
        <dependency>
            <groupId>com.zaxxer</groupId>
            <artifactId>HikariCP</artifactId>
            <version>2.7.2</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.11</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>com.alibaba.ververica</groupId>
            <artifactId>flink-connector-mysql-cdc</artifactId>
            <version>1.2.0</version>
        </dependency>
        <dependency>
            <groupId>com.ververica</groupId>
            <artifactId>flink-connector-oracle-cdc</artifactId>
            <version>2.1.1</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-jdbc_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.75</version>
        </dependency>

        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.12</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala-bridge_2.12</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-java-bridge_2.12</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <!-- blink执行计划,1.11+默认的-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.12</artifactId>
            <version>1.12.0</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-common</artifactId>
            <version>${flink.version}</version>
            <exclusions>
                <exclusion>
                    <artifactId>slf4j-api</artifactId>
                    <groupId>org.slf4j</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.15</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.15</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-nop</artifactId>
            <version>1.7.15</version>
            <type>jar</type>
        </dependency>

    </dependencies>

注意:由于我的项目中还还有其他程序代码,所以pom.xml配置文件有多余的依赖,自行排除即可

2.hikaricp.properties

driverClassName=com.mysql.jdbc.Driver
jdbcUrl=jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true
username=root
password=123456

总结

文章主要根据网上已有的demo结合笔者自己的思考,生产测试运行稳定无异常。
文章主要参考如下:

1.https://www.jianshu.com/p/1df61bb6801e
2.https://blog.youkuaiyun.com/zc19921215/article/details/117934640

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值