前言
目前大多数网上能搜索到的关于flink两阶段提交的内容大多雷同,均为直接对连接进行序列化,并且由单一的直接连接的方式处理。本文章结合网上的部分代码进行优化,解决了使用连接池各种异常的问题,并且处理好了单纯序列化连接带来的弊端,经过笔者生产在线跑数稳定无异常。
一、关键代码
1.主类
package com.baidu.twoPhaseCommit.twoPC;
import com.baidu.twoPhaseCommit.sink.MysqlSinkPlus;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import java.util.Properties;
/**
* @author Mu
* @ClassName MysqlTwoPhaseCommit
* @Description flink to mysql 两阶段提交代码
* @date 2022/4/14 10:01
* @Version 1.0
*/
public class MysqlTwoPhaseCommit {
//topic
private static final String topic_ExactlyOnce = "TwoPhaseCommit";
private static final String group_id = "TwoPhaseCommitConsumer";
private static final String bootstrap_servers = "HM-DATA04:9092,HM-DATA05:9092,HM-DATA06:9092";
private static final String statebackend_address = "file:///D:/code/wangmu/test/ckeckpoint";
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//设置并行度,为了方便测试,查看消息的顺序,这里设置为1,可以更改为多并行度
env.setParallelism(1);
//checkpoint的设置
//每隔10s进行启动一个检查点【设置checkpoint的周期】
env.enableCheckpointing(10000);
//设置模式为:exactly_one,仅一次语义
env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
//确保检查点之间有1s的时间间隔【checkpoint最小间隔】
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(1000);
//检查点必须在10s之内完成,或者被丢弃【checkpoint超时时间】
env.getCheckpointConfig().setCheckpointTimeout(10000);
//同一时间只允许进行一次检查点
env.getCheckpointConfig().setMaxConcurrentCheckpoints(1);
//表示一旦Flink程序被cancel后,会保留checkpoint数据,以便根据实际需要恢复到指定的checkpoint
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
//设置statebackend,将检查点保存在hdfs上面,默认保存在内存中。这里先保存到本地
env.setStateBackend(new FsStateBackend(statebackend_address));
//设置kafka消费参数
Properties properties = new Properties();
properties.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers);
properties.setProperty(ConsumerConfig.GROUP_ID_CONFIG, group_id);
/*SimpleStringSchema可以获取到kafka消息,JSONKeyValueDeserializationSchema可以获取都消息的key,value,metadata:topic,partition,offset等信息*/
FlinkKafkaConsumer<String> kafkaConsumer011 = new FlinkKafkaConsumer<>(
topic_ExactlyOnce,
new SimpleStringSchema(),
properties);
//加入kafka数据源
DataStreamSource<String> streamSource = env.addSource(kafkaConsumer011);
SingleOutputStreamOperator<Tuple2<String, Integer>> tupleStream = streamSource.map(str -> Tuple2.of(str, 1)).returns(Types.TUPLE(Types.STRING, Types.INT));
tupleStream.print();
//数据传输到下游
tupleStream.addSink(new MysqlSinkPlus()).name("MySqlTwoPhaseCommitSink");
//触发执行
env.execute("Kafka2MysqlDemo");
}
}
2.自定义sink
package com.baidu.twoPhaseCommit.sink;
import com.baidu.twoPhaseCommit.util.HikariUtil;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.typeutils.base.VoidSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer;
import org.apache.flink.streaming.api.functions.sink.TwoPhaseCommitSinkFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.PreparedStatement;
import java.sql.Timestamp;
/**
* @author Mu
* @ClassName MysqlSinkPlus
* @Description TODO
* @date 2022/4/14 10:03
* @Version 1.0
*/
public class MysqlSinkPlus extends TwoPhaseCommitSinkFunction<Tuple2<String,Integer>, HikariUtil,Void> {
private static final Logger log = LoggerFactory.getLogger(MysqlSinkPlus.class);
public MysqlSinkPlus() {
super(new KryoSerializer<>(HikariUtil.class,new ExecutionConfig()), VoidSerializer.INSTANCE);
}
/**
* 执行数据库入库操作 task初始化的时候调用
* @param hikariUtil
* @param tuple
* @param context
* @throws Exception
*/
@Override
protected void invoke(HikariUtil hikariUtil, Tuple2<String, Integer> tuple, Context context) throws Exception {
log.info("start invoke...");
String value = tuple.f0;
Integer total = tuple.f1;
String sql = "insert into `t_test` (`value`,`total`,`insert_time`) values (?,?,?)";
log.info("====执行SQL:{}===",sql);
PreparedStatement ps = hikariUtil.getconn().prepareStatement(sql);
ps.setString(1, value);
ps.setInt(2, total);
ps.setTimestamp(3, new Timestamp(System.currentTimeMillis()));
log.info("要插入的数据:{}----{}",value,total);
if (ps != null) {
String sqlStr = ps.toString().substring(ps.toString().indexOf(":")+2);
log.error("执行的SQL语句:{}",sqlStr);
}
//执行insert语句
ps.execute();
}
/**
* 获取连接,开启手动提交事物(getConnection方法中)
* @return
* @throws Exception
*/
@Override
protected HikariUtil beginTransaction() throws Exception {
log.info("start beginTransaction.......");
return new HikariUtil();
}
/**
*预提交,这里预提交的逻辑在invoke方法中
* @param hikariUtil
* @throws Exception
*/
@Override
protected void preCommit(HikariUtil hikariUtil) throws Exception {
log.info("start preCommit...");
}
/**
* 如果invoke方法执行正常,则提交事务
* @param hikariUtil
*/
@Override
protected void commit(HikariUtil hikariUtil) {
log.info("start commit...");
hikariUtil.commit();
}
/**
* 如果invoke执行异常则回滚事物,下一次的checkpoint操作也不会执行
* @param hikariUtil
*/
@Override
protected void abort(HikariUtil hikariUtil) {
log.info("start abort rollback...");
hikariUtil.rollback();
}
}
3.连接池工具类
package com.baidu.twoPhaseCommit.util;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.sql.DataSource;
import java.io.FileReader;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Objects;
import java.util.Properties;
/**
* @author Mu
* @ClassName HikariUtil
* @Description JDBC工具类,使用HikariCP做连接池
* @date 2022/4/18 14:21
* @Version 1.0
*/
public class HikariUtil {
private static final Logger log = LoggerFactory.getLogger(HikariUtil.class);
private final static HikariUtil HIKARI_CP_UTI = new HikariUtil();
private static Properties properties = null;
private static HikariDataSource dataSource = null;
private static transient Connection conn = null;
//1.单例模式中,应该创建私有构造方法
public HikariUtil() {}
/**
* 1.配置和获取数据库连接配置信息
* 2.扩展HikariCP功能,进行配置
* 3.获取数据库连接,提供对外获取数据库资源的方法
*/
private void initConfig() throws IOException {
String filePath = Objects.requireNonNull(HikariUtil.class.getClassLoader().getResource("hikaricp.properties")).getFile();
FileReader fileReader = new FileReader(filePath);
properties = new Properties();
properties.load(fileReader);
properties.forEach((k, v) -> {
log.debug(String.format("key:%s value:%S", k, v));
});
log.info("初始化配置文件成功.....");
}
/**
* 注册HikariCP
*/
private void registerHikariCP() {
if (null != dataSource) {
return;
}
HikariConfig config = new HikariConfig(properties);
dataSource = new HikariDataSource(config);
}
/**
* 提供对外 获取 HikariCPDatasource 的方法
* @return
*/
public static DataSource getHikariCPDataSource() {
if (null != dataSource) {
return dataSource;
}
try {
HIKARI_CP_UTI.initConfig();
HIKARI_CP_UTI.registerHikariCP();
} catch (IOException e) {
e.printStackTrace();
}
return dataSource;
}
/**
* 提供对外链接
*/
public static Connection getconn() {
try {
if(conn == null) {
if (dataSource != null) {
conn = dataSource.getConnection();
} else {
conn = getHikariCPDataSource().getConnection();
}
}
//设置手动提交
conn.setAutoCommit(false);
} catch (SQLException e) {
e.printStackTrace();
}
return conn;
}
/**
* 提交事务
*/
public static void commit() {
if (conn != null) {
try {
conn.commit();
} catch (SQLException e) {
log.error("提交事务失败,Connection:" + conn);
e.printStackTrace();
}
}
}
/**
* 事务回滚
*/
public static void rollback() {
if (conn != null) {
try {
conn.rollback();
} catch (SQLException e) {
log.error("事务回滚失败,Connection:" + conn);
e.printStackTrace();
}
}
}
/**
* 关闭连接
*/
public static void close() {
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
log.error("关闭连接失败,Connection:" + conn);
e.printStackTrace();
}
}
}
}
二、配置文件
1.pom.xml
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<scala.binary.version>2.12</scala.binary.version>
<flink.version>1.13.0</flink.version>
<target.java.version>1.8</target.java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_2.12</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_2.12</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.1.3</version>
<exclusions>
<exclusion>
<artifactId>log4j</artifactId>
<groupId>log4j</groupId>
</exclusion>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.49</version>
</dependency>
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
<version>2.7.2</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.alibaba.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-oracle-cdc</artifactId>
<version>2.1.1</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.75</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.12</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_2.12</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.12</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- blink执行计划,1.11+默认的-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner-blink_2.12</artifactId>
<version>1.12.0</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
<exclusions>
<exclusion>
<artifactId>slf4j-api</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.15</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.15</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-nop</artifactId>
<version>1.7.15</version>
<type>jar</type>
</dependency>
</dependencies>
注意:由于我的项目中还还有其他程序代码,所以pom.xml配置文件有多余的依赖,自行排除即可
2.hikaricp.properties
driverClassName=com.mysql.jdbc.Driver
jdbcUrl=jdbc:mysql://localhost:3306/test?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&useSSL=false&autoReconnect=true
username=root
password=123456
总结
文章主要根据网上已有的demo结合笔者自己的思考,生产测试运行稳定无异常。
文章主要参考如下:
1.https://www.jianshu.com/p/1df61bb6801e
2.https://blog.youkuaiyun.com/zc19921215/article/details/117934640