case class Canal(emptyCount: Long, //操作次数
logFileName: String, //binlog文件名
dbName: String, //数据库名
logFileOffset: Long, //binlong偏移量
eventType: String, //操作方式
columnValueList: String, //列值列表
tableName: String, //表名
timestamp: Long)
//时间戳
object Canal {
def apply(json: String): Canal = {
val jsonObject = JSON.parseObject(json)
Canal(
jsonObject.getLong("emptyCount"),
jsonObject.getString("logFileName"),
jsonObject.getString("dbName"),
jsonObject.getLong("logFileOffset"),
jsonObject.getString("eventType"),
jsonObject.getString("columnValueList"),
jsonObject.getString("tableName"),
jsonObject.getLong("timestamp")
)
}
}
/*
将Canal样例类预处理为HbaseOperation样例类
主要就是封装HBaseUtil操作所必须的参数
*/
case class HbaseOperation (
opType:String,
tableName:String,
cfName:String,
rowkey:String,
colName:String,
colValue:String
)
//操作类型(opType) = INSERT/DELETE/UPDATE
//表名(tableName)= binlog数据库名_binlog表名
//列蔟名(cfName)= 固定为info
//rowkey = 唯一主键(去binlog中列数据的第一个)
//列名(colName)= binlog中列名
//列值(colValue)= binlog中列值
==========
object App {
def main(args: Array[String]): Unit = {
//2.创建main方法,获取StreamExecutionEnvironment运行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//3.设置流处理的时间为 EventTime ,使用数据发生的时间来进行数据处理
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//4.将Flink默认的开发环境并行度设置为1
env.setParallelism(3)
//保证程序长时间运行的安全性进行checkpoint操作
//5秒启动一次checkpoint
env.enableCheckpointing(5000)
// 设置checkpoint只checkpoint一次
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
//设置两次checkpoint的最小时间间隔
env.getCheckpointConfig.setMinPauseBetweenCheckpoints(1000)
// checkpoint超时的时长
env.getCheckpointConfig.setCheckpointTimeout(60000)
// 允许的最大checkpoint并行度
env.getCheckpointConfig.setMaxConcurrentCheckpoints(1)
//当程序关闭的时,触发额外的checkpoint
env.getCheckpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
// 设置checkpoint的地址
env.setStateBackend(new FsStateBackend("hdfs://node01:8020/flink-checkpoint/"))
//整合Kafka
val properties = new Properties()
properties.setProperty("bootstrap.servers" , GlobalConfigUtil.bootstrapServers)
properties.setProperty("zookeeper.connect" , GlobalConfigUtil.zookeeperConnect)
properties.setProperty("group.id" , GlobalConfigUtil.groupId)
properties.setProperty("enable.auto.commit" , GlobalConfigUtil.enableAutoCommit)
properties.setProperty("auto.commit.interval.ms" , GlobalConfigUtil.autoCommitIntervalMs)
properties.setProperty("auto.offset.reset" , GlobalConfigUtil.autoOffsetReset)
//配置序列化和反序列化
properties.setProperty("key.serializer" , "org.apache.kafka.common.serialization.StringSerializer")
properties.setProperty("key.deserializer" , "org.apache.kafka.common.serialization.StringDeserializer")
val consumer: FlinkKafkaConsumer09[String] = new FlinkKafkaConsumer09[String]( GlobalConfigUtil.inputTopic,
new SimpleStringSchema(),
properties
)
//拿到kafka的数据
val