Debezium系列-Confluence 集成debezium hdfs-sink 将binlog导入Hive

环境准备

安装包

connector 列表:https://docs.confluent.io/5.5.1/connect/managing/connectors.html

安装步骤

## 解压安装包
tar -zxvf confluent-5.5.1-2.12.tar.gz
tar -zxvf debezium-connector-mysql-1.2.0.Final-plugin.tar.gz
unzip confluentinc-kafka-connect-hdfs-5.5.1.zip
## 将插件一如插件目录
mv debezium-connector-mysql confluent-5.5.1/share/java/
mv confluentinc-kafka-connect-hdfs-5.5.1 confluent-5.5.1/share/java/

启动和停止命令 (开发者模式启动,只适合开发测试使用

$confluent_home/bin/confluent local stop 
$confluent_home/bin/confluent local start
## 数据文件存储目录放在 /tmp/confluent.xxxx 下 xxxx为随机字符串
启动成功显示如下
Using CONFLUENT_CURRENT: /tmp/confluent.asfIvpl3
Starting zookeeper
zookeeper is [UP]
Starting kafka
kafka is [UP]
Starting schema-registry
schema-registry is [UP]
Starting kafka-rest
kafka-rest is [UP]
Starting connect
connect is [UP]
Starting ksql-server
ksql-server is [UP]
Starting control-center
control-center is [UP]

启动完成后可以通过 http://$confluent_host:9021/ 登录 control-center 管理页面。

connector配置

mysql source connect

{
    "name": "json-inventory-customers",
    "config": {
        "connector.class": "io.debezium.connector.mysql.MySqlConnector",
        "tasks.max": "1",
        "database.hostname": "psd-hadoop039",
        "database.port": "3306",
        "database.user": "debezium",
        "database.password": "dbz",
        "database.serverTimezone": "UTC",
        "database.server.name": "json",
        "database.whitelist": "inventory",
        "database.history.kafka.bootstrap.servers": "psd-hadoop039:9092",
        "database.history.kafka.topic": "dbhistory.json.inventory",
        "table.whitelist": "inventory.customers",
        "key.converter": "org.apache.kafka.connect.json.JsonConverter",
        "value.converter": "org.apache.kafka.connect.json.JsonConverter",
        "key.converter.schemas.enable":"true",
        "value.converter.schemas.enable":"true",
        "include.schema.changes": "true",
        "transforms": "unwrap",
        "binary.handling.mode": "hex",
        "time.precision.mode": "connect",
        "transforms.unwrap.type": "io.debezium.transforms.ExtractNewRecordState",
        "transforms.unwrap.drop.tombstones": "true",
        "transforms.unwrap.delete.handling.mode": "rewrite",
        "transforms.unwrap.add.headers": "name,db,table,op,db",
        "transforms.unwrap.add.fields": "name,db,table,op,file,pos,row,ts_ms,source.ts_ms"
    }
}

hdfs sink connect 配置

{
	"name": "json.inventory.customers.sink",
	"config": {
		"connector.class": "io.confluent.connect.hdfs.HdfsSinkConnector",
		"format.class": "io.confluent.connect.hdfs.parquet.ParquetFormat",
		"tasks.max": "1",
		"topics": "json.inventory.customers",
        "hadoop.conf.dir":"/etc/hadoop/conf",
		"store.url": "hdfs://cdhtest",
		"logs.dir": "/user/dts/logs",
        "topics.dir":"/user/dts/topics",
		"flush.size": "1",
		"rotate.interval.ms":"10000",
        "hive.integration":true,
        "hive.database":"dts",
        "hive.metastore.uris":"thrift://cdh-10-21-17-95:9083",
        "partitioner.class":"io.confluent.connect.hdfs.partitioner.HourlyPartitioner",
        "locale":"zh",
        "timezone":"Asia/Shanghai",
        "path.format":"YYYYMMddHH/",
        "schema.compatibility":"BACKWARD"
	}
}

配置参数详情可以从 https://docs.confluent.io/5.5.1/connect/managing/connectors.html 对应connector 查询

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值