环境准备
安装包
- confluent-5.5.1-2.12.tar.gz
- debezium-connector-mysql-1.2.0.Final-plugin.tar.gz
- confluentinc-kafka-connect-hdfs-5.5.1.zip
connector 列表:https://docs.confluent.io/5.5.1/connect/managing/connectors.html
安装步骤
## 解压安装包
tar -zxvf confluent-5.5.1-2.12.tar.gz
tar -zxvf debezium-connector-mysql-1.2.0.Final-plugin.tar.gz
unzip confluentinc-kafka-connect-hdfs-5.5.1.zip
## 将插件一如插件目录
mv debezium-connector-mysql confluent-5.5.1/share/java/
mv confluentinc-kafka-connect-hdfs-5.5.1 confluent-5.5.1/share/java/
启动和停止命令 (开发者模式启动,只适合开发测试使用
)
$confluent_home/bin/confluent local stop
$confluent_home/bin/confluent local start
## 数据文件存储目录放在 /tmp/confluent.xxxx 下 xxxx为随机字符串
启动成功显示如下
Using CONFLUENT_CURRENT: /tmp/confluent.asfIvpl3
Starting zookeeper
zookeeper is [UP]
Starting kafka
kafka is [UP]
Starting schema-registry
schema-registry is [UP]
Starting kafka-rest
kafka-rest is [UP]
Starting connect
connect is [UP]
Starting ksql-server
ksql-server is [UP]
Starting control-center
control-center is [UP]
启动完成后可以通过 http://$confluent_host:9021/ 登录 control-center 管理页面。
connector配置
mysql source connect
{
"name": "json-inventory-customers",
"config": {
"connector.class": "io.debezium.connector.mysql.MySqlConnector",
"tasks.max": "1",
"database.hostname": "psd-hadoop039",
"database.port": "3306",
"database.user": "debezium",
"database.password": "dbz",
"database.serverTimezone": "UTC",
"database.server.name": "json",
"database.whitelist": "inventory",
"database.history.kafka.bootstrap.servers": "psd-hadoop039:9092",
"database.history.kafka.topic": "dbhistory.json.inventory",
"table.whitelist": "inventory.customers",
"key.converter": "org.apache.kafka.connect.json.JsonConverter",
"value.converter": "org.apache.kafka.connect.json.JsonConverter",
"key.converter.schemas.enable":"true",
"value.converter.schemas.enable":"true",
"include.schema.changes": "true",
"transforms": "unwrap",
"binary.handling.mode": "hex",
"time.precision.mode": "connect",
"transforms.unwrap.type": "io.debezium.transforms.ExtractNewRecordState",
"transforms.unwrap.drop.tombstones": "true",
"transforms.unwrap.delete.handling.mode": "rewrite",
"transforms.unwrap.add.headers": "name,db,table,op,db",
"transforms.unwrap.add.fields": "name,db,table,op,file,pos,row,ts_ms,source.ts_ms"
}
}
hdfs sink connect 配置
{
"name": "json.inventory.customers.sink",
"config": {
"connector.class": "io.confluent.connect.hdfs.HdfsSinkConnector",
"format.class": "io.confluent.connect.hdfs.parquet.ParquetFormat",
"tasks.max": "1",
"topics": "json.inventory.customers",
"hadoop.conf.dir":"/etc/hadoop/conf",
"store.url": "hdfs://cdhtest",
"logs.dir": "/user/dts/logs",
"topics.dir":"/user/dts/topics",
"flush.size": "1",
"rotate.interval.ms":"10000",
"hive.integration":true,
"hive.database":"dts",
"hive.metastore.uris":"thrift://cdh-10-21-17-95:9083",
"partitioner.class":"io.confluent.connect.hdfs.partitioner.HourlyPartitioner",
"locale":"zh",
"timezone":"Asia/Shanghai",
"path.format":"YYYYMMddHH/",
"schema.compatibility":"BACKWARD"
}
}
配置参数详情可以从 https://docs.confluent.io/5.5.1/connect/managing/connectors.html 对应connector 查询