canal 环境搭建(docker)
官网:https://github.com/alibaba/canal/wiki/Docker-QuickStart
docker 仓库:https://hub.docker.com/r/canal/canal-server/tags
配置文件:https://github.com/alibaba/canal/wiki/AdminGuide
*********************
架构设计
单机:canal server、canal client单节点直连
客户端连接
public class CanalConnectors {
public static CanalConnector newSingleConnector(SocketAddress address, String destination, String username, String password) {
SimpleCanalConnector canalConnector = new SimpleCanalConnector(address, username, password, destination);
canalConnector.setSoTimeout(60000);
canalConnector.setIdleTimeout(3600000);
return canalConnector;
}
canal server高可用:canal server集群部署、canal client直连canal server服务列表
canal server:集群部署,将节点信息存储在在zookeeper中,可实现高可用
canal client:通过canal server静态服务列表建立连接
public class CanalConnectors {
public static CanalConnector newClusterConnector(List<? extends SocketAddress> addresses, String destination, String username, String password) {
ClusterCanalConnector canalConnector = new ClusterCanalConnector(username, password, destination, new SimpleNodeAccessStrategy(addresses));
canalConnector.setSoTimeout(60000);
canalConnector.setIdleTimeout(3600000);
return canalConnector;
}
canal server、canal client高可用:canal server集群部署、canal client开启多个
canal server:集群部署,节点信息存储在zookeeper中,可实现高可用
canal client:canal client注册到zookeeper中,从zookeeper中获取canal server信息
public class CanalConnectors {
public static CanalConnector newClusterConnector(String zkServers, String destination, String username, String password) {
ClusterCanalConnector canalConnector = new ClusterCanalConnector(username, password, destination, new ClusterNodeAccessStrategy(destination, ZkClientx.getZkClient(zkServers)));
canalConnector.setSoTimeout(60000);
canalConnector.setIdleTimeout(3600000);
return canalConnector;
}
*********************
canal server 配置
canal配置加载方式:ManagerCanalInstanceGenerator、SpringCanalInstanceGenerator
ManagerCanalInstanceGenerator:可视化界面配置canal参数
SpringCanalnstanceGenerator:本地文件配置(xxx-instance.xml、canal.properties、instance.properties)
*********************
xxx-instance.xml
创建CanalIstanceWithSpring实例,可选文件如下
memory-instance.xml:元数据在内存存储
file-instane.xml:元数据持久化到文件,log parser position优先在内存中查找,查找不到则到文件中查找
default-instance.xml:元数据保存到zookeeper,log parser position优先在内存中查找,查找不到则到zookeeper中查找
group-instance.xml:将多个parser组合成一个parser,可用于将分库分表后的数据导入同一地方存储分析,元数据默认保存在内存中
public class CanalInstanceWithSpring extends AbstractCanalInstance {
private static final Logger logger = LoggerFactory.getLogger(CanalInstanceWithSpring.class);
public CanalInstanceWithSpring() {
}
**************
AbstractCanalInstance
public class AbstractCanalInstance extends AbstractCanalLifeCycle implements CanalInstance {
private static final Logger logger = LoggerFactory.getLogger(AbstractCanalInstance.class);
protected Long canalId; //canal标识
protected String destination; //instance实例的名称,一个canal下可有多个instance
protected CanalEventStore<Event> eventStore; //eventStore,存储拉取的数据
protected CanalEventParser eventParser; //解析数据源
protected CanalEventSink<List<Entry>> eventSink; //处理转换数据
protected CanalMetaManager metaManager; //元数据管理器,parser log position、cursor position等数据
protected CanalAlarmHandler alarmHandler; //报警处理类
protected CanalMQConfig mqConfig; //mq配置,支持rocketmq、kafka、rabbitmq
public AbstractCanalInstance() {
}
*********************
properties 文件
canal.properties:配置canal server上instance的公共属性
instance.properties:配置instance的属性,若有同名配置,instance.properties优先级更高
canal.properties
#################################################
######### common argument #############
#################################################
canal.id = 1 #canal server的唯一标识,默认为1
# canal server用户名、密码,canal user、password如果不设置,则不开启密码功能
canal.user = canal
canal.passwd = E3619321C1A937C46A0D8BD1DAC39F93B27D4458
canal.ip = #canal server绑定的ip地址
canal.port = 11111 #canal server tcp连接端口,供客户端使用,默认为11111
canal.metrics.pull.port = 11112 #canal server指标数据端口,默认为11112
canal.register.ip = #canal server注册到zookeeper中的ip信息
canal.zkServers = #canal server连接的zookeeper集群,如:10.20.144.22:2181,10.20.144.51:2181
canal.zookeeper.flush.period = 1000 #数据持久化到zookeeper的周期,默认为1000毫秒
# canal admin配置
canal.admin.manager = 127.0.0.1:8089
canal.admin.port = 11110
canal.admin.user = admin
canal.admin.passwd = 4ACFE3202A5FF5CF467898FC58AAB1D615029441
# admin自动注册
#canal.admin.register.auto = true
#canal.admin.register.cluster =
#canal.admin.register.name =
canal.withoutNetty = false
# canal服务端模式,可选值:tcp, kafka, rocketMQ, rabbitMQ
canal.serverMode = tcp
# flush meta cursor/parse position to file
# 将元数据cursor、parse position保存到文件
canal.file.data.dir = ${canal.conf.dir}
canal.file.flush.period = 1000
# eventStore内存空间设置
canal.instance.memory.batch.mode = MEMSIZE #ITEMSIZE:buffer.size表示记录数量
#MEMSIZE(默认值):buffer.size * buffer.memunit限制存储空间大小
canal.instance.memory.buffer.size = 16384 #记录数或者记录大小
canal.instance.memory.buffer.memunit = 1024 #存储单位,默认为1Kb
canal.instance.memory.rawEntry = true #存储原始字符串,不做序列化处理
## 心跳检查mysql是否可用
canal.instance.detecting.enable = false #是否开启心跳检查,默认为false
#canal.instance.detecting.sql = insert into retl.xdual values(1,now()) on duplicate key update x=now()
#心跳检查sql
canal.instance.detecting.interval.time = 3 #心跳检查时间间隔,默认为3
canal.instance.detecting.retry.threshold = 3 #心跳检查重试次数,默认为3
canal.instance.detecting.heartbeatHaEnable = false #心跳检查mysql不可用时,是否自动切换到内分的数据库
#默认为false
# support maximum transaction size, more than the size of the transaction will be cut into multiple transactions delivery
# 支持的最大事务长度,超过该长度后,可能会切割存储到eventStore中,无法保证事务的完整可见性
canal.instance.transaction.size = 1024
# mysql fallback connected to new master should fallback times
# canal server切换新的mysql后,需要往前查找binlog的事件,默认为60s
# mysql 主从库同步存在延时,需要往前查找,保证数据不丢失
canal.instance.fallbackIntervalInSeconds = 60
# 网络配置
canal.instance.network.receiveBufferSize = 16384 #canal server接受数据的最大缓存(从mysql解析的数据)
canal.instance.network.sendBufferSize = 16384 #canal server发送数据的最大缓存(发送给canal cilent的数据)
canal.instance.network.soTimeout = 30 #canal server读取数据超时时间,默认为30s
# binlog过滤配置(binlog filter config)
canal.instance.filter.druid.ddl = true #是否使用druid解析ddl语句,来获取数据库名、表名
canal.instance.filter.query.dcl = false #是否忽略dcl语句(grant、commit、rollback)
canal.instance.filter.query.dml = false #是否忽略dml语句(insert、delete、update等)
canal.instance.filter.query.ddl = false #是否忽略ddl语句(create table、create view等)
canal.instance.filter.table.error = false #是否忽略binlog表结构获取失败的异常
#主要解决回溯binlog时,对应的表已被删除,
#或者表结构和binlog不一致的情况
canal.instance.filter.rows = false #是否忽略dml导致的数据变更,默认为false
#主要针对用户只订阅ddl、dcl操作
canal.instance.filter.transaction.entry = false #是否忽略事务头,事务尾,默认为false
canal.instance.filter.dml.insert = false #是否忽略dml insert操作,默认为false
canal.instance.filter.dml.update = false #是否忽略dml update操作,默认为false
canal.instance.filter.dml.delete = false #是否忽略dml delete操作,默认为false
# binlog format/image检查(binlog format/image check)
canal.instance.binlog.format = ROW,STATEMENT,MIXED #默认支持ROW,STATEMENT、MIXED
canal.instance.binlog.image = FULL,MINIMAL,NOBLOB #默认支持FULL、MINIMAL、NOBLOB
# binlog ddl isolation
canal.instance.get.ddl.isolation = false #ddl语句是否使用单独的batch返回,默认为false
#如果和其他ddl/dml在同一batch返回,并发处理时前后顺序不能保证,可能会改变表结构
# 并行配置
canal.instance.parser.parallel = true #eventParser是否并行解析binlog,默认为true
canal.instance.parser.parallelThreadSize = 16 #并行处理线程数,默认为60%可用线程数
canal.instance.parser.parallelBufferSize = 256 #并行解析的ringBuffer队列数,需为2的指数
# table meta tsdb info
# tableMetaTSDB:处理ddl语句造成的表结构变更
canal.instance.tsdb.enable = true #是否开启tablemeta tsdb
# 全局tsdb配置文件
canal.instance.tsdb.spring.xml = classpath:spring/tsdb/h2-tsdb.xml
canal.instance.tsdb.spring.xml = classpath:spring/tsdb/mysql-tsdb.xml
canal.instance.tsdb.dir = ${canal.file.data.dir:../conf}/${canal.instance.destination:}