改写datax实现业务层mysql 全量,增量,多表合并同步kudu

一.中间表

在这里插入图片描述

CREATE TABLE `xx_datax_status` (
  `id` int(11) NOT NULL AUTO_INCREMENT COMMENT '自增id',
  `dbname` varchar(64) NOT NULL COMMENT '数据库名',
  `tbname` varchar(64) NOT NULL COMMENT '表名',
  `xx_xx_data_create_time` datetime DEFAULT NULL COMMENT '业务层表中数据创建最大时间',
  `status` int(1) DEFAULT '0' COMMENT '是否操作完成 0未开始 1已完成 2正在增量同步',
  `create_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
  `update_time` datetime DEFAULT CURRENT_TIMESTAMP COMMENT '更新时间',
  PRIMARY KEY (`id`),
  UNIQUE KEY `dbname_tbname` (`dbname`,`tbname`),
  KEY `xx_xx_data_create_time` (`xx_xx_data_create_time`)
) ENGINE=InnoDB AUTO_INCREMENT=62 DEFAULT CHARSET=utf8;
二.datax同步代码
# -*- coding: utf-8 -*-
# @Time    : 2021/10/27 19:04
# @Author  :

import sys
import logging
import os
import signal
import subprocess
import time
import re
import socket
import json

import codecs
import platform
import random
from traceback import format_exc
from datetime import datetime
from string import Template

from dingtalkchatbot.chatbot import DingtalkChatbot
from apscheduler.schedulers.blocking import BlockingScheduler

from db import mysql_helper

logging.basicConfig(
    format="%(asctime)s - %(levelname)s - %(message)s",
    level=logging.INFO)


def isWindows():
    return platform.system() == 'Windows'


DATAX_HOME = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

DATAX_VERSION = 'DATAX-OPENSOURCE-3.0'
if isWindows():
    codecs.register(lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None)
    CLASS_PATH = ("%s/lib/*") % (DATAX_HOME)
else:
    CLASS_PATH = ("%s/lib/*:.") % (DATAX_HOME)
LOGBACK_FILE = ("%s/conf/logback.xml") % (DATAX_HOME)
DEFAULT_JVM = "-Xms1g -Xmx1g -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=%s/log" % (DATAX_HOME)
DEFAULT_PROPERTY_CONF = "-Dfile.encoding=UTF-8 -Dlogback.statusListenerClass=ch.qos.logback.core.status.NopStatusListener -Djava.security.egd=file:///dev/urandom -Ddatax.home=%s -Dlogback.configurationFile=%s" % (
    DATAX_HOME, LOGBACK_FILE)
ENGINE_COMMAND = "/usr/local/java/bin/java -server ${jvm} %s -classpath %s  ${params} com.alibaba.datax.core.Engine -mode ${mode} -jobid ${jobid} -job ${job}" % (
    DEFAULT_PROPERTY_CONF, CLASS_PATH)
REMOTE_DEBUG_CONFIG = "-Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=9999"

RET_STATE = {
   
   
    "KILL": 143,
    "FAIL": -1,
    "OK": 0,
    "RUN": 1,
    "RETRY": 2
}

jvmParameters = '-Xms1g -Xmx1g -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/opt/datax/log'
dingding_url = 'https://oapi.dingtalk.com/robot/send?access_token=b'

# 中间表
db_1 = mysql_helper.MysqlHelper(host='xx.x.x.x', user='user',
                                password="password",
                                database='sitemap', port=3306,
                                charset='utf8mb4')


class DingDingBot(object):
    # WebHook地址

    def __init__(self,
                 webhook="https://oapi.dingtalk.com/robot/send?access_token=f047e"):

        # 初始化机器人小丁DingDingBot
        self.xiaoding = DingtalkChatbot(webhook)
        # Text消息@所有人

    def send_message(self, msg):
        try:
            self.xiaoding.send_text(msg='{}'.format(msg), is_at_all=False)
        except:
            logging.error(format_exc())


def getLocalIp():
    try:
        return socket.gethostbyname(socket.getfqdn(socket.gethostname()))
    except:
        return "Unknown"


def isUrl(path):
    if not path:
        return False

    assert (isinstance(path, str))
    m = re.match(r"^http[s]?://\S+\w*", path.lower())
    if m:
        return True
    else:
        return False


def buildStartCommand
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

Cocktail_py

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值