1.问题描述
使用datax同步psql数据到doris,表的数量过多,写datax的配置文件很麻烦。鉴于此,编写了一个datax的配置文件生成脚本,可以灵活的实现一键生成配置文件,提高生产效率。
废话不多说,脚本如下
2.问题解决
vim gen_import_psql_config_simple.py
批量生成datax同步JSON(postgresql到doris)
# coding=utf-8
import json
import getopt
import os
import sys
import psycopg2
#MySQL相关配置,需根据实际情况作出修改
psql_host = "xxx"
psql_port = "xxx"
psql_user = "xxx"
psql_passwd = "xxx"
#HDFS NameNode相关配置,需根据实际情况作出修改
doris_host = "xxx"
doris_port = "xxx"
doris_http_port = "xxx"
doris_user = "xxx"
doris_passwd = "xxx"
sink_database = "xxx"
condition = True
#生成配置文件的目标路径,可根据实际情况作出修改
output_path = "/data/job"
def get_connection(database):
return psycopg2.connect(host=psql_host, port=int(psql_port), user=psql_user, password=psql_passwd,database=database,options="-c search_path=information_schema,public")
def get_psql_meta(database, schema,table):
connection = get_connection(database)
cursor = connection.cursor()
sql = "SELECT COLUMN_NAME from columns WHERE TABLE_SCHEMA=%s AND TABLE_NAME=%s ORDER BY ORDINAL_POSITION"
curs