使用DATAX同步postgresql到postgresql的脚本

DATAX同步PostgreSQL到PostgreSQL的脚本示例:

1. 基础全量同步脚本
json
{
  "job": {
    "content": [
      {
        "reader": {
          "name": "postgresqlreader",
          "parameter": {
            "username": "source_user",
            "password": "source_password",
            "column": ["id", "name", "email", "created_at"],
            "splitPk": "id",
            "connection": [
              {
                "table": ["users"],
                "jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
              }
            ]
          }
        },
        "writer": {
          "name": "postgresqlwriter",
          "parameter": {
            "username": "target_user",
            "password": "target_password",
            "column": ["id", "name", "email", "created_at"],
            "preSql": ["TRUNCATE TABLE users"],
            "postSql": [],
            "connection": [
              {
                "table": ["users"],
                "jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
              }
            ],
            "batchSize": 1024
          }
        }
      }
    ],
    "setting": {
      "speed": {
        "channel": 3
      }
    }
  }
}

2. 增量同步脚本(基于时间戳)
json
{
  "job": {
    "content": [
      {
        "reader": {
          "name": "postgresqlreader",
          "parameter": {
            "username": "source_user",
            "password": "source_password",
            "column": ["*"],
            "where": "updated_at > '2024-01-01 00:00:00'",
            "connection": [
              {
                "table": ["orders"],
                "jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
              }
            ]
          }
        },
        "writer": {
          "name": "postgresqlwriter",
          "parameter": {
            "username": "target_user",
            "password": "target_password",
            "column": ["id", "order_no", "amount", "updated_at"],
            "writeMode": "update",
            "preSql": [],
            "postSql": ["UPDATE sync_log SET last_sync_time = NOW() WHERE table_name = 'orders'"],
            "connection": [
              {
                "table": ["orders"],
                "jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
              }
            ]
          }
        }
      }
    ],
    "setting": {
      "speed": {
        "channel": 2
      }
    }
  }
}

3. 多表同步脚本
json
{
  "job": {
    "content": [
      {
        "reader": {
          "name": "postgresqlreader",
          "parameter": {
            "username": "source_user",
            "password": "source_password",
            "connection": [
              {
                "table": ["users"],
                "jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
              }
            ]
          }
        },
        "writer": {
          "name": "postgresqlwriter",
          "parameter": {
            "username": "target_user",
            "password": "target_password",
            "connection": [
              {
                "table": ["users"],
                "jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
              }
            ]
          }
        }
      },
      {
        "reader": {
          "name": "postgresqlreader",
          "parameter": {
            "username": "source_user",
            "password": "source_password",
            "connection": [
              {
                "table": ["products"],
                "jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
              }
            ]
          }
        },
        "writer": {
          "name": "postgresqlwriter",
          "parameter": {
            "username": "target_user",
            "password": "target_password",
            "connection": [
              {
                "table": ["products"],
                "jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
              }
            ]
          }
        }
      }
    ],
    "setting": {
      "speed": {
        "channel": 4
      }
    }
  }
}

4. 带数据转换的同步脚本
json
{
  "job": {
    "content": [
      {
        "reader": {
          "name": "postgresqlreader",
          "parameter": {
            "username": "source_user",
            "password": "source_password",
            "column": [
              "id",
              "name",
              "email",
              "age",
              "created_date"
            ],
            "where": "status = 'active'",
            "connection": [
              {
                "table": ["customers"],
                "jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
              }
            ]
          }
        },
        "writer": {
          "name": "postgresqlwriter",
          "parameter": {
            "username": "target_user",
            "password": "target_password",
            "column": [
              "customer_id",
              "customer_name",
              "customer_email",
              "customer_age",
              "registration_date"
            ],
            "preSql": [
              "DELETE FROM target_customers WHERE registration_date >= CURRENT_DATE - INTERVAL '30 days'"
            ],
            "connection": [
              {
                "table": ["target_customers"],
                "jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
              }
            ],
            "transform": [
              {
                "type": "function",
                "name": "substring",
                "parameter": ["email", 1, 10]
              }
            ]
          }
        }
      }
    ],
    "setting": {
      "speed": {
        "channel": 3,
        "byte": 1048576
      },
      "errorLimit": {
        "record": 100
      }
    }
  }
}

5. 使用Shell脚本执行DATAX任务
bash
#!/bin/bash

# DATAX PostgreSQL同步脚本
SOURCE_DB="jdbc:postgresql://localhost:5432/source_db"
TARGET_DB="jdbc:postgresql://localhost:5432/target_db"
SOURCE_USER="source_user"
SOURCE_PASS="source_password"
TARGET_USER="target_user"
TARGET_PASS="target_password"

# 生成DATAX配置文件
generate_config() {
    local table_name=$1
    cat > /tmp/datax_${table_name}.json << EOF
{
    "job": {
        "content": [{
            "reader": {
                "name": "postgresqlreader",
                "parameter": {
                    "username": "$SOURCE_USER",
                    "password": "$SOURCE_PASS",
                    "column": ["*"],
                    "connection": [{
                        "table": ["$table_name"],
                        "jdbcUrl": ["$SOURCE_DB"]
                    }]
                }
            },
            "writer": {
                "name": "postgresqlwriter",
                "parameter": {
                    "username": "$TARGET_USER",
                    "password": "$TARGET_PASS",
                    "column": ["*"],
                    "connection": [{
                        "table": ["$table_name"],
                        "jdbcUrl": "$TARGET_DB"
                    }]
                }
            }
        }],
        "setting": {
            "speed": {"channel": 3}
        }
    }
}
EOF
}

# 同步单个表
sync_table() {
    local table=$1
    echo "开始同步表: $table"
    generate_config $table
    python /path/to/datax/bin/datax.py /tmp/datax_${table}.json
    if [ $? -eq 0 ]; then
        echo "表 $table 同步成功"
    else
        echo "表 $table 同步失败"
    fi
}

# 主函数
main() {
    # 要同步的表列表
    tables=("users" "products" "orders" "customers")
    
    for table in "${tables[@]}"; do
        sync_table $table
    done
}

main "$@"



关键参数说明
Reader参数:
username/password: 源数据库认证信息
column: 指定要同步的列
splitPk: 数据分片字段,用于并行同步
where: 数据过滤条件
querySql: 自定义查询SQL

Writer参数:
writeMode: 写入模式(insert/update)
batchSize: 批量提交大小
preSql/postSql: 同步前后执行的SQL
session: 数据库会话参数

性能调优参数:
channel: 并发通道数
byte/record: 流量控制
errorLimit: 错误记录限制
这些脚本可以根据您的具体需求进行修改和扩展。建议先在小数据量上测试,确认无误后再进行全量同步。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值