DATAX同步PostgreSQL到PostgreSQL的脚本示例:
1. 基础全量同步脚本
json
{
"job": {
"content": [
{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "source_user",
"password": "source_password",
"column": ["id", "name", "email", "created_at"],
"splitPk": "id",
"connection": [
{
"table": ["users"],
"jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
}
]
}
},
"writer": {
"name": "postgresqlwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"column": ["id", "name", "email", "created_at"],
"preSql": ["TRUNCATE TABLE users"],
"postSql": [],
"connection": [
{
"table": ["users"],
"jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
}
],
"batchSize": 1024
}
}
}
],
"setting": {
"speed": {
"channel": 3
}
}
}
}
2. 增量同步脚本(基于时间戳)
json
{
"job": {
"content": [
{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "source_user",
"password": "source_password",
"column": ["*"],
"where": "updated_at > '2024-01-01 00:00:00'",
"connection": [
{
"table": ["orders"],
"jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
}
]
}
},
"writer": {
"name": "postgresqlwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"column": ["id", "order_no", "amount", "updated_at"],
"writeMode": "update",
"preSql": [],
"postSql": ["UPDATE sync_log SET last_sync_time = NOW() WHERE table_name = 'orders'"],
"connection": [
{
"table": ["orders"],
"jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 2
}
}
}
}
3. 多表同步脚本
json
{
"job": {
"content": [
{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "source_user",
"password": "source_password",
"connection": [
{
"table": ["users"],
"jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
}
]
}
},
"writer": {
"name": "postgresqlwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"connection": [
{
"table": ["users"],
"jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
}
]
}
}
},
{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "source_user",
"password": "source_password",
"connection": [
{
"table": ["products"],
"jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
}
]
}
},
"writer": {
"name": "postgresqlwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"connection": [
{
"table": ["products"],
"jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 4
}
}
}
}
4. 带数据转换的同步脚本
json
{
"job": {
"content": [
{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "source_user",
"password": "source_password",
"column": [
"id",
"name",
"email",
"age",
"created_date"
],
"where": "status = 'active'",
"connection": [
{
"table": ["customers"],
"jdbcUrl": ["jdbc:postgresql://source_host:5432/source_db"]
}
]
}
},
"writer": {
"name": "postgresqlwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"column": [
"customer_id",
"customer_name",
"customer_email",
"customer_age",
"registration_date"
],
"preSql": [
"DELETE FROM target_customers WHERE registration_date >= CURRENT_DATE - INTERVAL '30 days'"
],
"connection": [
{
"table": ["target_customers"],
"jdbcUrl": "jdbc:postgresql://target_host:5432/target_db"
}
],
"transform": [
{
"type": "function",
"name": "substring",
"parameter": ["email", 1, 10]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 3,
"byte": 1048576
},
"errorLimit": {
"record": 100
}
}
}
}
5. 使用Shell脚本执行DATAX任务
bash
#!/bin/bash
# DATAX PostgreSQL同步脚本
SOURCE_DB="jdbc:postgresql://localhost:5432/source_db"
TARGET_DB="jdbc:postgresql://localhost:5432/target_db"
SOURCE_USER="source_user"
SOURCE_PASS="source_password"
TARGET_USER="target_user"
TARGET_PASS="target_password"
# 生成DATAX配置文件
generate_config() {
local table_name=$1
cat > /tmp/datax_${table_name}.json << EOF
{
"job": {
"content": [{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "$SOURCE_USER",
"password": "$SOURCE_PASS",
"column": ["*"],
"connection": [{
"table": ["$table_name"],
"jdbcUrl": ["$SOURCE_DB"]
}]
}
},
"writer": {
"name": "postgresqlwriter",
"parameter": {
"username": "$TARGET_USER",
"password": "$TARGET_PASS",
"column": ["*"],
"connection": [{
"table": ["$table_name"],
"jdbcUrl": "$TARGET_DB"
}]
}
}
}],
"setting": {
"speed": {"channel": 3}
}
}
}
EOF
}
# 同步单个表
sync_table() {
local table=$1
echo "开始同步表: $table"
generate_config $table
python /path/to/datax/bin/datax.py /tmp/datax_${table}.json
if [ $? -eq 0 ]; then
echo "表 $table 同步成功"
else
echo "表 $table 同步失败"
fi
}
# 主函数
main() {
# 要同步的表列表
tables=("users" "products" "orders" "customers")
for table in "${tables[@]}"; do
sync_table $table
done
}
main "$@"
关键参数说明
Reader参数:
username/password: 源数据库认证信息
column: 指定要同步的列
splitPk: 数据分片字段,用于并行同步
where: 数据过滤条件
querySql: 自定义查询SQL
Writer参数:
writeMode: 写入模式(insert/update)
batchSize: 批量提交大小
preSql/postSql: 同步前后执行的SQL
session: 数据库会话参数
性能调优参数:
channel: 并发通道数
byte/record: 流量控制
errorLimit: 错误记录限制
这些脚本可以根据您的具体需求进行修改和扩展。建议先在小数据量上测试,确认无误后再进行全量同步。
07-01
592
592
10-13
635
635
05-15
08-07
1407
1407
08-19
611
611

被折叠的 条评论
为什么被折叠?



