DataX 同步 SQL Server 到 SQL Server 的详细配置脚本和模板。
基础配置模板
创建一个 JSON 文件,例如 sqlserver2sqlserver.json:
json
{
"job": {
"content": [
{
"reader": {
"name": "sqlserverreader",
"parameter": {
"username": "sa",
"password": "your_source_password",
"column": [
"ID",
"Name",
"Age",
"Email",
"CreateTime"
],
"splitPk": "ID",
"where": "",
"connection": [
{
"table": [
"Users"
],
"jdbcUrl": [
"jdbc:sqlserver://source-server:1433;DatabaseName=SourceDB"
]
}
],
"fetchSize": 1000
}
},
"writer": {
"name": "sqlserverwriter",
"parameter": {
"username": "sa",
"password": "your_target_password",
"column": [
"ID",
"Name",
"Age",
"Email",
"CreateTime"
],
"preSql": [
"TRUNCATE TABLE Users"
],
"postSql": [],
"batchSize": 1024,
"connection": [
{
"table": "Users",
"jdbcUrl": "jdbc:sqlserver://target-server:1433;DatabaseName=TargetDB"
}
],
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {
"channel": 3,
"byte": 10485760
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
}
}
}
详细参数说明
1. SQL Server Reader 配置详解
json
"reader": {
"name": "sqlserverreader",
"parameter": {
// 源数据库用户名
"username": "sa",
// 源数据库密码
"password": "YourSourcePassword123",
// 要同步的列名数组
"column": ["ID", "Name", "Age", "Email", "CreateTime"],
// 使用通配符同步所有列
// "column": ["*"],
// 切分键,用于数据分片(提高大表同步速度)
"splitPk": "ID",
// WHERE条件,用于数据过滤
"where": "CreateTime > '2024-01-01'",
// 自定义查询SQL(如果使用querySql,则table和column配置失效)
// "querySql": ["SELECT ID, Name FROM Users WHERE Status = 1"],
"connection": [
{
// 要同步的表名(数组,支持多表)
"table": ["Users"],
// SQL Server JDBC连接字符串
// 格式: jdbc:sqlserver://host:port;DatabaseName=database_name
"jdbcUrl": [
"jdbc:sqlserver://192.168.1.100:1433;DatabaseName=SourceDB"
]
}
],
// 每次读取的数据量
"fetchSize": 1000
}
}
2. SQL Server Writer 配置详解
json
"writer": {
"name": "sqlserverwriter",
"parameter": {
// 目标数据库用户名
"username": "sa",
// 目标数据库密码
"password": "YourTargetPassword123",
// 目标表列名(需要与reader的column顺序一致)
"column": ["ID", "Name", "Age", "Email", "CreateTime"],
// 同步前执行的SQL
"preSql": [
"TRUNCATE TABLE Users"
// 或者 "DELETE FROM Users WHERE CreateTime < DATEADD(day, -30, GETDATE())"
],
// 同步后执行的SQL
"postSql": [
"UPDATE SyncLog SET LastSyncTime = GETDATE() WHERE TableName = 'Users'"
],
// 批量提交大小
"batchSize": 1024,
// 写入模式:insert/update
"writeMode": "insert",
"connection": [
{
// 目标表名
"table": "Users",
// 目标数据库连接
"jdbcUrl": "jdbc:sqlserver://192.168.1.101:1433;DatabaseName=TargetDB"
}
]
}
}
不同场景的配置示例
场景1:全量同步(表结构一致)
json
{
"job": {
"content": [
{
"reader": {
"name": "sqlserverreader",
"parameter": {
"username": "source_user",
"password": "source_password",
"column": ["*"],
"connection": [
{
"table": ["Employees"],
"jdbcUrl": ["jdbc:sqlserver://source-sql:1433;DatabaseName=HR"]
}
],
"fetchSize": 2000
}
},
"writer": {
"name": "sqlserverwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"column": ["*"],
"preSql": ["TRUNCATE TABLE Employees"],
"batchSize": 1000,
"connection": [
{
"table": "Employees",
"jdbcUrl": "jdbc:sqlserver://target-sql:1433;DatabaseName=HR"
}
],
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {"channel": 4}
}
}
}
场景2:增量同步(基于时间戳)
json
{
"job": {
"content": [
{
"reader": {
"name": "sqlserverreader",
"parameter": {
"username": "source_user",
"password": "source_password",
"column": ["ID", "OrderNumber", "Amount", "UpdateTime"],
"where": "UpdateTime > '2024-01-01 00:00:00'",
"splitPk": "ID",
"connection": [
{
"table": ["Orders"],
"jdbcUrl": ["jdbc:sqlserver://source-sql:1433;DatabaseName=Sales"]
}
]
}
},
"writer": {
"name": "sqlserverwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"column": ["ID", "OrderNumber", "Amount", "UpdateTime"],
"writeMode": "insert", // 或者使用merge
"batchSize": 500,
"connection": [
{
"table": "Orders",
"jdbcUrl": "jdbc:sqlserver://target-sql:1433;DatabaseName=Sales"
}
]
}
}
}
],
"setting": {
"speed": {"channel": 2}
}
}
}
场景3:复杂查询同步
json
{
"job": {
"content": [
{
"reader": {
"name": "sqlserverreader",
"parameter": {
"username": "source_user",
"password": "source_password",
// 使用querySql进行复杂查询
"querySql": [
"SELECT " +
"c.CustomerID, " +
"c.CustomerName, " +
"o.OrderID, " +
"o.OrderDate, " +
"SUM(oi.Quantity * oi.UnitPrice) as TotalAmount " +
"FROM Customers c " +
"INNER JOIN Orders o ON c.CustomerID = o.CustomerID " +
"INNER JOIN OrderItems oi ON o.OrderID = oi.OrderID " +
"WHERE o.OrderDate >= DATEADD(month, -12, GETDATE()) " +
"GROUP BY c.CustomerID, c.CustomerName, o.OrderID, o.OrderDate"
],
"connection": [
{
"jdbcUrl": ["jdbc:sqlserver://source-sql:1433;DatabaseName=Sales"]
}
]
}
},
"writer": {
"name": "sqlserverwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"column": ["CustomerID", "CustomerName", "OrderID", "OrderDate", "TotalAmount"],
"preSql": ["TRUNCATE TABLE CustomerOrderSummary"],
"connection": [
{
"table": "CustomerOrderSummary",
"jdbcUrl": "jdbc:sqlserver://target-sql:1433;DatabaseName=Reporting"
}
],
"writeMode": "insert"
}
}
}
]
}
}
场景4:多表同步
json
{
"job": {
"content": [
{
"reader": {
"name": "sqlserverreader",
"parameter": {
"username": "source_user",
"password": "source_password",
"connection": [
{
"table": ["Customers", "Products", "Orders"],
"jdbcUrl": ["jdbc:sqlserver://source-sql:1433;DatabaseName=Business"]
}
]
}
},
"writer": {
"name": "sqlserverwriter",
"parameter": {
"username": "target_user",
"password": "target_password",
"connection": [
{
"table": "Customers",
"jdbcUrl": "jdbc:sqlserver://target-sql:1433;DatabaseName=Backup"
}
]
}
}
}
],
"setting": {
"speed": {"channel": 3}
}
}
}
自动化增量同步脚本
bash
#!/bin/bash
# sqlserver_incremental_sync.sh
# 配置变量
SOURCE_USER="source_user"
SOURCE_PASSWORD="source_password"
TARGET_USER="target_user"
TARGET_PASSWORD="target_password"
SOURCE_URL="jdbc:sqlserver://source-sql:1433;DatabaseName=SourceDB"
TARGET_URL="jdbc:sqlserver://target-sql:1433;DatabaseName=TargetDB"
TABLE_NAME="SalesData"
# 获取上次同步时间(从目标数据库的状态表)
LAST_SYNC=$(sqlcmd -S target-sql -d TargetDB -U $TARGET_USER -P $TARGET_PASSWORD -h -1 -Q "
SET NOCOUNT ON;
SELECT ISNULL(CONVERT(VARCHAR, MAX(LastSyncTime), 120), '1970-01-01 00:00:00')
FROM SyncStatus WHERE TableName = '$TABLE_NAME'
")
# 清理输出
LAST_SYNC=$(echo $LAST_SYNC | tr -d '\r\n' | sed 's/^ *//;s/ *$//')
# 当前时间
CURRENT_TIME=$(date +"%Y-%m-%d %H:%M:%S")
# 生成DataX配置
cat > /tmp/sqlserver_incremental_${TABLE_NAME}.json <<EOF
{
"job": {
"content": [
{
"reader": {
"name": "sqlserverreader",
"parameter": {
"username": "$SOURCE_USER",
"password": "$SOURCE_PASSWORD",
"column": ["*"],
"where": "UpdateTime > '$LAST_SYNC' AND UpdateTime <= GETDATE()",
"connection": [
{
"table": ["$TABLE_NAME"],
"jdbcUrl": ["$SOURCE_URL"]
}
]
}
},
"writer": {
"name": "sqlserverwriter",
"parameter": {
"username": "$TARGET_USER",
"password": "$TARGET_PASSWORD",
"column": ["*"],
"writeMode": "insert",
"batchSize": 1000,
"connection": [
{
"table": "$TABLE_NAME",
"jdbcUrl": "$TARGET_URL"
}
]
}
}
}
],
"setting": {
"speed": {"channel": 3}
}
}
}
EOF
# 执行同步
echo "开始同步表: $TABLE_NAME, 时间范围: $LAST_SYNC 至 $CURRENT_TIME"
python /opt/datax/bin/datax.py /tmp/sqlserver_incremental_${TABLE_NAME}.json
# 更新同步状态
if [ $? -eq 0 ]; then
sqlcmd -S target-sql -d TargetDB -U $TARGET_USER -P $TARGET_PASSWORD -Q "
MERGE SyncStatus AS target
USING (SELECT '$TABLE_NAME' AS TableName) AS source
ON (target.TableName = source.TableName)
WHEN MATCHED THEN
UPDATE SET LastSyncTime = '$CURRENT_TIME'
WHEN NOT MATCHED THEN
INSERT (TableName, LastSyncTime)
VALUES ('$TABLE_NAME', '$CURRENT_TIME');"
echo "同步完成: $TABLE_NAME"
else
echo "同步失败: $TABLE_NAME"
exit 1
fi
Windows批处理脚本
batch
@echo off
REM sqlserver_sync.bat
set SOURCE_USER=sa
set SOURCE_PASSWORD=source_password
set TARGET_USER=sa
set TARGET_PASSWORD=target_password
set TABLE_NAME=Users
REM 生成DataX配置
echo {
echo "job": {
echo "content": [
echo {
echo "reader": {
echo "name": "sqlserverreader",
echo "parameter": {
echo "username": "%SOURCE_USER%",
echo "password": "%SOURCE_PASSWORD%",
echo "column": ["*"],
echo "connection": [
echo {
echo "table": ["%TABLE_NAME%"],
echo "jdbcUrl": ["jdbc:sqlserver://source-sql:1433;DatabaseName=SourceDB"]
echo }
echo ]
echo }
echo },
echo "writer": {
echo "name": "sqlserverwriter",
echo "parameter": {
echo "username": "%TARGET_USER%",
echo "password": "%TARGET_PASSWORD%",
echo "column": ["*"],
echo "preSql": ["TRUNCATE TABLE %TABLE_NAME%"],
echo "connection": [
echo {
echo "table": "%TABLE_NAME%",
echo "jdbcUrl": "jdbc:sqlserver://target-sql:1433;DatabaseName=TargetDB"
echo }
echo ],
echo "writeMode": "insert"
echo }
echo }
echo }
echo ]
echo }
echo } > sync_config.json
REM 执行同步
python C:\datax\bin\datax.py sync_config.json
if %errorlevel% equ 0 (
echo 同步成功!
) else (
echo 同步失败!
exit /b 1
)
重要注意事项
SQL Server驱动:确保 DataX 的 plugin/reader/sqlserverreader 和 plugin/writer/sqlserverwriter
目录中存在正确的 JDBC 驱动(如 mssql-jdbc.jar)
身份验证模式:
Windows身份验证:需要在连接字符串中配置 integratedSecurity=true
SQL Server身份验证:使用用户名密码
连接字符串格式:
javascript
// 标准格式
"jdbc:sqlserver://host:port;DatabaseName=db_name"
// 使用实例名
"jdbc:sqlserver://host\\instance_name:port;DatabaseName=db_name"
// Windows身份验证
"jdbc:sqlserver://host:port;DatabaseName=db_name;integratedSecurity=true"
性能优化:
大表同步配置合适的 splitPk
调整 fetchSize 和 batchSize
根据网络和服务器性能调整 channel 数量
数据类型兼容性:确保源表和目标表的数据类型兼容
执行命令
bash
# 执行同步任务
python /opt/datax/bin/datax.py /path/to/sqlserver2sqlserver.json
# 查看详细执行日志
python /opt/datax/bin/datax.py /path/to/sqlserver2sqlserver.json > sync.log 2>&1
# 后台执行
nohup python /opt/datax/bin/datax.py /path/to/sqlserver2sqlserver.json > sync.log 2>&1 &
这些模板涵盖了 SQL Server 到 SQL Server 同步的主要场景,您可以根据实际需求进行调整。
特别要注意连接字符串的格式和身份验证方式的配置。

03-13
890
890

被折叠的 条评论
为什么被折叠?



