import cx_Oracle
import pyodbc
import time
from datetime import datetime, timedelta
import threading
import queue
# 多源Oracle数据库配置
ORACLE_CONFIGS = [
{
'system_id': '12', # 源系统标识
'desc': '品质情报数据12',
'user': 'toyota',
'password': 'HISTORY',
'dsn': '192.168.154.6:1521/UALC',
'line_default': 12 # 源1的line默认值
},
{
'system_id': '34',
'desc': '品质情报数据34',
'user': 'toyota',
'password': 'HISTORY',
'dsn': '192.168.151.6:1521/UALC',
'line_default':34 # 源1的line默认值
}
]
# SQL Server目标数据库配置
SQLSERVER_CONFIG = {
'server': '192.168.10.79',
'database': 'TNGA现场DB',
'user': 'sa',
'password': 'zhglbQG@)!$6600',
'driver': 'ODBC Driver 17 for SQL Server'
}
# 全局配置
INCREMENTAL_COLUMN = 'last_updated' # 时间戳字段
SOURCE_TABLE = 'T_MEASUREMENTS' # 源表名
TARGET_TABLE = 'T_MEASUREMENTS' # 目标表名
STATE_TABLE = 'etl_state_tracking' # 状态表名
BATCH_SIZE = 5000 # 每批处理数据量
MAX_THREADS = 4 # 最大并发线程数
def create_state_table_if_not_exists(conn):
"""创建状态跟踪表(如果不存在)"""
cursor = conn.cursor()
try:
cursor.execute(f"""
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = '{STATE_TABLE}')
CREATE TABLE {STATE_TABLE} (
id INT IDENTITY(1,1) PRIMARY KEY,
source_system VARCHAR(100) NOT NULL,
source_table VARCHAR(255) NOT NULL,
last_state VARCHAR(100) NOT NULL,
last_update DATETIME DEFAULT GETDATE()
)
""")
conn.commit()
except Exception as e:
print(f"Error creating state table: {e}")
finally:
cursor.close()
def ensure_target_table_structure(sql_conn, oracle_desc):
"""
确保目标表结构包含必备字段
不对源数据库做任何修改,仅在目标库操作
"""
cursor = sql_conn.cursor()
try:
# 获取目标表列信息
cursor.execute(f"""
SELECT COLUMN_NAME, DATA_TYPE
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = '{TARGET_TABLE}'
""")
existing_columns = {row[0].lower(): row[1] for row in cursor.fetchall()}
# 添加缺失的必要字段(不修改源数据库)
if 'source_system' not in existing_columns:
cursor.execute(f"""
ALTER TABLE {TARGET_TABLE}
ADD source_system VARCHAR(100) NOT NULL DEFAULT 'UNKNOWN'
""")
print(f"Added 'source_system' column to {TARGET_TABLE}")
if 'line' not in existing_columns:
cursor.execute(f"""
ALTER TABLE {TARGET_TABLE}
ADD line INT NOT NULL DEFAULT 0
""")
print(f"Added 'line' column to {TARGET_TABLE}")
# 添加索引优化查询
if 'source_system' in existing_columns:
cursor.execute(f"""
IF NOT EXISTS (
SELECT * FROM sys.indexes
WHERE name='idx_target_source_system' AND object_id = OBJECT_ID('{TARGET_TABLE}')
)
CREATE INDEX idx_target_source_system
ON {TARGET_TABLE}(source_system)
""")
conn.commit()
return True
except Exception as e:
print(f"Error verifying table structure: {e}")
return False
finally:
cursor.close()
def get_last_state(sql_conn, source_system, source_table):
"""获取指定源系统的最后状态值"""
cursor = sql_conn.cursor()
try:
cursor.execute(f"""
SELECT TOP 1 last_state
FROM {STATE_TABLE}
WHERE source_system = ? AND source_table = ?
ORDER BY last_update DESC
""", source_system, source_table)
row = cursor.fetchone()
if row:
return row[0]
# 首次运行初始化状态
init_time = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d %H:%M:%S')
cursor.execute(f"""
INSERT INTO {STATE_TABLE} (source_system, source_table, last_state)
VALUES (?, ?, ?)
""", source_system, source_table, init_time)
sql_conn.commit()
return init_time
except Exception as e:
print(f"Error getting last state for {source_system}: {e}")
return None
finally:
cursor.close()
def save_last_state(sql_conn, source_system, source_table, state):
"""保存指定源系统的最新状态值"""
cursor = sql_conn.cursor()
try:
cursor.execute(f"""
INSERT INTO {STATE_TABLE} (source_system, source_table, last_state)
VALUES (?, ?, ?)
""", source_system, source_table, state.strftime('%Y-%m-%d %H:%M:%S'))
sql_conn.commit()
return True
except Exception as e:
print(f"Error saving state for {source_system}: {e}")
sql_conn.rollback()
return False
finally:
cursor.close()
def worker(task_queue, sql_conn_lock):
"""工作线程处理源数据库同步任务"""
while True:
task = task_queue.get()
if task is None:
break
oracle_config = task
source_system = oracle_config['system_id']
line_default = oracle_config['line_default']
try:
# 获取SQL Server连接(线程安全)
with sql_conn_lock:
sql_conn = pyodbc.connect(
f"DRIVER={SQLSERVER_CONFIG['driver']};"
f"SERVER={SQLSERVER_CONFIG['server']};"
f"DATABASE={SQLSERVER_CONFIG['database']};"
f"UID={SQLSERVER_CONFIG['user']};"
f"PWD={SQLSERVER_CONFIG['password']}",
autocommit=False
)
# 获取上次抽取状态
last_state = get_last_state(sql_conn, source_system, SOURCE_TABLE)
if last_state is None:
print(f"Failed to get state for {source_system}, skipping")
continue
print(f"[{source_system}] Starting sync from: {last_state}")
# 连接Oracle(只读操作)
oracle_conn = cx_Oracle.connect(
user=oracle_config['user'],
password=oracle_config['password'],
dsn=oracle_config['dsn']
)
oracle_cursor = oracle_conn.cursor()
# 查询增量数据(只读,不修改源数据库)
query = f"""
SELECT /*+ PARALLEL(4) */ *
FROM {SOURCE_TABLE} t
WHERE t.{INCREMENTAL_COLUMN} > TO_TIMESTAMP(:state, 'YYYY-MM-DD HH24:MI:SS')
ORDER BY t.{INCREMENTAL_COLUMN}
"""
oracle_cursor.execute(query, state=last_state)
# 获取源表描述信息
oracle_desc = oracle_cursor.description
source_columns = [desc[0] for desc in oracle_desc]
# 获取SQL Server游标(使用连接锁)
with sql_conn_lock:
# 确保目标表结构(线程安全)
ensure_target_table_structure(sql_conn, oracle_desc)
sql_cursor = sql_conn.cursor()
target_columns = source_columns + ['source_system', 'line']
placeholders = ', '.join(['?' for _ in target_columns])
insert_sql = f"INSERT INTO {TARGET_TABLE} ({', '.join(target_columns)}) VALUES ({placeholders})"
# 重置批处理变量
batch = []
max_state = datetime.strptime(last_state, '%Y-%m-%d %H:%M:%S')
row_count = 0
# 处理数据批次
while True:
rows = oracle_cursor.fetchmany(BATCH_SIZE)
if not rows:
break
for row in rows:
# 使用该源的line默认值(不修改源数据)
row_data = tuple(row) + (source_system, line_default)
batch.append(row_data)
# 更新最大状态值
if row[-1] > max_state:
max_state = row[-1]
# 批量插入SQL Server
sql_cursor.fast_executemany = True
sql_cursor.executemany(insert_sql, batch)
row_count += len(batch)
print(f"[{source_system}] Inserted {len(batch)} rows (line={line_default}), total: {row_count}")
batch = []
# 保存状态并提交
if max_state > datetime.strptime(last_state, '%Y-%m-%d %H:%M:%S'):
save_last_state(sql_conn, source_system, SOURCE_TABLE, max_state)
sql_conn.commit()
sql_cursor.close()
except Exception as e:
print(f"[{source_system}] Error: {str(e)}")
if 'sql_conn' in locals() and sql_conn:
sql_conn.rollback()
finally:
# 清理资源(不修改源数据库)
if 'oracle_cursor' in locals():
oracle_cursor.close()
if 'oracle_conn' in locals():
oracle_conn.close()
if 'sql_conn' in locals():
sql_conn.close()
task_queue.task_done()
def multi_source_sync():
"""多源数据库同步主函数(线程池版)"""
# 创建SQL Server连接锁
sql_conn_lock = threading.Lock()
# 创建任务队列
task_queue = queue.Queue()
for config in ORACLE_CONFIGS:
task_queue.put(config)
# 创建工作线程
threads = []
for i in range(min(MAX_THREADS, len(ORACLE_CONFIGS))):
t = threading.Thread(target=worker, args=(task_queue, sql_conn_lock))
t.start()
threads.append(t)
# 等待所有任务完成
task_queue.join()
# 停止工作线程
for _ in threads:
task_queue.put(None)
for t in threads:
t.join()
print("All sources processed")
if __name__ == "__main__":
# 初始SQL Server连接创建状态表
try:
sql_conn = pyodbc.connect(
f"DRIVER={SQLSERVER_CONFIG['driver']};"
f"SERVER={SQLSERVER_CONFIG['server']};"
f"DATABASE={SQLSERVER_CONFIG['database']};"
f"UID={SQLSERVER_CONFIG['user']};"
f"PWD={SQLSERVER_CONFIG['password']}"
)
create_state_table_if_not_exists(sql_conn)
sql_conn.close()
except Exception as e:
print(f"Initialization error: {str(e)}")
# 主循环
while True:
start_time = time.time()
multi_source_sync()
elapsed = time.time() - start_time
sleep_time = max(300 - elapsed, 0) # 保持5分钟间隔
print(f"Sync completed in {elapsed:.2f}s, next in {sleep_time:.1f}s")
time.sleep(sleep_time)
def create_state_table_if_not_exists(conn):
"""创建状态跟踪表(如果不存在)"""
cursor = conn.cursor()
try:
cursor.execute(f"""
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = '{STATE_TABLE}')
CREATE TABLE {STATE_TABLE} (
id INT IDENTITY(1,1) PRIMARY KEY,
source_system VARCHAR(100) NOT NULL, -- 源系统标识
source_table VARCHAR(255) NOT NULL,
last_state VARCHAR(100) NOT NULL,
last_update DATETIME DEFAULT GETDATE()
)
""")
# 添加索引优化查询性能
cursor.execute(f"""
IF NOT EXISTS (
SELECT * FROM sys.indexes
WHERE name='idx_state_system_table' AND object_id = OBJECT_ID('{STATE_TABLE}')
)
CREATE INDEX idx_state_system_table
ON {STATE_TABLE}(source_system, source_table)
""")
conn.commit()
except Exception as e:
print(f"Error creating state table: {e}")
finally:
cursor.close()
def ensure_target_table_structure(sql_conn, oracle_desc, source_system=None):
"""确保目标表结构包含必要字段"""
cursor = sql_conn.cursor()
try:
# 获取目标表列信息
cursor.execute(f"""
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = '{TARGET_TABLE}'
""")
existing_columns = [row[0].lower() for row in cursor.fetchall()]
# 获取源表列名(Oracle描述信息)
source_columns = [desc[0].lower() for desc in oracle_desc]
# 检查并添加源系统标识字段
if 'source_system' not in existing_columns:
cursor.execute(f"""
ALTER TABLE {TARGET_TABLE}
ADD source_system VARCHAR(100) NOT NULL DEFAULT 'UNKNOWN'
""")
print(f"Added 'source_system' column to {TARGET_TABLE}")
existing_columns.append('source_system')
# 检查并添加line字段
if 'line' not in existing_columns:
cursor.execute(f"""
ALTER TABLE {TARGET_TABLE}
ADD line INT DEFAULT {DEFAULT_LINE_VALUE} NOT NULL
""")
print(f"Added 'line' column to {TARGET_TABLE} with default value {DEFAULT_LINE_VALUE}")
existing_columns.append('line')
# 检查源表和目标表的列是否匹配(忽略新增字段)
for col in source_columns:
if col not in existing_columns:
print(f"Warning: Column {col} exists in source but not in target table")
# 添加源系统索引(如果不存在)
if 'source_system' in existing_columns:
cursor.execute(f"""
IF NOT EXISTS (
SELECT * FROM sys.indexes
WHERE name='idx_target_source_system' AND object_id = OBJECT_ID('{TARGET_TABLE}')
)
CREATE INDEX idx_target_source_system
ON {TARGET_TABLE}(source_system)
""")
conn.commit()
return True
except Exception as e:
print(f"Error verifying table structure: {e}")
return False
finally:
cursor.close()
def get_last_state(sql_conn, source_system, source_table):
"""获取指定源系统的最后状态值"""
cursor = sql_conn.cursor()
try:
cursor.execute(f"""
SELECT TOP 1 last_state
FROM {STATE_TABLE}
WHERE source_system = ? AND source_table = ?
ORDER BY last_update DESC
""", source_system, source_table)
row = cursor.fetchone()
if row:
return row[0]
# 首次运行初始化状态
init_time = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d %H:%M:%S')
cursor.execute(f"""
INSERT INTO {STATE_TABLE} (source_system, source_table, last_state)
VALUES (?, ?, ?)
""", source_system, source_table, init_time)
sql_conn.commit()
return init_time
except Exception as e:
print(f"Error getting last state for {source_system}: {e}")
return None
finally:
cursor.close()
def save_last_state(sql_conn, source_system, source_table, state):
"""保存指定源系统的最新状态值"""
cursor = sql_conn.cursor()
try:
cursor.execute(f"""
INSERT INTO {STATE_TABLE} (source_system, source_table, last_state)
VALUES (?, ?, ?)
""", source_system, source_table, state.strftime('%Y-%m-%d %H:%M:%S'))
sql_conn.commit()
return True
except Exception as e:
print(f"Error saving state for {source_system}: {e}")
sql_conn.rollback()
return False
finally:
cursor.close()
def process_source(oracle_config, sql_conn):
"""处理单个源数据库的抽取和加载"""
source_system = oracle_config['system_id']
print(f"Processing source system: {source_system} ({oracle_config['desc']})")
try:
# 获取上次抽取状态
last_state = get_last_state(sql_conn, source_system, SOURCE_TABLE)
if last_state is None:
print(f"Failed to get state for {source_system}, skipping")
return False
print(f"Starting incremental sync from: {last_state}")
# 连接Oracle
oracle_conn = cx_Oracle.connect(
user=oracle_config['user'],
password=oracle_config['password'],
dsn=oracle_config['dsn']
)
oracle_cursor = oracle_conn.cursor()
# 使用绑定变量防止SQL注入
query = f"""
SELECT /*+ PARALLEL(4) */ *
FROM {SOURCE_TABLE} t
WHERE t.{INCREMENTAL_COLUMN} > TO_TIMESTAMP(:state, 'YYYY-MM-DD HH24:MI:SS')
ORDER BY t.{INCREMENTAL_COLUMN}
"""
oracle_cursor.execute(query, state=last_state)
# 获取源表描述信息
oracle_desc = oracle_cursor.description
# 确保目标表结构正确
if not ensure_target_table_structure(sql_conn, oracle_desc, source_system):
print(f"Skipping {source_system} due to table structure issues")
return False
# 准备SQL Server插入
sql_cursor = sql_conn.cursor()
# 获取源表列名
source_columns = [desc[0] for desc in oracle_desc]
# 目标表列名 = 源表列 + 系统标识 + line
target_columns = source_columns + ['line']
# 创建插入SQL语句
placeholders = ', '.join(['?' for _ in target_columns])
insert_sql = f"INSERT INTO {TARGET_TABLE} ({', '.join(target_columns)}) VALUES ({placeholders})"
# 批量处理
batch = []
max_state = datetime.strptime(last_state, '%Y-%m-%d %H:%M:%S')
row_count = 0
while True:
rows = oracle_cursor.fetchmany(BATCH_SIZE)
if not rows:
break
for row in rows:
# 转换行数据:源列 + 系统标识 + line默认值
row_data = tuple(row) + (source_system, DEFAULT_LINE_VALUE)
batch.append(row_data)
# 更新最大状态值(假设增量字段在最后位置)
if row[-1] > max_state:
max_state = row[-1]
# 批量插入
sql_cursor.fast_executemany = True
sql_cursor.executemany(insert_sql, batch)
row_count += len(batch)
print(f"Inserted {len(batch)} rows from {source_system}, total: {row_count}")
batch = []
# 保存新状态
if max_state > datetime.strptime(last_state, '%Y-%m-%d %H:%M:%S'):
if save_last_state(sql_conn, source_system, SOURCE_TABLE, max_state):
print(f"New state saved for {source_system}: {max_state}")
return True
except Exception as e:
print(f"Error processing {source_system}: {str(e)}")
return False
finally:
# 清理Oracle资源
if 'oracle_cursor' in locals():
oracle_cursor.close()
if 'oracle_conn' in locals():
oracle_conn.close()
# 清理SQL Server游标
if 'sql_cursor' in locals():
sql_cursor.close()
def multi_source_sync():
"""多源数据库同步主函数"""
try:
# 连接SQL Server
sql_conn_str = f"DRIVER={SQLSERVER_CONFIG['driver']};SERVER={SQLSERVER_CONFIG['server']};" \
f"DATABASE={SQLSERVER_CONFIG['database']};UID={SQLSERVER_CONFIG['user']};" \
f"PWD={SQLSERVER_CONFIG['password']}"
sql_conn = pyodbc.connect(sql_conn_str, autocommit=False)
# 确保状态表存在
create_state_table_if_not_exists(sql_conn)
# 处理每个源系统
success_count = 0
for oracle_config in ORACLE_CONFIGS:
start_time = time.time()
if process_source(oracle_config, sql_conn):
success_count += 1
elapsed = time.time() - start_time
print(f"Processed {oracle_config['system_id']} in {elapsed:.2f}s")
# 提交所有变更
sql_conn.commit()
print(f"Completed: {success_count}/{len(ORACLE_CONFIGS)} sources processed successfully")
return success_count == len(ORACLE_CONFIGS)
except Exception as e:
print(f"Global error occurred: {str(e)}")
if 'sql_conn' in locals():
sql_conn.rollback()
return False
finally:
if 'sql_conn' in locals():
sql_conn.close()
if __name__ == "__main__":
while True:
start_time = time.time()
sync_result = multi_source_sync()
elapsed = time.time() - start_time
sleep_time = max(300 - elapsed, 0) # 保持5分钟间隔
status = "SUCCESS" if sync_result else "PARTIAL/FAILED"
print(f"Sync {status}, took {elapsed:.2f}s, next in {sleep_time:.1f}s")
time.sleep(sleep_time)
DPI-1047: Cannot locate a 64-bit Oracle Client library: "The specified module could not be found".
最新发布