import psycopg2
import pyodbc
import re
import logging
import time
import schedule
import concurrent.futures
from datetime import datetime
from typing import List, Dict, Tuple, Any, Union, Optional, Set
from queue import Queue
import threading
import os
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# 定义数据源配置(包含多个表)
TABLE_NAMES = ["line1block", "line1cam", "line1crank", "line1head", "line1hsg",
"line2block", "line2cam", "line2crank", "line2head"] # 添加需要处理的表名
# 重构数据源配置(添加主机IP)
DATA_SOURCES = []
for table_name in TABLE_NAMES:
# 192.168.154.11 主机
DATA_SOURCES.append({
'type': 'postgresql',
'host': '192.168.154.11',
'port': '5432',
'database': 'andondata',
'user': 'powertrain',
'password': 'andromeda',
'table': table_name,
'host_ip': '192.168.154.11' # 显式存储IP用于映射
})
# 192.168.151.11 主机
DATA_SOURCES.append({
'type': 'postgresql',
'host': '192.168.151.11',
'port': '5432',
'database': 'andondata',
'user': 'powertrain',
'password': 'andromeda',
'table': table_name,
'host_ip': '192.168.151.11' # 显式存储IP用于映射
})
# SQL Server目标配置
TARGET_CONFIG = {
'server': '192.168.10.116',
'database': '现场DB',
'username': 'sa',
'password': 'gtec_6600'
}
# 地址校验结果表配置
CHECK_RESULT_TABLE = "dwd_web安东状态"
# 数据库连接重试配置
DB_RETRY_CONFIG = {
'max_retries': 3,
'delay': 5 # seconds
}
# 连接池配置
MAX_POOL_SIZE = 20
postgres_pools = {}
sqlserver_pool = None
sqlserver_pool_lock = threading.Lock()
postgres_pool_locks = {}
# 预编译正则表达式
ADDRESS_VALUE_PATTERN = re.compile(r'[\{\[]\s*[\{\[]?\s*"?([^,"\s]+)"?\s*,\s*"?([^,"\}\]]+)"?\s*[\}\]]?\s*[\}\]]')
class SimpleConnectionPool:
"""简单的数据库连接池实现"""
def __init__(self, create_func, max_size=10):
self.create_func = create_func
self.max_size = max_size
self.pool = Queue(maxsize=max_size)
self.lock = threading.Lock()
self.connections_created = 0
def get_connection(self):
"""从连接池获取连接"""
try:
# 尝试从队列中获取现有连接
if not self.pool.empty():
return self.pool.get_nowait()
# 如果未达到最大连接数,创建新连接
with self.lock:
if self.connections_created < self.max_size:
conn = self.create_func()
self.connections_created += 1
return conn
except:
pass
# 如果队列为空且达到最大连接数,等待连接释放
return self.pool.get()
def release_connection(self, conn):
"""释放连接回连接池"""
try:
self.pool.put_nowait(conn)
except:
# 如果队列已满,关闭连接
try:
conn.close()
except:
pass
def create_sqlserver_connection():
"""创建SQL Server连接"""
conn_str = (
f"DRIVER={{ODBC Driver 17 for SQL Server}};"
f"SERVER={TARGET_CONFIG['server']};"
f"DATABASE={TARGET_CONFIG['database']};"
f"UID={TARGET_CONFIG['username']};"
f"PWD={TARGET_CONFIG['password']}"
)
return pyodbc.connect(conn_str)
def create_postgres_connection(host, port, database, user, password):
"""创建PostgreSQL连接"""
return psycopg2.connect(
host=host,
port=port,
database=database,
user=user,
password=password
)
def init_connection_pools():
"""初始化数据库连接池"""
global sqlserver_pool
# 创建SQL Server连接池
try:
sqlserver_pool = SimpleConnectionPool(
create_func=create_sqlserver_connection,
max_size=MAX_POOL_SIZE
)
logger.info("SQL Server连接池初始化成功")
except Exception as e:
logger.error(f"SQL Server连接池初始化失败: {str(e)}")
sqlserver_pool = None
# 创建PostgreSQL连接池(按主机分组)
unique_hosts = set(source['host'] for source in DATA_SOURCES)
for host in unique_hosts:
source = next(s for s in DATA_SOURCES if s['host'] == host)
try:
create_func = lambda h=host: create_postgres_connection(
host=source['host'],
port=source['port'],
database=source['database'],
user=source['user'],
password=source['password']
)
pool = SimpleConnectionPool(
create_func=create_func,
max_size=MAX_POOL_SIZE
)
postgres_pools[host] = pool
logger.info(f"PostgreSQL连接池初始化成功: {host}")
except Exception as e:
logger.error(f"PostgreSQL连接池初始化失败 ({host}): {str(e)}")
postgres_pools[host] = None
def get_db_connection(config: Dict, db_type: str):
"""从连接池获取数据库连接"""
if db_type == 'postgresql' and config['host'] in postgres_pools and postgres_pools[config['host']]:
try:
return postgres_pools[config['host']].get_connection()
except Exception as e:
logger.error(f"从连接池获取PostgreSQL连接失败: {str(e)}")
# 回退到直接连接
for attempt in range(DB_RETRY_CONFIG['max_retries']):
try:
if db_type == 'postgresql':
return psycopg2.connect(
host=config['host'],
port=config['port'],
database=config['database'],
user=config['user'],
password=config['password']
)
elif db_type == 'sqlserver':
return pyodbc.connect(
f"DRIVER={{ODBC Driver 17 for SQL Server}};"
f"SERVER={config['server']};"
f"DATABASE={config['database']};"
f"UID={config['username']};"
f"PWD={config['password']}"
)
except Exception as e:
logger.warning(f"数据库连接失败(尝试 {attempt + 1}/{DB_RETRY_CONFIG['max_retries']}): {str(e)}")
time.sleep(DB_RETRY_CONFIG['delay'])
raise ConnectionError(f"无法连接到数据库: {config}")
def release_db_connection(conn, config: Dict, db_type: str):
"""释放数据库连接回连接池"""
try:
if db_type == 'postgresql' and config['host'] in postgres_pools and postgres_pools[config['host']]:
postgres_pools[config['host']].release_connection(conn)
elif db_type == 'sqlserver' and sqlserver_pool:
sqlserver_pool.release_connection(conn)
else:
try:
conn.close()
except:
pass
except Exception as e:
logger.error(f"释放数据库连接失败: {str(e)}")
try:
conn.close()
except:
pass
def get_address_set(target_config: Dict) -> Set[str]:
"""获取所有需要校验的地址集合(使用连接池)"""
conn = None
try:
conn = get_db_connection(target_config, 'sqlserver')
cursor = conn.cursor()
# 查询所有地址
cursor.execute("SELECT DISTINCT 地址 FROM T_web安东地址 WHERE 项目='状态' and 线组='加工' ")
addresses = set()
for row in cursor.fetchall():
addr = row[0]
if addr:
addresses.add(addr.strip())
logger.info(f"成功获取 {len(addresses)} 个需要校验的地址")
return addresses
except Exception as e:
logger.error(f"获取地址集合失败: {str(e)}")
return set()
finally:
if conn:
release_db_connection(conn, target_config, 'sqlserver')
def extract_data(source_config: Dict) -> List[Tuple]:
"""从指定数据源抽取数据(使用连接池)"""
logger.info(f"从 {source_config['host']}.{source_config['table']} 抽取数据...")
conn = None
try:
conn = get_db_connection(source_config, source_config['type'])
cursor = conn.cursor()
# 使用参数化查询防止SQL注入
query = f"""
SELECT data, "createdAt"
FROM {source_config['table']}
WHERE "createdAt" > CURRENT_TIMESTAMP - INTERVAL '5 minutes'
ORDER BY "createdAt" DESC LIMIT 1
"""
cursor.execute(query)
data = cursor.fetchall()
logger.info(f"获取 {len(data)} 条记录")
return data
except Exception as e:
logger.error(f"数据抽取失败: {str(e)}")
return []
finally:
if conn:
release_db_connection(conn, source_config, source_config['type'])
def fast_check_address_value(
value_data: Any,
address: str
) -> int:
"""快速检查地址值是否符合条件(第8位和第16位同时为1)"""
try:
# 1. 处理None值
if value_data is None:
return 0
# 2. 直接处理整数类型
if isinstance(value_data, int):
# 获取第8位(从0开始计数,第7位)和第16位(第15位)
bit8 = (value_data >> 7) & 1
bit16 = (value_data >> 15) & 1
return 1 if bit8 == 1 and bit16 == 1 else 0
# 3. 尝试转换为整数处理
if isinstance(value_data, str):
# 移除所有空白字符
clean_value = re.sub(r'\s+', '', value_data)
# 如果只包含数字,尝试转换为整数
if clean_value.isdigit():
try:
int_value = int(clean_value)
bit8 = (int_value >> 7) & 1
bit16 = (int_value >> 15) & 1
return 1 if bit8 == 1 and bit16 == 1 else 0
except (ValueError, OverflowError):
pass # 回退到字符串处理
# 确保长度为16位(不足补0,超过则截断)
if len(clean_value) < 16:
clean_value = clean_value.zfill(16)
elif len(clean_value) > 16:
clean_value = clean_value[:16]
# 检查第8位(索引7)和第16位(索引15)
bit8 = clean_value[7] if len(clean_value) > 7 else '0'
bit16 = clean_value[15] if len(clean_value) > 15 else '0'
# 如果第8位和第16位都为1,则返回1,否则返回0
return 1 if bit8 == '1' and bit16 == '1' else 0
# 4. 其他类型转换为字符串处理
value_str = str(value_data).strip()
clean_value = re.sub(r'\s+', '', value_str)
if len(clean_value) < 16:
clean_value = clean_value.zfill(16)
elif len(clean_value) > 16:
clean_value = clean_value[:16]
bit8 = clean_value[7] if len(clean_value) > 7 else '0'
bit16 = clean_value[15] if len(clean_value) > 15 else '0'
return 1 if bit8 == '1' and bit16 == '1' else 0
except Exception as e:
logger.error(f"地址校验错误: {address}: {str(e)}")
return -1 # 错误标志
def process_source_data(
data: Any,
created_at: datetime,
address_set: Set[str],
source_ip: str,
source_table: str
) -> Union[Tuple, None]:
"""处理单个数据源的数据并返回校验结果(优化版)"""
overall_result = 0
found_address = False
# 优化列表处理
if isinstance(data, list):
for item in data:
# 跳过无效项
if not isinstance(item, list) or len(item) < 2:
continue
address = str(item[0]).strip() if item[0] is not None else ""
# 快速跳过不在地址集的项
if not address or address not in address_set:
continue
found_address = True
result = fast_check_address_value(item[1], address)
if result == 1:
overall_result = 1
break # 找到一个满足条件的地址即可
elif result == -1:
continue
# 优化字符串处理
elif isinstance(data, (str, bytes)):
raw_str = data.decode('utf-8', errors='ignore') if isinstance(data, bytes) else data
# 使用预编译的正则表达式匹配模式
matches = ADDRESS_VALUE_PATTERN.findall(raw_str)
for match in matches:
if len(match) < 2:
continue
address = match[0].strip()
value_data = match[1].strip()
# 快速跳过不在地址集的项
if not address or address not in address_set:
continue
found_address = True
result = fast_check_address_value(value_data, address)
if result == 1:
overall_result = 1
break # 找到一个满足条件的地址即可
elif result == -1:
continue
# 优化字典处理
elif isinstance(data, dict):
# 尝试多种可能的键名
address = ""
for key in ['code', 'Code', 'address', 'Address']:
if key in data:
address = str(data[key]).strip()
break
# 快速跳过不在地址集的项
if not address or address not in address_set:
return None
found_address = True
# 查找值
value_data = None
for key in ['bin', 'Bin', 'value', 'Value', 'data', 'Data']:
if key in data:
value_data = data[key]
break
if value_data is None:
logger.warning(f"字典数据中未找到有效的值字段: {data}")
return None
result = fast_check_address_value(value_data, address)
if result == 1:
overall_result = 1
# 如果没有找到任何地址在集合中,则返回None
if not found_address:
logger.debug(f"数据源 {source_ip}.{source_table} 中没有需要校验的地址")
return None
# 返回结果记录
return (
source_ip,
source_table,
overall_result,
created_at
)
def clear_check_table():
"""清空地址校验结果表(使用连接池)"""
conn = None
try:
conn = get_db_connection(TARGET_CONFIG, 'sqlserver')
cursor = conn.cursor()
truncate_sql = f"DELETE FROM {CHECK_RESULT_TABLE} WHERE TABLE_NAME !='line1assembly'"
cursor.execute(truncate_sql)
conn.commit()
logger.info(f"已清空地址校验结果表 {CHECK_RESULT_TABLE}")
return True
except Exception as e:
logger.error(f"清空结果表失败: {str(e)}")
return False
finally:
if conn:
release_db_connection(conn, TARGET_CONFIG, 'sqlserver')
def ensure_check_table():
"""确保地址校验结果表存在(使用连接池)"""
conn = None
try:
conn = get_db_connection(TARGET_CONFIG, 'sqlserver')
cursor = conn.cursor()
create_table_sql = f"""
IF NOT EXISTS (SELECT * FROM sys.tables WHERE name = '{CHECK_RESULT_TABLE}')
BEGIN
CREATE TABLE {CHECK_RESULT_TABLE} (
id INT IDENTITY(1,1) PRIMARY KEY,
host_ip VARCHAR(50) NOT NULL, -- 源主机IP
table_name VARCHAR(50) NOT NULL, -- 源表名称
check_result TINYINT NOT NULL, -- 校验结果(1/0/-1)
data_created_at DATETIME NOT NULL -- 数据创建时间
);
CREATE INDEX idx_host_table ON {CHECK_RESULT_TABLE} (host_ip, table_name);
CREATE INDEX idx_result ON {CHECK_RESULT_TABLE} (check_result);
END
"""
cursor.execute(create_table_sql)
conn.commit()
logger.info(f"地址校验结果表 {CHECK_RESULT_TABLE} 检查/创建完成")
return True
except Exception as e:
logger.error(f"创建地址校验结果表失败: {str(e)}")
return False
finally:
if conn:
release_db_connection(conn, TARGET_CONFIG, 'sqlserver')
def save_check_results(results: List[Tuple]):
"""保存地址校验结果到数据库(使用连接池和批量插入)"""
if not results:
logger.info("无校验结果需要保存")
return True
if not ensure_check_table():
logger.error("无法确保校验结果表存在,跳过保存")
return False
# 先清空结果表
if not clear_check_table():
logger.error("清空结果表失败,跳过保存")
return False
conn = None
try:
conn = get_db_connection(TARGET_CONFIG, 'sqlserver')
cursor = conn.cursor()
# 使用高效批量插入
params = [(ip, table, result, created_at) for ip, table, result, created_at in results]
# 构建参数化批量插入语句
insert_query = f"""
INSERT INTO {CHECK_RESULT_TABLE}
(host_ip, table_name, check_result, data_created_at)
VALUES (?, ?, ?, ?)
"""
# 使用executemany进行批量插入
cursor.executemany(insert_query, params)
conn.commit()
logger.info(f"成功保存 {len(results)} 条地址校验结果")
return True
except Exception as e:
logger.error(f"保存校验结果失败: {str(e)}")
return False
finally:
if conn:
release_db_connection(conn, TARGET_CONFIG, 'sqlserver')
def process_data_source(source: Dict, address_set: Set[str]) -> Union[Tuple, None]:
"""并行处理单个数据源"""
try:
raw_records = extract_data(source)
if not raw_records:
logger.debug(f"数据源 {source['table']}@{source['host']} 无新数据")
return None
# 只处理最新的一条记录
data, created_at = raw_records[0]
return process_source_data(
data, created_at,
address_set,
source['host_ip'],
source['table']
)
except Exception as e:
logger.error(f"处理数据源 {source['host']}.{source['table']} 时出错: {str(e)}")
return None
def run_address_check():
"""执行地址校验任务(并行处理)"""
start_time = time.time()
logger.info("▶▶▶ 开始地址校验任务 ◀◀◀")
try:
# 获取需要校验的地址集合
address_set = get_address_set(TARGET_CONFIG)
if not address_set:
logger.warning("未获取到需要校验的地址集合,任务终止")
return False
# 使用线程池并行处理数据源
all_check_results = []
# 根据CPU核心数确定线程数
max_workers = min(len(DATA_SOURCES), 4 * (os.cpu_count() or 1))
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# 提交所有任务
future_to_source = {executor.submit(process_data_source, source, address_set): source for source in
DATA_SOURCES}
# 收集结果
for future in concurrent.futures.as_completed(future_to_source):
source = future_to_source[future]
try:
result = future.result()
if result:
all_check_results.append(result)
except Exception as e:
logger.error(f"处理数据源 {source['host']}.{source['table']} 时出错: {str(e)}")
# 保存校验结果
if all_check_results:
logger.info(f"准备保存 {len(all_check_results)} 条地址校验结果")
save_success = save_check_results(all_check_results)
else:
logger.info("无有效的地址校验结果需要保存")
save_success = True
elapsed = time.time() - start_time
logger.info(f"▶▶▶ 地址校验任务完成,耗时 {elapsed:.2f} 秒 ◀◀◀")
return save_success
except Exception as e:
logger.exception(f"地址校验任务严重错误: {str(e)}")
return False
def cleanup_connection_pools():
"""清理所有连接池资源"""
logger.info("清理连接池资源...")
# 清理PostgreSQL连接池
for host, pool in postgres_pools.items():
if pool:
logger.info(f"清理PostgreSQL连接池: {host}")
while not pool.pool.empty():
try:
conn = pool.pool.get_nowait()
conn.close()
except:
pass
# 清理SQL Server连接池
if sqlserver_pool:
logger.info("清理SQL Server连接池")
while not sqlserver_pool.pool.empty():
try:
conn = sqlserver_pool.pool.get_nowait()
conn.close()
except:
pass
def run_scheduler():
"""定时任务调度(每分钟运行一次)"""
logger.info("启动定时调度器,每分钟执行一次地址校验")
# 初始化连接池
init_connection_pools()
# 初始执行
run_address_check()
# 定时任务
schedule.every(1).minutes.do(run_address_check)
try:
while True:
try:
schedule.run_pending()
time.sleep(1)
except KeyboardInterrupt:
logger.info("用户中断,退出程序")
break
except Exception as e:
logger.error(f"调度器错误: {str(e)}")
time.sleep(60) # 出错后休眠1分钟
finally:
# 清理连接池资源
cleanup_connection_pools()
if __name__ == "__main__":
run_scheduler()
一个地址对应多个位索引,应该是返回多条数据,但是结果只返回一个地址的数据。
最新发布