get_async_keys.h

 
class BaseTransformer: """异步数据转换协调器""" def __init__(self, db_utils: DBUtils, max_workers: int = 4): self.db_utils = db_utils self.reader_converter_pairs: List[Tuple[BaseReader, BaseConverter]] = [] self.name = self.__class__.__name__ self.max_workers = max_workers # 线程池大小 self.truncated_tables = set() def register_reader_converter_pair( self, reader: BaseReader, converter: BaseConverter ): """添加Reader和Converter配对 Args: reader: 数据读取器实例 converter: 数据转换器实例 """ self.reader_converter_pairs.append((reader, converter)) logger.info(f"添加Reader-Converter配对: {reader.name} -> {converter.name}") async def transform_async(self) -> bool: """ 异步执行整个数据转换流程 Returns: bool: 转换是否成功 """ try: logger.info(f"{self.name} 开始异步数据转换") # 使用线程池执行器处理同步I/O操作 executor = ThreadPoolExecutor(max_workers=self.max_workers) loop = asyncio.get_running_loop() # 处理所有Reader-Converter配对 tasks = [ self._process_pair_async(reader, converter, loop, executor) for reader, converter in self.reader_converter_pairs ] # 等待所有任务完成 await asyncio.gather(*tasks) logger.info(f"{self.name} 异步数据转换完成") return True except Exception as e: logger.exception(f"异步转换失败: {str(e)}") return False async def _process_pair_async( self, reader: BaseReader, converter: BaseConverter, loop: asyncio.AbstractEventLoop, executor: ThreadPoolExecutor ) -> None: """ 异步处理单个Reader-Converter配对 """ logger.info(f"异步处理配对: {reader.name} -> {converter.name}") # 准备目标表 target_table = converter.table_name schema, table_name = target_table.split(".", 1) if "." in target_table else (None, target_table) table_obj = await loop.run_in_executor( executor, self.db_utils.create_table_obj, table_name, schema ) # 清空目标表 # 改进:确保同一目标表只清空一次 if converter.truncate_before_convert and target_table not in self.truncated_tables: logger.info(f"首次清空目标表: {target_table}") await loop.run_in_executor( executor, self.db_utils.execute_ddl, text(f"TRUNCATE TABLE {target_table}") ) # 记录已清空表 self.truncated_tables.add(target_table) # 创建异步数据处理管道 async for batch_data in self._create_async_pipeline(reader, converter, loop, executor): await self._write_batch_async( converter, table_obj, batch_data, loop, executor ) async def _create_async_pipeline( self, reader: BaseReader, converter: BaseConverter, loop: asyncio.AbstractEventLoop, executor: ThreadPoolExecutor ) -> AsyncGenerator[Tuple[Any, ...], None]: """ 创建异步数据处理管道 """ # 异步读取 logger.info(f"开始异步读取: {reader.name}") data_gen = await loop.run_in_executor(executor, reader.read) # 异步转换 logger.info(f"开始异步转换: {converter.name}") converter_gen = await loop.run_in_executor( executor, lambda: list(converter.convert(data_gen)) ) # 分批产出转换后的数据 for batch in converter_gen: yield batch async def _write_batch_async( self, converter: BaseConverter, table_obj: Table, batch_data: Tuple[Any, ...], loop: asyncio.AbstractEventLoop, executor: ThreadPoolExecutor ) -> None: """ 异步写入数据批次 """ converted_batch, upsert_keys, delete_columns, delete_values = batch_data if not converted_batch: return # 批量删除 if delete_columns and delete_values: await loop.run_in_executor( executor, self.db_utils.batch_delete, table_obj, delete_columns, delete_values ) # 批量写入 if upsert_keys: success = await loop.run_in_executor( executor, self.db_utils.batch_upsert, table_obj, converted_batch, upsert_keys ) else: success = await loop.run_in_executor( executor, self.db_utils.batch_insert, table_obj, converted_batch ) if not success: raise Exception(f"异步写入到表 {converter.table_name} 失败") logger.debug(f"异步处理了 {len(converted_batch)} 条记录") 依然没有实现只清除一次
09-06
import asyncio import asyncssh import logging from typing import Optional, Dict, Any, Tuple, Union from src.auth.manager import AuthManager from src.session.manager import SessionManager from src.command.filter import CommandFilter from src.cluster.manager import ClusterManager from src.utils.logger import SessionLogger from src.config.manager import ConfigManager logger = logging.getLogger('ssh-proxy.server') class SSHProxyServer(asyncssh.SSHServer): def __init__(self, config: ConfigManager, cluster: Optional[ClusterManager] = None): self.config = config self.cluster = cluster self.auth_manager = AuthManager(config) self.session_manager = SessionManager(config) filter_config = self.config.get_filter_config() self.command_filter = CommandFilter( forbidden_commands=filter_config['forbidden_commands'], warning_commands=filter_config['warning_commands'] ) self.username = None self.session_id = None self.session_logger = None self.backend_node_id = None self.conn = None # 当前SSH连接 self.forbidden_commands = filter_config["forbidden_commands"] self.warning_commands = filter_config["warning_commands"] self.session_log_commands = self.config.get_session_config()["log_commands"] def connection_made(self, conn: asyncssh.SSHServerConnection): """客户端连接建立时调用""" self.conn = conn peer = conn.get_extra_info('peername') logger.info(f"New connection from {peer[0]}:{peer[1]}") async def handle_command(self, command: str, client_ip: str): """处理命令过滤和日志记录(异步)""" for forbidden in self.forbidden_commands: if forbidden in command: await self.session_logger.log_event( "forbidden_command", f"阻止命令: {command} 来自 {client_ip}" ) return False for warning in self.warning_commands: if warning in command: await self.session_logger.log_event( "warning_command", f"警告命令: {command} 来自 {client_ip}" ) if self.session_log_commands: await self.session_logger.log_command(command, client_ip) return True async def get_backend_node(self) -> Optional[Dict[str, Any]]: """获取后端节点(集群模式)""" if not self.cluster: return None node = await self.cluster.get_backend() if node: self.backend_node_id = node.get('id') return node async def validate_password(self, username: str, password: str) -> bool: """异步密码认证""" try: auth_info = await self.auth_manager.get_auth_info(username) stored_password, _ = auth_info or ('fjy', b'') # 回退到默认密码 if stored_password == password: self.username = username await self._init_session(username, "password") return True return False except Exception as e: logger.error(f"密码认证失败 for {username}: {e}") return False async def validate_public_key(self, username: str, key: asyncssh.PublicKey) -> bool: """异步公钥认证""" try: auth_info = await self.auth_manager.get_auth_info(username) if not auth_info: return False stored_password, stored_pubkey = auth_info provided_pubkey = key.export_public_key() # 标准化并比较公钥 normalized_stored = self.auth_manager.normalize_key(stored_pubkey) normalized_provided = self.auth_manager.normalize_key(provided_pubkey) if normalized_stored == normalized_provided: self.username = username await self._init_session(username, "publickey") return True return False except Exception as e: logger.error(f"公钥认证失败 for {username}: {e}") return False async def _init_session(self, username: str, auth_method: str): """初始化会话(异步)""" peer = self.conn.get_extra_info('peername') client_ip = peer[0] self.session_id = await self.session_manager.create_session(client_ip, username) self.session_logger = SessionLogger( session_id=self.session_id, log_dir=self.config.get_session_config().get('record_dir', 'session_records') ) await self.session_logger.log_event( "authentication", f"用户 {username} 通过 {auth_method} 认证成功" ) async def sftp_requested(self) -> bool: """SFTP请求处理""" asyncio.create_task(self.forward_sftp_to_backend()) return True async def forward_sftp_to_backend(self): """异步SFTP转发""" try: backend_info = await self.establish_backend_connection() if not backend_info: return auth_info = await self.auth_manager.get_auth_info(self.username) stored_password, _ = auth_info or ('fjy', b'') async with asyncssh.connect( host=backend_info['host'], port=backend_info['port'], username=backend_info['username'], password=stored_password, known_hosts=None ) as conn: async with conn.start_sftp_client() as sftp: chan = await self.conn.create_sftp_channel() async with chan: while True: # 实现SFTP数据转发 await asyncio.sleep(0.1) except Exception as e: logger.error(f"SFTP转发失败: {e}") async def establish_backend_connection(self) -> Optional[Dict[str, Any]]: """建立后端连接(异步)""" if not self.cluster: backend_config = self.config.get_backend_config() return { 'host': backend_config['default_host'], 'port': backend_config['default_port'], 'username': backend_config['default_user'] } backend_node = await self.get_backend_node() if not backend_node: logger.error("集群中无可用后端节点") return None return { 'host': backend_node['host'], 'port': backend_node['port'], 'username': backend_node['username'] } async def session_closed(self): """连接关闭时清理资源(异步)""" if self.cluster and self.backend_node_id: await self.cluster.release_backend(self.backend_node_id) logger.info(f"释放后端节点: {self.backend_node_id}") if self.session_logger: await self.session_logger.close() class SSHProxyServerController: def __init__(self, config: ConfigManager, cluster: Optional[ClusterManager] = None): self.config = config self.cluster = cluster self._server = None async def start(self): """启动异步SSH服务器""" server_config = self.config.get_server_config() host = server_config.get('listen_host', '0.0.0.0') port = int(server_config.get('listen_port', 2222)) try: self._server = await asyncssh.create_server( lambda: SSHProxyServer(self.config, self.cluster), host, port, server_host_keys=[self.config.get_host_key_path()], password_auth_enabled=True, public_key_auth_enabled=True, process_factory=self._handle_client_process ) logger.info(f"SSH proxy listening on {host}:{port}") except Exception as e: logger.error(f"Server start failed: {e}") raise async def _handle_client_process(self, process: asyncssh.SSHServerProcess): """处理客户端进程""" if process.command: await self._handle_command(process) elif process.subsystem == 'sftp': await self._handle_sftp(process) async def stop(self): """停止服务器""" if self._server: self._server.close() await self._server.wait_closed() logger.info("SSH proxy server stopped") 请根据上个问题修改,给出完整代码
07-06
# 设计 # 1. 实现两个基类,BaseReader和BaseConverter # 2. BaseReader有几个抽象方法:get_raw_sql支持通过raw sql查询, get_query支持通过sqlalchemy query查询 # 3. BaseReader实现batch_read方法,使用cursor方式批量读取数据,并yield出去 # 4. 基于BaseReader实现一个子类BaseRelationReader,接收主表的ids列表,读取关联表数据。BaseRelationReader必须依附于BaseReader,BaseReader可以管理0~N个BaseRelationReader,在BaseReader的batch_read方法中,会依次调用每个BaseRelationReader的batch_read方法,把同批次的关联数据也取出来,一并yield出去 # 5. Reader yield出去的数据格式为:Dict[str, List],key为Reader的名称,value为查询出来的数据列表 # 6. BaseConverter实现batch_convert方法,接收BaseReader的yield数据,进行转换,并yield出去 # 7. 实现Basetransformer类,用来管理目标表的写入,以及一个或多个BaseReader和BaseConverter对,并实现transformer方法驱动核心流程 # 8. Reader和Converter是成对出现的,每对Reader和Converter的转换结果都会批量upsert到目标表。 from abc import ABC, abstractmethod from typing import Callable, Generator, List, Dict, Any, Optional, Set, Tuple,AsyncGenerator import asyncio from concurrent.futures import ThreadPoolExecutor from sqlalchemy import MetaData, Table, text from ddwh.config import setup_logging from ddwh.utils.db_utils import DBUtils import time from functools import wraps import asyncio logger = setup_logging() def timer_decorator(func): @wraps(func) def wrapper(*args,**kwargs): instance_name = args[0].name if hasattr(args[0], 'name') else func.__name__ start = time.perf_counter() result = func(*args, **kwargs) duration = time.perf_counter() - start # 获取批次大小(如果可用) batch_size = 0 if result: batch_size = len(result) if isinstance(result, list) else 1 # 记录耗时日志(包含转换器名称和批次大小) logger.info( f"转换器 {instance_name} - {func.__name__} 耗时: {duration:.4f} 秒 | " f"批次大小: {batch_size} | 平均每条: {duration / max(1, batch_size):.6f} 秒" ) return result return wrapper class BaseReader(ABC): """数据读取基类 负责从数据源读取原始数据,支持批量读取和游标方式处理大数据集 可以管理多个BaseRelationReader, 在读取主表数据时同时读取关联数据 """ def __init__(self, db_utils: DBUtils): """初始化读取器 Args: db_utils: 数据库工具实例 """ self.db_utils: DBUtils = db_utils self.relation_readers: List[BaseRelationReader] = [] @property def name(self) -> str: return self.__class__.__name__ @property def batch_size(self) -> int: return 5000 @property def order_fields(self) -> List[str]: return [] @property @abstractmethod def get_query(self) -> Callable: pass class BaseMainReader(BaseReader): """主表数据读取器基类""" def __init__(self, db_utils: DBUtils): super().__init__(db_utils) def add_relation_reader(self, relation_reader: "BaseRelationReader"): """添加关联表读取器 Args: relation_reader: 关联表读取器实例 """ self.relation_readers.append(relation_reader) logger.info(f"添加关联读取器: {relation_reader.__class__.__name__}") def read(self) -> Generator[Dict[str, List], None, None]: """批量读取数据 使用游标方式批量读取数据,同时读取关联数据 Yields: Dict[str, List]: 数据字典,key为读取器名称,value为数据列表 """ try: query = self.get_query() logger.info(f"开始批量读取数据,批次大小: {self.batch_size}") logger.debug( f"{self.name} query: {query.compile(compile_kwargs={'literal_binds': True})}" ) for batch in self.db_utils.fetch_data_with_cursor( query, batch_size=self.batch_size ): if batch: logger.info(f"从主表 {self.name} 读取到 {len(batch)} 条记录") # 构建返回数据字典 result_data = {self.name: batch} # 读取关联数据 for relation_reader in self.relation_readers: try: # 更新关联读取器的ID列表 relation_reader.extract_relation_ids(batch) # 读取关联数据 relation_batch = [] for relation_data_batch in relation_reader.read(): relation_batch.extend(relation_data_batch) if relation_batch: result_data[relation_reader.name] = relation_batch logger.info( f"从关联表 {relation_reader.name} 读取到 {len(relation_batch)} 条记录" ) except Exception as e: logger.exception(f"读取关联数据失败: {str(e)}") # 继续处理其他关联读取器 continue yield result_data except Exception as e: logger.exception(f"批量读取数据失败: {str(e)}") raise class BaseRelationReader(BaseReader): """关联表数据读取器 基于主表ID列表读取关联表数据,必须依附于BaseReader """ def __init__(self, db_utils: DBUtils): """初始化关联表读取器 Args: db_utils: 数据库工具实例 """ super().__init__(db_utils) self.relation_ids: Dict[str, Set[Any]] = {} @property @abstractmethod def relations(self) -> Dict[str, str]: """ 返回关联表的关联关系, key为主表的列名, value为关联表(当前表)的列名 """ pass def extract_relation_ids(self, batch: List[Dict[str, Any]]): """更新主表ID列表 Args: main_ids: 主表ID列表 """ for current_table_column in self.relations.values(): self.relation_ids[current_table_column] = set() for record in batch: for main_table_column, current_table_column in self.relations.items(): self.relation_ids[current_table_column].add(record[main_table_column]) def read(self) -> Generator[List[Dict[str, Any]], None, None]: """批量读取关联数据 Yields: List[Dict[str, Any]]: 关联数据批次 """ if not self.relation_ids: return try: query = self.get_query() logger.info(f"开始读取关联数据,主表ID数量: {len(self.relation_ids)}") logger.debug( f"{self.name} query: {query.compile(compile_kwargs={'literal_binds': True})}" ) for batch in self.db_utils.fetch_data_with_cursor( query, batch_size=self.batch_size ): if batch: logger.debug(f"读取到 {len(batch)} 条关联记录") yield batch except Exception as e: logger.exception(f"批量读取关联数据失败: {str(e)}") raise class BaseConverter(ABC): """数据转换基类 负责将原始数据转换为目标格式 """ def __init__(self, name: str = None): """初始化转换器 Args: name: 转换器名称 """ self.name = name or self.__class__.__name__ @property @abstractmethod def upsert_keys(self) -> Optional[List[str]]: """ 返回upsert的键字段名列表,支持单列或多列联合主键 如果为None,则使用insert操作 """ pass @property @abstractmethod def table_name(self) -> str: """ 返回目标表名,需要携带schema,格式为schema.table_name """ pass @property def truncate_before_convert(self) -> bool: """ 返回是否在转换前清空目标表 """ return False def batch_delete_conditions( self, converted_batch: List[Dict[str, Any]] ) -> Tuple[Optional[List[str]], Optional[List[List[Any]]]]: """ 返回在每个批次转换前删除该批次对应的数据的条件 Args: converted_batch: 转换后的数据批次 Returns: Tuple[Optional[List[str]], Optional[List[List[Any]]]]: 删除条件列名列表和删除条件值列表 Example: converted_batch = [ {"user_id": 1, "data_type": "A", "data_value": 100}, {"user_id": 2, "data_type": "B", "data_value": 200} ] return ["user_id", "data_type"], [ [1, "A"], [2, "B"] ] """ return None, None @abstractmethod def convert_a_batch(self, data_batch: Dict[str, List]) -> List[Dict[str, Any]]: """转换数据批次 Args: data_batch: 原始数据批次,格式为Dict[str, List] Returns: List[Dict[str, Any]]: 转换后的数据列表 """ pass def convert( self, data_batches: Generator[Dict[str, List], None, None] ) -> Generator[Tuple[List[Dict[str, Any]], str, Optional[List[str]]], None, None]: """批量转换数据 Args: data_batches: 原始数据批次生成器 Yields: Tuple[List[Dict[str, Any]], str, Optional[List[str]]]: 转换后的数据批次、目标表名、upsert键 """ try: for data_batch in data_batches: try: start_time = time.perf_counter() converted_batch = self.convert_a_batch(data_batch) elapsed = time.perf_counter() - start_time logger.info( f"转换器 {self.name} 的批次转换耗时: {elapsed:.4f} 秒, 批次大小: {len(converted_batch) if converted_batch else 0}" ) if converted_batch: logger.info(f"转换了 {len(converted_batch)} 条记录") delete_columns, delete_values = self.batch_delete_conditions( converted_batch ) yield converted_batch, self.upsert_keys, delete_columns, delete_values except Exception as e: logger.exception(f"转换数据批次失败: {str(e)}") # 继续处理其他批次,不中断整个流程 continue except Exception as e: logger.exception(f"批量转换数据失败: {str(e)}") raise class BaseTransformer: """异步数据转换协调器""" def __init__(self, db_utils: DBUtils, max_workers: int = 4): self.db_utils = db_utils self.reader_converter_pairs: List[Tuple[BaseReader, BaseConverter]] = [] self.name = self.__class__.__name__ self.max_workers = max_workers # 线程池大小 def register_reader_converter_pair( self, reader: BaseReader, converter: BaseConverter ): """添加Reader和Converter配对 Args: reader: 数据读取器实例 converter: 数据转换器实例 """ self.reader_converter_pairs.append((reader, converter)) logger.info(f"添加Reader-Converter配对: {reader.name} -> {converter.name}") async def transform_async(self) -> bool: """ 异步执行整个数据转换流程 Returns: bool: 转换是否成功 """ try: logger.info(f"{self.name} 开始异步数据转换") # 使用线程池执行器处理同步I/O操作 executor = ThreadPoolExecutor(max_workers=self.max_workers) loop = asyncio.get_running_loop() # 处理所有Reader-Converter配对 tasks = [ self._process_pair_async(reader, converter, loop, executor) for reader, converter in self.reader_converter_pairs ] # 等待所有任务完成 await asyncio.gather(*tasks) logger.info(f"{self.name} 异步数据转换完成") return True except Exception as e: logger.exception(f"异步转换失败: {str(e)}") return False async def _process_pair_async( self, reader: BaseReader, converter: BaseConverter, loop: asyncio.AbstractEventLoop, executor: ThreadPoolExecutor ) -> None: """ 异步处理单个Reader-Converter配对 """ logger.info(f"异步处理配对: {reader.name} -> {converter.name}") # 准备目标表 target_table = converter.table_name schema, table_name = target_table.split(".", 1) if "." in target_table else (None, target_table) table_obj = await loop.run_in_executor( executor, self.db_utils.create_table_obj, table_name, schema ) # 清空目标表(如果需要) if converter.truncate_before_convert: logger.info(f"清空目标表: {target_table}") await loop.run_in_executor( executor, self.db_utils.execute_ddl, text(f"TRUNCATE TABLE {target_table}") ) # 创建异步数据处理管道 async for batch_data in self._create_async_pipeline(reader, converter, loop, executor): await self._write_batch_async( converter, table_obj, batch_data, loop, executor ) async def _create_async_pipeline( self, reader: BaseReader, converter: BaseConverter, loop: asyncio.AbstractEventLoop, executor: ThreadPoolExecutor ) -> AsyncGenerator[Tuple[Any, ...], None]: """ 创建异步数据处理管道 """ # 异步读取 logger.info(f"开始异步读取: {reader.name}") data_gen = await loop.run_in_executor(executor, reader.read) # 异步转换 logger.info(f"开始异步转换: {converter.name}") converter_gen = await loop.run_in_executor( executor, lambda: list(converter.convert(data_gen)) ) # 分批产出转换后的数据 for batch in converter_gen: yield batch async def _write_batch_async( self, converter: BaseConverter, table_obj: Table, batch_data: Tuple[Any, ...], loop: asyncio.AbstractEventLoop, executor: ThreadPoolExecutor ) -> None: """ 异步写入数据批次 """ converted_batch, upsert_keys, delete_columns, delete_values = batch_data # 批量删除 if delete_columns and delete_values: await loop.run_in_executor( executor, self.db_utils.batch_delete, table_obj, delete_columns, delete_values ) # 批量写入 if upsert_keys: success = await loop.run_in_executor( executor, self.db_utils.batch_upsert, table_obj, converted_batch, upsert_keys ) else: success = await loop.run_in_executor( executor, self.db_utils.batch_insert, table_obj, converted_batch ) if not success: raise Exception(f"异步写入到表 {converter.table_name} 失败") logger.debug(f"异步处理了 {len(converted_batch)} 条记录") 依然没有写入数据
最新发布
09-06
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值