escape在sqlserver中的使用--【叶子】

本文介绍了SQL Server中使用escape关键字解决模糊查询时遇到的特殊字符问题。通过具体实例展示了如何正确使用escape来指定转义字符,从而实现精确的模糊匹配。

escapesqlserver中的使用--【叶子】

1escape是干什么用的?

escape相对于转移字符

2escape在什么情况下使用?

举例说明:

例如我们要进行模糊查询:

--测试数据

declare @tablea table (id int ,col varchar(20))

insert into @tablea

select 1,'maco' union all

select 2,'mao' union all

select 3,'micro'

--模糊查询

select * from @tablea where col like '%ma%'

/*结果

id col

----------- --------------------

1 maco

2 mao

*/

这是最普通的模糊查询了

但是当col列含有特殊字符例如%%的时候

declare @tableb table (id int ,col varchar(20))

insert into @tableb

select 1,'m%a%co' union all

select 2,'m%a%o' union all

select 3,'miacro'

select * from @tableb where col like '%%a%%'

上面这句就不行了,结果相当于%a%,a的都出来了

/*

结果:

id col

----------- --------------------

1 m%a%co

2 m%a%o

3 miacro

*/

此时我们可以用escape来处理

declare @tablec table (id int ,col varchar(20))

insert into @tablec

select 1,'m%a%co' union all

select 2,'m%a%o' union all

select 3,'miacro'

--模糊查询

select * from @tablec where col like '%$%a$%%' escape '$'

/*结果

id col

----------- --------------------

1 m%a%co

2 m%a%o

*/

@【叶子】http://blog.youkuaiyun.com/maco_wang 原创作品,转贴请注明作者和出处,留此信息。

import psycopg2 import pyodbc import re import logging import time import schedule import concurrent.futures from typing import List, Dict, Tuple, Any, Set from functools import lru_cache from collections import deque import threading # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("andon_etl_optimized.log"), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) # 定义数据源配置 TABLE_NAMES = ["line1block", "line1cam", "line1crank", "line1head", "line1hsg", "line2block", "line2cam", "line2crank", "line2head"] # 重构数据源配置(添加主机IP) DATA_SOURCES = [] for table_name in TABLE_NAMES: DATA_SOURCES.append({ 'type': 'postgresql', 'host': '192.168.154.11', 'port': '5432', 'database': 'andondata', 'user': 'powertrain', 'password': 'andromeda', 'table': table_name, 'host_ip': '192.168.154.11' }) DATA_SOURCES.append({ 'type': 'postgresql', 'host': '192.168.151.11', 'port': '5432', 'database': 'andondata', 'user': 'powertrain', 'password': 'andromeda', 'table': table_name, 'host_ip': '192.168.151.11' }) # SQL Server目标配置 TARGET_CONFIG = { 'server': '192.168.10.116', 'database': '现场DB', 'username': 'sa', 'password': 'gtec_6600', 'table': 'dwd_web安东生产线状态' } # 目标编码列表 TARGET_CODES = [ "GY100", "GY101", "GY102", "GY103", "GY104", "GY105", "GY106", "GY107", "GY108", "GY109", "GY10A", "GY10B", "GY10C", "GY10D", "GY10E", "GY116", "GY11F", "GY120", "GY121", "GY122", "GY124", "GY126", "GY128", "GY130", "GY131", "GY132", "GY133" ] # 创建目标编码集合用于快速查找 TARGET_CODES_SET = set(TARGET_CODES) # 预编译所有目标编码的正则表达式 def create_target_patterns(codes: List[str]) -> Dict[str, re.Pattern]: patterns = {} for code in codes: patterns[code] = re.compile(re.escape(code) + r',([01]{16})}') return patterns TARGET_PATTERNS = create_target_patterns(TARGET_CODES) # 连接池配置 POSTGRES_CONN_POOL = {} SQL_SERVER_CONN_STR = f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={TARGET_CONFIG['server']};DATABASE={TARGET_CONFIG['database']};UID={TARGET_CONFIG['username']};PWD={TARGET_CONFIG['password']}" POOL_LOCK = threading.Lock() def get_postgres_connection(source_config: Dict) -> Any: """获取或创建PostgreSQL连接(带连接池和线程安全)""" key = f"{source_config['host']}_{source_config['database']}_{source_config['user']}" with POOL_LOCK: if key not in POSTGRES_CONN_POOL or POSTGRES_CONN_POOL[key].closed: try: POSTGRES_CONN_POOL[key] = psycopg2.connect( host=source_config['host'], port=source_config['port'], database=source_config['database'], user=source_config['user'], password=source_config['password'], connect_timeout=5 ) logger.debug(f"创建新的PostgreSQL连接: {key}") except Exception as e: logger.error(f"创建PostgreSQL连接失败: {str(e)}") return None return POSTGRES_CONN_POOL[key] def get_sql_server_connection() -> Any: """获取SQL Server连接""" try: return pyodbc.connect(SQL_SERVER_CONN_STR, timeout=5) except pyodbc.Error as e: logger.error(f"创建SQL Server连接失败: {str(e)}") return None # 缓存线体和工作站信息 @lru_cache(maxsize=128) def parse_station_info(table_name: str) -> Tuple[str, str]: """解析线体和工作站信息(带缓存)""" line_match = re.match(r'line(\d+)(\w+)', table_name) if line_match: return line_match.group(1), line_match.group(2) return "unknown", "unknown" def flatten_nested_list_optimized(nested_list: Any) -> str: """ 优化后的展平嵌套列表函数,使用迭代代替递归 Args: nested_list: 可能是列表、元组或其他嵌套结构的数据 Returns: 展平后的字符串 """ stack = [nested_list] result = [] while stack: current = stack.pop() if isinstance(current, (list, tuple)): # 反转添加顺序以保持原始顺序 stack.extend(reversed(current)) else: result.append(str(current)) return ''.join(result) def extract_data_optimized(source_config: Dict) -> List[Tuple]: """优化后的数据抽取函数,减少数据库查询时间""" logger.info(f"从 {source_config['host']}.{source_config['table']} 抽取数据...") try: conn = get_postgres_connection(source_config) if not conn: return [] cursor = conn.cursor() # 优化查询:只获取必要字段,使用更高效的数组操作 query = f""" SELECT data, "createdAt", "updatedAt" FROM {source_config['table']} WHERE data && %s -- 使用数组重叠操作符,更快 ORDER BY "createdAt" DESC LIMIT 1 """ # 传递包含所有目标编码的数组 cursor.execute(query, ([f"{{{code}," for code in TARGET_CODES],)) data = cursor.fetchall() cursor.close() return data except psycopg2.Error as e: logger.error(f"PostgreSQL查询失败: {str(e)}") return [] except Exception as e: logger.error(f"抽取数据失败: {str(e)}") return [] def process_record_optimized(record: Tuple, source_config: Dict) -> List[Dict]: """优化后的记录处理函数,提高处理速度""" if not record: return [] data_arr, created_at, updated_at = record table_name = source_config['table'] line_number, station_name = parse_station_info(table_name) # 快速展平数据 data_str = flatten_nested_list_optimized(data_arr) transformed_records = [] processed_codes = set() # 一次性查找所有目标编码 for code in TARGET_CODES: if code not in data_str or code in processed_codes: continue # 使用预编译的正则表达式提取二进制字符串 match = TARGET_PATTERNS[code].search(data_str) if not match: continue binary_str = match.group(1) if len(binary_str) == 16: # 直接使用索引访问,避免多次查找 result = 1 if binary_str[7] == '1' and binary_str[15] == '1' else 0 else: result = 0 transformed_records.append({ 'line_number': line_number, 'station_name': station_name, 'result': result, 'created_at': created_at, 'updated_at': updated_at, 'source_ip': source_config['host_ip'], 'source_table': table_name, 'target_code': code }) processed_codes.add(code) return transformed_records def create_target_table_if_not_exists() -> bool: """确保目标表存在,创建表(如果不存在)""" try: conn = get_sql_server_connection() if not conn: return False cursor = conn.cursor() # 检查表是否存在 cursor.execute(f""" SELECT COUNT(*) FROM information_schema.tables WHERE table_name = '{TARGET_CONFIG['table']}' """) table_exists = cursor.fetchone()[0] > 0 if not table_exists: # 创建目标表 create_table_sql = f""" CREATE TABLE {TARGET_CONFIG['table']} ( id INT IDENTITY(1,1) PRIMARY KEY, line_number NVARCHAR(10) NOT NULL, station_name NVARCHAR(50) NOT NULL, result INT NOT NULL, created_at DATETIME NOT NULL, updated_at DATETIME NOT NULL, source_ip NVARCHAR(20) NOT NULL, source_table NVARCHAR(50) NOT NULL, target_code NVARCHAR(20) NOT NULL, insert_time DATETIME DEFAULT GETDATE() ) """ cursor.execute(create_table_sql) logger.info(f"创建目标表 {TARGET_CONFIG['table']}") # 创建索引(如果不存在) try: cursor.execute(f""" IF NOT EXISTS ( SELECT * FROM sys.indexes WHERE name = 'idx_target_code' AND object_id = OBJECT_ID('{TARGET_CONFIG['table']}') ) CREATE INDEX idx_target_code ON {TARGET_CONFIG['table']} (target_code) """) except Exception as e: logger.warning(f"创建索引失败(可能已存在): {str(e)}") conn.commit() return True except Exception as e: logger.error(f"创建目标表失败: {str(e)}") return False finally: if 'cursor' in locals(): cursor.close() if 'conn' in locals(): conn.close() def load_data_batch_optimized(records: List[Dict]): """优化后的批量加载函数,使用更高效的插入方法""" if not records: return try: conn = get_sql_server_connection() if not conn: return cursor = conn.cursor() # 使用表值参数(TVP)提高批量插入性能 try: # 创建临时表 cursor.execute(""" CREATE TABLE #TempAndonData ( line_number NVARCHAR(10), station_name NVARCHAR(50), result INT, created_at DATETIME, updated_at DATETIME, source_ip NVARCHAR(20), source_table NVARCHAR(50), target_code NVARCHAR(20) ) """) # 批量插入到临时表 insert_temp = """ INSERT INTO #TempAndonData (line_number, station_name, result, created_at, updated_at, source_ip, source_table, \ target_code) VALUES (?, ?, ?, ?, ?, ?, ?, ?) \ """ params = [ (r['line_number'], r['station_name'], r['result'], r['created_at'], r['updated_at'], r['source_ip'], r['source_table'], r['target_code']) for r in records ] cursor.executemany(insert_temp, params) # 从临时表插入到目标表 cursor.execute(f""" INSERT INTO {TARGET_CONFIG['table']} (line_number, station_name, result, created_at, updated_at, source_ip, source_table, target_code) SELECT line_number, station_name, result, created_at, updated_at, source_ip, source_table, target_code FROM #TempAndonData """) conn.commit() logger.info(f"成功批量插入 {len(records)} 条数据到SQL Server (TVP)") except pyodbc.Error as e: # 如果TVP不可用,则回退到常规批量插入 logger.warning(f"TVP方法失败,使用常规批量插入: {str(e)}") insert_sql = f""" INSERT INTO {TARGET_CONFIG['table']} ( line_number, station_name, result, created_at, updated_at, source_ip, source_table, target_code ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) """ params = [ ( r['line_number'], r['station_name'], r['result'], r['created_at'], r['updated_at'], r['source_ip'], r['source_table'], r['target_code'] ) for r in records ] cursor.executemany(insert_sql, params) conn.commit() logger.info(f"成功批量插入 {len(records)} 条数据到SQL Server (常规)") except pyodbc.Error as e: logger.error(f"数据库操作失败: {str(e)}") conn.rollback() except Exception as e: logger.error(f"加载数据到SQL Server失败: {str(e)}") finally: cursor.close() conn.close() def process_data_source(source_config: Dict) -> int: """处理单个数据源并返回处理记录数""" try: # 抽取数据 data = extract_data_optimized(source_config) if not data: return 0 # 处理最新记录 latest_record = data[0] # 转换数据 transformed_records = process_record_optimized(latest_record, source_config) # 加载到SQL Server if transformed_records: load_data_batch_optimized(transformed_records) return len(transformed_records) return 0 except Exception as e: logger.error(f"处理数据源 {source_config['table']} 失败: {str(e)}") return 0 def extract_transform_load_optimized(): """优化后的ETL主流程""" logger.info(f"开始执行优化ETL流程,目标编码数量: {len(TARGET_CODES)}") total_processed = 0 start_time = time.time() # 确保目标表存在 if not create_target_table_if_not_exists(): logger.error("目标表创建失败,中止ETL流程") return # 使用线程池并行处理数据源(根据CPU核心数动态调整) num_workers = min(16, len(DATA_SOURCES) * 2) # 最大16个worker with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: futures = {executor.submit(process_data_source, config): config for config in DATA_SOURCES} for future in concurrent.futures.as_completed(futures): source_config = futures[future] try: processed = future.result() total_processed += processed except Exception as e: logger.error(f"处理 {source_config['table']} 失败: {str(e)}") duration = time.time() - start_time logger.info(f"优化ETL流程执行完成,总计处理 {total_processed} 条记录,耗时 {duration:.2f} 秒") return total_processed def run_scheduler(): """定时任务调度""" logger.info(f"启动定时任务调度器,每分钟执行一次优化版ETL") schedule.every(1).minutes.do(extract_transform_load_optimized) # 初始执行 extract_transform_load_optimized() while True: try: schedule.run_pending() time.sleep(1) except KeyboardInterrupt: logger.info("接收到中断信号,停止调度器") break except Exception as e: logger.error(f"调度器异常: {str(e)}") time.sleep(10) def cleanup_connections(): """清理所有数据库连接""" logger.info("清理数据库连接...") for key, conn in list(POSTGRES_CONN_POOL.items()): try: if not conn.closed: conn.close() del POSTGRES_CONN_POOL[key] except: pass if __name__ == "__main__": try: run_scheduler() finally: cleanup_connections() 源数据每个data字段中有几千对键值对,优化pytho脚本,提升数据处理速度
最新发布
11-27
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值