验证Guid

using System.Text.RegularExpressions;
 
        public static bool IsGuidValid(string inputGuid)
        {
            string pattern = @"^[0-9a-fA-F]{8}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{4}\-[0-9a-fA-F]{12}$";
            return Regex.IsMatch(inputGuid, pattern);
        }

转载于:https://www.cnblogs.com/puzi0315/archive/2012/08/08/2628933.html

import pyodbc import pandas as pd from sqlalchemy import create_engine, text from urllib.parse import quote_plus import uuid import numpy as np from tqdm import tqdm import time import re def get_table_columns(engine, table_name): """获取表的实际列名""" with engine.connect() as conn: result = conn.execute(text(f"SHOW COLUMNS FROM {table_name}")) columns = [row[0] for row in result] return columns def safe_get_column_name(columns, possible_names): """安全获取列名(处理大小写和前缀)""" lower_columns = [col.lower() for col in columns] for name in possible_names: # 尝试精确匹配 if name in columns: return name # 尝试小写匹配 if name.lower() in lower_columns: return columns[lower_columns.index(name.lower())] # 尝试模糊匹配 for col in columns: if any(name in col for name in possible_names): return col return None def compare_records(row, existing_row, fields_to_compare): """比较两个记录,返回需要更新的字段和一致字段数量""" changes = {} consistent_fields = 0 total_fields = 0 for field in fields_to_compare: total_fields += 1 # 处理可能的NaN值 new_val = row[field] if pd.notna(row[field]) else None old_val = existing_row[field] if field in existing_row and pd.notna(existing_row[field]) else None # 比较值是否不同 if new_val != old_val: changes[field] = new_val else: consistent_fields += 1 return changes, consistent_fields, total_fields def generate_unique_guids(count): """生成唯一的小写GUID列表""" return [str(uuid.uuid4()).lower() for _ in range(count)] def main(): print("="*80) print("数据同步程序启动") print(f"开始时间: {time.strftime('%Y-%m-%d %H:%M:%S')}") print("="*80) # 1. 安全处理包含特殊字符的密码 password = "@Aa.1234" encoded_password = quote_plus(password) # 对特殊字符进行URL编码 # 2. 创建MySQL连接字符串 mysql_conn_str = ( f"mysql+pymysql://lpsoft:{encoded_password}@192.168.3.1:3306/OrderManage" "?charset=utf8mb4" ) # 3. 创建SQLAlchemy引擎 mysql_engine = create_engine( mysql_conn_str, pool_size=5, max_overflow=10, pool_timeout=30, connect_args={'connect_timeout': 15} ) # 4. 测试MySQL连接 try: with mysql_engine.connect() as test_conn: test_conn.execute(text("SELECT 1")) print("✅ MySQL连接测试成功") except Exception as e: print(f"❌ MySQL连接失败: {str(e)}") return # 5. 获取MySQL表结构 print("正在获取MySQL表结构...") try: table_columns = get_table_columns(mysql_engine, "T_Customer") print(f"表字段: {', '.join(table_columns)}") # 安全获取电话字段名 phone_field = safe_get_column_name(table_columns, ["phone", "Phone", "telephone", "tel"]) if not phone_field: raise ValueError("找不到电话字段") print(f"✅ 确定电话字段: {phone_field}") except Exception as e: print(f"❌ 表结构获取失败: {str(e)}") return # 6. 连接SQL Server (SAP数据) try: sap_conn = pyodbc.connect( 'DRIVER={ODBC Driver 17 for SQL Server};' 'SERVER=192.168.0.229;' 'DATABASE=SINO_SAP;' 'UID=SAPReader;' 'PWD=Sino2025zyq;' ) print("✅ SQL Server连接成功") except Exception as e: print(f"❌ SQL Server连接失败: {str(e)}") return # 7. 从SQL Server获取SAP数据(使用新的查询) sap_query = """ SELECT CAST( CASE WHEN ZY_TB_CustomerProfile.[客户类型] = 'VIP' THEN '703192237850693' ELSE '703192237846597' END AS varchar(20)) AS CustomerTypeCode, CAST(ZY_TB_CustomerPhone.CardCode AS varchar(50)) AS S6, CAST(ZY_TB_CustomerPhone.Cardname AS varchar(100)) AS CompanyName, CAST(ZY_TB_CustomerPhone.Name AS varchar(50)) AS Name, CAST(LEFT(ZY_TB_CustomerPhone.Telephone, 15) AS varchar(15)) AS Phone, CAST(FORMAT(ZY_TB_CustomerProfile.近一年总毛利, '0.00') AS varchar(50)) AS S4, CAST(FORMAT(ZY_TB_CustomerProfile.预收款金额, '0.00') AS varchar(50)) AS S2, CAST(FORMAT(ZY_TB_CustomerProfile.应收款, '0.00') AS varchar(50)) AS S1, CAST(FORMAT(ZY_TB_CustomerProfile.全部库存金额, '0.00') AS varchar(50)) AS S3, CAST((CAST(ZY_TB_CustomerProfile.客户等级 AS varchar(50)) + ZY_TB_CustomerProfile.等级名称) AS varchar(50)) AS S5 FROM ZY_TB_CustomerPhone LEFT JOIN ZY_TB_CustomerProfile ON ZY_TB_CustomerPhone.CardCode = ZY_TB_CustomerProfile.[客户编号] WHERE ZY_TB_CustomerPhone.CardCode IS NOT NULL AND ZY_TB_CustomerPhone.Cardname IS NOT NULL AND ZY_TB_CustomerPhone.Telephone NOT LIKE '8441%' AND ZY_TB_CustomerPhone.Cardname NOT LIKE '%中源合聚生物%' """ print("正在从SAP数据库加载数据...") start_time = time.time() try: df_sap = pd.read_sql(sap_query, sap_conn) # 确保电话字段格式正确 df_sap['Phone'] = df_sap['Phone'].astype(str).str.slice(0, 15) sap_duration = time.time() - start_time print(f"✅ 从SAP加载了 {len(df_sap)} 条记录 (耗时: {sap_duration:.2f}秒)") except Exception as e: print(f"❌ SAP数据加载失败: {str(e)}") return # 8. 从MySQL获取现有客户数据(使用动态字段名) print("正在从MySQL加载现有客户数据...") start_time = time.time() try: # 动态生成查询 query = f"SELECT * FROM T_Customer" df_existing = pd.read_sql(query, mysql_engine) # 确保电话字段存在 if phone_field not in df_existing.columns: raise ValueError(f"电话字段 '{phone_field}' 在查询结果中不存在") # 处理电话字段 df_existing['Phone'] = df_existing[phone_field].astype(str).str.slice(0, 15) # 创建以Phone为键的字典,便于快速查找 existing_dict = df_existing.set_index('Phone').to_dict('index') mysql_duration = time.time() - start_time print(f"✅ MySQL现有 {len(df_existing)} 条记录 (耗时: {mysql_duration:.2f}秒)") except Exception as e: print(f"❌ MySQL数据加载失败: {str(e)}") print("请检查表结构和字段名") return # 9. 定义需要比较的字段 fields_to_compare = [ 'CompanyName', 'S6', 'CustomerTypeCode', 'S5', 'S1', 'S2', 'S3', 'S4' ] # 10. 数据对比分析 print("\n" + "="*80) print("开始数据对比分析...") print(f"比较字段: {', '.join(fields_to_compare)}") # 初始化统计变量 stats = { 'total_records': len(df_sap), 'existing_records': 0, 'new_records': 0, 'consistent_records': 0, 'inconsistent_records': 0, 'total_fields_compared': 0, 'consistent_fields': 0, 'inconsistent_fields': 0, 'max_consistent_fields': 0, 'min_consistent_fields': len(fields_to_compare), # 初始设为最大字段数 'update_candidates': 0 } # 存储需要更新和插入的记录 updates_to_apply = [] # 存储需要更新的记录和变化字段 inserts_to_apply = [] # 存储需要插入的新记录(仅业务字段) # 进度条显示 pbar = tqdm(total=len(df_sap), desc="分析记录") # 11. 遍历所有SAP记录 for idx, row in df_sap.iterrows(): phone = row['Phone'] # 检查电话是否存在于现有数据中 if phone in existing_dict and phone: # 确保phone不为空 stats['existing_records'] += 1 existing_row = existing_dict[phone] # 比较记录并获取变化 changes, consistent_fields, total_fields = compare_records(row, existing_row, fields_to_compare) stats['total_fields_compared'] += total_fields stats['consistent_fields'] += consistent_fields stats['inconsistent_fields'] += (total_fields - consistent_fields) # 更新最大/最小一致字段数 stats['max_consistent_fields'] = max(stats['max_consistent_fields'], consistent_fields) stats['min_consistent_fields'] = min(stats['min_consistent_fields'], consistent_fields) # 检查是否完全一致 if len(changes) == 0: stats['consistent_records'] += 1 else: stats['inconsistent_records'] += 1 stats['update_candidates'] += 1 changes['Phone'] = phone # 添加标识符 updates_to_apply.append(changes) else: # 新记录 - 只保存业务字段 stats['new_records'] += 1 new_record = { 'CustomerTypeCode': row['CustomerTypeCode'], 'S6': row['S6'], 'CompanyName': row['CompanyName'], 'Name': row['Name'], 'Phone': row['Phone'], 'S4': row['S4'], 'S2': row['S2'], 'S1': row['S1'], 'S3': row['S3'], 'S5': row['S5'] } inserts_to_apply.append(new_record) pbar.update(1) pbar.close() # 12. 打印详细统计信息 print("\n" + "="*80) print("数据对比统计结果:") print(f"总记录数: {stats['total_records']}") print(f"已存在记录数: {stats['existing_records']} ({stats['existing_records']/stats['total_records']:.1%})") print(f"新增记录数: {stats['new_records']} ({stats['new_records']/stats['total_records']:.1%})") print(f"一致记录数: {stats['consistent_records']} ({stats['consistent_records']/stats['existing_records']:.1%})") print(f"不一致记录数: {stats['inconsistent_records']} ({stats['inconsistent_records']/stats['existing_records']:.1%})") print(f"需要更新记录数: {stats['update_candidates']}") print(f"需要插入记录数: {stats['new_records']}") print(f"\n字段级别对比:") print(f"总比较字段数: {stats['total_fields_compared']}") print(f"一致字段数: {stats['consistent_fields']} ({stats['consistent_fields']/stats['total_fields_compared']:.1%})") print(f"不一致字段数: {stats['inconsistent_fields']} ({stats['inconsistent_fields']/stats['total_fields_compared']:.1%})") print(f"字段一致性范围: {stats['min_consistent_fields']}-{stats['max_consistent_fields']} (每记录一致字段数)") print("="*80 + "\n") # 13. 执行批量更新操作(只更新有变化的字段) if updates_to_apply: print(f"开始批量更新 {len(updates_to_apply)} 条有变化的记录...") start_time = time.time() try: # 创建包含变化的DataFrame df_changes = pd.DataFrame(updates_to_apply) # 创建临时表存储更新数据 temp_table_name = "temp_customer_updates" with mysql_engine.begin() as conn: # 先删除可能存在的旧临时表 conn.execute(text(f"DROP TABLE IF EXISTS {temp_table_name}")) # 创建临时表(只包含变化的字段) df_changes.to_sql( temp_table_name, mysql_engine, if_exists='replace', index=False ) # 添加索引加速更新 with mysql_engine.begin() as conn: conn.execute(text(f"CREATE INDEX idx_phone ON {temp_table_name} (Phone)")) # 动态生成SET子句 set_clauses = [] for col in df_changes.columns: if col != 'Phone': # 跳过标识符字段 # 使用实际电话字段名 set_clauses.append(f"t.{col} = tmp.{col}") set_clause = ", ".join(set_clauses) # 使用JOIN执行批量更新 update_query = f""" UPDATE T_Customer AS t JOIN {temp_table_name} AS tmp ON LEFT(t.{phone_field}, 15) = tmp.Phone SET {set_clause} """ with mysql_engine.begin() as conn: result = conn.execute(text(update_query)) update_duration = time.time() - start_time print(f"✅ 成功更新 {result.rowcount} 条记录 (耗时: {update_duration:.2f}秒)") except Exception as e: print(f"❌ 更新操作失败: {str(e)}") # 14. 执行批量插入操作(使用呼叫中心生成的ID和GUID) if inserts_to_apply: print(f"开始准备插入 {len(inserts_to_apply)} 条新记录...") start_time = time.time() try: # 准备插入数据 df_insert = pd.DataFrame(inserts_to_apply) # 获取当前最大ID max_id_query = "SELECT COALESCE(MAX(id), 111100000048210) AS max_id FROM T_Customer" with mysql_engine.connect() as conn: max_id = conn.execute(text(max_id_query)).scalar() # 生成新ID(从最大ID+1开始) new_ids = range(max_id + 1, max_id + 1 + len(df_insert)) # 生成唯一的小写GUID new_guids = generate_unique_guids(len(df_insert)) # 添加呼叫中心系统生成的ID和GUID df_insert['id'] = list(new_ids) df_insert['GUID'] = new_guids df_insert['Domain'] = 'ipcc.org' df_insert['Remark'] = '' # 选择并排序列以匹配表结构 insert_columns = [ 'id', 'GUID', 'Domain', 'CustomerTypeCode', 'Remark', 'S6', 'CompanyName', 'Name', 'Phone', 'S4', 'S2', 'S1', 'S3', 'S5' ] df_insert = df_insert[insert_columns] # 重命名列以匹配实际表结构 column_mapping = {'Phone': phone_field} df_insert = df_insert.rename(columns=column_mapping) # 批量插入新记录(分块处理) print(f"正在批量插入 {len(df_insert)} 条新记录...") df_insert.to_sql( 'T_Customer', mysql_engine, if_exists='append', index=False, chunksize=2000, # 分块插入提高性能 method='multi' # 使用多值插入 ) insert_duration = time.time() - start_time print(f"✅ 成功插入 {len(df_insert)} 条新记录 (耗时: {insert_duration:.2f}秒)") # 验证GUID唯一性 guid_check_query = """ SELECT COUNT(*) AS total, COUNT(DISTINCT GUID) AS distinct_guids FROM T_Customer """ with mysql_engine.connect() as conn: guid_check = conn.execute(text(guid_check_query)).fetchone() if guid_check[0] == guid_check[1]: print("✅ GUID唯一性验证通过") else: print(f"⚠️ GUID唯一性警告: 总数={guid_check[0]}, 唯一数={guid_check[1]}") except Exception as e: print(f"❌ 插入操作失败: {str(e)}") # 15. 清理临时表 try: with mysql_engine.begin() as conn: conn.execute(text(f"DROP TABLE IF EXISTS {temp_table_name}")) print("✅ 临时表清理完成") except: print("⚠️ 临时表清理失败,请手动检查") # 16. 最终统计 print("\n" + "="*80) print("数据同步完成!") print(f"总耗时: {time.time() - start_time:.2f}秒") print(f"处理记录统计:") print(f" 一致记录: {stats['consistent_records']}") print(f" 更新记录: {len(updates_to_apply)}") print(f" 新增记录: {len(inserts_to_apply)}") print(f" 总处理记录: {stats['consistent_records'] + len(updates_to_apply) + len(inserts_to_apply)}") print("="*80) if __name__ == "__main__": main() 要注意的是 新增数据的时候 就不用sap查出来的id 和 GUID了 直接根据呼叫中心数据的id和guid增加吧 一定要保证呼叫中心的id和guid不一致 这个代码保证了最后更新和新增后的呼叫中心的所有的id是不一致的,GUID也是不一致的么?
08-12
import pyodbc import pandas as pd from sqlalchemy import create_engine, text from urllib.parse import quote_plus import uuid import numpy as np from tqdm import tqdm import time import re def get_table_columns(engine, table_name): """获取表的实际列名""" with engine.connect() as conn: result = conn.execute(text(f"SHOW COLUMNS FROM {table_name}")) columns = [row[0] for row in result] return columns def safe_get_column_name(columns, possible_names): """安全获取列名(处理大小写和前缀)""" lower_columns = [col.lower() for col in columns] for name in possible_names: # 尝试精确匹配 if name in columns: return name # 尝试小写匹配 if name.lower() in lower_columns: return columns[lower_columns.index(name.lower())] # 尝试模糊匹配 for col in columns: if any(name in col for name in possible_names): return col return None def compare_records(row, existing_row, fields_to_compare): """比较两个记录,返回需要更新的字段和一致字段数量""" changes = {} consistent_fields = 0 total_fields = 0 for field in fields_to_compare: total_fields += 1 # 处理可能的NaN值 new_val = row[field] if pd.notna(row[field]) else None old_val = existing_row[field] if field in existing_row and pd.notna(existing_row[field]) else None # 比较值是否不同 if new_val != old_val: changes[field] = new_val else: consistent_fields += 1 return changes, consistent_fields, total_fields def generate_unique_guids(count): """生成唯一的小写GUID列表并确保唯一性""" guids = set() while len(guids) < count: guids.add(str(uuid.uuid4()).lower()) return list(guids) def main(): print("="*80) print("数据同步程序启动") print(f"开始时间: {time.strftime('%Y-%m-%d %H:%M:%S')}") print("="*80) # 1. 安全处理包含特殊字符的密码 password = "@Aa.1234" encoded_password = quote_plus(password) # 对特殊字符进行URL编码 # 2. 创建MySQL连接字符串 mysql_conn_str = ( f"mysql+pymysql://lpsoft:{encoded_password}@192.168.3.1:3306/OrderManage" "?charset=utf8mb4" ) # 3. 创建SQLAlchemy引擎 mysql_engine = create_engine( mysql_conn_str, pool_size=5, max_overflow=10, pool_timeout=30, connect_args={'connect_timeout': 15} ) # 4. 测试MySQL连接 try: with mysql_engine.connect() as test_conn: test_conn.execute(text("SELECT 1")) print("✅ MySQL连接测试成功") except Exception as e: print(f"❌ MySQL连接失败: {str(e)}") return # 5. 获取MySQL表结构 print("正在获取MySQL表结构...") try: table_columns = get_table_columns(mysql_engine, "T_Customer") print(f"表字段: {', '.join(table_columns)}") # 安全获取电话字段名 phone_field = safe_get_column_name(table_columns, ["phone", "Phone", "telephone", "tel"]) if not phone_field: raise ValueError("找不到电话字段") print(f"✅ 确定电话字段: {phone_field}") except Exception as e: print(f"❌ 表结构获取失败: {str(e)}") return # 6. 连接SQL Server (SAP数据) try: sap_conn = pyodbc.connect( 'DRIVER={ODBC Driver 17 for SQL Server};' 'SERVER=192.168.0.229;' 'DATABASE=SINO_SAP;' 'UID=SAPReader;' 'PWD=Sino2025zyq;' ) print("✅ SQL Server连接成功") except Exception as e: print(f"❌ SQL Server连接失败: {str(e)}") return # 7. 从SQL Server获取SAP数据(使用新的查询) sap_query = """ SELECT CAST( CASE WHEN ZY_TB_CustomerProfile.[客户类型] = 'VIP' THEN '703192237850693' ELSE '703192237846597' END AS varchar(20)) AS CustomerTypeCode, CAST(ZY_TB_CustomerPhone.CardCode AS varchar(50)) AS S6, CAST(ZY_TB_CustomerPhone.Cardname AS varchar(100)) AS CompanyName, CAST(ZY_TB_CustomerPhone.Name AS varchar(50)) AS Name, CAST(LEFT(ZY_TB_CustomerPhone.Telephone, 15) AS varchar(15)) AS Phone, CAST(FORMAT(ZY_TB_CustomerProfile.近一年总毛利, '0.00') AS varchar(50)) AS S4, CAST(FORMAT(ZY_TB_CustomerProfile.预收款金额, '0.00') AS varchar(50)) AS S2, CAST(FORMAT(ZY_TB_CustomerProfile.应收款, '0.00') AS varchar(50)) AS S1, CAST(FORMAT(ZY_TB_CustomerProfile.全部库存金额, '0.00') AS varchar(50)) AS S3, CAST((CAST(ZY_TB_CustomerProfile.客户等级 AS varchar(50)) + ZY_TB_CustomerProfile.等级名称) AS varchar(50)) AS S5 FROM ZY_TB_CustomerPhone LEFT JOIN ZY_TB_CustomerProfile ON ZY_TB_CustomerPhone.CardCode = ZY_TB_CustomerProfile.[客户编号] WHERE ZY_TB_CustomerPhone.CardCode IS NOT NULL AND ZY_TB_CustomerPhone.Cardname IS NOT NULL AND ZY_TB_CustomerPhone.Telephone NOT LIKE '8441%' AND ZY_TB_CustomerPhone.Cardname NOT LIKE '%中源合聚生物%' """ print("正在从SAP数据库加载数据...") start_time = time.time() try: df_sap = pd.read_sql(sap_query, sap_conn) # 确保电话字段格式正确 df_sap['Phone'] = df_sap['Phone'].astype(str).str.slice(0, 15) sap_duration = time.time() - start_time print(f"✅ 从SAP加载了 {len(df_sap)} 条记录 (耗时: {sap_duration:.2f}秒)") except Exception as e: print(f"❌ SAP数据加载失败: {str(e)}") return # 8. 从MySQL获取现有客户数据(使用动态字段名) print("正在从MySQL加载现有客户数据...") start_time = time.time() try: # 动态生成查询 query = f"SELECT * FROM T_Customer" df_existing = pd.read_sql(query, mysql_engine) # 确保电话字段存在 if phone_field not in df_existing.columns: raise ValueError(f"电话字段 '{phone_field}' 在查询结果中不存在") # 处理电话字段 df_existing['Phone'] = df_existing[phone_field].astype(str).str.slice(0, 15) # 创建以Phone为键的字典,便于快速查找 existing_dict = df_existing.set_index('Phone').to_dict('index') mysql_duration = time.time() - start_time print(f"✅ MySQL现有 {len(df_existing)} 条记录 (耗时: {mysql_duration:.2f}秒)") except Exception as e: print(f"❌ MySQL数据加载失败: {str(e)}") print("请检查表结构和字段名") return # 9. 定义需要比较的字段 fields_to_compare = [ 'CompanyName', 'S6', 'CustomerTypeCode', 'S5', 'S1', 'S2', 'S3', 'S4' ] # 10. 数据对比分析 print("\n" + "="*80) print("开始数据对比分析...") print(f"比较字段: {', '.join(fields_to_compare)}") # 初始化统计变量 stats = { 'total_records': len(df_sap), 'existing_records': 0, 'new_records': 0, 'consistent_records': 0, 'inconsistent_records': 0, 'total_fields_compared': 0, 'consistent_fields': 0, 'inconsistent_fields': 0, 'max_consistent_fields': 0, 'min_consistent_fields': len(fields_to_compare), # 初始设为最大字段数 'update_candidates': 0 } # 存储需要更新和插入的记录 updates_to_apply = [] # 存储需要更新的记录和变化字段 inserts_to_apply = [] # 存储需要插入的新记录(仅业务字段) # 进度条显示 pbar = tqdm(total=len(df_sap), desc="分析记录") # 11. 遍历所有SAP记录 for idx, row in df_sap.iterrows(): phone = row['Phone'] # 检查电话是否存在于现有数据中 if phone in existing_dict and phone: # 确保phone不为空 stats['existing_records'] += 1 existing_row = existing_dict[phone] # 比较记录并获取变化 changes, consistent_fields, total_fields = compare_records(row, existing_row, fields_to_compare) stats['total_fields_compared'] += total_fields stats['consistent_fields'] += consistent_fields stats['inconsistent_fields'] += (total_fields - consistent_fields) # 更新最大/最小一致字段数 stats['max_consistent_fields'] = max(stats['max_consistent_fields'], consistent_fields) stats['min_consistent_fields'] = min(stats['min_consistent_fields'], consistent_fields) # 检查是否完全一致 if len(changes) == 0: stats['consistent_records'] += 1 else: stats['inconsistent_records'] += 1 stats['update_candidates'] += 1 changes['Phone'] = phone # 添加标识符 updates_to_apply.append(changes) else: # 新记录 - 只保存业务字段 stats['new_records'] += 1 new_record = { 'CustomerTypeCode': row['CustomerTypeCode'], 'S6': row['S6'], 'CompanyName': row['CompanyName'], 'Name': row['Name'], 'Phone': row['Phone'], 'S4': row['S4'], 'S2': row['S2'], 'S1': row['S1'], 'S3': row['S3'], 'S5': row['S5'] } inserts_to_apply.append(new_record) pbar.update(1) pbar.close() # 12. 打印详细统计信息 print("\n" + "="*80) print("数据对比统计结果:") print(f"总记录数: {stats['total_records']}") print(f"已存在记录数: {stats['existing_records']} ({stats['existing_records']/stats['total_records']:.1%})") print(f"新增记录数: {stats['new_records']} ({stats['new_records']/stats['total_records']:.1%})") print(f"一致记录数: {stats['consistent_records']} ({stats['consistent_records']/stats['existing_records']:.1%})") print(f"不一致记录数: {stats['inconsistent_records']} ({stats['inconsistent_records']/stats['existing_records']:.1%})) print(f"需要更新记录数: {stats['update_candidates']}") print(f"需要插入记录数: {stats['new_records']}") print(f"\n字段级别对比:") print(f"总比较字段数: {stats['total_fields_compared']}") print(f"一致字段数: {stats['consistent_fields']} ({stats['consistent_fields']/stats['total_fields_compared']:.1%})) print(f"不一致字段数: {stats['inconsistent_fields']} ({stats['inconsistent_fields']/stats['total_fields_compared']:.1%})") print(f"字段一致性范围: {stats['min_consistent_fields']}-{stats['max_consistent_fields']} (每记录一致字段数)") print("="*80 + "\n") # 13. 执行批量更新操作(只更新有变化的字段) if updates_to_apply: print(f"开始批量更新 {len(updates_to_apply)} 条有变化的记录...") start_time = time.time() try: # 创建包含变化的DataFrame df_changes = pd.DataFrame(updates_to_apply) # 创建临时表存储更新数据 temp_table_name = "temp_customer_updates" with mysql_engine.begin() as conn: # 先删除可能存在的旧临时表 conn.execute(text(f"DROP TABLE IF EXISTS {temp_table_name}")) # 创建临时表(只包含变化的字段) df_changes.to_sql( temp_table_name, mysql_engine, if_exists='replace', index=False ) # 添加索引加速更新 with mysql_engine.begin() as conn: conn.execute(text(f"CREATE INDEX idx_phone ON {temp_table_name} (Phone)")) # 动态生成SET子句 set_clauses = [] for col in df_changes.columns: if col != 'Phone': # 跳过标识符字段 # 使用实际电话字段名 set_clauses.append(f"t.{col} = tmp.{col}") set_clause = ", ".join(set_clauses) # 使用JOIN执行批量更新 update_query = f""" UPDATE T_Customer AS t JOIN {temp_table_name} AS tmp ON LEFT(t.{phone_field}, 15) = tmp.Phone SET {set_clause} """ with mysql_engine.begin() as conn: result = conn.execute(text(update_query)) update_duration = time.time() - start_time print(f"✅ 成功更新 {result.rowcount} 条记录 (耗时: {update_duration:.2f}秒)") except Exception as e: print(f"❌ 更新操作失败: {str(e)}") # 14. 执行批量插入操作(使用呼叫中心生成的ID和GUID) if inserts_to_apply: print(f"开始准备插入 {len(inserts_to_apply)} 条新记录...") start_time = time.time() try: # 准备插入数据 df_insert = pd.DataFrame(inserts_to_apply) # 获取当前最大ID max_id_query = "SELECT COALESCE(MAX(id), 111100000048210) AS max_id FROM T_Customer" with mysql_engine.connect() as conn: max_id = conn.execute(text(max_id_query)).scalar() # 生成新ID(从最大ID+1开始) new_ids = range(max_id + 1, max_id + 1 + len(df_insert)) # 生成唯一的小写GUID(确保在集合中唯一) new_guids = generate_unique_guids(len(df_insert)) # 添加呼叫中心系统生成的ID和GUID df_insert['id'] = list(new_ids) df_insert['GUID'] = new_guids df_insert['Domain'] = 'ipcc.org' df_insert['Remark'] = '' # 选择并排序列以匹配表结构 insert_columns = [ 'id', 'GUID', 'Domain', 'CustomerTypeCode', 'Remark', 'S6', 'CompanyName', 'Name', 'Phone', 'S4', 'S2', 'S1', 'S3', 'S5' ] df_insert = df_insert[insert_columns] # 重命名列以匹配实际表结构 column_mapping = {'Phone': phone_field} df_insert = df_insert.rename(columns=column_mapping) # 批量插入新记录(分块处理) print(f"正在批量插入 {len(df_insert)} 条新记录...") df_insert.to_sql( 'T_Customer', mysql_engine, if_exists='append', index=False, chunksize=2000, # 分块插入提高性能 method='multi' # 使用多值插入 ) insert_duration = time.time() - start_time print(f"✅ 成功插入 {len(df_insert)} 条新记录 (耗时: {insert_duration:.2f}秒)") # 验证ID和GUID唯一性 id_check_query = "SELECT COUNT(*) AS total, COUNT(DISTINCT id) AS distinct_ids FROM T_Customer" guid_check_query = "SELECT COUNT(*) AS total, COUNT(DISTINCT GUID) AS distinct_guids FROM T_Customer" with mysql_engine.connect() as conn: # 验证ID唯一性 id_check = conn.execute(text(id_check_query)).fetchone() if id_check[0] == id_check[1]: print("✅ ID唯一性验证通过") else: print(f"❌ ID唯一性警告: 总数={id_check[0]}, 唯一数={id_check[1]}") # 验证GUID唯一性 guid_check = conn.execute(text(guid_check_query)).fetchone() if guid_check[0] == guid_check[1]: print("✅ GUID唯一性验证通过") else: print(f"❌ GUID唯一性警告: 总数={guid_check[0]}, 唯一数={guid_check[1]}") except Exception as e: print(f"❌ 插入操作失败: {str(e)}") # 15. 清理临时表 try: with mysql_engine.begin() as conn: conn.execute(text(f"DROP TABLE IF EXISTS {temp_table_name}")) print("✅ 临时表清理完成") except: print("⚠️ 临时表清理失败,请手动检查") # 16. 最终统计 print("\n" + "="*80) print("数据同步完成!") print(f"总耗时: {time.time() - start_time:.2f}秒") print(f"处理记录统计:") print(f" 一致记录: {stats['consistent_records']}") print(f" 更新记录: {len(updates_to_apply)}") print(f" 新增记录: {len(inserts_to_apply)}") print(f" 总处理记录: {stats['consistent_records'] + len(updates_to_apply) + len(inserts_to_apply)}") print("="*80) if __name__ == "__main__": main() 这个代码在满足什么条件下才会更新数据
最新发布
08-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值