import pyodbc
import pandas as pd
from sqlalchemy import create_engine, text
from urllib.parse import quote_plus
import uuid
import numpy as np
from tqdm import tqdm
import time
import re
def get_table_columns(engine, table_name):
"""获取表的实际列名"""
with engine.connect() as conn:
result = conn.execute(text(f"SHOW COLUMNS FROM {table_name}"))
columns = [row[0] for row in result]
return columns
def safe_get_column_name(columns, possible_names):
"""安全获取列名(处理大小写和前缀)"""
lower_columns = [col.lower() for col in columns]
for name in possible_names:
# 尝试精确匹配
if name in columns:
return name
# 尝试小写匹配
if name.lower() in lower_columns:
return columns[lower_columns.index(name.lower())]
# 尝试模糊匹配
for col in columns:
if any(name in col for name in possible_names):
return col
return None
def compare_records(row, existing_row, fields_to_compare):
"""比较两个记录,返回需要更新的字段和一致字段数量"""
changes = {}
consistent_fields = 0
total_fields = 0
for field in fields_to_compare:
total_fields += 1
# 处理可能的NaN值
new_val = row[field] if pd.notna(row[field]) else None
old_val = existing_row[field] if field in existing_row and pd.notna(existing_row[field]) else None
# 比较值是否不同
if new_val != old_val:
changes[field] = new_val
else:
consistent_fields += 1
return changes, consistent_fields, total_fields
def generate_unique_guids(count):
"""生成唯一的小写GUID列表并确保唯一性"""
guids = set()
while len(guids) < count:
guids.add(str(uuid.uuid4()).lower())
return list(guids)
def main():
print("="*80)
print("数据同步程序启动")
print(f"开始时间: {time.strftime('%Y-%m-%d %H:%M:%S')}")
print("="*80)
# 1. 安全处理包含特殊字符的密码
password = "@Aa.1234"
encoded_password = quote_plus(password) # 对特殊字符进行URL编码
# 2. 创建MySQL连接字符串
mysql_conn_str = (
f"mysql+pymysql://lpsoft:{encoded_password}@192.168.3.1:3306/OrderManage"
"?charset=utf8mb4"
)
# 3. 创建SQLAlchemy引擎
mysql_engine = create_engine(
mysql_conn_str,
pool_size=5,
max_overflow=10,
pool_timeout=30,
connect_args={'connect_timeout': 15}
)
# 4. 测试MySQL连接
try:
with mysql_engine.connect() as test_conn:
test_conn.execute(text("SELECT 1"))
print("✅ MySQL连接测试成功")
except Exception as e:
print(f"❌ MySQL连接失败: {str(e)}")
return
# 5. 获取MySQL表结构
print("正在获取MySQL表结构...")
try:
table_columns = get_table_columns(mysql_engine, "T_Customer")
print(f"表字段: {', '.join(table_columns)}")
# 安全获取电话字段名
phone_field = safe_get_column_name(table_columns, ["phone", "Phone", "telephone", "tel"])
if not phone_field:
raise ValueError("找不到电话字段")
print(f"✅ 确定电话字段: {phone_field}")
except Exception as e:
print(f"❌ 表结构获取失败: {str(e)}")
return
# 6. 连接SQL Server (SAP数据)
try:
sap_conn = pyodbc.connect(
'DRIVER={ODBC Driver 17 for SQL Server};'
'SERVER=192.168.0.229;'
'DATABASE=SINO_SAP;'
'UID=SAPReader;'
'PWD=Sino2025zyq;'
)
print("✅ SQL Server连接成功")
except Exception as e:
print(f"❌ SQL Server连接失败: {str(e)}")
return
# 7. 从SQL Server获取SAP数据(使用新的查询)
sap_query = """
SELECT
CAST(
CASE
WHEN ZY_TB_CustomerProfile.[客户类型] = 'VIP'
THEN '703192237850693'
ELSE '703192237846597'
END
AS varchar(20)) AS CustomerTypeCode,
CAST(ZY_TB_CustomerPhone.CardCode AS varchar(50)) AS S6,
CAST(ZY_TB_CustomerPhone.Cardname AS varchar(100)) AS CompanyName,
CAST(ZY_TB_CustomerPhone.Name AS varchar(50)) AS Name,
CAST(LEFT(ZY_TB_CustomerPhone.Telephone, 15) AS varchar(15)) AS Phone,
CAST(FORMAT(ZY_TB_CustomerProfile.近一年总毛利, '0.00') AS varchar(50)) AS S4,
CAST(FORMAT(ZY_TB_CustomerProfile.预收款金额, '0.00') AS varchar(50)) AS S2,
CAST(FORMAT(ZY_TB_CustomerProfile.应收款, '0.00') AS varchar(50)) AS S1,
CAST(FORMAT(ZY_TB_CustomerProfile.全部库存金额, '0.00') AS varchar(50)) AS S3,
CAST((CAST(ZY_TB_CustomerProfile.客户等级 AS varchar(50)) + ZY_TB_CustomerProfile.等级名称) AS varchar(50)) AS S5
FROM ZY_TB_CustomerPhone
LEFT JOIN ZY_TB_CustomerProfile
ON ZY_TB_CustomerPhone.CardCode = ZY_TB_CustomerProfile.[客户编号]
WHERE
ZY_TB_CustomerPhone.CardCode IS NOT NULL
AND ZY_TB_CustomerPhone.Cardname IS NOT NULL
AND ZY_TB_CustomerPhone.Telephone NOT LIKE '8441%'
AND ZY_TB_CustomerPhone.Cardname NOT LIKE '%中源合聚生物%'
"""
print("正在从SAP数据库加载数据...")
start_time = time.time()
try:
df_sap = pd.read_sql(sap_query, sap_conn)
# 确保电话字段格式正确
df_sap['Phone'] = df_sap['Phone'].astype(str).str.slice(0, 15)
sap_duration = time.time() - start_time
print(f"✅ 从SAP加载了 {len(df_sap)} 条记录 (耗时: {sap_duration:.2f}秒)")
except Exception as e:
print(f"❌ SAP数据加载失败: {str(e)}")
return
# 8. 从MySQL获取现有客户数据(使用动态字段名)
print("正在从MySQL加载现有客户数据...")
start_time = time.time()
try:
# 动态生成查询
query = f"SELECT * FROM T_Customer"
df_existing = pd.read_sql(query, mysql_engine)
# 确保电话字段存在
if phone_field not in df_existing.columns:
raise ValueError(f"电话字段 '{phone_field}' 在查询结果中不存在")
# 处理电话字段
df_existing['Phone'] = df_existing[phone_field].astype(str).str.slice(0, 15)
# 创建以Phone为键的字典,便于快速查找
existing_dict = df_existing.set_index('Phone').to_dict('index')
mysql_duration = time.time() - start_time
print(f"✅ MySQL现有 {len(df_existing)} 条记录 (耗时: {mysql_duration:.2f}秒)")
except Exception as e:
print(f"❌ MySQL数据加载失败: {str(e)}")
print("请检查表结构和字段名")
return
# 9. 定义需要比较的字段
fields_to_compare = [
'CompanyName', 'S6', 'CustomerTypeCode',
'S5', 'S1', 'S2', 'S3', 'S4'
]
# 10. 数据对比分析
print("\n" + "="*80)
print("开始数据对比分析...")
print(f"比较字段: {', '.join(fields_to_compare)}")
# 初始化统计变量
stats = {
'total_records': len(df_sap),
'existing_records': 0,
'new_records': 0,
'consistent_records': 0,
'inconsistent_records': 0,
'total_fields_compared': 0,
'consistent_fields': 0,
'inconsistent_fields': 0,
'max_consistent_fields': 0,
'min_consistent_fields': len(fields_to_compare), # 初始设为最大字段数
'update_candidates': 0
}
# 存储需要更新和插入的记录
updates_to_apply = [] # 存储需要更新的记录和变化字段
inserts_to_apply = [] # 存储需要插入的新记录(仅业务字段)
# 进度条显示
pbar = tqdm(total=len(df_sap), desc="分析记录")
# 11. 遍历所有SAP记录
for idx, row in df_sap.iterrows():
phone = row['Phone']
# 检查电话是否存在于现有数据中
if phone in existing_dict and phone: # 确保phone不为空
stats['existing_records'] += 1
existing_row = existing_dict[phone]
# 比较记录并获取变化
changes, consistent_fields, total_fields = compare_records(row, existing_row, fields_to_compare)
stats['total_fields_compared'] += total_fields
stats['consistent_fields'] += consistent_fields
stats['inconsistent_fields'] += (total_fields - consistent_fields)
# 更新最大/最小一致字段数
stats['max_consistent_fields'] = max(stats['max_consistent_fields'], consistent_fields)
stats['min_consistent_fields'] = min(stats['min_consistent_fields'], consistent_fields)
# 检查是否完全一致
if len(changes) == 0:
stats['consistent_records'] += 1
else:
stats['inconsistent_records'] += 1
stats['update_candidates'] += 1
changes['Phone'] = phone # 添加标识符
updates_to_apply.append(changes)
else:
# 新记录 - 只保存业务字段
stats['new_records'] += 1
new_record = {
'CustomerTypeCode': row['CustomerTypeCode'],
'S6': row['S6'],
'CompanyName': row['CompanyName'],
'Name': row['Name'],
'Phone': row['Phone'],
'S4': row['S4'],
'S2': row['S2'],
'S1': row['S1'],
'S3': row['S3'],
'S5': row['S5']
}
inserts_to_apply.append(new_record)
pbar.update(1)
pbar.close()
# 12. 打印详细统计信息
print("\n" + "="*80)
print("数据对比统计结果:")
print(f"总记录数: {stats['total_records']}")
print(f"已存在记录数: {stats['existing_records']} ({stats['existing_records']/stats['total_records']:.1%})")
print(f"新增记录数: {stats['new_records']} ({stats['new_records']/stats['total_records']:.1%})")
print(f"一致记录数: {stats['consistent_records']} ({stats['consistent_records']/stats['existing_records']:.1%})")
print(f"不一致记录数: {stats['inconsistent_records']} ({stats['inconsistent_records']/stats['existing_records']:.1%}))
print(f"需要更新记录数: {stats['update_candidates']}")
print(f"需要插入记录数: {stats['new_records']}")
print(f"\n字段级别对比:")
print(f"总比较字段数: {stats['total_fields_compared']}")
print(f"一致字段数: {stats['consistent_fields']} ({stats['consistent_fields']/stats['total_fields_compared']:.1%}))
print(f"不一致字段数: {stats['inconsistent_fields']} ({stats['inconsistent_fields']/stats['total_fields_compared']:.1%})")
print(f"字段一致性范围: {stats['min_consistent_fields']}-{stats['max_consistent_fields']} (每记录一致字段数)")
print("="*80 + "\n")
# 13. 执行批量更新操作(只更新有变化的字段)
if updates_to_apply:
print(f"开始批量更新 {len(updates_to_apply)} 条有变化的记录...")
start_time = time.time()
try:
# 创建包含变化的DataFrame
df_changes = pd.DataFrame(updates_to_apply)
# 创建临时表存储更新数据
temp_table_name = "temp_customer_updates"
with mysql_engine.begin() as conn:
# 先删除可能存在的旧临时表
conn.execute(text(f"DROP TABLE IF EXISTS {temp_table_name}"))
# 创建临时表(只包含变化的字段)
df_changes.to_sql(
temp_table_name,
mysql_engine,
if_exists='replace',
index=False
)
# 添加索引加速更新
with mysql_engine.begin() as conn:
conn.execute(text(f"CREATE INDEX idx_phone ON {temp_table_name} (Phone)"))
# 动态生成SET子句
set_clauses = []
for col in df_changes.columns:
if col != 'Phone': # 跳过标识符字段
# 使用实际电话字段名
set_clauses.append(f"t.{col} = tmp.{col}")
set_clause = ", ".join(set_clauses)
# 使用JOIN执行批量更新
update_query = f"""
UPDATE T_Customer AS t
JOIN {temp_table_name} AS tmp ON LEFT(t.{phone_field}, 15) = tmp.Phone
SET {set_clause}
"""
with mysql_engine.begin() as conn:
result = conn.execute(text(update_query))
update_duration = time.time() - start_time
print(f"✅ 成功更新 {result.rowcount} 条记录 (耗时: {update_duration:.2f}秒)")
except Exception as e:
print(f"❌ 更新操作失败: {str(e)}")
# 14. 执行批量插入操作(使用呼叫中心生成的ID和GUID)
if inserts_to_apply:
print(f"开始准备插入 {len(inserts_to_apply)} 条新记录...")
start_time = time.time()
try:
# 准备插入数据
df_insert = pd.DataFrame(inserts_to_apply)
# 获取当前最大ID
max_id_query = "SELECT COALESCE(MAX(id), 111100000048210) AS max_id FROM T_Customer"
with mysql_engine.connect() as conn:
max_id = conn.execute(text(max_id_query)).scalar()
# 生成新ID(从最大ID+1开始)
new_ids = range(max_id + 1, max_id + 1 + len(df_insert))
# 生成唯一的小写GUID(确保在集合中唯一)
new_guids = generate_unique_guids(len(df_insert))
# 添加呼叫中心系统生成的ID和GUID
df_insert['id'] = list(new_ids)
df_insert['GUID'] = new_guids
df_insert['Domain'] = 'ipcc.org'
df_insert['Remark'] = ''
# 选择并排序列以匹配表结构
insert_columns = [
'id', 'GUID', 'Domain', 'CustomerTypeCode', 'Remark',
'S6', 'CompanyName', 'Name', 'Phone',
'S4', 'S2', 'S1', 'S3', 'S5'
]
df_insert = df_insert[insert_columns]
# 重命名列以匹配实际表结构
column_mapping = {'Phone': phone_field}
df_insert = df_insert.rename(columns=column_mapping)
# 批量插入新记录(分块处理)
print(f"正在批量插入 {len(df_insert)} 条新记录...")
df_insert.to_sql(
'T_Customer',
mysql_engine,
if_exists='append',
index=False,
chunksize=2000, # 分块插入提高性能
method='multi' # 使用多值插入
)
insert_duration = time.time() - start_time
print(f"✅ 成功插入 {len(df_insert)} 条新记录 (耗时: {insert_duration:.2f}秒)")
# 验证ID和GUID唯一性
id_check_query = "SELECT COUNT(*) AS total, COUNT(DISTINCT id) AS distinct_ids FROM T_Customer"
guid_check_query = "SELECT COUNT(*) AS total, COUNT(DISTINCT GUID) AS distinct_guids FROM T_Customer"
with mysql_engine.connect() as conn:
# 验证ID唯一性
id_check = conn.execute(text(id_check_query)).fetchone()
if id_check[0] == id_check[1]:
print("✅ ID唯一性验证通过")
else:
print(f"❌ ID唯一性警告: 总数={id_check[0]}, 唯一数={id_check[1]}")
# 验证GUID唯一性
guid_check = conn.execute(text(guid_check_query)).fetchone()
if guid_check[0] == guid_check[1]:
print("✅ GUID唯一性验证通过")
else:
print(f"❌ GUID唯一性警告: 总数={guid_check[0]}, 唯一数={guid_check[1]}")
except Exception as e:
print(f"❌ 插入操作失败: {str(e)}")
# 15. 清理临时表
try:
with mysql_engine.begin() as conn:
conn.execute(text(f"DROP TABLE IF EXISTS {temp_table_name}"))
print("✅ 临时表清理完成")
except:
print("⚠️ 临时表清理失败,请手动检查")
# 16. 最终统计
print("\n" + "="*80)
print("数据同步完成!")
print(f"总耗时: {time.time() - start_time:.2f}秒")
print(f"处理记录统计:")
print(f" 一致记录: {stats['consistent_records']}")
print(f" 更新记录: {len(updates_to_apply)}")
print(f" 新增记录: {len(inserts_to_apply)}")
print(f" 总处理记录: {stats['consistent_records'] + len(updates_to_apply) + len(inserts_to_apply)}")
print("="*80)
if __name__ == "__main__":
main()
这个代码在满足什么条件下才会更新数据
最新发布