import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
import tkinter as tk
from tkinter import ttk, filedialog, messagebox, scrolledtext
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import seaborn as sns
import re
import chardet
import threading
import time
import traceback
from tkinter import simpledialog
class TransactionAnalyzerApp:
def __init__(self, root):
self.root = root
self.root.title("交易数据分析工具 - 增强版")
self.root.geometry("1400x800")
self.root.configure(bg='#f0f0f0')
self.root.minsize(1200, 700)
self.df = None
self.results = None
self.analysis_thread = None
self.stop_analysis = False
self.db_connection = None
self.schemas = []
self.total_rows = 0
self.loaded_rows = 0
self.opponent_tags = {} # 存储交易对手标签 {标签名: 条件}
self.setup_ui()
def setup_ui(self):
# 创建主框架和滚动条
main_frame = ttk.Frame(self.root, padding="10")
main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# 创建画布和滚动条
canvas = tk.Canvas(main_frame)
scrollbar = ttk.Scrollbar(main_frame, orient="vertical", command=canvas.yview)
scrollable_frame = ttk.Frame(canvas)
scrollable_frame.bind(
"<Configure>",
lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
)
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
canvas.configure(yscrollcommand=scrollbar.set)
canvas.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S))
# 配置网格权重
self.root.columnconfigure(0, weight=1)
self.root.rowconfigure(0, weight=1)
main_frame.columnconfigure(0, weight=1)
main_frame.rowconfigure(0, weight=1)
scrollable_frame.columnconfigure(1, weight=1)
# 标题
title_label = ttk.Label(scrollable_frame, text="交易数据分析工具 - 增强版(解决连续进账分析问题)",
font=("Arial", 16, "bold"))
title_label.grid(row=0, column=0, columnspan=4, pady=(0, 20))
# 数据库连接区域
db_frame = ttk.LabelFrame(scrollable_frame, text="数据库连接", padding="10")
db_frame.grid(row=1, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10))
db_frame.columnconfigure(1, weight=1)
# 数据库类型选择
ttk.Label(db_frame, text="数据库类型:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5))
self.db_type_var = tk.StringVar(value="postgresql")
db_combo = ttk.Combobox(db_frame, textvariable=self.db_type_var,
values=["mysql", "postgresql"], width=10)
db_combo.grid(row=0, column=1, padx=(0, 10))
db_combo.bind("<<ComboboxSelected>>", self.on_db_type_change)
# 连接参数
ttk.Label(db_frame, text="主机:").grid(row=0, column=2, sticky=tk.W, padx=(0, 5))
self.db_host_var = tk.StringVar(value="localhost")
ttk.Entry(db_frame, textvariable=self.db_host_var, width=15).grid(row=0, column=3, padx=(0, 10))
ttk.Label(db_frame, text="端口:").grid(row=0, column=4, sticky=tk.W, padx=(0, 5))
self.db_port_var = tk.StringVar(value="5433")
ttk.Entry(db_frame, textvariable=self.db_port_var, width=10).grid(row=0, column=5, padx=(0, 10))
ttk.Label(db_frame, text="数据库:").grid(row=1, column=0, sticky=tk.W, padx=(0, 5))
self.db_name_var = tk.StringVar(value="postgres")
ttk.Entry(db_frame, textvariable=self.db_name_var, width=15).grid(row=1, column=1, padx=(0, 10))
ttk.Label(db_frame, text="模式:").grid(row=1, column=2, sticky=tk.W, padx=(0, 5))
self.db_schema_var = tk.StringVar(value="xinpujing")
self.schema_combo = ttk.Combobox(db_frame, textvariable=self.db_schema_var, width=15)
self.schema_combo.grid(row=1, column=3, padx=(0, 10))
ttk.Button(db_frame, text="刷新模式", command=self.refresh_schemas).grid(row=1, column=4, padx=(5, 5))
ttk.Label(db_frame, text="用户名:").grid(row=2, column=0, sticky=tk.W, padx=(0, 5))
self.db_user_var = tk.StringVar(value="postgres")
ttk.Entry(db_frame, textvariable=self.db_user_var, width=15).grid(row=2, column=1, padx=(0, 10))
ttk.Label(db_frame, text="密码:").grid(row=2, column=2, sticky=tk.W, padx=(0, 5))
self.db_password_var = tk.StringVar(value="")
ttk.Entry(db_frame, textvariable=self.db_password_var, width=15, show="*").grid(row=2, column=3, padx=(0, 10))
ttk.Label(db_frame, text="表名:").grid(row=2, column=4, sticky=tk.W, padx=(0, 5))
self.db_table_var = tk.StringVar(value="secsflc_JYMX")
ttk.Entry(db_frame, textvariable=self.db_table_var, width=15).grid(row=2, column=5, padx=(0, 10))
# 数据加载选项
ttk.Label(db_frame, text="加载行数:").grid(row=2, column=6, sticky=tk.W, padx=(0, 5))
self.load_limit_var = tk.StringVar(value="50000")
ttk.Entry(db_frame, textvariable=self.load_limit_var, width=10).grid(row=2, column=7, padx=(0, 10))
ttk.Button(db_frame, text="测试连接", command=self.test_db_connection).grid(row=2, column=8, padx=(10, 5))
ttk.Button(db_frame, text="从数据库加载", command=self.load_from_database).grid(row=2, column=9, padx=(5, 0))
# 文件选择区域
file_frame = ttk.LabelFrame(scrollable_frame, text="文件导入", padding="10")
file_frame.grid(row=2, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10))
file_frame.columnconfigure(1, weight=1)
ttk.Label(file_frame, text="文件路径:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5))
self.file_path_var = tk.StringVar()
file_entry = ttk.Entry(file_frame, textvariable=self.file_path_var, width=50)
file_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=(0, 5))
ttk.Button(file_frame, text="浏览", command=self.browse_file).grid(row=0, column=2, padx=(0, 5))
ttk.Button(file_frame, text="加载数据", command=self.load_data).grid(row=0, column=3)
# 字段映射区域
mapping_frame = ttk.LabelFrame(scrollable_frame, text="字段映射设置", padding="10")
mapping_frame.grid(row=3, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10))
# 创建字段映射的网格
ttk.Label(mapping_frame, text="原始字段", font=("Arial", 10, "bold")).grid(row=0, column=0, padx=5)
ttk.Label(mapping_frame, text="→", font=("Arial", 10, "bold")).grid(row=0, column=1, padx=5)
ttk.Label(mapping_frame, text="目标字段", font=("Arial", 10, "bold")).grid(row=0, column=2, padx=5)
# 必需的字段映射
required_fields = ['交易卡号', '交易时间', '交易金额', '收付标志', '对方卡号']
self.mapping_vars = {}
for i, field in enumerate(required_fields, 1):
ttk.Label(mapping_frame, text=field).grid(row=i, column=2, sticky=tk.W, padx=5)
self.mapping_vars[field] = tk.StringVar()
mapping_combo = ttk.Combobox(mapping_frame, textvariable=self.mapping_vars[field], width=20)
mapping_combo.grid(row=i, column=0, padx=5)
ttk.Label(mapping_frame, text="→").grid(row=i, column=1, padx=5)
# 参数设置区域
param_frame = ttk.LabelFrame(scrollable_frame, text="分析参数", padding="10")
param_frame.grid(row=4, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10))
ttk.Label(param_frame, text="最小连续次数:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5))
self.min_continuous_var = tk.StringVar(value="3")
ttk.Entry(param_frame, textvariable=self.min_continuous_var, width=10).grid(row=0, column=1, padx=(0, 20))
ttk.Label(param_frame, text="最大交易时间间隔(分钟):").grid(row=0, column=2, sticky=tk.W, padx=(0, 5))
self.max_gap_var = tk.StringVar(value="10")
ttk.Entry(param_frame, textvariable=self.max_gap_var, width=10).grid(row=0, column=3, padx=(0, 20))
ttk.Label(param_frame, text="金额匹配阈值(%):").grid(row=0, column=4, sticky=tk.W, padx=(0, 5))
self.amount_threshold_var = tk.StringVar(value="20")
ttk.Entry(param_frame, textvariable=self.amount_threshold_var, width=10).grid(row=0, column=5, padx=(0, 20))
ttk.Label(param_frame, text="时间窗口(分钟):").grid(row=0, column=6, sticky=tk.W, padx=(0, 5))
self.time_window_var = tk.StringVar(value="1440") # 24小时 = 1440分钟
ttk.Entry(param_frame, textvariable=self.time_window_var, width=10).grid(row=0, column=7, padx=(0, 20))
# 新增参数:最大出账时间差
ttk.Label(param_frame, text="最大出账时间差(分钟):").grid(row=1, column=0, sticky=tk.W, padx=(0, 5))
self.max_out_gap_var = tk.StringVar(value="1440") # 24小时 = 1440分钟
ttk.Entry(param_frame, textvariable=self.max_out_gap_var, width=10).grid(row=1, column=1, padx=(0, 20))
# 交易对手标签设置
ttk.Label(param_frame, text="交易对手标签条件:").grid(row=1, column=2, sticky=tk.W, padx=(0, 5))
self.opponent_tag_var = tk.StringVar(value="")
ttk.Entry(param_frame, textvariable=self.opponent_tag_var, width=15, state='disabled').grid(row=1, column=3,
padx=(0, 5))
ttk.Button(param_frame, text="设置标签条件", command=self.set_opponent_tag_conditions).grid(row=1, column=4,
padx=(5, 5))
ttk.Button(param_frame, text="管理标签", command=self.manage_opponent_tags).grid(row=1, column=5, padx=(5, 5))
ttk.Button(param_frame, text="开始分析", command=self.start_analysis).grid(row=2, column=0, padx=(20, 5),
pady=(10, 0))
ttk.Button(param_frame, text="停止分析", command=self.stop_analysis_process).grid(row=2, column=1, padx=(5, 5),
pady=(10, 0))
ttk.Button(param_frame, text="导出结果", command=self.export_results).grid(row=2, column=2, padx=(5, 0),
pady=(10, 0))
ttk.Button(param_frame, text="调试模式", command=self.debug_analysis).grid(row=2, column=3, padx=(20, 5),
pady=(10, 0))
# 进度条区域
progress_frame = ttk.Frame(scrollable_frame)
progress_frame.grid(row=5, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10))
self.progress_var = tk.DoubleVar()
self.progress_bar = ttk.Progressbar(progress_frame, variable=self.progress_var, maximum=100)
self.progress_bar.grid(row=0, column=0, sticky=(tk.W, tk.E), padx=(0, 10))
progress_frame.columnconfigure(0, weight=1)
self.progress_label = ttk.Label(progress_frame, text="就绪")
self.progress_label.grid(row=0, column=1)
# 数据显示区域
data_frame = ttk.LabelFrame(scrollable_frame, text="数据预览", padding="10")
data_frame.grid(row=6, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10), padx=(0, 10))
data_frame.columnconfigure(0, weight=1)
data_frame.rowconfigure(0, weight=1)
self.data_text = scrolledtext.ScrolledText(data_frame, height=12, width=70)
self.data_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# 结果显示区域
result_frame = ttk.LabelFrame(scrollable_frame, text="分析结果", padding="10")
result_frame.grid(row=6, column=2, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10))
result_frame.columnconfigure(0, weight=1)
result_frame.rowconfigure(0, weight=1)
self.result_text = scrolledtext.ScrolledText(result_frame, height=12, width=60)
self.result_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# 配置权重
scrollable_frame.rowconfigure(6, weight=1)
data_frame.rowconfigure(0, weight=1)
data_frame.columnconfigure(0, weight=1)
result_frame.rowconfigure(0, weight=1)
result_frame.columnconfigure(0, weight=1)
# 绑定鼠标滚轮事件
canvas.bind("<Enter>", lambda e: canvas.bind_all("<MouseWheel>", lambda event: canvas.yview_scroll(
int(-1 * (event.delta / 120)), "units")))
canvas.bind("<Leave>", lambda e: canvas.unbind_all("<MouseWheel>"))
# 初始化模式列表
self.refresh_schemas()
def set_opponent_tag_conditions(self):
"""设置交易对手标签条件"""
tag_window = tk.Toplevel(self.root)
tag_window.title("设置交易对手标签条件")
tag_window.geometry("500x400")
# 标签列表
ttk.Label(tag_window, text="可用标签:").pack(pady=(10, 5))
listbox = tk.Listbox(tag_window, selectmode=tk.MULTIPLE)
for tag in self.opponent_tags.keys():
listbox.insert(tk.END, tag)
listbox.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
# 条件设置区域
condition_frame = ttk.Frame(tag_window)
condition_frame.pack(fill=tk.X, padx=10, pady=5)
ttk.Label(condition_frame, text="最小出现次数:").grid(row=0, column=0, padx=(0, 5))
self.min_count_var = tk.StringVar(value="1")
ttk.Entry(condition_frame, textvariable=self.min_count_var, width=5).grid(row=0, column=1, padx=(0, 10))
ttk.Label(condition_frame, text="最大出现次数:").grid(row=0, column=2, padx=(0, 5))
self.max_count_var = tk.StringVar(value="999")
ttk.Entry(condition_frame, textvariable=self.max_count_var, width=5).grid(row=0, column=3)
# 操作按钮
button_frame = ttk.Frame(tag_window)
button_frame.pack(pady=10)
ttk.Button(button_frame, text="添加条件", command=lambda: self.add_tag_condition(listbox)).pack(side=tk.LEFT,padx=5)
ttk.Button(button_frame, text="清除条件", command=self.clear_tag_conditions).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="完成", command=tag_window.destroy).pack(side=tk.LEFT, padx=5)
# 显示当前条件
ttk.Label(tag_window, text="当前条件:").pack(pady=(10, 5))
self.condition_text = scrolledtext.ScrolledText(tag_window, height=5)
self.condition_text.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
self.update_condition_display()
def add_tag_condition(self, listbox):
"""添加标签条件"""
selection = listbox.curselection()
if not selection:
messagebox.showwarning("警告", "请选择至少一个标签")
return
min_count = int(self.min_count_var.get())
max_count = int(self.max_count_var.get())
for idx in selection:
tag = listbox.get(idx)
self.opponent_tags[tag] = {"min": min_count, "max": max_count}
self.update_condition_display()
messagebox.showinfo("成功", "条件已添加")
def clear_tag_conditions(self):
"""清除所有标签条件"""
for tag in self.opponent_tags.keys():
self.opponent_tags[tag] = {"min": 0, "max": 999}
self.update_condition_display()
def update_condition_display(self):
"""更新条件显示"""
self.condition_text.delete(1.0, tk.END)
conditions = []
for tag, condition in self.opponent_tags.items():
if condition["min"] > 0 or condition["max"] < 999:
conditions.append(f"{tag}: {condition['min']}-{condition['max']}次")
if conditions:
self.condition_text.insert(tk.END, "\n".join(conditions))
self.opponent_tag_var.set("; ".join(conditions))
else:
self.condition_text.insert(tk.END, "无条件")
self.opponent_tag_var.set("")
def manage_opponent_tags(self):
"""管理交易对手标签"""
tag_window = tk.Toplevel(self.root)
tag_window.title("管理交易对手标签")
tag_window.geometry("400x300")
# 标签列表
ttk.Label(tag_window, text="当前标签:").pack(pady=(10, 5))
listbox = tk.Listbox(tag_window)
for tag in self.opponent_tags.keys():
listbox.insert(tk.END, tag)
listbox.pack(fill=tk.BOTH, expand=True, padx=10, pady=5)
# 添加新标签
add_frame = ttk.Frame(tag_window)
add_frame.pack(fill=tk.X, padx=10, pady=5)
ttk.Label(add_frame, text="新标签:").pack(side=tk.LEFT, padx=(0, 5))
new_tag_var = tk.StringVar()
ttk.Entry(add_frame, textvariable=new_tag_var, width=15).pack(side=tk.LEFT, padx=(0, 5))
ttk.Button(add_frame, text="添加", command=lambda: self.add_new_tag(new_tag_var, listbox)).pack(side=tk.LEFT)
# 操作按钮
button_frame = ttk.Frame(tag_window)
button_frame.pack(pady=10)
ttk.Button(button_frame, text="删除选中", command=lambda: self.delete_tag(listbox)).pack(side=tk.LEFT, padx=5)
ttk.Button(button_frame, text="关闭", command=tag_window.destroy).pack(side=tk.LEFT, padx=5)
def add_new_tag(self, new_tag_var, listbox):
"""添加新标签"""
tag = new_tag_var.get().strip()
if not tag:
messagebox.showwarning("警告", "请输入标签名称")
return
if tag not in self.opponent_tags:
self.opponent_tags[tag] = {"min": 0, "max": 999}
listbox.insert(tk.END, tag)
new_tag_var.set("")
else:
messagebox.showwarning("警告", "标签已存在")
def delete_tag(self, listbox):
"""删除选中的标签"""
selection = listbox.curselection()
if not selection:
return
tag = listbox.get(selection[0])
if tag in self.opponent_tags:
del self.opponent_tags[tag]
listbox.delete(selection[0])
def on_db_type_change(self, event):
"""数据库类型改变时的处理"""
db_type = self.db_type_var.get()
if db_type == "mysql":
self.db_port_var.set("3306")
self.db_schema_var.set("") # MySQL不使用schema概念
self.schema_combo.config(state="disabled")
elif db_type == "postgresql":
self.db_port_var.set("5432")
self.db_schema_var.set("public")
self.schema_combo.config(state="readonly")
self.refresh_schemas()
def refresh_schemas(self):
"""刷新数据库模式列表"""
try:
db_type = self.db_type_var.get()
host = self.db_host_var.get()
port = self.db_port_var.get()
database = self.db_name_var.get()
username = self.db_user_var.get()
password = self.db_password_var.get()
if not all([host, port, database, username]):
return
if db_type == "mysql":
import mysql.connector
conn = mysql.connector.connect(
host=host,
port=int(port),
user=username,
password=password,
database=database
)
cursor = conn.cursor()
cursor.execute("SHOW DATABASES")
self.schemas = [db[0] for db in cursor.fetchall()]
cursor.close()
conn.close()
elif db_type == "postgresql":
import psycopg2
try:
conn = psycopg2.connect(
host=host,
port=port,
user=username,
password=password,
database=database,
connect_timeout=5
)
conn.autocommit = True
cursor = conn.cursor()
cursor.execute("""
SELECT schema_name
FROM information_schema.schemata
WHERE schema_name NOT LIKE 'pg_%'
AND schema_name != 'information_schema'
""")
self.schemas = [schema[0] for schema in cursor.fetchall()]
cursor.close()
conn.close()
except Exception as e:
messagebox.showwarning("警告", f"获取模式列表失败: {str(e)}")
self.schemas = ['public']
self.schema_combo['values'] = self.schemas
if self.schemas:
if self.db_schema_var.get() in self.schemas:
pass
elif 'public' in self.schemas:
self.db_schema_var.set('public')
else:
self.db_schema_var.set(self.schemas[0])
except ImportError:
messagebox.showerror("错误",
"请安装相应的数据库驱动:\nMySQL: pip install mysql-connector-python\nPostgreSQL: pip install psycopg2-binary")
except Exception as e:
print(f"刷新模式列表失败: {e}")
def test_db_connection(self):
"""测试数据库连接"""
try:
db_type = self.db_type_var.get()
host = self.db_host_var.get()
port = self.db_port_var.get()
database = self.db_name_var.get()
username = self.db_user_var.get()
password = self.db_password_var.get()
if db_type == "mysql":
import mysql.connector
conn = mysql.connector.connect(
host=host,
port=int(port),
user=username,
password=password,
database=database
)
conn.close()
messagebox.showinfo("成功", "MySQL数据库连接测试成功!")
elif db_type == "postgresql":
import psycopg2
conn = psycopg2.connect(
host=host,
port=port,
user=username,
password=password,
database=database,
connect_timeout=5
)
conn.autocommit = True
conn.close()
messagebox.showinfo("成功", "PostgreSQL数据库连接测试成功!")
except ImportError:
messagebox.showerror("错误",
"请安装相应的数据库驱动:\nMySQL: pip install mysql-connector-python\nPostgreSQL: pip install psycopg2-binary")
except Exception as e:
messagebox.showerror("错误", f"数据库连接失败: {str(e)}")
def load_from_database(self):
"""从数据库加载数据"""
try:
self.update_progress("正在连接数据库...", 0)
db_type = self.db_type_var.get()
host = self.db_host_var.get()
port = self.db_port_var.get()
database = self.db_name_var.get()
username = self.db_user_var.get()
password = self.db_password_var.get()
table_name = self.db_table_var.get()
schema = self.db_schema_var.get()
load_limit = int(self.load_limit_var.get())
if not table_name:
messagebox.showerror("错误", "请输入表名")
return
if db_type == "mysql":
import mysql.connector
conn = mysql.connector.connect(
host=host,
port=int(port),
user=username,
password=password,
database=database
)
full_table_name = f"`{database}`.`{table_name}`" if database else f"`{table_name}`"
query = f"SELECT * FROM {full_table_name} LIMIT {load_limit}"
self.df = self.read_mysql_data(conn, query)
conn.close()
elif db_type == "postgresql":
import psycopg2
conn = psycopg2.connect(
host=host,
port=port,
user=username,
password=password,
database=database,
connect_timeout=10
)
conn.autocommit = True
try:
if schema:
full_table_name = f'"{schema}"."{table_name}"'
else:
full_table_name = f'"{table_name}"'
query = f"SELECT * FROM {full_table_name} LIMIT {load_limit}"
self.df = self.read_postgresql_data(conn, query)
except Exception as e:
try:
cursor = conn.cursor()
cursor.execute("""
SELECT table_name
FROM information_schema.tables
WHERE table_schema = %s
AND LOWER(table_name) = LOWER(%s)
""", (schema, table_name))
result = cursor.fetchone()
if result:
actual_table_name = result[0]
full_table_name = f'"{schema}"."{actual_table_name}"'
query = f"SELECT * FROM {full_table_name} LIMIT {load_limit}"
self.df = self.read_postgresql_data(conn, query)
else:
if schema:
full_table_name = f"{schema}.{table_name}"
else:
full_table_name = table_name
query = f"SELECT * FROM {full_table_name} LIMIT {load_limit}"
self.df = self.read_postgresql_data(conn, query)
except Exception as inner_e:
raise Exception(f"无法加载表数据: {str(inner_e)}")
finally:
try:
conn.close()
except:
pass
else:
messagebox.showerror("错误", "不支持的数据库类型")
return
self.update_progress("数据处理中...", 80)
# 更新字段映射下拉框
self.update_mapping_comboboxes()
# 显示数据预览
self.data_text.delete(1.0, tk.END)
self.data_text.insert(tk.END, f"从数据库表 {table_name} 加载数据成功!\n\n")
self.data_text.insert(tk.END, f"数据形状: {self.df.shape}\n\n")
self.data_text.insert(tk.END, f"所有字段: {list(self.df.columns)}\n\n")
self.data_text.insert(tk.END, self.df.head(20).to_string())
self.update_progress("数据加载完成", 100)
except ImportError:
messagebox.showerror("错误",
"请安装相应的数据库驱动:\nMySQL: pip install mysql-connector-python\nPostgreSQL: pip install psycopg2-binary")
except Exception as e:
error_msg = f"从数据库加载数据失败: {str(e)}\n\n详细错误信息:\n{traceback.format_exc()}"
messagebox.showerror("错误", error_msg)
self.update_progress("加载失败", 0)
def read_mysql_data(self, conn, query):
"""从MySQL读取数据"""
cursor = conn.cursor()
cursor.execute(query)
columns = [desc[0] for desc in cursor.description]
data = []
for row in cursor.fetchall():
data.append(row)
cursor.close()
return pd.DataFrame(data, columns=columns)
def read_postgresql_data(self, conn, query):
"""从PostgreSQL读取数据"""
cursor = conn.cursor()
try:
cursor.execute(query)
columns = [desc[0] for desc in cursor.description]
data = []
for row in cursor.fetchall():
data.append(row)
return pd.DataFrame(data, columns=columns)
finally:
try:
cursor.close()
except:
pass
def browse_file(self):
file_path = filedialog.askopenfilename(
title="选择数据文件",
filetypes=[("CSV文件", "*.csv"), ("Excel文件", "*.xlsx;*.xls"), ("文本文件", "*.txt"), ("所有文件", "*.*")]
)
if file_path:
self.file_path_var.set(file_path)
def detect_encoding(self, file_path):
"""检测文件编码"""
try:
with open(file_path, 'rb') as f:
raw_data = f.read(10000) # 读取前10000字节来检测编码
result = chardet.detect(raw_data)
return result['encoding']
except:
return 'utf-8'
def auto_detect_fields(self, df):
"""自动检测字段类型并设置映射"""
column_types = {}
for col in df.columns:
col_lower = col.lower()
# 检查是否是金额字段
if any(x in col_lower for x in ['金额', 'amount', 'money', 'amt', '交易金额']):
column_types[col] = '交易金额'
# 检查是否是时间字段
elif any(x in col_lower for x in ['时间', 'time', 'date', '日期', '交易时间']):
column_types[col] = '交易时间'
# 检查是否是卡号字段
elif any(x in col_lower for x in ['卡号', 'account', 'card', '账号', '交易卡号']):
column_types[col] = '交易卡号'
# 检查是否是对方卡号字段
elif any(x in col_lower for x in ['对方', 'counter', 'target', '对手', '对方卡号']):
column_types[col] = '对方卡号'
# 检查是否是收付标志字段
elif any(x in col_lower for x in ['标志', 'type', 'direction', '收付', '收付标志']):
column_types[col] = '收付标志'
return column_types
def load_data(self):
"""加载文件数据"""
file_path = self.file_path_var.get()
if not file_path:
messagebox.showerror("错误", "请先选择数据文件")
return
try:
self.update_progress("正在加载数据...", 0)
if file_path.endswith('.csv'):
# 检测文件编码
encoding = self.detect_encoding(file_path)
chunk_size = 10000
chunks = []
total_rows = 0
with open(file_path, 'r', encoding=encoding, errors='ignore') as f:
total_rows = sum(1 for _ in f) - 1
self.total_rows = total_rows
self.loaded_rows = 0
for chunk in pd.read_csv(file_path, chunksize=chunk_size, low_memory=False, encoding=encoding):
chunks.append(chunk)
self.loaded_rows += len(chunk)
progress = min(80, (self.loaded_rows / self.total_rows) * 80)
self.update_progress(f"正在加载数据: {self.loaded_rows}/{self.total_rows} 行", progress)
if self.loaded_rows >= int(self.load_limit_var.get()):
break
self.df = pd.concat(chunks, ignore_index=True)
elif file_path.endswith(('.xlsx', '.xls')):
load_limit = int(self.load_limit_var.get())
self.df = pd.read_excel(file_path, nrows=load_limit)
else:
encoding = self.detect_encoding(file_path)
with open(file_path, 'r', encoding=encoding, errors='ignore') as f:
lines = f.readlines()
self.df = self.parse_text_data(lines)
self.update_progress("数据加载完成", 30)
# 自动检测字段并设置映射
auto_mapping = self.auto_detect_fields(self.df)
for field, var in self.mapping_vars.items():
for col, col_type in auto_mapping.items():
if col_type == field:
var.set(col)
break
self.update_mapping_comboboxes()
self.data_text.delete(1.0, tk.END)
self.data_text.insert(tk.END, f"数据形状: {self.df.shape}\n\n")
self.data_text.insert(tk.END, f"所有字段: {list(self.df.columns)}\n\n")
self.data_text.insert(tk.END, self.df.head(20).to_string())
self.update_progress("就绪", 100)
except Exception as e:
messagebox.showerror("错误", f"读取文件失败: {str(e)}")
print(f"错误详情: {str(e)}")
self.update_progress("加载失败", 0)
def update_mapping_comboboxes(self):
"""更新字段映射下拉框"""
if self.df is None:
return
all_columns = list(self.df.columns)
for field, var in self.mapping_vars.items():
for widget in self.root.winfo_children():
if isinstance(widget, ttk.Combobox) and widget.cget('textvariable') == var._name:
widget['values'] = all_columns
# 如果当前没有设置值,尝试自动匹配
if not var.get():
for col in all_columns:
col_lower = col.lower()
if field == '交易卡号' and any(x in col_lower for x in ['交易卡号', 'account', 'card']):
var.set(col)
break
elif field == '交易时间' and any(x in col_lower for x in ['交易时间', 'time', 'date']):
var.set(col)
break
elif field == '交易金额' and any(x in col_lower for x in ['交易金额', 'amount', 'money']):
var.set(col)
break
elif field == '收付标志' and any(x in col_lower for x in ['标志', 'type', 'direction']):
var.set(col)
break
elif field == '对方卡号' and any(x in col_lower for x in ['对方', 'counter', 'target']):
var.set(col)
break
def parse_text_data(self, lines):
"""解析文本数据"""
transactions = []
load_limit = int(self.load_limit_var.get())
for i, line in enumerate(lines[:load_limit + 1]):
if i == 0:
headers = line.strip().split(',')
continue
line = line.strip()
if not line:
continue
parts = re.split(r'\s+', line)
if len(parts) >= 4:
transaction = {
'原始字段1': parts[0] if len(parts) > 0 else '',
'原始字段2': parts[1] if len(parts) > 1 else '',
'原始字段3': parts[2] if len(parts) > 2 else '',
'原始字段4': parts[3] if len(parts) > 3 else '',
'原始字段5': parts[4] if len(parts) > 4 else ''
}
transactions.append(transaction)
return pd.DataFrame(transactions)
def update_progress(self, message, value):
"""更新进度条和标签"""
self.progress_var.set(value)
self.progress_label.config(text=message)
self.root.update_idletasks()
def start_analysis(self):
"""开始分析"""
if self.df is None or self.df.empty:
messagebox.showerror("错误", "请先加载数据")
return
for field, var in self.mapping_vars.items():
if not var.get():
messagebox.showerror("错误", f"请设置'{field}'的字段映射")
return
self.stop_analysis = False
self.analysis_thread = threading.Thread(target=self.analyze_data_thread)
self.analysis_thread.daemon = True
self.analysis_thread.start()
def stop_analysis_process(self):
"""停止分析过程"""
self.stop_analysis = True
self.update_progress("正在停止分析...", 0)
def debug_analysis(self):
"""调试模式:显示详细的分析过程"""
if self.df is None or self.df.empty:
messagebox.showerror("错误", "请先加载数据")
return
# 检查字段映射
for field, var in self.mapping_vars.items():
if not var.get():
messagebox.showerror("错误", f"请设置'{field}'的字段映射")
return
try:
# 创建字段映射
column_mapping = {var.get(): field for field, var in self.mapping_vars.items()}
# 选择需要的字段
selected_columns = list(self.mapping_vars.values())
selected_columns = [var.get() for var in selected_columns]
analysis_df = self.df[selected_columns].copy()
analysis_df.columns = list(self.mapping_vars.keys())
# 显示数据信息
debug_info = f"数据总行数: {len(analysis_df)}\n"
debug_info += f"唯一卡号数量: {analysis_df['交易卡号'].nunique()}\n"
debug_info += f"收付标志分布:\n{analysis_df['收付标志'].value_counts()}\n\n"
# 检查前几个账户的交易模式
sample_accounts = analysis_df['交易卡号'].unique()[:5]
for account in sample_accounts:
account_data = analysis_df[analysis_df['交易卡号'] == account].copy()
debug_info += f"\n账户 {account} 的交易记录:\n"
debug_info += f"总交易数: {len(account_data)}\n"
debug_info += f"收款交易数: {len(account_data[account_data['收付标志'].astype(str).str.contains('收|1|收入|收款', case=False, na=False)])}\n"
debug_info += f"付款交易数: {len(account_data[account_data['收付标志'].astype(str).str.contains('出|付|0|支出|付款', case=False, na=False)])}\n"
# 显示前几条交易记录
debug_info += "前5条交易记录:\n"
debug_info += account_data.head().to_string() + "\n"
# 显示调试信息
self.result_text.delete(1.0, tk.END)
self.result_text.insert(tk.END, "=== 调试信息 ===\n\n")
self.result_text.insert(tk.END, debug_info)
except Exception as e:
messagebox.showerror("错误", f"调试过程中出现错误: {str(e)}")
def clean_and_transform_data(self, df):
"""数据清洗和转换"""
# 转换交易金额为数值类型 - 修复语法错误
df['交易金额'] = pd.to_numeric(df['交易金额'], errors='coerce')
# 转换交易时间为datetime
df['交易时间'] = pd.to_datetime(df['交易时间'], errors='coerce')
# 清理收付标志
df['收付标志'] = df['收付标志'].astype(str).str.strip().str.upper()
# 移除无效数据
df = df.dropna(subset=['交易卡号', '交易时间', '交易金额', '收付标志'])
return df
def analyze_data_thread(self):
"""在新线程中执行分析"""
try:
self.update_progress("开始分析数据...", 0)
# 创建字段映射
column_mapping = {var.get(): field for field, var in self.mapping_vars.items()}
# 选择需要的字段
selected_columns = list(self.mapping_vars.values())
selected_columns = [var.get() for var in selected_columns]
analysis_df = self.df[selected_columns].copy()
analysis_df.columns = list(self.mapping_vars.keys())
# 数据清洗和转换
analysis_df = self.clean_and_transform_data(analysis_df)
min_continuous = int(self.min_continuous_var.get())
max_gap_minutes = float(self.max_gap_var.get()) # 允许小数分钟
amount_threshold = float(self.amount_threshold_var.get()) / 100
time_window_minutes = float(self.time_window_var.get()) # 使用分钟
max_out_gap_minutes = float(self.max_out_gap_var.get()) # 使用分钟
results = []
all_traders = analysis_df['交易卡号'].unique()
total_traders = len(all_traders)
for idx, trader in enumerate(all_traders):
if self.stop_analysis:
self.update_progress("分析已停止", 0)
return
progress = (idx / total_traders) * 80
self.update_progress(f"分析账户 {idx + 1}/{total_traders}: {trader}", progress)
if pd.isna(trader):
continue
trader_data = analysis_df[analysis_df['交易卡号'] == trader].copy()
trader_data = trader_data.sort_values('交易时间')
if len(trader_data) < min_continuous + 1:
continue
# 找出所有收款交易
receives = trader_data[
trader_data['收付标志'].isin(['收', '进', '1', '收入', '收款', 'IN', 'INCOME', 'RECEIVE'])].copy()
if len(receives) < min_continuous:
continue
# 按时间窗口分组查找连续收款序列
sequences = self.find_continuous_sequences(receives, min_continuous, max_gap_minutes,
time_window_minutes)
# 分析每个序列
for seq in sequences:
if self.stop_analysis:
break
# 检查交易对手标签条件
if not self.check_opponent_tag_conditions(seq):
continue
seq_end_time = seq['交易时间'].max()
total_received = seq['交易金额'].sum()
# 查找时间窗口内的后续付款交易
time_window_end = seq_end_time + timedelta(minutes=max_out_gap_minutes)
out_transactions = trader_data[
(trader_data['收付标志'].isin(['出', '付', '0', '支出', '付款', 'OUT', 'PAY', 'EXPENSE'])) &
(trader_data['交易时间'] > seq_end_time) &
(trader_data['交易时间'] <= time_window_end)
]
for _, out_row in out_transactions.iterrows():
out_amount = out_row['交易金额']
# 检查金额匹配(允许一定误差)
if abs(out_amount - total_received) / max(total_received, out_amount) <= amount_threshold:
# 计算时间差(分钟)
time_diff = (out_row['交易时间'] - seq_end_time).total_seconds() / 60
result = {
'交易卡号': trader,
'连续接收次数': len(seq),
'总接收金额': total_received,
'出账金额': out_amount,
'对方卡号': out_row['对方卡号'],
'金额差异百分比': f"{abs(out_amount - total_received) / max(total_received, out_amount) * 100:.2f}%",
'模式': '连续进账后出账',
'连续交易时间范围': f"{seq['交易时间'].min()} 至 {seq['交易时间'].max()}",
'出账时间': out_row['交易时间'],
'时间差(分钟)': f"{time_diff:.2f}",
'交易对手标签': self.get_sequence_tags(seq)
}
results.append(result)
self.results = pd.DataFrame(results)
self.update_progress("分析完成", 90)
# 更新结果显示
self.root.after(0, self.update_results_display)
self.update_progress("就绪", 100)
except Exception as e:
error_msg = f"分析过程中出现错误: {str(e)}\n\n详细错误信息:\n{traceback.format_exc()}"
self.root.after(0, lambda: messagebox.showerror("错误", error_msg))
self.update_progress("分析出错", 0)
def check_opponent_tag_conditions(self, sequence):
"""检查交易对手标签条件"""
if not self.opponent_tags:
return True # 没有设置条件,直接通过
# 统计序列中每个标签的出现次数
tag_counts = {}
for _, row in sequence.iterrows():
opponent = str(row['对方卡号'])
for tag in self.opponent_tags.keys():
# 这里可以根据实际业务逻辑判断对手是否属于某个标签
# 示例:简单判断对手卡号是否包含标签名
if tag in opponent:
tag_counts[tag] = tag_counts.get(tag, 0) + 1
# 检查所有条件是否满足
for tag, condition in self.opponent_tags.items():
count = tag_counts.get(tag, 0)
if count < condition["min"] or count > condition["max"]:
return False
return True
def get_sequence_tags(self, sequence):
"""获取序列的交易对手标签统计"""
tag_counts = {}
for _, row in sequence.iterrows():
opponent = str(row['对方卡号'])
for tag in self.opponent_tags.keys():
if tag in opponent:
tag_counts[tag] = tag_counts.get(tag, 0) + 1
return "; ".join([f"{tag}:{count}" for tag, count in tag_counts.items()])
def find_continuous_sequences(self, receives, min_continuous, max_gap_minutes, time_window_minutes):
"""查找连续的交易序列(使用分钟)"""
receives = receives.sort_values('交易时间')
sequences = []
current_sequence = []
for _, row in receives.iterrows():
if not current_sequence:
current_sequence.append(row)
else:
last_time = current_sequence[-1]['交易时间']
current_time = row['交易时间']
# 检查时间间隔(分钟)
time_diff = (current_time - last_time).total_seconds() / 60
if time_diff <= max_gap_minutes:
current_sequence.append(row)
else:
# 检查时间窗口
if len(current_sequence) >= min_continuous:
seq_df = pd.DataFrame(current_sequence)
time_span = (seq_df['交易时间'].max() - seq_df['交易时间'].min()).total_seconds() / 60
if time_span <= time_window_minutes:
sequences.append(seq_df)
current_sequence = [row]
# 处理最后一个序列
if len(current_sequence) >= min_continuous:
seq_df = pd.DataFrame(current_sequence)
time_span = (seq_df['交易时间'].max() - seq_df['交易时间'].min()).total_seconds() / 60
if time_span <= time_window_minutes:
sequences.append(seq_df)
return sequences
def update_results_display(self):
"""更新结果显示"""
self.result_text.delete(1.0, tk.END)
if not self.results.empty:
self.result_text.insert(tk.END, f"找到 {len(self.results)} 条符合条件的交易模式:\n\n")
self.result_text.insert(tk.END, self.results.to_string(index=False))
# 分析共同收款方
common_recipients = self.results['对方卡号'].value_counts()
if len(common_recipients) > 0:
self.result_text.insert(tk.END, "\n\n共同收款方分析:\n")
for recipient, count in common_recipients.items():
if count > 1:
recipient_data = self.results[self.results['对方卡号'] == recipient]
total_amount = recipient_data['出账金额'].sum()
sources = len(recipient_data['交易卡号'].unique())
self.result_text.insert(tk.END,
f"\n收款方 {recipient}: {count} 笔交易, {sources} 个来源账户, 总金额 {total_amount:.2f}")
else:
self.result_text.insert(tk.END, "未找到符合条件的交易记录\n")
self.result_text.insert(tk.END, "建议使用'调试模式'检查数据格式和字段映射")
def export_results(self):
"""导出分析结果"""
if self.results is None or self.results.empty:
messagebox.showerror("错误", "没有结果可导出")
return
file_path = filedialog.asksaveasfilename(
title="保存结果",
defaultextension=".csv",
filetypes=[("CSV文件", "*.csv"), ("Excel文件", "*.xlsx")]
)
if file_path:
try:
if file_path.endswith('.csv'):
self.results.to_csv(file_path, index=False, encoding='utf-8-sig')
else:
self.results.to_excel(file_path, index=False)
messagebox.showinfo("成功", f"结果已保存到: {file_path}")
except Exception as e:
messagebox.showerror("错误", f"保存失败: {str(e)}")
def main():
root = tk.Tk()
app = TransactionAnalyzerApp(root)
root.mainloop()
if __name__ == "__main__":
main()对手标签是对手标签,对手卡号是交易对手账卡号