pg table export shape file command

pgsql2shp -h localhost -u postgres -P password -p 5432 -f d:\export_out.shp smartwatergdb export_output2

import pandas as pd import numpy as np from datetime import datetime, timedelta import os import tkinter as tk from tkinter import ttk, filedialog, messagebox, scrolledtext import matplotlib.pyplot as plt from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg from matplotlib.figure import Figure import seaborn as sns import re import chardet import threading import time import traceback from tkinter import simpledialog class TransactionAnalyzerApp: def __init__(self, root): self.root = root self.root.title("交易数据分析工具 - 增强版") self.root.geometry("1400x800") self.root.configure(bg='#f0f0f0') self.root.minsize(1200, 700) self.df = None self.results = None self.analysis_thread = None self.stop_analysis = False self.db_connection = None self.schemas = [] self.total_rows = 0 self.loaded_rows = 0 self.opponent_tags = {} # 存储交易对手标签 {标签名: 条件} self.setup_ui() def setup_ui(self): # 创建主框架和滚动条 main_frame = ttk.Frame(self.root, padding="10") main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) # 创建画布和滚动条 canvas = tk.Canvas(main_frame) scrollbar = ttk.Scrollbar(main_frame, orient="vertical", command=canvas.yview) scrollable_frame = ttk.Frame(canvas) scrollable_frame.bind( "<Configure>", lambda e: canvas.configure(scrollregion=canvas.bbox("all")) ) canvas.create_window((0, 0), window=scrollable_frame, anchor="nw") canvas.configure(yscrollcommand=scrollbar.set) canvas.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) scrollbar.grid(row=0, column=1, sticky=(tk.N, tk.S)) # 配置网格权重 self.root.columnconfigure(0, weight=1) self.root.rowconfigure(0, weight=1) main_frame.columnconfigure(0, weight=1) main_frame.rowconfigure(0, weight=1) scrollable_frame.columnconfigure(1, weight=1) # 标题 title_label = ttk.Label(scrollable_frame, text="交易数据分析工具 - 增强版(解决连续进账分析问题)", font=("Arial", 16, "bold")) title_label.grid(row=0, column=0, columnspan=4, pady=(0, 20)) # 数据库连接区域 db_frame = ttk.LabelFrame(scrollable_frame, text="数据库连接", padding="10") db_frame.grid(row=1, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10)) db_frame.columnconfigure(1, weight=1) # 数据库类型选择 ttk.Label(db_frame, text="数据库类型:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) self.db_type_var = tk.StringVar(value="postgresql") db_combo = ttk.Combobox(db_frame, textvariable=self.db_type_var, values=["mysql", "postgresql"], width=10) db_combo.grid(row=0, column=1, padx=(0, 10)) db_combo.bind("<<ComboboxSelected>>", self.on_db_type_change) # 连接参数 ttk.Label(db_frame, text="主机:").grid(row=0, column=2, sticky=tk.W, padx=(0, 5)) self.db_host_var = tk.StringVar(value="localhost") ttk.Entry(db_frame, textvariable=self.db_host_var, width=15).grid(row=0, column=3, padx=(0, 10)) ttk.Label(db_frame, text="端口:").grid(row=0, column=4, sticky=tk.W, padx=(0, 5)) self.db_port_var = tk.StringVar(value="5433") ttk.Entry(db_frame, textvariable=self.db_port_var, width=10).grid(row=0, column=5, padx=(0, 10)) ttk.Label(db_frame, text="数据库:").grid(row=1, column=0, sticky=tk.W, padx=(0, 5)) self.db_name_var = tk.StringVar(value="postgres") ttk.Entry(db_frame, textvariable=self.db_name_var, width=15).grid(row=1, column=1, padx=(0, 10)) ttk.Label(db_frame, text="模式:").grid(row=1, column=2, sticky=tk.W, padx=(0, 5)) self.db_schema_var = tk.StringVar(value="xinpujing") self.schema_combo = ttk.Combobox(db_frame, textvariable=self.db_schema_var, width=15) self.schema_combo.grid(row=1, column=3, padx=(0, 10)) ttk.Button(db_frame, text="刷新模式", command=self.refresh_schemas).grid(row=1, column=4, padx=(5, 5)) ttk.Label(db_frame, text="用户名:").grid(row=2, column=0, sticky=tk.W, padx=(0, 5)) self.db_user_var = tk.StringVar(value="postgres") ttk.Entry(db_frame, textvariable=self.db_user_var, width=15).grid(row=2, column=1, padx=(0, 10)) ttk.Label(db_frame, text="密码:").grid(row=2, column=2, sticky=tk.W, padx=(0, 5)) self.db_password_var = tk.StringVar(value="") ttk.Entry(db_frame, textvariable=self.db_password_var, width=15, show="*").grid(row=2, column=3, padx=(0, 10)) ttk.Label(db_frame, text="表名:").grid(row=2, column=4, sticky=tk.W, padx=(0, 5)) self.db_table_var = tk.StringVar(value="secsflc_JYMX") ttk.Entry(db_frame, textvariable=self.db_table_var, width=15).grid(row=2, column=5, padx=(0, 10)) # 数据加载选项 ttk.Label(db_frame, text="加载行数:").grid(row=2, column=6, sticky=tk.W, padx=(0, 5)) self.load_limit_var = tk.StringVar(value="50000") ttk.Entry(db_frame, textvariable=self.load_limit_var, width=10).grid(row=2, column=7, padx=(0, 10)) ttk.Button(db_frame, text="测试连接", command=self.test_db_connection).grid(row=2, column=8, padx=(10, 5)) ttk.Button(db_frame, text="从数据库加载", command=self.load_from_database).grid(row=2, column=9, padx=(5, 0)) # 文件选择区域 file_frame = ttk.LabelFrame(scrollable_frame, text="文件导入", padding="10") file_frame.grid(row=2, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10)) file_frame.columnconfigure(1, weight=1) ttk.Label(file_frame, text="文件路径:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) self.file_path_var = tk.StringVar() file_entry = ttk.Entry(file_frame, textvariable=self.file_path_var, width=50) file_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=(0, 5)) ttk.Button(file_frame, text="浏览", command=self.browse_file).grid(row=0, column=2, padx=(0, 5)) ttk.Button(file_frame, text="加载数据", command=self.load_data).grid(row=0, column=3) # 字段映射区域 mapping_frame = ttk.LabelFrame(scrollable_frame, text="字段映射设置", padding="10") mapping_frame.grid(row=3, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10)) # 创建字段映射的网格 ttk.Label(mapping_frame, text="原始字段", font=("Arial", 10, "bold")).grid(row=0, column=0, padx=5) ttk.Label(mapping_frame, text="→", font=("Arial", 10, "bold")).grid(row=0, column=1, padx=5) ttk.Label(mapping_frame, text="目标字段", font=("Arial", 10, "bold")).grid(row=0, column=2, padx=5) # 必需的字段映射 required_fields = ['交易卡号', '交易时间', '交易金额', '收付标志', '对方卡号'] self.mapping_vars = {} for i, field in enumerate(required_fields, 1): ttk.Label(mapping_frame, text=field).grid(row=i, column=2, sticky=tk.W, padx=5) self.mapping_vars[field] = tk.StringVar() mapping_combo = ttk.Combobox(mapping_frame, textvariable=self.mapping_vars[field], width=20) mapping_combo.grid(row=i, column=0, padx=5) ttk.Label(mapping_frame, text="→").grid(row=i, column=1, padx=5) # 参数设置区域 param_frame = ttk.LabelFrame(scrollable_frame, text="分析参数", padding="10") param_frame.grid(row=4, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10)) ttk.Label(param_frame, text="最小连续次数:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) self.min_continuous_var = tk.StringVar(value="3") ttk.Entry(param_frame, textvariable=self.min_continuous_var, width=10).grid(row=0, column=1, padx=(0, 20)) ttk.Label(param_frame, text="最大交易时间间隔(分钟):").grid(row=0, column=2, sticky=tk.W, padx=(0, 5)) self.max_gap_var = tk.StringVar(value="10") ttk.Entry(param_frame, textvariable=self.max_gap_var, width=10).grid(row=0, column=3, padx=(0, 20)) ttk.Label(param_frame, text="金额匹配阈值(%):").grid(row=0, column=4, sticky=tk.W, padx=(0, 5)) self.amount_threshold_var = tk.StringVar(value="20") ttk.Entry(param_frame, textvariable=self.amount_threshold_var, width=10).grid(row=0, column=5, padx=(0, 20)) ttk.Label(param_frame, text="时间窗口(分钟):").grid(row=0, column=6, sticky=tk.W, padx=(0, 5)) self.time_window_var = tk.StringVar(value="1440") # 24小时 = 1440分钟 ttk.Entry(param_frame, textvariable=self.time_window_var, width=10).grid(row=0, column=7, padx=(0, 20)) # 新增参数:最大出账时间差 ttk.Label(param_frame, text="最大出账时间差(分钟):").grid(row=1, column=0, sticky=tk.W, padx=(0, 5)) self.max_out_gap_var = tk.StringVar(value="1440") # 24小时 = 1440分钟 ttk.Entry(param_frame, textvariable=self.max_out_gap_var, width=10).grid(row=1, column=1, padx=(0, 20)) # 交易对手标签设置 ttk.Label(param_frame, text="交易对手标签条件:").grid(row=1, column=2, sticky=tk.W, padx=(0, 5)) self.opponent_tag_var = tk.StringVar(value="") ttk.Entry(param_frame, textvariable=self.opponent_tag_var, width=15, state='disabled').grid(row=1, column=3, padx=(0, 5)) ttk.Button(param_frame, text="设置标签条件", command=self.set_opponent_tag_conditions).grid(row=1, column=4, padx=(5, 5)) ttk.Button(param_frame, text="管理标签", command=self.manage_opponent_tags).grid(row=1, column=5, padx=(5, 5)) ttk.Button(param_frame, text="开始分析", command=self.start_analysis).grid(row=2, column=0, padx=(20, 5), pady=(10, 0)) ttk.Button(param_frame, text="停止分析", command=self.stop_analysis_process).grid(row=2, column=1, padx=(5, 5), pady=(10, 0)) ttk.Button(param_frame, text="导出结果", command=self.export_results).grid(row=2, column=2, padx=(5, 0), pady=(10, 0)) ttk.Button(param_frame, text="调试模式", command=self.debug_analysis).grid(row=2, column=3, padx=(20, 5), pady=(10, 0)) # 进度条区域 progress_frame = ttk.Frame(scrollable_frame) progress_frame.grid(row=5, column=0, columnspan=4, sticky=(tk.W, tk.E), pady=(0, 10)) self.progress_var = tk.DoubleVar() self.progress_bar = ttk.Progressbar(progress_frame, variable=self.progress_var, maximum=100) self.progress_bar.grid(row=0, column=0, sticky=(tk.W, tk.E), padx=(0, 10)) progress_frame.columnconfigure(0, weight=1) self.progress_label = ttk.Label(progress_frame, text="就绪") self.progress_label.grid(row=0, column=1) # 数据显示区域 data_frame = ttk.LabelFrame(scrollable_frame, text="数据预览", padding="10") data_frame.grid(row=6, column=0, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10), padx=(0, 10)) data_frame.columnconfigure(0, weight=1) data_frame.rowconfigure(0, weight=1) self.data_text = scrolledtext.ScrolledText(data_frame, height=12, width=70) self.data_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) # 结果显示区域 result_frame = ttk.LabelFrame(scrollable_frame, text="分析结果", padding="10") result_frame.grid(row=6, column=2, columnspan=2, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10)) result_frame.columnconfigure(0, weight=1) result_frame.rowconfigure(0, weight=1) self.result_text = scrolledtext.ScrolledText(result_frame, height=12, width=60) self.result_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) # 配置权重 scrollable_frame.rowconfigure(6, weight=1) data_frame.rowconfigure(0, weight=1) data_frame.columnconfigure(0, weight=1) result_frame.rowconfigure(0, weight=1) result_frame.columnconfigure(0, weight=1) # 绑定鼠标滚轮事件 canvas.bind("<Enter>", lambda e: canvas.bind_all("<MouseWheel>", lambda event: canvas.yview_scroll( int(-1 * (event.delta / 120)), "units"))) canvas.bind("<Leave>", lambda e: canvas.unbind_all("<MouseWheel>")) # 初始化模式列表 self.refresh_schemas() def set_opponent_tag_conditions(self): """设置交易对手标签条件""" tag_window = tk.Toplevel(self.root) tag_window.title("设置交易对手标签条件") tag_window.geometry("500x400") # 标签列表 ttk.Label(tag_window, text="可用标签:").pack(pady=(10, 5)) listbox = tk.Listbox(tag_window, selectmode=tk.MULTIPLE) for tag in self.opponent_tags.keys(): listbox.insert(tk.END, tag) listbox.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 条件设置区域 condition_frame = ttk.Frame(tag_window) condition_frame.pack(fill=tk.X, padx=10, pady=5) ttk.Label(condition_frame, text="最小出现次数:").grid(row=0, column=0, padx=(0, 5)) self.min_count_var = tk.StringVar(value="1") ttk.Entry(condition_frame, textvariable=self.min_count_var, width=5).grid(row=0, column=1, padx=(0, 10)) ttk.Label(condition_frame, text="最大出现次数:").grid(row=0, column=2, padx=(0, 5)) self.max_count_var = tk.StringVar(value="999") ttk.Entry(condition_frame, textvariable=self.max_count_var, width=5).grid(row=0, column=3) # 操作按钮 button_frame = ttk.Frame(tag_window) button_frame.pack(pady=10) ttk.Button(button_frame, text="添加条件", command=lambda: self.add_tag_condition(listbox)).pack(side=tk.LEFT,padx=5) ttk.Button(button_frame, text="清除条件", command=self.clear_tag_conditions).pack(side=tk.LEFT, padx=5) ttk.Button(button_frame, text="完成", command=tag_window.destroy).pack(side=tk.LEFT, padx=5) # 显示当前条件 ttk.Label(tag_window, text="当前条件:").pack(pady=(10, 5)) self.condition_text = scrolledtext.ScrolledText(tag_window, height=5) self.condition_text.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) self.update_condition_display() def add_tag_condition(self, listbox): """添加标签条件""" selection = listbox.curselection() if not selection: messagebox.showwarning("警告", "请选择至少一个标签") return min_count = int(self.min_count_var.get()) max_count = int(self.max_count_var.get()) for idx in selection: tag = listbox.get(idx) self.opponent_tags[tag] = {"min": min_count, "max": max_count} self.update_condition_display() messagebox.showinfo("成功", "条件已添加") def clear_tag_conditions(self): """清除所有标签条件""" for tag in self.opponent_tags.keys(): self.opponent_tags[tag] = {"min": 0, "max": 999} self.update_condition_display() def update_condition_display(self): """更新条件显示""" self.condition_text.delete(1.0, tk.END) conditions = [] for tag, condition in self.opponent_tags.items(): if condition["min"] > 0 or condition["max"] < 999: conditions.append(f"{tag}: {condition['min']}-{condition['max']}次") if conditions: self.condition_text.insert(tk.END, "\n".join(conditions)) self.opponent_tag_var.set("; ".join(conditions)) else: self.condition_text.insert(tk.END, "无条件") self.opponent_tag_var.set("") def manage_opponent_tags(self): """管理交易对手标签""" tag_window = tk.Toplevel(self.root) tag_window.title("管理交易对手标签") tag_window.geometry("400x300") # 标签列表 ttk.Label(tag_window, text="当前标签:").pack(pady=(10, 5)) listbox = tk.Listbox(tag_window) for tag in self.opponent_tags.keys(): listbox.insert(tk.END, tag) listbox.pack(fill=tk.BOTH, expand=True, padx=10, pady=5) # 添加新标签 add_frame = ttk.Frame(tag_window) add_frame.pack(fill=tk.X, padx=10, pady=5) ttk.Label(add_frame, text="新标签:").pack(side=tk.LEFT, padx=(0, 5)) new_tag_var = tk.StringVar() ttk.Entry(add_frame, textvariable=new_tag_var, width=15).pack(side=tk.LEFT, padx=(0, 5)) ttk.Button(add_frame, text="添加", command=lambda: self.add_new_tag(new_tag_var, listbox)).pack(side=tk.LEFT) # 操作按钮 button_frame = ttk.Frame(tag_window) button_frame.pack(pady=10) ttk.Button(button_frame, text="删除选中", command=lambda: self.delete_tag(listbox)).pack(side=tk.LEFT, padx=5) ttk.Button(button_frame, text="关闭", command=tag_window.destroy).pack(side=tk.LEFT, padx=5) def add_new_tag(self, new_tag_var, listbox): """添加新标签""" tag = new_tag_var.get().strip() if not tag: messagebox.showwarning("警告", "请输入标签名称") return if tag not in self.opponent_tags: self.opponent_tags[tag] = {"min": 0, "max": 999} listbox.insert(tk.END, tag) new_tag_var.set("") else: messagebox.showwarning("警告", "标签已存在") def delete_tag(self, listbox): """删除选中的标签""" selection = listbox.curselection() if not selection: return tag = listbox.get(selection[0]) if tag in self.opponent_tags: del self.opponent_tags[tag] listbox.delete(selection[0]) def on_db_type_change(self, event): """数据库类型改变时的处理""" db_type = self.db_type_var.get() if db_type == "mysql": self.db_port_var.set("3306") self.db_schema_var.set("") # MySQL不使用schema概念 self.schema_combo.config(state="disabled") elif db_type == "postgresql": self.db_port_var.set("5432") self.db_schema_var.set("public") self.schema_combo.config(state="readonly") self.refresh_schemas() def refresh_schemas(self): """刷新数据库模式列表""" try: db_type = self.db_type_var.get() host = self.db_host_var.get() port = self.db_port_var.get() database = self.db_name_var.get() username = self.db_user_var.get() password = self.db_password_var.get() if not all([host, port, database, username]): return if db_type == "mysql": import mysql.connector conn = mysql.connector.connect( host=host, port=int(port), user=username, password=password, database=database ) cursor = conn.cursor() cursor.execute("SHOW DATABASES") self.schemas = [db[0] for db in cursor.fetchall()] cursor.close() conn.close() elif db_type == "postgresql": import psycopg2 try: conn = psycopg2.connect( host=host, port=port, user=username, password=password, database=database, connect_timeout=5 ) conn.autocommit = True cursor = conn.cursor() cursor.execute(""" SELECT schema_name FROM information_schema.schemata WHERE schema_name NOT LIKE 'pg_%' AND schema_name != 'information_schema' """) self.schemas = [schema[0] for schema in cursor.fetchall()] cursor.close() conn.close() except Exception as e: messagebox.showwarning("警告", f"获取模式列表失败: {str(e)}") self.schemas = ['public'] self.schema_combo['values'] = self.schemas if self.schemas: if self.db_schema_var.get() in self.schemas: pass elif 'public' in self.schemas: self.db_schema_var.set('public') else: self.db_schema_var.set(self.schemas[0]) except ImportError: messagebox.showerror("错误", "请安装相应的数据库驱动:\nMySQL: pip install mysql-connector-python\nPostgreSQL: pip install psycopg2-binary") except Exception as e: print(f"刷新模式列表失败: {e}") def test_db_connection(self): """测试数据库连接""" try: db_type = self.db_type_var.get() host = self.db_host_var.get() port = self.db_port_var.get() database = self.db_name_var.get() username = self.db_user_var.get() password = self.db_password_var.get() if db_type == "mysql": import mysql.connector conn = mysql.connector.connect( host=host, port=int(port), user=username, password=password, database=database ) conn.close() messagebox.showinfo("成功", "MySQL数据库连接测试成功!") elif db_type == "postgresql": import psycopg2 conn = psycopg2.connect( host=host, port=port, user=username, password=password, database=database, connect_timeout=5 ) conn.autocommit = True conn.close() messagebox.showinfo("成功", "PostgreSQL数据库连接测试成功!") except ImportError: messagebox.showerror("错误", "请安装相应的数据库驱动:\nMySQL: pip install mysql-connector-python\nPostgreSQL: pip install psycopg2-binary") except Exception as e: messagebox.showerror("错误", f"数据库连接失败: {str(e)}") def load_from_database(self): """从数据库加载数据""" try: self.update_progress("正在连接数据库...", 0) db_type = self.db_type_var.get() host = self.db_host_var.get() port = self.db_port_var.get() database = self.db_name_var.get() username = self.db_user_var.get() password = self.db_password_var.get() table_name = self.db_table_var.get() schema = self.db_schema_var.get() load_limit = int(self.load_limit_var.get()) if not table_name: messagebox.showerror("错误", "请输入表名") return if db_type == "mysql": import mysql.connector conn = mysql.connector.connect( host=host, port=int(port), user=username, password=password, database=database ) full_table_name = f"`{database}`.`{table_name}`" if database else f"`{table_name}`" query = f"SELECT * FROM {full_table_name} LIMIT {load_limit}" self.df = self.read_mysql_data(conn, query) conn.close() elif db_type == "postgresql": import psycopg2 conn = psycopg2.connect( host=host, port=port, user=username, password=password, database=database, connect_timeout=10 ) conn.autocommit = True try: if schema: full_table_name = f'"{schema}"."{table_name}"' else: full_table_name = f'"{table_name}"' query = f"SELECT * FROM {full_table_name} LIMIT {load_limit}" self.df = self.read_postgresql_data(conn, query) except Exception as e: try: cursor = conn.cursor() cursor.execute(""" SELECT table_name FROM information_schema.tables WHERE table_schema = %s AND LOWER(table_name) = LOWER(%s) """, (schema, table_name)) result = cursor.fetchone() if result: actual_table_name = result[0] full_table_name = f'"{schema}"."{actual_table_name}"' query = f"SELECT * FROM {full_table_name} LIMIT {load_limit}" self.df = self.read_postgresql_data(conn, query) else: if schema: full_table_name = f"{schema}.{table_name}" else: full_table_name = table_name query = f"SELECT * FROM {full_table_name} LIMIT {load_limit}" self.df = self.read_postgresql_data(conn, query) except Exception as inner_e: raise Exception(f"无法加载表数据: {str(inner_e)}") finally: try: conn.close() except: pass else: messagebox.showerror("错误", "不支持的数据库类型") return self.update_progress("数据处理中...", 80) # 更新字段映射下拉框 self.update_mapping_comboboxes() # 显示数据预览 self.data_text.delete(1.0, tk.END) self.data_text.insert(tk.END, f"从数据库表 {table_name} 加载数据成功!\n\n") self.data_text.insert(tk.END, f"数据形状: {self.df.shape}\n\n") self.data_text.insert(tk.END, f"所有字段: {list(self.df.columns)}\n\n") self.data_text.insert(tk.END, self.df.head(20).to_string()) self.update_progress("数据加载完成", 100) except ImportError: messagebox.showerror("错误", "请安装相应的数据库驱动:\nMySQL: pip install mysql-connector-python\nPostgreSQL: pip install psycopg2-binary") except Exception as e: error_msg = f"从数据库加载数据失败: {str(e)}\n\n详细错误信息:\n{traceback.format_exc()}" messagebox.showerror("错误", error_msg) self.update_progress("加载失败", 0) def read_mysql_data(self, conn, query): """从MySQL读取数据""" cursor = conn.cursor() cursor.execute(query) columns = [desc[0] for desc in cursor.description] data = [] for row in cursor.fetchall(): data.append(row) cursor.close() return pd.DataFrame(data, columns=columns) def read_postgresql_data(self, conn, query): """从PostgreSQL读取数据""" cursor = conn.cursor() try: cursor.execute(query) columns = [desc[0] for desc in cursor.description] data = [] for row in cursor.fetchall(): data.append(row) return pd.DataFrame(data, columns=columns) finally: try: cursor.close() except: pass def browse_file(self): file_path = filedialog.askopenfilename( title="选择数据文件", filetypes=[("CSV文件", "*.csv"), ("Excel文件", "*.xlsx;*.xls"), ("文本文件", "*.txt"), ("所有文件", "*.*")] ) if file_path: self.file_path_var.set(file_path) def detect_encoding(self, file_path): """检测文件编码""" try: with open(file_path, 'rb') as f: raw_data = f.read(10000) # 读取前10000字节来检测编码 result = chardet.detect(raw_data) return result['encoding'] except: return 'utf-8' def auto_detect_fields(self, df): """自动检测字段类型并设置映射""" column_types = {} for col in df.columns: col_lower = col.lower() # 检查是否是金额字段 if any(x in col_lower for x in ['金额', 'amount', 'money', 'amt', '交易金额']): column_types[col] = '交易金额' # 检查是否是时间字段 elif any(x in col_lower for x in ['时间', 'time', 'date', '日期', '交易时间']): column_types[col] = '交易时间' # 检查是否是卡号字段 elif any(x in col_lower for x in ['卡号', 'account', 'card', '账号', '交易卡号']): column_types[col] = '交易卡号' # 检查是否是对方卡号字段 elif any(x in col_lower for x in ['对方', 'counter', 'target', '对手', '对方卡号']): column_types[col] = '对方卡号' # 检查是否是收付标志字段 elif any(x in col_lower for x in ['标志', 'type', 'direction', '收付', '收付标志']): column_types[col] = '收付标志' return column_types def load_data(self): """加载文件数据""" file_path = self.file_path_var.get() if not file_path: messagebox.showerror("错误", "请先选择数据文件") return try: self.update_progress("正在加载数据...", 0) if file_path.endswith('.csv'): # 检测文件编码 encoding = self.detect_encoding(file_path) chunk_size = 10000 chunks = [] total_rows = 0 with open(file_path, 'r', encoding=encoding, errors='ignore') as f: total_rows = sum(1 for _ in f) - 1 self.total_rows = total_rows self.loaded_rows = 0 for chunk in pd.read_csv(file_path, chunksize=chunk_size, low_memory=False, encoding=encoding): chunks.append(chunk) self.loaded_rows += len(chunk) progress = min(80, (self.loaded_rows / self.total_rows) * 80) self.update_progress(f"正在加载数据: {self.loaded_rows}/{self.total_rows} 行", progress) if self.loaded_rows >= int(self.load_limit_var.get()): break self.df = pd.concat(chunks, ignore_index=True) elif file_path.endswith(('.xlsx', '.xls')): load_limit = int(self.load_limit_var.get()) self.df = pd.read_excel(file_path, nrows=load_limit) else: encoding = self.detect_encoding(file_path) with open(file_path, 'r', encoding=encoding, errors='ignore') as f: lines = f.readlines() self.df = self.parse_text_data(lines) self.update_progress("数据加载完成", 30) # 自动检测字段并设置映射 auto_mapping = self.auto_detect_fields(self.df) for field, var in self.mapping_vars.items(): for col, col_type in auto_mapping.items(): if col_type == field: var.set(col) break self.update_mapping_comboboxes() self.data_text.delete(1.0, tk.END) self.data_text.insert(tk.END, f"数据形状: {self.df.shape}\n\n") self.data_text.insert(tk.END, f"所有字段: {list(self.df.columns)}\n\n") self.data_text.insert(tk.END, self.df.head(20).to_string()) self.update_progress("就绪", 100) except Exception as e: messagebox.showerror("错误", f"读取文件失败: {str(e)}") print(f"错误详情: {str(e)}") self.update_progress("加载失败", 0) def update_mapping_comboboxes(self): """更新字段映射下拉框""" if self.df is None: return all_columns = list(self.df.columns) for field, var in self.mapping_vars.items(): for widget in self.root.winfo_children(): if isinstance(widget, ttk.Combobox) and widget.cget('textvariable') == var._name: widget['values'] = all_columns # 如果当前没有设置值,尝试自动匹配 if not var.get(): for col in all_columns: col_lower = col.lower() if field == '交易卡号' and any(x in col_lower for x in ['交易卡号', 'account', 'card']): var.set(col) break elif field == '交易时间' and any(x in col_lower for x in ['交易时间', 'time', 'date']): var.set(col) break elif field == '交易金额' and any(x in col_lower for x in ['交易金额', 'amount', 'money']): var.set(col) break elif field == '收付标志' and any(x in col_lower for x in ['标志', 'type', 'direction']): var.set(col) break elif field == '对方卡号' and any(x in col_lower for x in ['对方', 'counter', 'target']): var.set(col) break def parse_text_data(self, lines): """解析文本数据""" transactions = [] load_limit = int(self.load_limit_var.get()) for i, line in enumerate(lines[:load_limit + 1]): if i == 0: headers = line.strip().split(',') continue line = line.strip() if not line: continue parts = re.split(r'\s+', line) if len(parts) >= 4: transaction = { '原始字段1': parts[0] if len(parts) > 0 else '', '原始字段2': parts[1] if len(parts) > 1 else '', '原始字段3': parts[2] if len(parts) > 2 else '', '原始字段4': parts[3] if len(parts) > 3 else '', '原始字段5': parts[4] if len(parts) > 4 else '' } transactions.append(transaction) return pd.DataFrame(transactions) def update_progress(self, message, value): """更新进度条和标签""" self.progress_var.set(value) self.progress_label.config(text=message) self.root.update_idletasks() def start_analysis(self): """开始分析""" if self.df is None or self.df.empty: messagebox.showerror("错误", "请先加载数据") return for field, var in self.mapping_vars.items(): if not var.get(): messagebox.showerror("错误", f"请设置'{field}'的字段映射") return self.stop_analysis = False self.analysis_thread = threading.Thread(target=self.analyze_data_thread) self.analysis_thread.daemon = True self.analysis_thread.start() def stop_analysis_process(self): """停止分析过程""" self.stop_analysis = True self.update_progress("正在停止分析...", 0) def debug_analysis(self): """调试模式:显示详细的分析过程""" if self.df is None or self.df.empty: messagebox.showerror("错误", "请先加载数据") return # 检查字段映射 for field, var in self.mapping_vars.items(): if not var.get(): messagebox.showerror("错误", f"请设置'{field}'的字段映射") return try: # 创建字段映射 column_mapping = {var.get(): field for field, var in self.mapping_vars.items()} # 选择需要的字段 selected_columns = list(self.mapping_vars.values()) selected_columns = [var.get() for var in selected_columns] analysis_df = self.df[selected_columns].copy() analysis_df.columns = list(self.mapping_vars.keys()) # 显示数据信息 debug_info = f"数据总行数: {len(analysis_df)}\n" debug_info += f"唯一卡号数量: {analysis_df['交易卡号'].nunique()}\n" debug_info += f"收付标志分布:\n{analysis_df['收付标志'].value_counts()}\n\n" # 检查前几个账户的交易模式 sample_accounts = analysis_df['交易卡号'].unique()[:5] for account in sample_accounts: account_data = analysis_df[analysis_df['交易卡号'] == account].copy() debug_info += f"\n账户 {account} 的交易记录:\n" debug_info += f"总交易数: {len(account_data)}\n" debug_info += f"收款交易数: {len(account_data[account_data['收付标志'].astype(str).str.contains('收|1|收入|收款', case=False, na=False)])}\n" debug_info += f"付款交易数: {len(account_data[account_data['收付标志'].astype(str).str.contains('出|付|0|支出|付款', case=False, na=False)])}\n" # 显示前几条交易记录 debug_info += "前5条交易记录:\n" debug_info += account_data.head().to_string() + "\n" # 显示调试信息 self.result_text.delete(1.0, tk.END) self.result_text.insert(tk.END, "=== 调试信息 ===\n\n") self.result_text.insert(tk.END, debug_info) except Exception as e: messagebox.showerror("错误", f"调试过程中出现错误: {str(e)}") def clean_and_transform_data(self, df): """数据清洗和转换""" # 转换交易金额为数值类型 - 修复语法错误 df['交易金额'] = pd.to_numeric(df['交易金额'], errors='coerce') # 转换交易时间为datetime df['交易时间'] = pd.to_datetime(df['交易时间'], errors='coerce') # 清理收付标志 df['收付标志'] = df['收付标志'].astype(str).str.strip().str.upper() # 移除无效数据 df = df.dropna(subset=['交易卡号', '交易时间', '交易金额', '收付标志']) return df def analyze_data_thread(self): """在新线程中执行分析""" try: self.update_progress("开始分析数据...", 0) # 创建字段映射 column_mapping = {var.get(): field for field, var in self.mapping_vars.items()} # 选择需要的字段 selected_columns = list(self.mapping_vars.values()) selected_columns = [var.get() for var in selected_columns] analysis_df = self.df[selected_columns].copy() analysis_df.columns = list(self.mapping_vars.keys()) # 数据清洗和转换 analysis_df = self.clean_and_transform_data(analysis_df) min_continuous = int(self.min_continuous_var.get()) max_gap_minutes = float(self.max_gap_var.get()) # 允许小数分钟 amount_threshold = float(self.amount_threshold_var.get()) / 100 time_window_minutes = float(self.time_window_var.get()) # 使用分钟 max_out_gap_minutes = float(self.max_out_gap_var.get()) # 使用分钟 results = [] all_traders = analysis_df['交易卡号'].unique() total_traders = len(all_traders) for idx, trader in enumerate(all_traders): if self.stop_analysis: self.update_progress("分析已停止", 0) return progress = (idx / total_traders) * 80 self.update_progress(f"分析账户 {idx + 1}/{total_traders}: {trader}", progress) if pd.isna(trader): continue trader_data = analysis_df[analysis_df['交易卡号'] == trader].copy() trader_data = trader_data.sort_values('交易时间') if len(trader_data) < min_continuous + 1: continue # 找出所有收款交易 receives = trader_data[ trader_data['收付标志'].isin(['收', '进', '1', '收入', '收款', 'IN', 'INCOME', 'RECEIVE'])].copy() if len(receives) < min_continuous: continue # 按时间窗口分组查找连续收款序列 sequences = self.find_continuous_sequences(receives, min_continuous, max_gap_minutes, time_window_minutes) # 分析每个序列 for seq in sequences: if self.stop_analysis: break # 检查交易对手标签条件 if not self.check_opponent_tag_conditions(seq): continue seq_end_time = seq['交易时间'].max() total_received = seq['交易金额'].sum() # 查找时间窗口内的后续付款交易 time_window_end = seq_end_time + timedelta(minutes=max_out_gap_minutes) out_transactions = trader_data[ (trader_data['收付标志'].isin(['出', '付', '0', '支出', '付款', 'OUT', 'PAY', 'EXPENSE'])) & (trader_data['交易时间'] > seq_end_time) & (trader_data['交易时间'] <= time_window_end) ] for _, out_row in out_transactions.iterrows(): out_amount = out_row['交易金额'] # 检查金额匹配(允许一定误差) if abs(out_amount - total_received) / max(total_received, out_amount) <= amount_threshold: # 计算时间差(分钟) time_diff = (out_row['交易时间'] - seq_end_time).total_seconds() / 60 result = { '交易卡号': trader, '连续接收次数': len(seq), '总接收金额': total_received, '出账金额': out_amount, '对方卡号': out_row['对方卡号'], '金额差异百分比': f"{abs(out_amount - total_received) / max(total_received, out_amount) * 100:.2f}%", '模式': '连续进账后出账', '连续交易时间范围': f"{seq['交易时间'].min()} 至 {seq['交易时间'].max()}", '出账时间': out_row['交易时间'], '时间差(分钟)': f"{time_diff:.2f}", '交易对手标签': self.get_sequence_tags(seq) } results.append(result) self.results = pd.DataFrame(results) self.update_progress("分析完成", 90) # 更新结果显示 self.root.after(0, self.update_results_display) self.update_progress("就绪", 100) except Exception as e: error_msg = f"分析过程中出现错误: {str(e)}\n\n详细错误信息:\n{traceback.format_exc()}" self.root.after(0, lambda: messagebox.showerror("错误", error_msg)) self.update_progress("分析出错", 0) def check_opponent_tag_conditions(self, sequence): """检查交易对手标签条件""" if not self.opponent_tags: return True # 没有设置条件,直接通过 # 统计序列中每个标签的出现次数 tag_counts = {} for _, row in sequence.iterrows(): opponent = str(row['对方卡号']) for tag in self.opponent_tags.keys(): # 这里可以根据实际业务逻辑判断对手是否属于某个标签 # 示例:简单判断对手卡号是否包含标签名 if tag in opponent: tag_counts[tag] = tag_counts.get(tag, 0) + 1 # 检查所有条件是否满足 for tag, condition in self.opponent_tags.items(): count = tag_counts.get(tag, 0) if count < condition["min"] or count > condition["max"]: return False return True def get_sequence_tags(self, sequence): """获取序列的交易对手标签统计""" tag_counts = {} for _, row in sequence.iterrows(): opponent = str(row['对方卡号']) for tag in self.opponent_tags.keys(): if tag in opponent: tag_counts[tag] = tag_counts.get(tag, 0) + 1 return "; ".join([f"{tag}:{count}" for tag, count in tag_counts.items()]) def find_continuous_sequences(self, receives, min_continuous, max_gap_minutes, time_window_minutes): """查找连续的交易序列(使用分钟)""" receives = receives.sort_values('交易时间') sequences = [] current_sequence = [] for _, row in receives.iterrows(): if not current_sequence: current_sequence.append(row) else: last_time = current_sequence[-1]['交易时间'] current_time = row['交易时间'] # 检查时间间隔(分钟) time_diff = (current_time - last_time).total_seconds() / 60 if time_diff <= max_gap_minutes: current_sequence.append(row) else: # 检查时间窗口 if len(current_sequence) >= min_continuous: seq_df = pd.DataFrame(current_sequence) time_span = (seq_df['交易时间'].max() - seq_df['交易时间'].min()).total_seconds() / 60 if time_span <= time_window_minutes: sequences.append(seq_df) current_sequence = [row] # 处理最后一个序列 if len(current_sequence) >= min_continuous: seq_df = pd.DataFrame(current_sequence) time_span = (seq_df['交易时间'].max() - seq_df['交易时间'].min()).total_seconds() / 60 if time_span <= time_window_minutes: sequences.append(seq_df) return sequences def update_results_display(self): """更新结果显示""" self.result_text.delete(1.0, tk.END) if not self.results.empty: self.result_text.insert(tk.END, f"找到 {len(self.results)} 条符合条件的交易模式:\n\n") self.result_text.insert(tk.END, self.results.to_string(index=False)) # 分析共同收款方 common_recipients = self.results['对方卡号'].value_counts() if len(common_recipients) > 0: self.result_text.insert(tk.END, "\n\n共同收款方分析:\n") for recipient, count in common_recipients.items(): if count > 1: recipient_data = self.results[self.results['对方卡号'] == recipient] total_amount = recipient_data['出账金额'].sum() sources = len(recipient_data['交易卡号'].unique()) self.result_text.insert(tk.END, f"\n收款方 {recipient}: {count} 笔交易, {sources} 个来源账户, 总金额 {total_amount:.2f}") else: self.result_text.insert(tk.END, "未找到符合条件的交易记录\n") self.result_text.insert(tk.END, "建议使用'调试模式'检查数据格式和字段映射") def export_results(self): """导出分析结果""" if self.results is None or self.results.empty: messagebox.showerror("错误", "没有结果可导出") return file_path = filedialog.asksaveasfilename( title="保存结果", defaultextension=".csv", filetypes=[("CSV文件", "*.csv"), ("Excel文件", "*.xlsx")] ) if file_path: try: if file_path.endswith('.csv'): self.results.to_csv(file_path, index=False, encoding='utf-8-sig') else: self.results.to_excel(file_path, index=False) messagebox.showinfo("成功", f"结果已保存到: {file_path}") except Exception as e: messagebox.showerror("错误", f"保存失败: {str(e)}") def main(): root = tk.Tk() app = TransactionAnalyzerApp(root) root.mainloop() if __name__ == "__main__": main()对手标签是对手标签,对手卡号是交易对手账卡号
09-09
基于数据驱动的 Koopman 算子的递归神经网络模型线性化,用于纳米定位系统的预测控制研究(Matlab代码实现)内容概要:本文围绕“基于数据驱动的Koopman算子的递归神经网络模型线性化”展开,旨在研究纳米定位系统的预测控制问题,并提供完整的Matlab代码实现。文章结合数据驱动方法与Koopman算子理论,利用递归神经网络(RNN)对非线性系统进行建模与线性化处理,从而提升纳米级定位系统的精度与动态响应性能。该方法通过提取系统隐含动态特征,构建近似线性模型,便于后续模型预测控制(MPC)的设计与优化,适用于高精度自动化控制场景。文中还展示了相关实验验证与仿真结果,证明了该方法的有效性和先进性。; 适合人群:具备一定控制理论基础和Matlab编程能力,从事精密控制、智能制造、自动化或相关领域研究的研究生、科研人员及工程技术人员。; 使用场景及目标:①应用于纳米级精密定位系统(如原子力显微镜、半导体制造设备)中的高性能控制设计;②为非线性系统建模与线性化提供一种结合深度学习与现代控制理论的新思路;③帮助读者掌握Koopman算子、RNN建模与模型预测控制的综合应用。; 阅读建议:建议读者结合提供的Matlab代码逐段理解算法实现流程,重点关注数据预处理、RNN结构设计、Koopman观测矩阵构建及MPC控制器集成等关键环节,并可通过更换实际系统数据进行迁移验证,深化对方法泛化能力的理解。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值