Ext之comboBox 本地数据字典

本文介绍了一个关于车辆状态选择框的前端配置示例,该选择框使用ExtJS库实现,并预设了两种状态:启用和停用。通过本地存储方式展示选项,适用于简单的状态切换场景。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

//车辆状态(值固定,写在前台页面,不用后台数据字典)	
var carState = new Ext.form.ComboBox({
hiddenName:'car.status',
allowBlank:false,
width : 165,
editable:false,
fieldLabel:'<font color="red">*</font>车辆状态',
triggerAction: 'all',
lazyRender:true,
mode: 'local',
store: new Ext.data.ArrayStore({
fields: [
'idValue',
'displayText'
],
data: [['0', '停用'], ['1','启用']]
}),
valueField: 'idValue',
displayField: 'displayText'
});



--
def analyze_table(self): """执行表格分析(在后台线程中运行)""" try: # 重置分析状态 self.reset_analysis() self.header_row = self.header_row_var.get() file_ext = os.path.splitext(self.file_path)[1].lower() # 更新进度 self.progress = 10 time.sleep(0.1) if self.stop_analysis: return # 确保有原始数据 if self.raw_data is None: self.load_and_preview() if file_ext in ['.xlsx', '.xls']: # 读取Excel文件,指定表头行 self.df = pd.read_excel(self.file_path, header=self.header_row) self.workbook = load_workbook(self.file_path) self.sheet = self.workbook.active elif file_ext == '.csv': # 读取CSV文件,指定表头行 with open(self.file_path, 'rb') as f: result = chardet.detect(f.read()) encoding = result['encoding'] or 'utf-8' self.df = pd.read_csv( self.file_path, header=self.header_row, encoding=encoding, engine='c' ) elif file_ext == '.parquet': # 读取Parquet文件 self.df = pd.read_parquet(self.file_path) else: messagebox.showerror("错误", "不支持的文件类型") return # 确保DataFrame不为空 if self.df is None or self.df.empty: raise ValueError("加载的数据为空") # 清理列名 self.df.columns = [str(col).strip() for col in self.df.columns] # 初始化列底色信息 self.column_has_color = {col: False for col in self.df.columns} # 更新进度 self.progress = 30 time.sleep(0.1) if self.stop_analysis: return # 分析表头 self.analyze_headers() # 更新进度 self.progress = 50 time.sleep(0.1) if self.stop_analysis: return # 分析列格式 if file_ext in ['.xlsx', '.xls']: self.analyze_format() # 更新进度 self.progress = 70 time.sleep(0.1) if self.stop_analysis: return # 分析数据质量 self.analyze_data_quality() # 更新列选择框 self.col_combobox['values'] = list(self.df.columns) if self.df.columns.size > 0: self.col_combobox.current(0) self.update_stats() # 更新筛选控件(添加底色标记) display_columns = [] for col in self.df.columns: has_color = self.column_has_color.get(col, False) display_columns.append(f"{col} (有底色)" if has_color else f"{col} (无底色)") self.filter_col_combobox['values'] = display_columns # 更新联动统计控件(添加底色标记) display_columns = [] for col in self.df.columns: has_color = self.column_has_color.get(col, False) display_columns.append(f"{col} (有底色)" if has_color else f"{col} (无底色)") self.linkage_col_combobox['values'] = display_columns self.status_var.set("分析完成") # 更新进度 self.progress = 100 except Exception as e: error_msg = f"分析表格时出错: {str(e)}" logging.error(error_msg) logging.error(traceback.format_exc()) messagebox.showerror("分析错误", error_msg) self.status_var.set(f"错误: {error_msg}") def analyze_headers(self): # 清空表头表格 for item in self.headers_tree.get_children(): self.headers_tree.delete(item) # 添加表头信息 for i, col in enumerate(self.df.columns): if self.stop_analysis: return # 检查列是否有底色(只针对数据区域) has_color = self.column_has_color.get(col, False) col_display = f"{col} (有底色)" if has_color else f"{col} (无底色)" sample = "" # 获取前3个非空值作为示例 non_empty = self.df[col].dropna() if len(non_empty) > 0: sample = ", ".join(str(x) for x in non_empty.head(3).tolist()) # 推断数据类型 dtype = str(self.df[col].dtype) dtype_map = { 'object': '文本', 'int64': '整数', 'float64': '小数', 'bool': '布尔值', 'datetime64': '日期时间', 'category': '分类数据' } for k, v in dtype_map.items(): if k in dtype: dtype = v break # 唯一值统计 unique_count = self.df[col].nunique() # 空值率 null_count = self.df[col].isnull().sum() null_percent = f"{null_count / len(self.df):.1%}" if len(self.df) > 0 else "0%" self.headers_tree.insert("", "end", values=( i+1, col_display, # 使用带底色标记的列名 dtype, sample, unique_count, null_percent )) def analyze_format(self): """分析每个单元格的底色状态""" if not self.workbook or not self.sheet: return # 初始化底色状态DataFrame(与数据表相同形状) self.cell_bg_status = pd.DataFrame( "无底色", index=self.df.index, columns=self.df.columns ) # 分析每个单元格的底色 for row_idx in range(len(self.df)): for col_idx, col_name in enumerate(self.df.columns): # 转换为Excel坐标(行号从1开始) excel_row = self.header_row + 2 + row_idx col_letter = self.get_column_letter(col_idx + 1) cell = self.sheet[f"{col_letter}{excel_row}"] # 检测底色 has_bg = False if cell.fill and isinstance(cell.fill, PatternFill): if cell.fill.fgColor and cell.fill.fgColor.rgb: if cell.fill.fgColor.rgb != "00000000": # 排除透明 has_bg = True # 存储底色状态 self.cell_bg_status.iloc[row_idx, col_idx] = "有底色" if has_bg else "无底色" # 标记列是否有底色 if has_bg: self.column_has_color[col_name] = True def analyze_data_quality(self): """分析数据质量问题""" # 清空质量表格 for item in self.quality_tree.get_children(): self.quality_tree.delete(item) issues = [] total_rows = len(self.df) # 1. 检查空值 null_counts = self.df.isnull().sum() for col, count in null_counts.items(): if count > 0: severity = "高" if count / total_rows > 0.3 else "中" if count / total_rows > 0.1 else "低" issues.append({ "issue": "空值", "count": count, "severity": severity, "columns": col, "suggestion": f"考虑使用平均值/中位数填充或删除空值行" }) # 2. 检查重复行 duplicate_rows = self.df.duplicated().sum() if duplicate_rows > 0: severity = "高" if duplicate_rows / total_rows > 0.1 else "中" issues.append({ "issue": "重复行", "count": duplicate_rows, "severity": severity, "columns": "所有列", "suggestion": "删除重复行或分析重复原因" }) # 3. 检查数据类型不一致 for col in self.df.columns: if self.df[col].dtype == 'object': # 检查混合数据类型 type_counts = self.df[col].apply(type).value_counts() if len(type_counts) > 1: issues.append({ "issue": "数据类型不一致", "count": len(self.df[col]), "severity": "中", "columns": col, "suggestion": "统一数据类型或转换格式" }) # 4. 检查异常值(仅数值列) numeric_cols = self.df.select_dtypes(include=np.number).columns for col in numeric_cols: q1 = self.df[col].quantile(0.25) q3 = self.df[col].quantile(0.75) iqr = q3 - q1 lower_bound = q1 - 1.5 * iqr upper_bound = q3 + 1.5 * iqr outliers = self.df[(self.df[col] < lower_bound) | (self.df[col] > upper_bound)] outlier_count = len(outliers) if outlier_count > 0: severity = "高" if outlier_count / total_rows > 0.05 else "中" issues.append({ "issue": "异常值", "count": outlier_count, "severity": severity, "columns": col, "suggestion": "检查数据准确性或进行转换" }) # 添加到表格 for issue in issues: self.quality_tree.insert("", "end", values=( issue["issue"], issue["count"], issue["severity"], issue["columns"], issue["suggestion"] )) def get_column_letter(self, col_idx): """将列索引转换为Excel列字母""" letters = [] while col_idx: col_idx, remainder = divmod(col_idx - 1, 26) letters.append(chr(65 + remainder)) return ''.join(reversed(letters)) def update_filter_values(self, event=None): """更新筛选值列表(带底色标记)""" selected_col_display = self.filter_col_var.get() if not selected_col_display: return # 从显示列名中提取原始列名(移除底色标记) if " (有底色)" in selected_col_display: selected_col = selected_col_display.replace(" (有底色)", "") elif " (无底色)" in selected_col_display: selected_col = selected_col_display.replace(" (无底色)", "") else: selected_col = selected_col_display if not selected_col or selected_col not in self.df.columns: return # 获取列的值和对应的底色状态 values = self.df[selected_col].astype(str) # 创建带底色标记的值列表 unique_values = set() if not self.cell_bg_status.empty and selected_col in self.cell_bg_status: bg_status = self.cell_bg_status[selected_col] for val, bg in zip(values, bg_status): unique_values.add(f"{val} ({bg})") else: unique_values = set(values) # 如果唯一值太多,只显示前50个 if len(unique_values) > 50: unique_values = sorted(unique_values)[:50] messagebox.showinfo("提示", f"该列有超过50个唯一值,只显示前50个") # 更新值选择框 self.filter_value_combobox['values'] = sorted(unique_values) self.filter_value_combobox.set('') def apply_filter(self): """应用筛选条件(考虑底色状态)""" if self.df is None: messagebox.showwarning("警告", "请先分析表格") return selected_col_display = self.filter_col_var.get() selected_value_with_bg = self.filter_value_var.get() # 检查输入有效性 if not selected_col_display or not selected_value_with_bg: messagebox.showwarning("警告", "请选择列和值") return try: # 从显示列名中提取原始列名(移除底色标记) if " (有底色)" in selected_col_display: selected_col = selected_col_display.replace(" (有底色)", "") elif " (无底色)" in selected_col_display: selected_col = selected_col_display.replace(" (无底色)", "") else: selected_col = selected_col_display # 从带底色标记的值中提取原始值和底色状态 if " (有底色)" in selected_value_with_bg: selected_value = selected_value_with_bg.replace(" (有底色)", "") selected_bg = "有底色" elif " (无底色)" in selected_value_with_bg: selected_value = selected_value_with_bg.replace(" (无底色)", "") selected_bg = "无底色" else: selected_value = selected_value_with_bg selected_bg = None # 应用值筛选 if pd.api.types.is_numeric_dtype(self.df[selected_col]): try: selected_value = float(selected_value) value_condition = (self.df[selected_col] == selected_value) except: value_condition = (self.df[selected_col].astype(str) == selected_value) else: value_condition = (self.df[selected_col].astype(str) == selected_value) # 应用底色状态筛选(如果有) if selected_bg and not self.cell_bg_status.empty and selected_col in self.cell_bg_status: bg_condition = (self.cell_bg_status[selected_col] == selected_bg) condition = value_condition & bg_condition else: condition = value_condition # 应用组合筛选条件 self.filtered_df = self.df[condition] # 更新预览 self.update_preview(self.filtered_df) # 更新筛选信息 match_count = len(self.filtered_df) total_count = len(self.df) self.filter_info_var.set( f"筛选结果: {match_count} 行 (共 {total_count} 行, {match_count/total_count:.1%})" ) # 更新统计信息 if self.col_var.get(): self.update_stats() except Exception as e: messagebox.showerror("错误", f"筛选数据时出错: {str(e)}") def clear_filter(self): """清除筛选条件""" self.filtered_df = None # 重置筛选控件 self.filter_col_var.set('') self.filter_value_var.set('') self.filter_value_combobox['values'] = [] # 恢复原始数据预览 if self.df is not None: self.update_preview(self.df) # 更新筛选信息 self.filter_info_var.set("筛选已清除") # 更新统计信息 if self.col_var.get(): self.update_stats() # 清除联动统计 for item in self.linkage_tree.get_children(): self.linkage_tree.delete(item) self.linkage_info_var.set("未统计") def update_preview(self, df): """更新数据预览 - 增强健壮性""" try: # 确保有数据 if df is None or df.empty: logging.warning("尝试更新预览时数据为空") return # 创建带底色状态的预览副本 preview_df = df.copy().head(1000) # 添加底色状态列(如果存在) if not self.cell_bg_status.empty: for col in df.columns: if col in self.cell_bg_status: preview_df[f"{col}_底色状态"] = self.cell_bg_status[col].loc[preview_df.index] # 更新或创建表格 if self.pt is not None and self.pt_frame is not None: try: # 更新现有表格 self.pt.model = TableModel(dataframe=preview_df) self.pt.redraw() except Exception as e: logging.error(f"更新预览时出错: {str(e)}") # 尝试重新创建表格 self.safe_destroy_preview() self.create_preview_table(preview_df) else: # 创建新表格 self.create_preview_table(preview_df) except Exception as e: logging.error(f"更新预览失败: {str(e)}") messagebox.showerror("预览错误", f"无法更新预览: {str(e)}") def create_preview_table(self, df): """创建新的预览表格""" try: # 安全销毁旧组件 self.safe_destroy_preview() # 创建新框架 self.pt_frame = ttk.Frame(self.preview_table) self.pt_frame.pack(fill=tk.BOTH, expand=True) # 创建新表格 self.pt = Table( self.pt_frame, showtoolbar=False, showstatusbar=True, width=400, height=300 ) self.pt.model = TableModel(dataframe=df) self.pt.show() except Exception as e: logging.error(f"创建预览表格失败: {str(e)}") messagebox.showerror("预览错误", f"无法创建预览表格: {str(e)}") def update_linkage_stats(self, event=None): """更新联动统计结果(考虑底色状态)""" if not hasattr(self, 'df') or self.df.empty: messagebox.showwarning("警告", "请先分析表格") return target_col_display = self.linkage_col_var.get() if not target_col_display: return # 从显示列名中提取原始列名(移除底色标记) if " (有底色)" in target_col_display: target_col = target_col_display.replace(" (有底色)", "") elif " (无底色)" in target_col_display: target_col = target_col_display.replace(" (无底色)", "") else: target_col = target_col_display # 清空现有结果 for item in self.linkage_tree.get_children(): self.linkage_tree.delete(item) # 确定使用筛选数据还是完整数据 data_source = self.filtered_df if self.filtered_df is not None else self.df # 添加底色状态列(如果存在) if not self.cell_bg_status.empty and target_col in self.cell_bg_status: # 合并底色状态到数据中 temp_df = data_source.copy() temp_df['底色状态'] = self.cell_bg_status[target_col].loc[temp_df.index] # 统计值分布(包含底色状态) value_counts = temp_df.groupby([target_col, '底色状态']).size().reset_index(name='计数') total_count = len(data_source) # 添加到树状视图 for _, row in value_counts.iterrows(): percent = f"{row['计数'] / total_count:.1%}" self.linkage_tree.insert("", "end", values=( row[target_col], row['底色状态'], row['计数'], percent )) # 更新统计信息 self.linkage_info_var.set( f"共 {len(value_counts)} 个组合 (总计 {total_count} 行)" ) else: # 没有底色信息时的处理 value_counts = data_source[target_col].value_counts() total_count = len(data_source) for value, count in value_counts.items(): percent = f"{count / total_count:.1%}" self.linkage_tree.insert("", "end", values=(value, "", count, percent)) self.linkage_info_var.set( f"共 {len(value_counts)} 个唯一值 (总计 {total_count} 行)" ) def update_stats(self, event=None): """更新列统计信息""" # 清空统计表格 for item in self.stats_tree.get_children(): self.stats_tree.delete(item) selected_col_display = self.col_var.get() if not selected_col_display: return # 从显示列名中提取原始列名(移除底色标记) if " (有底色)" in selected_col_display: selected_col = selected_col_display.replace(" (有底色)", "") elif " (无底色)" in selected_col_display: selected_col = selected_col_display.replace(" (无底色)", "") else: selected_col = selected_col_display if not selected_col or selected_col not in self.df.columns: return col_data = self.df[selected_col] # 基本统计信息 self.stats_tree.insert("", "end", values=("列名", selected_col)) self.stats_tree.insert("", "end", values=("数据类型", str(col_data.dtype))) self.stats_tree.insert("", "end", values=("总行数", len(col_data))) non_null_count = col_data.count() null_count = len(col_data) - non_null_count self.stats_tree.insert("", "end", values=("非空值数量", non_null_count)) self.stats_tree.insert("", "end", values=("空值数量", null_count)) self.stats_tree.insert("", "end", values=("空值比例", f"{null_count/len(col_data):.2%}" if len(col_data) > 0 else "0%")) # 值统计 if pd.api.types.is_numeric_dtype(col_data): try: self.stats_tree.insert("", "end", values=("平均值", f"{col_data.mean():.4f}")) self.stats_tree.insert("", "end", values=("中位数", f"{col_data.median():.4f}")) self.stats_tree.insert("", "end", values=("最小值", f"{col_data.min():.4f}")) self.stats_tree.insert("", "end", values=("最大值", f"{col_data.max():.4f}")) self.stats_tree.insert("", "end", values=("标准差", f"{col_data.std():.4f}")) self.stats_tree.insert("", "end", values=("偏度", f"{col_data.skew():.4f}")) self.stats_tree.insert("", "end", values=("峰度", f"{col_data.kurtosis():.4f}")) self.stats_tree.insert("", "end", values=("总和", f"{col_data.sum():.4f}")) except: pass # 唯一值统计 unique_count = col_data.nunique() self.stats_tree.insert("", "end", values=("唯一值数量", unique_count)) unique_ratio = unique_count/non_null_count if non_null_count > 0 else 0 self.stats_tree.insert("", "end", values=("唯一值比例", f"{unique_ratio:.2%}")) # 最常出现的值 if non_null_count > 0: try: top_values = col_data.value_counts().head(3) top_str = ", ".join([f"{val} ({count})" for val, count in top_values.items()]) self.stats_tree.insert("", "end", values=("最常见值", top_str)) except: pass def perform_advanced_analysis(self): """执行高级分析""" if self.df is None or self.df.empty: messagebox.showwarning("警告", "请先分析表格") return self.status_var.set("执行高级分析...") self.progress_var.set(0) # 备份原始底色状态 original_bg_status = self.cell_bg_status.copy() if not self.cell_bg_status.empty else None try: # 缺失值处理 missing_strategy = self.missing_var.get() if missing_strategy != "不处理": numeric_cols = self.df.select_dtypes(include=np.number).columns categorical_cols = self.df.select_dtypes(include='object').columns if missing_strategy == "删除行": self.df = self.df.dropna() else: if missing_strategy == "平均值填充": strategy = 'mean' elif missing_strategy == "中位数填充": strategy = 'median' else: # 众数填充 strategy = 'most_frequent' # 数值列填充 if len(numeric_cols) > 0: num_imputer = SimpleImputer(strategy=strategy) self.df[numeric_cols] = num_imputer.fit_transform(self.df[numeric_cols]) # 分类列填充 if len(categorical_cols) > 0: cat_imputer = SimpleImputer(strategy='most_frequent') self.df[categorical_cols] = cat_imputer.fit_transform(self.df[categorical_cols]) # 异常值检测 if self.outlier_var.get(): numeric_cols = self.df.select_dtypes(include=np.number).columns if len(numeric_cols) > 0: clf = IsolationForest(contamination=0.05, random_state=42) outliers = clf.fit_predict(self.df[numeric_cols]) self.df['is_outlier'] = outliers == -1 # 在界面上显示异常值数量 outlier_count = self.df['is_outlier'].sum() messagebox.showinfo("异常值检测", f"检测到 {outlier_count} 个异常值\n已添加 'is_outlier' 列标记") # 数据标准化 if self.scale_var.get(): numeric_cols = self.df.select_dtypes(include=np.number).columns if len(numeric_cols) > 0: for col in numeric_cols: min_val = self.df[col].min() max_val = self.df[col].max() if max_val > min_val: self.df[col] = (self.df[col] - min_val) / (max_val - min_val) # 更新分析结果 self.analyze_headers() self.analyze_data_quality() # 更新列选择框 self.col_combobox['values'] = list(self.df.columns) if self.df.columns.size > 0: self.col_combobox.current(0) self.update_stats() # 高级分析后恢复底色状态 if original_bg_status is not None: # 只保留现有行的底色状态 self.cell_bg_status = original_bg_status.loc[self.df.index] # 处理新增列(如果没有底色状态,设为"无底色") for col in self.df.columns: if col not in self.cell_bg_status: self.cell_bg_status[col] = "无底色" self.status_var.set("高级分析完成") self.progress_var.set(100) except Exception as e: if original_bg_status is not None: self.cell_bg_status = original_bg_status messagebox.showerror("错误", f"高级分析时出错: {str(e)}") self.status_var.set(f"错误: {str(e)}") if __name__ == "__main__": # 设置未捕获异常的全局处理 def handle_exception(exc_type, exc_value, exc_traceback): """处理未捕获的异常""" error_msg = f"未捕获异常:\n{exc_type.__name__}: {exc_value}" logging.critical(error_msg) logging.critical(traceback.format_exc()) # 在GUI中显示错误 root = tk.Tk() root.withdraw() # 隐藏主窗口 messagebox.showerror("未捕获异常", error_msg) root.destroy() sys.exit(1) sys.excepthook = handle_exception try: root = tk.Tk() app = TableAnalyzer(root) root.mainloop() except Exception as e: error_msg = f"初始化应用失败: {str(e)}" logging.critical(error_msg) logging.critical(traceback.format_exc()) messagebox.showerror("致命错误", error_msg) sys.exit(1) 后面要怎么改
最新发布
08-06
import locale locale.setlocale(locale.LC_ALL, 'chinese') import tkinter as tk from tkinter import filedialog, messagebox, ttk, scrolledtext import csv from datetime import datetime import logging import os from collections import defaultdict class CSVProcessorApp: def __init__(self, root): self.root = root self.root.title("CSV_ProcessPro") self.root.geometry("800x600") # 调整窗口大小 self.root.resizable(False, False) # 禁止调整窗口大小 # 初始化变量 self.file_path = tk.StringVar() self.csv_data = [] self.headers = [] self.setup_variables() # 初始化日志 self.setup_logging() # 创建UI self.create_widgets() # 设置全局样式 self.setup_styles() def setup_styles(self): """设置全局样式""" self.style = ttk.Style() self.style.configure("TFrame", background="#f0f0f0") self.style.configure("TLabel", background="#f0f0f0", font=('Arial', 9)) self.style.configure("TButton", font=('Arial', 9, 'bold')) self.style.configure("Accent.TButton", foreground="black", font=('Arial', 9, 'bold'), borderwidth=2, relief="raised") self.style.map("Accent.TButton", background=[("active", "#4a90e2"), ("!active", "#d4e6ff")], bordercolor=[("active", "#4a90e2"), ("!active", "#ffcc00")]) self.style.configure("Remove.TButton", foreground="black", font=('Arial', 8), background="#ffcccc", borderwidth=1, relief="solid") self.style.map("Remove.TButton", background=[("active", "#ff9999"), ("!active", "#ffcccc")]) def setup_variables(self): """初始化所有动态变量""" # 排序相关 self.sort_header = tk.StringVar() self.sort_order = tk.StringVar(value="升序") # 去重相关 self.dedupe_header = tk.StringVar() # 删除行相关 self.delete_keyword = tk.StringVar() self.delete_column = tk.StringVar() self.delete_case_sensitive = tk.BooleanVar() # 合并文件相关 self.merge_file_paths = [] self.merge_column = tk.StringVar() # 状态变量 self.enable_sort = tk.BooleanVar() self.enable_dedupe = tk.BooleanVar() self.enable_custom_letter_sort = tk.BooleanVar() self.letter_range_start = tk.StringVar(value="A") self.letter_range_end = tk.StringVar(value="Z") def setup_logging(self): """配置日志记录""" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('csv_processor.log', encoding='utf-8'), logging.StreamHandler() ] ) self.logger = logging.getLogger(__name__) self.logger.info("===== 程序启动 =====") def create_widgets(self): """创建所有界面组件""" # 主容器 main_container = ttk.Frame(self.root, padding=5) main_container.pack(fill=tk.BOTH, expand=True) # 使用notebook分页组织功能 self.notebook = ttk.Notebook(main_container) self.notebook.pack(fill=tk.BOTH, expand=True) # 创建各个标签页 self.create_file_tab() self.create_process_tab() self.create_delete_tab() self.create_merge_tab() self.create_log_tab() def create_file_tab(self): """创建文件操作标签页""" tab = ttk.Frame(self.notebook) self.notebook.add(tab, text="文件操作") # 文件选择部分 frame = ttk.LabelFrame(tab, text="CSV文件选择", padding=10) frame.pack(fill=tk.X, padx=5, pady=5) ttk.Label(frame, text="文件路径:").grid(row=0, column=0, sticky=tk.W) ttk.Entry(frame, textvariable=self.file_path, width=40).grid(row=0, column=1, sticky=tk.EW) ttk.Button(frame, text="浏览", command=self.select_file).grid(row=0, column=2, padx=5) # 文件信息显示 self.file_info = scrolledtext.ScrolledText(tab, height=8, width=80) self.file_info.pack(fill=tk.X, padx=5, pady=5) def create_process_tab(self): """创建数据处理标签页""" tab = ttk.Frame(self.notebook) self.notebook.add(tab, text="排序/去重") # 排序选项部分 frame = ttk.LabelFrame(tab, text="排序选项", padding=10) frame.pack(fill=tk.X, padx=5, pady=5) ttk.Checkbutton(frame, text="启用排序", variable=self.enable_sort, command=self.toggle_sort).grid(row=0, column=0, sticky=tk.W) ttk.Label(frame, text="排序表头:").grid(row=1, column=0, sticky=tk.W) self.sort_header_combobox = ttk.Combobox(frame, textvariable=self.sort_header, state="readonly") self.sort_header_combobox.grid(row=1, column=1, sticky=tk.EW) ttk.Label(frame, text="排序方式:").grid(row=2, column=0, sticky=tk.W) self.sort_order_combobox = ttk.Combobox(frame, textvariable=self.sort_order, values=["升序", "降序", "自定义字母排序"]) self.sort_order_combobox.grid(row=2, column=1, sticky=tk.W) # 自定义字母排序范围 ttk.Checkbutton(frame, text="启用字母范围过滤", variable=self.enable_custom_letter_sort, command=self.toggle_letter_sort).grid(row=3, column=0, sticky=tk.W) ttk.Label(frame, text="字母范围:").grid(row=4, column=0, sticky=tk.W) ttk.Entry(frame, textvariable=self.letter_range_start, width=5).grid(row=4, column=1, sticky=tk.W) ttk.Label(frame, text="到").grid(row=4, column=2) ttk.Entry(frame, textvariable=self.letter_range_end, width=5).grid(row=4, column=3, sticky=tk.W) # 去重选项部分 frame = ttk.LabelFrame(tab, text="去重选项", padding=10) frame.pack(fill=tk.X, padx=5, pady=5) ttk.Checkbutton(frame, text="启用去重", variable=self.enable_dedupe, command=self.toggle_dedupe).grid(row=0, column=0, sticky=tk.W) ttk.Label(frame, text="去重表头:").grid(row=1, column=0, sticky=tk.W) self.dedupe_header_combobox = ttk.Combobox(frame, textvariable=self.dedupe_header, state="readonly") self.dedupe_header_combobox.grid(row=1, column=1, sticky=tk.EW) # 处理按钮 btn_frame = ttk.Frame(tab) btn_frame.pack(pady=10) ttk.Button(btn_frame, text="处理并保存到桌面", command=self.process_csv, style="Accent.TButton").pack() def create_delete_tab(self): """创建删除行标签页""" tab = ttk.Frame(self.notebook) self.notebook.add(tab, text="删除行") frame = ttk.LabelFrame(tab, text="删除包含指定字符的行", padding=10) frame.pack(fill=tk.X, padx=5, pady=5) # 删除条件设置 ttk.Label(frame, text="搜索列:").grid(row=0, column=0, sticky=tk.W) self.delete_column_combobox = ttk.Combobox(frame, textvariable=self.delete_column, state="readonly") self.delete_column_combobox.grid(row=0, column=1, sticky=tk.EW) ttk.Label(frame, text="关键字:").grid(row=1, column=0, sticky=tk.W) ttk.Entry(frame, textvariable=self.delete_keyword).grid(row=1, column=1, sticky=tk.EW) ttk.Checkbutton(frame, text="区分大小写", variable=self.delete_case_sensitive).grid(row=2, column=0, sticky=tk.W) # 执行按钮 btn_frame = ttk.Frame(tab) btn_frame.pack(pady=10) ttk.Button(btn_frame, text="执行删除并保存到桌面", command=self.delete_rows_with_keyword, style="Accent.TButton").pack() def create_merge_tab(self): """创建文件合并标签页""" tab = ttk.Frame(self.notebook) self.notebook.add(tab, text="文件合并") # 合并文件部分 frame = ttk.LabelFrame(tab, text="合并CSV文件", padding=10) frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) # 文件列表容器 list_frame = ttk.Frame(frame) list_frame.pack(fill=tk.BOTH, expand=True) ttk.Label(list_frame, text="已选择文件:").grid(row=0, column=0, sticky=tk.W) # 文件列表和滚动条 self.merge_file_canvas = tk.Canvas(list_frame, height=150) self.merge_file_canvas.grid(row=1, column=0, sticky=tk.EW) scrollbar = ttk.Scrollbar(list_frame, orient="vertical", command=self.merge_file_canvas.yview) scrollbar.grid(row=1, column=1, sticky=tk.NS) self.merge_file_canvas.configure(yscrollcommand=scrollbar.set) self.merge_file_frame = ttk.Frame(self.merge_file_canvas) self.merge_file_canvas.create_window((0, 0), window=self.merge_file_frame, anchor="nw") # 按钮区域 btn_frame = ttk.Frame(frame) btn_frame.pack(fill=tk.X, pady=5) ttk.Button(btn_frame, text="添加文件", command=self.add_merge_file).pack(side=tk.LEFT, padx=5) ttk.Button(btn_frame, text="清空列表", command=self.clear_merge_list).pack(side=tk.LEFT, padx=5) # 合并选项 opt_frame = ttk.Frame(frame) opt_frame.pack(fill=tk.X, pady=5) ttk.Label(opt_frame, text="合并依据列(可选):").grid(row=0, column=0, sticky=tk.W) self.merge_column_combo = ttk.Combobox(opt_frame, textvariable=self.merge_column, state="readonly") self.merge_column_combo.grid(row=0, column=1, sticky=tk.EW) # 合并按钮 btn_frame = ttk.Frame(tab) btn_frame.pack(pady=10) ttk.Button(btn_frame, text="执行合并并保存到桌面", command=self.merge_csv_files, style="Accent.TButton").pack() def create_log_tab(self): """创建日志标签页""" tab = ttk.Frame(self.notebook) self.notebook.add(tab, text="运行日志") self.log_text = scrolledtext.ScrolledText(tab, height=15, width=80) self.log_text.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) def log_message(self, message, level="info"): """记录日志并显示在GUI中""" log_methods = { "info": self.logger.info, "error": self.logger.error, "warning": self.logger.warning } # 记录到日志文件 log_methods.get(level, self.logger.info)(message) # 显示在GUI日志标签页 timestamp = datetime.now().strftime("%H:%M:%S") tagged_msg = f"[{timestamp}] {message}" self.log_text.insert(tk.END, tagged_msg + "\n") self.log_text.see(tk.END) # 同时在文件信息标签页显示重要信息 if level in ["error", "warning"]: self.file_info.config(state=tk.NORMAL) self.file_info.insert(tk.END, tagged_msg + "\n") self.file_info.config(state=tk.DISABLED) self.file_info.see(tk.END) def select_file(self): """选择CSV文件""" file_path = filedialog.askopenfilename( title="选择CSV文件", filetypes=[("CSV文件", "*.csv"), ("文本文件", "*.txt"), ("所有文件", "*.*")] ) if file_path: self.file_path.set(file_path) self.log_message(f"已选择文件: {file_path}") self.load_csv(file_path) def load_csv(self, file_path): """加载CSV文件内容""" try: with open(file_path, 'r', encoding='utf-8-sig') as file: reader = csv.reader(file) self.csv_data = list(reader) if len(self.csv_data) > 0: self.headers = self.csv_data[0] self.update_ui_with_headers() # 显示文件信息 self.show_file_info(file_path) self.log_message(f"文件加载成功,共 {len(self.csv_data)} 行") else: raise ValueError("文件为空或格式不正确") except Exception as e: error_msg = f"读取CSV文件失败: {str(e)}" self.log_message(error_msg, "error") messagebox.showerror("错误", error_msg) def show_file_info(self, file_path): """显示文件信息""" self.file_info.config(state=tk.NORMAL) self.file_info.delete(1.0, tk.END) info = [ f"文件路径: {file_path}", f"总行数: {len(self.csv_data)}", f"列数: {len(self.headers)}", f"表头: {', '.join(self.headers)}", "="*40, "前5行数据预览:" ] self.file_info.insert(tk.END, "\n".join(info) + "\n") # 显示前5行数据 for i, row in enumerate(self.csv_data[:5], 1): self.file_info.insert(tk.END, f"{i}. {', '.join(row)}\n") self.file_info.config(state=tk.DISABLED) def update_ui_with_headers(self): """根据加载的CSV更新UI元素""" # 更新所有下拉框 for combo in [ self.sort_header_combobox, self.dedupe_header_combobox, self.delete_column_combobox, self.merge_column_combo ]: combo['values'] = self.headers # 设置默认值 if self.headers: self.sort_header.set(self.headers[0]) self.dedupe_header.set(self.headers[0]) self.delete_column.set(self.headers[0]) self.merge_column.set("") def toggle_sort(self): """切换排序功能的启用状态""" state = "normal" if self.enable_sort.get() else "disabled" self.sort_header_combobox['state'] = state self.sort_order_combobox['state'] = state self.toggle_letter_sort() self.log_message(f"排序功能 {'启用' if self.enable_sort.get() else '禁用'}") def toggle_dedupe(self): """切换去重功能的启用状态""" state = "normal" if self.enable_dedupe.get() else "disabled" self.dedupe_header_combobox['state'] = state self.log_message(f"去重功能 {'启用' if self.enable_dedupe.get() else '禁用'}") def toggle_letter_sort(self): """控制字母范围输入框的启用状态""" if not self.enable_sort.get(): return state = "normal" if self.enable_custom_letter_sort.get() else "disabled" self.letter_range_start_entry['state'] = state self.letter_range_end_entry['state'] = state self.log_message(f"字母范围过滤 {'启用' if self.enable_custom_letter_sort.get() else '禁用'}") def add_merge_file(self): """添加要合并的文件""" file_paths = filedialog.askopenfilenames( title="选择要合并的CSV文件", filetypes=[("CSV文件", "*.csv"), ("文本文件", "*.txt"), ("所有文件", "*.*")] ) if file_paths: for path in file_paths: if path not in self.merge_file_paths: self.merge_file_paths.append(path) self.update_merge_file_list() def clear_merge_list(self): """清空合并文件列表""" if self.merge_file_paths: self.merge_file_paths = [] self.update_merge_file_list() self.log_message("已清空合并文件列表") def update_merge_file_list(self): """更新合并文件列表显示""" # 清除现有内容 for widget in self.merge_file_frame.winfo_children(): widget.destroy() if not self.merge_file_paths: ttk.Label(self.merge_file_frame, text="尚未选择任何文件").pack() self.merge_file_canvas.configure(scrollregion=self.merge_file_canvas.bbox("all")) return # 添加文件列表 for i, path in enumerate(self.merge_file_paths): row_frame = ttk.Frame(self.merge_file_frame) row_frame.pack(fill=tk.X, pady=2) ttk.Label(row_frame, text=f"{i+1}. {os.path.basename(path)}", width=40, anchor="w").pack(side=tk.LEFT) ttk.Button(row_frame, text="移除", command=lambda p=path: self.remove_merge_file(p), style="Remove.TButton").pack(side=tk.LEFT, padx=2) # 更新滚动区域 self.merge_file_frame.update_idletasks() self.merge_file_canvas.configure(scrollregion=self.merge_file_canvas.bbox("all")) def remove_merge_file(self, file_path): """移除指定的合并文件""" if file_path in self.merge_file_paths: self.merge_file_paths.remove(file_path) self.update_merge_file_list() self.log_message(f"已移除文件: {file_path}") def delete_rows_with_keyword(self): """删除包含关键字的行并保存到桌面""" if not self.file_path.get(): messagebox.showwarning("警告", "请先选择CSV文件") return column = self.delete_column.get() keyword = self.delete_keyword.get() if not column: messagebox.showwarning("警告", "请选择要搜索的列") return if not keyword: messagebox.showwarning("警告", "请输入要搜索的关键字") return try: col_index = self.headers.index(column) case_sensitive = self.delete_case_sensitive.get() if not case_sensitive: keyword = keyword.lower() new_data = [self.headers] deleted_count = 0 for row in self.csv_data[1:]: if len(row) > col_index: value = row[col_index] compare_value = value if case_sensitive else value.lower() if keyword not in compare_value: new_data.append(row) else: deleted_count += 1 if deleted_count == 0: messagebox.showinfo("信息", "没有找到包含关键字的行") return # 生成保存路径 operation = f"deleted_{keyword}" save_path = self.generate_filename(self.file_path.get(), operation) # 保存文件 self.save_csv_file(new_data, save_path) # 更新当前数据 self.csv_data = new_data self.log_message(f"已删除 {deleted_count} 行包含关键字 '{keyword}' 的数据") messagebox.showinfo("成功", f"已删除 {deleted_count} 行,结果已保存到桌面:\n{os.path.basename(save_path)}") except Exception as e: error_msg = f"删除行时出错: {str(e)}" self.log_message(error_msg, "error") messagebox.showerror("错误", error_msg) def get_desktop_path(self): """获取桌面路径""" try: desktop = os.path.join(os.path.join(os.environ['USERPROFILE']), 'Desktop') if os.path.exists(desktop): return desktop except KeyError: pass # 如果上面的方法失败,尝试其他方法 desktop = os.path.join(os.path.expanduser('~'), 'Desktop') if os.path.exists(desktop): return desktop # 如果还是失败,返回当前目录 return os.getcwd() def generate_filename(self, original_name, operation): """生成新的文件名""" if not original_name: original_name = "processed" base = os.path.basename(original_name) name, ext = os.path.splitext(base) # 清理操作名称中的特殊字符 clean_op = "".join(c if c.isalnum() else "_" for c in operation) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") new_name = f"{name}_{clean_op}_{timestamp}{ext}" return os.path.join(self.get_desktop_path(), new_name) def save_csv_file(self, data, save_path): """保存CSV文件到指定路径""" try: with open(save_path, 'w', encoding='utf-8-sig', newline='') as file: writer = csv.writer(file) writer.writerows(data) # 更新文件信息显示 self.show_file_info(save_path) self.log_message(f"文件已保存到: {save_path}") return True except Exception as e: error_msg = f"保存文件时出错: {str(e)}" self.log_message(error_msg, "error") messagebox.showerror("错误", error_msg) return False def process_csv(self): """处理CSV文件(排序、去重等)并保存到桌面""" if not self.file_path.get(): messagebox.showwarning("警告", "请先选择CSV文件") return if not self.csv_data: messagebox.showwarning("警告", "CSV文件没有数据") return self.log_message("开始处理CSV文件...") new_data = [self.headers.copy()] data_rows = self.csv_data[1:] # 去重处理 if self.enable_dedupe.get(): try: dedupe_index = self.headers.index(self.dedupe_header.get()) seen = set() unique_rows = [] for row in data_rows: if len(row) > dedupe_index: key = row[dedupe_index] if key not in seen: seen.add(key) unique_rows.append(row) data_rows = unique_rows self.log_message(f"去重完成,根据表头 '{self.dedupe_header.get()}' 删除重复项") except Exception as e: self.log_message(f"去重时出错: {str(e)}", "error") messagebox.showerror("错误", f"去重时出错: {str(e)}") return # 排序处理 if self.enable_sort.get(): try: sort_index = self.headers.index(self.sort_header.get()) order = self.sort_order.get() reverse = (order == "降序") # 字母范围过滤 if self.enable_custom_letter_sort.get(): try: letter_start = self.letter_range_start.get().upper() letter_end = self.letter_range_end.get().upper() if not (len(letter_start) == 1 and len(letter_end) == 1 and letter_start.isalpha() and letter_end.isalpha()): raise ValueError("字母范围必须是单个字母(如A-Z)") filtered_rows = [] for row in data_rows: if len(row) > sort_index: value = str(row[sort_index]).strip().upper() if value and letter_start <= value[0] <= letter_end: filtered_rows.append(row) data_rows = filtered_rows self.log_message(f"字母范围过滤完成:{letter_start} 到 {letter_end}") except Exception as e: self.log_message(f"字母范围过滤失败: {str(e)}", "error") messagebox.showerror("错误", f"字母范围过滤失败: {str(e)}") return # 排序逻辑 def sort_key(row): if len(row) > sort_index: value = row[sort_index] # 尝试解析为日期 for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%m/%d/%Y", "%Y.%m.%d"): try: return datetime.strptime(value, fmt) except ValueError: continue # 尝试解析为数字 try: return float(value) except ValueError: pass return value.lower() # 默认按字符串排序 return "" try: if order == "自定义字母排序": data_rows.sort(key=lambda x: str(sort_key(x)).lower() if len(x) > sort_index else "", reverse=False) else: data_rows.sort(key=sort_key, reverse=reverse) self.log_message(f"排序完成,表头 '{self.sort_header.get()}',顺序: {order}") except Exception as e: self.log_message(f"排序时出错: {str(e)}", "error") messagebox.showerror("错误", f"排序时出错: {str(e)}") return except Exception as e: self.log_message(f"排序处理时出错: {str(e)}", "error") messagebox.showerror("错误", f"排序处理时出错: {str(e)}") return # 组合最终数据 new_data.extend(data_rows) # 生成操作描述 operations = [] if self.enable_sort.get(): operations.append(f"sorted_{self.sort_header.get()}_{self.sort_order.get()}") if self.enable_dedupe.get(): operations.append(f"deduped_{self.dedupe_header.get()}") operation = "_".join(operations) if operations else "processed" # 生成保存路径 save_path = self.generate_filename(self.file_path.get(), operation) # 保存文件 if self.save_csv_file(new_data, save_path): # 更新当前数据 self.csv_data = new_data messagebox.showinfo("成功", f"文件处理完成,已保存到桌面:\n{os.path.basename(save_path)}") def merge_csv_files(self): """合并多个CSV文件并保存到桌面""" if not self.merge_file_paths: messagebox.showwarning("警告", "请先添加要合并的文件") return try: # 检查所有文件是否存在 missing_files = [f for f in self.merge_file_paths if not os.path.exists(f)] if missing_files: raise FileNotFoundError(f"以下文件不存在: {', '.join(missing_files)}") merge_column = self.merge_column.get() common_headers = None all_data = [] # 收集所有文件的表头和数据 header_sets = [] for file_path in self.merge_file_paths: with open(file_path, 'r', encoding='utf-8-sig') as file: reader = csv.reader(file) data = list(reader) if data: header_sets.append(set(data[0])) all_data.append(data) # 找出共同表头 if header_sets: common_headers = set(header_sets[0]) for headers in header_sets[1:]: common_headers.intersection_update(headers) common_headers = sorted(common_headers) if not common_headers: raise ValueError("选中的文件没有共同的列,无法合并") # 如果没有指定合并依据列,使用所有共同列 merge_indices = None if merge_column: if merge_column not in common_headers: raise ValueError(f"合并依据列 '{merge_column}' 不在共同列中") merge_indices = [i for i, h in enumerate(common_headers) if h == merge_column] # 合并数据 merged_data = [common_headers.copy()] key_counter = defaultdict(int) for data in all_data: if not data: continue headers = data[0] header_map = {h: i for i, h in enumerate(headers)} for row in data[1:]: # 如果指定了合并列,检查是否已存在相同键 if merge_indices: merge_values = [row[header_map[h]] for h in common_headers if h == merge_column] if merge_values: key = tuple(merge_values) key_counter[key] += 1 if key_counter[key] > 1: continue # 跳过重复键的行 # 构建新行,只保留共同列 new_row = [] for col in common_headers: if col in header_map and len(row) > header_map[col]: new_row.append(row[header_map[col]]) else: new_row.append("") merged_data.append(new_row) # 生成操作描述 operation = "merged" if merge_column: operation += f"_by_{merge_column}" # 生成保存路径 first_file = os.path.basename(self.merge_file_paths[0]) save_path = self.generate_filename(first_file, operation) # 保存文件 if self.save_csv_file(merged_data, save_path): messagebox.showinfo("成功", f"文件合并完成,已保存到桌面:\n{os.path.basename(save_path)}") except Exception as e: error_msg = f"合并文件时出错: {str(e)}" self.log_message(error_msg, "error") messagebox.showerror("错误", error_msg) if __name__ == "__main__": root = tk.Tk() app = CSVProcessorApp(root) root.mainloop() 优化这个代码,并新增以下功能 1.排序和去重功能中增加自定义指定某行为表头的功能 2.优化功能处理顺序,新增一个可以依次执行删除行,再排序,在去重的功能,比如处理一个csv数据,设置数据第二行为表头,然后删除某个关键字的所在行,在对这个数据进行指定列排序,比如升序,即整份数据都按照升序进行排列,最后最后在执行删除指定表头的重复项的功能,最后在桌面上输出一份处理后的新的csv数据。 3.并保留现在这个代码也可以单一执行某种功能功能
07-10
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值