使用vectorizer.fit_transform时出现AttributeError: 'file' object has no attribute 'lower'

本文详细介绍了在使用Python进行机器学习项目开发时,遇到的文件操作错误及其解决方法,并讨论了如何正确使用CountVectorizer进行文本特征提取。特别指出了解决空词汇表错误的方法,以及在Ipythonnotebook环境下处理此类问题的注意事项。

问题

最近在读书《Building Machine Learning Systems with Python》1第一版,发现其中的一个代码错误,

AttributeError: ‘file’ object has no attribute ‘lower’

产生该错误的代码为:

import os
os.listdir('./data/toy/')
posts = [open(os.path.join('./data/toy/',f)) for f in os.listdir('./data/toy/')]

from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(min_df=1)

X_train= vectorizer.fit_transform(posts)
num_samples, num_features = X_train.shape
print("#samples, %d, #features, %d" %(num_samples, num_features))

解决方法

感谢网站提供的解决方法,即将vectorizer = CountVectorizer(min_df=1)改为

vectorizer = CountVectorizer(min_df=1,input="file")

即可解决上面的错误。

由于我使用的是Ipython notebook运行环境,在同一个cell里面将代码改变了以后,重新运行,则出现了新的错误:

ValueError: empty vocabulary; perhaps the documents only contain stop words

尝试了半天也没有找到合适的解决办法。最后,我找到了解决办法:删除所有含有之前代码的cell,新建一个cell,在里面写入更新的代码,即可解决 “empty vocabularty”错误。这个错误与代码本身无关,而与使用的Ipython notebook环境有关。希望大家在以后使用Ipython notebook时,注意这类的问题。

运行成功的界面为:
这里写图片描述


  1. Building Machine Learning Systems with Python. 2013. Willi Ricchert, Luis Pedro Coelho. Packt publishing.
import os import pandas as pd import numpy as np import tkinter as tk from tkinter import ttk, filedialog, messagebox from openpyxl import load_workbook from openpyxl.styles import PatternFill, Font, colors import chardet import threading import time from pandastable import Table, TableModel from sklearn.impute import SimpleImputer from sklearn.ensemble import IsolationForest class TableAnalyzer: def __init__(self, root): self.root = root self.root.title("智能表格分析工具") self.root.geometry("1200x800") self.root.configure(bg='#f5f7fa') # 设置窗口图标 try: self.root.iconbitmap('table_icon.ico') except: pass # 创建主框架 self.main_frame = ttk.Frame(root, padding=15) self.main_frame.pack(fill=tk.BOTH, expand=True) # 创建标题 title_frame = ttk.Frame(self.main_frame) title_frame.pack(fill=tk.X, pady=(0, 15)) title_label = ttk.Label( title_frame, text="智能表格分析工具", font=("Arial", 18, "bold"), foreground="#2c3e50" ) title_label.pack(side=tk.LEFT) # 状态指示器 self.status_var = tk.StringVar(value="就绪") status_label = ttk.Label( title_frame, textvariable=self.status_var, font=("Arial", 10), foreground="#7f8c8d" ) status_label.pack(side=tk.RIGHT, padx=10) # 创建左右分栏 paned_window = ttk.PanedWindow(self.main_frame, orient=tk.HORIZONTAL) paned_window.pack(fill=tk.BOTH, expand=True) # 左侧面板 (控制和预览) left_panel = ttk.Frame(paned_window, padding=10) paned_window.add(left_panel, weight=1) # 右侧面板 (分析和筛选) right_panel = ttk.Frame(paned_window, padding=10) paned_window.add(right_panel, weight=1) # 左侧面板内容 self.create_file_selection_section(left_panel) self.create_header_detection_section(left_panel) self.create_preview_section(left_panel) # 右侧面板内容 self.create_filter_section(right_panel) self.create_linkage_section(right_panel) self.create_analysis_section(right_panel) self.create_advanced_analysis_section(right_panel) # 初始化变量 self.file_path = "" self.df = None self.workbook = None self.sheet = None self.header_row = 0 self.raw_data = None self.analysis_thread = None self.stop_analysis = False self.progress = 0 #self.column_has_color = {} # 存储列是否有底色 # 设置样式 self.configure_styles() self.cell_bg_status = None # 存储单元格底色状态的DataFrame def configure_styles(self): style = ttk.Style() style.theme_use('clam') # 主框架样式 style.configure('TFrame', background='#f5f7fa') # 标签框架样式 style.configure('TLabelframe', background='#ffffff', borderwidth=2) style.configure('TLabelframe.Label', background='#ffffff', foreground='#2c3e50', font=('Arial', 10, 'bold')) # 按钮样式 style.configure('TButton', background='#3498db', foreground='white', font=('Arial', 10), borderwidth=1, padding=5) style.map('TButton', background=[('active', '#2980b9')]) # 强调按钮样式 style.configure('Accent.TButton', background='#2ecc71', foreground='white') style.map('Accent.TButton', background=[('active', '#27ae60')]) # 危险按钮样式 style.configure('Danger.TButton', background='#e74c3c', foreground='white') style.map('Danger.TButton', background=[('active', '#c0392b')]) # 树状视图样式 style.configure('Treeview', background='#ffffff', fieldbackground='#ffffff', foreground='#2c3e50', rowheight=25, font=('Arial', 9)) style.configure('Treeview.Heading', background='#3498db', foreground='white', font=('Arial', 10, 'bold')) style.map('Treeview', background=[('selected', '#3498db')]) # 进度条样式 style.configure('Custom.Horizontal.TProgressbar', background='#3498db', thickness=20) # 标签样式 style.configure('TLabel', background='#f5f7fa', font=('Arial', 10)) style.configure('Title.TLabel', font=('Arial', 12, 'bold'), foreground='#2c3e50') # 组合框样式 style.configure('TCombobox', fieldbackground='white', background='white') def create_file_selection_section(self, parent): # 文件选择框架 file_frame = ttk.LabelFrame(parent, text="文件选择") file_frame.pack(fill=tk.X, padx=5, pady=5) # 文件路径输入框 path_frame = ttk.Frame(file_frame) path_frame.pack(fill=tk.X, padx=10, pady=10) self.path_var = tk.StringVar() path_entry = ttk.Entry(path_frame, textvariable=self.path_var, state='readonly') path_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 10)) # 浏览按钮 browse_btn = ttk.Button( path_frame, text="浏览文件", command=self.browse_file, width=10 ) browse_btn.pack(side=tk.RIGHT) # 进度条 self.progress_var = tk.DoubleVar() progress_bar = ttk.Progressbar( file_frame, variable=self.progress_var, maximum=100, style='Custom.Horizontal.TProgressbar' ) progress_bar.pack(fill=tk.X, padx=10, pady=(0, 10)) # 文件类型提示 type_label = ttk.Label( file_frame, text="支持文件类型: Excel (.xlsx, .xls), CSV (.csv), Parquet (.parquet)", foreground="#7f8c8d", font=("Arial", 9) ) type_label.pack(side=tk.LEFT, padx=10, pady=(0, 5)) def create_header_detection_section(self, parent): # 表头检测框架 header_frame = ttk.LabelFrame(parent, text="表头检测与设置") header_frame.pack(fill=tk.X, padx=5, pady=5) # 表头行设置 row_frame = ttk.Frame(header_frame) row_frame.pack(fill=tk.X, padx=10, pady=10) ttk.Label(row_frame, text="表头所在行:").pack(side=tk.LEFT, padx=(0, 5)) self.header_row_var = tk.IntVar(value=0) self.header_row_spin = ttk.Spinbox( row_frame, from_=0, to=100, width=5, textvariable=self.header_row_var ) self.header_row_spin.pack(side=tk.LEFT, padx=(0, 10)) # 自动检测按钮 detect_btn = ttk.Button( row_frame, text="自动检测表头", command=self.auto_detect_header ) detect_btn.pack(side=tk.LEFT, padx=(0, 10)) # 分析按钮 analyze_btn = ttk.Button( row_frame, text="分析表格", command=self.start_analysis_thread, style="Accent.TButton", width=10 ) analyze_btn.pack(side=tk.RIGHT) # 停止按钮 stop_btn = ttk.Button( row_frame, text="停止分析", command=self.stop_analysis_process, style="Danger.TButton", width=10 ) stop_btn.pack(side=tk.RIGHT, padx=(0, 10)) def create_preview_section(self, parent): # 数据预览框架 preview_frame = ttk.LabelFrame(parent, text="数据预览") preview_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) # 创建表格预览 self.preview_table = ttk.Frame(preview_frame) self.preview_table.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 初始占位符 self.preview_placeholder = ttk.Label( preview_frame, text="选择文件后显示数据预览", foreground="#7f8c8d", font=("Arial", 11) ) self.preview_placeholder.pack(expand=True) def create_filter_section(self, parent): """创建数据筛选区域""" # 筛选框架 filter_frame = ttk.LabelFrame(parent, text="数据筛选") filter_frame.pack(fill=tk.X, padx=5, pady=5) # 筛选控制区域 control_frame = ttk.Frame(filter_frame) control_frame.pack(fill=tk.X, padx=10, pady=10) # 列选择 ttk.Label(control_frame, text="选择列:").pack(side=tk.LEFT, padx=(0, 5)) self.filter_col_var = tk.StringVar() self.filter_col_combobox = ttk.Combobox( control_frame, textvariable=self.filter_col_var, state="readonly", width=20 ) self.filter_col_combobox.pack(side=tk.LEFT, padx=(0, 10)) self.filter_col_combobox.bind("<<ComboboxSelected>>", self.update_filter_values) # 值选择 ttk.Label(control_frame, text="选择值:").pack(side=tk.LEFT, padx=(0, 5)) self.filter_value_var = tk.StringVar() self.filter_value_combobox = ttk.Combobox( control_frame, textvariable=self.filter_value_var, state="readonly", width=20 ) self.filter_value_combobox.pack(side=tk.LEFT, padx=(0, 10)) # 筛选按钮 filter_btn = ttk.Button( control_frame, text="应用筛选", command=self.apply_filter, style="Accent.TButton" ) filter_btn.pack(side=tk.LEFT, padx=(0, 10)) # 清除筛选按钮 clear_btn = ttk.Button( control_frame, text="清除筛选", command=self.clear_filter, style="Danger.TButton" ) clear_btn.pack(side=tk.LEFT) # 筛选结果信息 self.filter_info_var = tk.StringVar(value="未应用筛选") filter_info_label = ttk.Label( filter_frame, textvariable=self.filter_info_var, foreground="#3498db", font=("Arial", 10, "bold") ) filter_info_label.pack(side=tk.RIGHT, padx=10, pady=(0, 5)) def create_linkage_section(self, parent): """创建联动统计区域""" # 联动统计框架 linkage_frame = ttk.LabelFrame(parent, text="联动统计") linkage_frame.pack(fill=tk.X, padx=5, pady=5) # 控制区域 control_frame = ttk.Frame(linkage_frame) control_frame.pack(fill=tk.X, padx=10, pady=10) # 目标列选择 ttk.Label(control_frame, text="统计列:").pack(side=tk.LEFT, padx=(0, 5)) self.linkage_col_var = tk.StringVar() self.linkage_col_combobox = ttk.Combobox( control_frame, textvariable=self.linkage_col_var, state="readonly", width=20 ) self.linkage_col_combobox.pack(side=tk.LEFT, padx=(0, 10)) # 统计按钮 stats_btn = ttk.Button( control_frame, text="统计分布", command=self.update_linkage_stats, style="Accent.TButton" ) stats_btn.pack(side=tk.LEFT, padx=(0, 10)) # 统计结果框架 stats_frame = ttk.Frame(linkage_frame) stats_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 创建树状视图显示统计结果 columns = ("value", "count", "percent") self.linkage_tree = ttk.Treeview( stats_frame, columns=columns, show="headings", height=8 ) # 设置列 col_widths = [200, 100, 100] col_headings = ["值", "出现次数", "占比"] for col, width, heading in zip(columns, col_widths, col_headings): self.linkage_tree.column(col, width=width, anchor=tk.CENTER) self.linkage_tree.heading(col, text=heading) # 添加滚动条 scrollbar = ttk.Scrollbar( stats_frame, orient="vertical", command=self.linkage_tree.yview ) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.linkage_tree.configure(yscrollcommand=scrollbar.set) self.linkage_tree.pack(fill=tk.BOTH, expand=True) # 统计信息 self.linkage_info_var = tk.StringVar(value="未统计") linkage_info_label = ttk.Label( linkage_frame, textvariable=self.linkage_info_var, foreground="#3498db", font=("Arial", 10, "bold") ) linkage_info_label.pack(side=tk.RIGHT, padx=10, pady=(0, 5)) def create_analysis_section(self, parent): # 分析结果框架 analysis_frame = ttk.LabelFrame(parent, text="数据分析结果") analysis_frame.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) # 创建分页控件 self.notebook = ttk.Notebook(analysis_frame) self.notebook.pack(fill=tk.BOTH, expand=True, padx=5, pady=5) # 表头识别标签页 self.headers_tab = ttk.Frame(self.notebook) self.notebook.add(self.headers_tab, text="表头识别") # 列统计标签页 self.stats_tab = ttk.Frame(self.notebook) self.notebook.add(self.stats_tab, text="列统计") # 格式分析标签页 self.format_tab = ttk.Frame(self.notebook) self.notebook.add(self.format_tab, text="格式分析") # 数据质量标签页 self.quality_tab = ttk.Frame(self.notebook) self.notebook.add(self.quality_tab, text="数据质量") # 初始化标签页内容 self.initialize_tabs() def initialize_tabs(self): # 表头识别标签页内容 headers_frame = ttk.Frame(self.headers_tab) headers_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 表头表格 columns = ("index", "header", "data_type", "sample", "unique", "null_percent") self.headers_tree = ttk.Treeview( headers_frame, columns=columns, show="headings", height=8 ) # 设置列 col_widths = [50, 150, 100, 200, 80, 80] col_anchors = [tk.CENTER, tk.W, tk.W, tk.W, tk.CENTER, tk.CENTER] col_headings = ["序号", "表头名称", "数据类型", "示例值", "唯一值", "空值率"] for col, width, anchor, heading in zip(columns, col_widths, col_anchors, col_headings): self.headers_tree.column(col, width=width, anchor=anchor) self.headers_tree.heading(col, text=heading) # 添加滚动条 scrollbar = ttk.Scrollbar( headers_frame, orient="vertical", command=self.headers_tree.yview ) scrollbar.pack(side=tk.RIGHT, fill=tk.Y) self.headers_tree.configure(yscrollcommand=scrollbar.set) self.headers_tree.pack(fill=tk.BOTH, expand=True) # 列统计标签页内容 stats_frame = ttk.Frame(self.stats_tab) stats_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 列选择框 col_frame = ttk.Frame(stats_frame) col_frame.pack(fill=tk.X, pady=(0, 10)) ttk.Label(col_frame, text="选择列:").pack(side=tk.LEFT, padx=(0, 5)) self.col_var = tk.StringVar() self.col_combobox = ttk.Combobox( col_frame, textvariable=self.col_var, state="readonly", width=30 ) self.col_combobox.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 10)) self.col_combobox.bind("<<ComboboxSelected>>", self.update_stats) # 统计信息表格 self.stats_tree = ttk.Treeview( stats_frame, columns=("metric", "value"), show="headings", height=10 ) # 设置列 self.stats_tree.column("metric", width=200, anchor=tk.W) self.stats_tree.column("value", width=300, anchor=tk.W) # 设置列标题 self.stats_tree.heading("metric", text="统计指标") self.stats_tree.heading("value", text="值") # 添加滚动条 scrollbar2 = ttk.Scrollbar( stats_frame, orient="vertical", command=self.stats_tree.yview ) scrollbar2.pack(side=tk.RIGHT, fill=tk.Y) self.stats_tree.configure(yscrollcommand=scrollbar2.set) self.stats_tree.pack(fill=tk.BOTH, expand=True) # 格式分析标签页内容 format_frame = ttk.Frame(self.format_tab) format_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 格式信息表格 columns = ("column", "has_bg", "bg_status") self.format_tree = ttk.Treeview( format_frame, columns=columns, show="headings", height=10 ) # 设置列 col_widths = [150, 120, 120] col_anchors = [tk.W, tk.CENTER, tk.CENTER] col_headings = ["列名", "有底色", "底色状态"] for col, width, anchor, heading in zip(columns, col_widths, col_anchors, col_headings): self.format_tree.column(col, width=width, anchor=anchor) self.format_tree.heading(col, text=heading) # 添加滚动条 scrollbar3 = ttk.Scrollbar( format_frame, orient="vertical", command=self.format_tree.yview ) scrollbar3.pack(side=tk.RIGHT, fill=tk.Y) self.format_tree.configure(yscrollcommand=scrollbar3.set) self.format_tree.pack(fill=tk.BOTH, expand=True) # 数据质量标签页内容 quality_frame = ttk.Frame(self.quality_tab) quality_frame.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) # 数据质量表格 columns = ("issue", "count", "severity", "columns", "suggestion") self.quality_tree = ttk.Treeview( quality_frame, columns=columns, show="headings", height=8 ) # 设置列 col_widths = [150, 80, 100, 150, 250] col_anchors = [tk.W, tk.CENTER, tk.CENTER, tk.W, tk.W] col_headings = ["问题类型", "数量", "严重程度", "相关列", "解决建议"] for col, width, anchor, heading in zip(columns, col_widths, col_anchors, col_headings): self.quality_tree.column(col, width=width, anchor=anchor) self.quality_tree.heading(col, text=heading) # 添加滚动条 scrollbar4 = ttk.Scrollbar( quality_frame, orient="vertical", command=self.quality_tree.yview ) scrollbar4.pack(side=tk.RIGHT, fill=tk.Y) self.quality_tree.configure(yscrollcommand=scrollbar4.set) self.quality_tree.pack(fill=tk.BOTH, expand=True) # 添加缺失的 update_stats 方法 def update_stats(self, event=None): """更新列统计信息""" # 清空统计表格 for item in self.stats_tree.get_children(): self.stats_tree.delete(item) selected_col_display = self.col_var.get() if not selected_col_display: return # 从显示列名中提取原始列名(移除底色标记) if " (有底色)" in selected_col_display: selected_col = selected_col_display.replace(" (有底色)", "") elif " (无底色)" in selected_col_display: selected_col = selected_col_display.replace(" (无底色)", "") else: selected_col = selected_col_display if not selected_col or selected_col not in self.df.columns: return col_data = self.df[selected_col] # 基本统计信息 self.stats_tree.insert("", "end", values=("列名", selected_col)) self.stats_tree.insert("", "end", values=("数据类型", str(col_data.dtype))) self.stats_tree.insert("", "end", values=("总行数", len(col_data))) non_null_count = col_data.count() null_count = len(col_data) - non_null_count self.stats_tree.insert("", "end", values=("非空值数量", non_null_count)) self.stats_tree.insert("", "end", values=("空值数量", null_count)) self.stats_tree.insert("", "end", values=("空值比例", f"{null_count/len(col_data):.2%}" if len(col_data) > 0 else "0%")) # 值统计 if pd.api.types.is_numeric_dtype(col_data): try: self.stats_tree.insert("", "end", values=("平均值", f"{col_data.mean():.4f}")) self.stats_tree.insert("", "end", values=("中位数", f"{col_data.median():.4f}")) self.stats_tree.insert("", "end", values=("最小值", f"{col_data.min():.4f}")) self.stats_tree.insert("", "end", values=("最大值", f"{col_data.max():.4f}")) self.stats_tree.insert("", "end", values=("标准差", f"{col_data.std():.4f}")) self.stats_tree.insert("", "end", values=("偏度", f"{col_data.skew():.4f}")) self.stats_tree.insert("", "end", values=("峰度", f"{col_data.kurtosis():.4f}")) self.stats_tree.insert("", "end", values=("总和", f"{col_data.sum():.4f}")) except: pass # 唯一值统计 unique_count = col_data.nunique() self.stats_tree.insert("", "end", values=("唯一值数量", unique_count)) unique_ratio = unique_count/non_null_count if non_null_count > 0 else 0 self.stats_tree.insert("", "end", values=("唯一值比例", f"{unique_ratio:.2%}")) # 最常出现的值 if non_null_count > 0: try: top_values = col_data.value_counts().head(3) top_str = ", ".join([f"{val} ({count})" for val, count in top_values.items()]) self.stats_tree.insert("", "end", values=("最常见值", top_str)) except: pass def create_advanced_analysis_section(self, parent): # 高级分析框架 analysis_frame = ttk.LabelFrame(parent, text="高级分析") analysis_frame.pack(fill=tk.X, padx=5, pady=5) # 分析选项 options_frame = ttk.Frame(analysis_frame) options_frame.pack(fill=tk.X, padx=10, pady=10) # 异常值检测 self.outlier_var = tk.BooleanVar(value=True) outlier_cb = ttk.Checkbutton( options_frame, text="检测异常值", variable=self.outlier_var ) outlier_cb.pack(side=tk.LEFT, padx=(0, 20)) # 数据标准化 self.scale_var = tk.BooleanVar(value=False) scale_cb = ttk.Checkbutton( options_frame, text="数据标准化", variable=self.scale_var ) scale_cb.pack(side=tk.LEFT, padx=(0, 20)) # 缺失值处理 ttk.Label(options_frame, text="缺失值处理:").pack(side=tk.LEFT, padx=(0, 5)) self.missing_var = tk.StringVar(value="mean") missing_options = ["不处理", "平均值填充", "中位数填充", "众数填充", "删除行"] missing_cb = ttk.Combobox( options_frame, textvariable=self.missing_var, values=missing_options, state="readonly", width=15 ) missing_cb.pack(side=tk.LEFT, padx=(0, 20)) # 执行高级分析按钮 adv_analyze_btn = ttk.Button( options_frame, text="执行高级分析", command=self.perform_advanced_analysis, style="Accent.TButton" ) adv_analyze_btn.pack(side=tk.RIGHT) def browse_file(self): filetypes = [ ("Excel 文件", "*.xlsx *.xls"), ("CSV 文件", "*.csv"), ("Parquet 文件", "*.parquet"), ("所有文件", "*.*") ] file_path = filedialog.askopenfilename( title="选择表格文件", filetypes=filetypes ) if file_path: self.file_path = file_path self.path_var.set(file_path) self.reset_analysis() self.load_and_preview() def load_and_preview(self): """加载文件并显示预览""" try: self.status_var.set("加载文件中...") self.progress_var.set(0) self.root.update() file_ext = os.path.splitext(self.file_path)[1].lower() # 移除预览占位符 self.preview_placeholder.pack_forget() # 创建预览表格 if hasattr(self, 'pt'): self.pt.destroy() self.pt_frame = ttk.Frame(self.preview_table) self.pt_frame.pack(fill=tk.BOTH, expand=True) # 使用pandastable进行高效预览 self.pt = Table( self.pt_frame, showtoolbar=False, showstatusbar=True, width=400, height=300 ) self.pt.show() # 加载数据 if file_ext in ['.xlsx', '.xls']: # 分块读取Excel文件 self.load_excel_preview() elif file_ext == '.csv': # 分块读取CSV文件 self.load_csv_preview() elif file_ext == '.parquet': # 读取Parquet文件 self.load_parquet_preview() else: messagebox.showerror("错误", "不支持的文件类型") return # 尝试自动检测表头 self.auto_detect_header() self.status_var.set("文件加载完成") except Exception as e: messagebox.showerror("错误", f"加载文件出错: {str(e)}") self.status_var.set(f"错误: {str(e)}") def load_excel_preview(self): """分块读取Excel文件预览""" # 仅读取前1000行进行预览 self.raw_data = pd.read_excel(self.file_path, header=None, nrows=1000) self.pt.model = TableModel(dataframe=self.raw_data) self.pt.redraw() def load_csv_preview(self): """分块读取CSV文件预览""" # 检测编码 with open(self.file_path, 'rb') as f: result = chardet.detect(f.read(10000)) encoding = result['encoding'] or 'utf-8' # 仅读取前1000行进行预览 self.raw_data = pd.read_csv( self.file_path, header=None, encoding=encoding, engine='c', nrows=1000 ) self.pt.model = TableModel(dataframe=self.raw_data) self.pt.redraw() def load_parquet_preview(self): """读取Parquet文件预览""" # 仅读取前1000行进行预览 self.raw_data = pd.read_parquet(self.file_path) self.pt.model = TableModel(dataframe=self.raw_data.head(1000)) self.pt.redraw() def auto_detect_header(self): """自动检测表头所在行""" if self.raw_data is None or self.raw_data.empty: return # 使用更高效的检测算法 best_row = 0 best_score = -1 # 只检查前20行 for i in range(min(20, len(self.raw_data))): row = self.raw_data.iloc[i] # 计算得分 score = 0 # 1. 非空值比例 non_null_ratio = row.notnull().mean() score += non_null_ratio * 20 # 2. 字符串比例 str_count = sum(isinstance(x, str) for x in row) str_ratio = str_count / len(row) if len(row) > 0 else 0 score += str_ratio * 30 # 3. 唯一值比例 unique_count = len(set(row)) unique_ratio = unique_count / len(row) if len(row) > 0 else 0 score += unique_ratio * 50 if score > best_score: best_score = score best_row = i # 设置检测到的表头行 self.header_row_var.set(best_row) def reset_analysis(self): # 清除之前的分析结果 for tree in [self.headers_tree, self.stats_tree, self.format_tree, self.quality_tree, self.linkage_tree]: for item in tree.get_children(): tree.delete(item) self.col_combobox.set('') self.col_combobox['values'] = [] self.filter_col_var.set('') self.filter_value_var.set('') self.filter_value_combobox['values'] = [] self.linkage_col_var.set('') self.filter_info_var.set("未应用筛选") self.linkage_info_var.set("未统计") self.column_has_color = {} def start_analysis_thread(self): """启动分析线程""" if not self.file_path: messagebox.showwarning("警告", "请先选择表格文件") return # 禁用分析按钮 for widget in self.root.winfo_children(): if isinstance(widget, ttk.Button) and widget.cget("text") == "分析表格": widget.config(state=tk.DISABLED) self.status_var.set("分析中...") self.progress_var.set(0) self.stop_analysis = False # 创建并启动分析线程 self.analysis_thread = threading.Thread(target=self.analyze_table) self.analysis_thread.daemon = True self.analysis_thread.start() # 启动进度更新 self.root.after(100, self.update_progress) def stop_analysis_process(self): """停止分析过程""" self.stop_analysis = True self.status_var.set("分析已停止") def update_progress(self): """更新进度条""" if self.analysis_thread and self.analysis_thread.is_alive(): self.progress_var.set(self.progress) self.root.after(100, self.update_progress) else: # 重新启用分析按钮 for widget in self.root.winfo_children(): if isinstance(widget, ttk.Button) and widget.cget("text") == "分析表格": widget.config(state=tk.NORMAL) self.progress_var.set(100) def analyze_table(self): """执行表格分析(在后台线程中运行)""" try: self.header_row = self.header_row_var.get() file_ext = os.path.splitext(self.file_path)[1].lower() # 更新进度 self.progress = 10 time.sleep(0.1) if self.stop_analysis: return if file_ext in ['.xlsx', '.xls']: # 读取Excel文件,指定表头行 self.df = pd.read_excel(self.file_path, header=self.header_row) self.workbook = load_workbook(self.file_path) self.sheet = self.workbook.active elif file_ext == '.csv': # 读取CSV文件,指定表头行 with open(self.file_path, 'rb') as f: result = chardet.detect(f.read()) encoding = result['encoding'] or 'utf-8' self.df = pd.read_csv( self.file_path, header=self.header_row, encoding=encoding, engine='c' ) elif file_ext == '.parquet': # 读取Parquet文件 self.df = pd.read_parquet(self.file_path) else: messagebox.showerror("错误", "不支持的文件类型") return # 清理列名 self.df.columns = [str(col).strip() for col in self.df.columns] # 初始化列底色信息 self.column_has_color = {col: False for col in self.df.columns} # 更新进度 self.progress = 30 time.sleep(0.1) if self.stop_analysis: return # 分析表头 self.analyze_headers() # 更新进度 self.progress = 50 time.sleep(0.1) if self.stop_analysis: return # 分析列格式 if file_ext in ['.xlsx', '.xls']: self.analyze_format() # 更新进度 self.progress = 70 time.sleep(0.1) if self.stop_analysis: return # 分析数据质量 self.analyze_data_quality() # 更新列选择框 self.col_combobox['values'] = list(self.df.columns) if self.df.columns.size > 0: self.col_combobox.current(0) self.update_stats() # 更新筛选控件(添加底色标记) display_columns = [] for col in self.df.columns: has_color = self.column_has_color.get(col, False) display_columns.append(f"{col} (有底色)" if has_color else f"{col} (无底色)") self.filter_col_combobox['values'] = display_columns # 更新联动统计控件(添加底色标记) display_columns = [] for col in self.df.columns: has_color = self.column_has_color.get(col, False) display_columns.append(f"{col} (有底色)" if has_color else f"{col} (无底色)") self.linkage_col_combobox['values'] = display_columns self.status_var.set("分析完成") # 更新进度 self.progress = 100 except Exception as e: messagebox.showerror("错误", f"分析表格出错: {str(e)}") self.status_var.set(f"错误: {str(e)}") def analyze_headers(self): # 清空表头表格 for item in self.headers_tree.get_children(): self.headers_tree.delete(item) # 添加表头信息 for i, col in enumerate(self.df.columns): if self.stop_analysis: return # 检查列是否有底色(只针对数据区域) has_color = self.column_has_color.get(col, False) col_display = f"{col} (有底色)" if has_color else f"{col} (无底色)" sample = "" # 获取前3个非空值作为示例 non_empty = self.df[col].dropna() if len(non_empty) > 0: sample = ", ".join(str(x) for x in non_empty.head(3).tolist()) # 推断数据类型 dtype = str(self.df[col].dtype) dtype_map = { 'object': '文本', 'int64': '整数', 'float64': '小数', 'bool': '布尔值', 'datetime64': '日期间', 'category': '分类数据' } for k, v in dtype_map.items(): if k in dtype: dtype = v break # 唯一值统计 unique_count = self.df[col].nunique() # 空值率 null_count = self.df[col].isnull().sum() null_percent = f"{null_count / len(self.df):.1%}" if len(self.df) > 0 else "0%" self.headers_tree.insert("", "end", values=( i+1, col_display, # 使用带底色标记的列名 dtype, sample, unique_count, null_percent )) def analyze_format(self): """分析每个单元格的底色状态""" if not self.workbook or not self.sheet: return # 初始化底色状态DataFrame(与数据表相同形状) self.cell_bg_status = pd.DataFrame( "无底色", index=self.df.index, columns=self.df.columns ) # 分析每个单元格的底色 for row_idx in range(len(self.df)): for col_idx, col_name in enumerate(self.df.columns): # 转换为Excel坐标(行号从1开始) excel_row = self.header_row + 2 + row_idx col_letter = self.get_column_letter(col_idx + 1) cell = self.sheet[f"{col_letter}{excel_row}"] # 检测底色 has_bg = False if cell.fill and isinstance(cell.fill, PatternFill): if cell.fill.fgColor and cell.fill.fgColor.rgb: if cell.fill.fgColor.rgb != "00000000": # 排除透明 has_bg = True # 存储底色状态 self.cell_bg_status.iloc[row_idx, col_idx] = "有底色" if has_bg else "无底色" def analyze_data_quality(self): """分析数据质量问题""" # 清空质量表格 for item in self.quality_tree.get_children(): self.quality_tree.delete(item) issues = [] # 1. 检查空值 null_counts = self.df.isnull().sum() total_rows = len(self.df) for col, count in null_counts.items(): if count > 0: severity = "高" if count / total_rows > 0.3 else "中" if count / total_rows > 0.1 else "低" issues.append({ "issue": "空值", "count": count, "severity": severity, "columns": col, "suggestion": f"考虑使用平均值/中位数填充或删除空值行" }) # 2. 检查重复行 duplicate_rows = self.df.duplicated().sum() if duplicate_rows > 0: severity = "高" if duplicate_rows / total_rows > 0.1 else "中" issues.append({ "issue": "重复行", "count": duplicate_rows, "severity": severity, "columns": "所有列", "suggestion": "删除重复行或分析重复原因" }) # 3. 检查数据类型不一致 for col in self.df.columns: if self.df[col].dtype == 'object': # 检查混合数据类型 type_counts = self.df[col].apply(type).value_counts() if len(type_counts) > 1: issues.append({ "issue": "数据类型不一致", "count": len(self.df[col]), "severity": "中", "columns": col, "suggestion": "统一数据类型或转换格式" }) # 4. 检查异常值(仅数值列) numeric_cols = self.df.select_dtypes(include=np.number).columns for col in numeric_cols: q1 = self.df[col].quantile(0.25) q3 = self.df[col].quantile(0.75) iqr = q3 - q1 lower_bound = q1 - 1.5 * iqr upper_bound = q3 + 1.5 * iqr outliers = self.df[(self.df[col] < lower_bound) | (self.df[col] > upper_bound)] outlier_count = len(outliers) if outlier_count > 0: severity = "高" if outlier_count / total_rows > 0.05 else "中" issues.append({ "issue": "异常值", "count": outlier_count, "severity": severity, "columns": col, "suggestion": "检查数据准确性或进行转换" }) # 添加到表格 for issue in issues: self.quality_tree.insert("", "end", values=( issue["issue"], issue["count"], issue["severity"], issue["columns"], issue["suggestion"] )) def get_column_letter(self, col_idx): """将列索引转换为Excel列字母""" letters = [] while col_idx: col_idx, remainder = divmod(col_idx - 1, 26) letters.append(chr(65 + remainder)) return ''.join(reversed(letters)) def update_filter_values(self, event=None): """更新筛选值列表(带底色标记)""" selected_col = self.filter_col_var.get() if not selected_col or selected_col not in self.df.columns: return # 获取列的值和对应的底色状态 values = self.df[selected_col].astype(str) bg_status = self.cell_bg_status[selected_col] # 创建带底色标记的值列表 unique_values = set() for val, bg in zip(values, bg_status): unique_values.add(f"{val} ({bg})") # 如果唯一值太多,只显示前50个 if len(unique_values) > 50: unique_values = sorted(unique_values)[:50] messagebox.showinfo("提示", f"该列有超过50个唯一值,只显示前50个") # 更新值选择框 self.filter_value_combobox['values'] = sorted(unique_values) self.filter_value_combobox.set('') def apply_filter(self): """应用筛选条件(考虑底色状态)""" if self.df is None: messagebox.showwarning("警告", "请先分析表格") return selected_col = self.filter_col_var.get() selected_value_with_bg = self.filter_value_var.get() # 检查输入有效性 if not selected_col or not selected_value_with_bg: messagebox.showwarning("警告", "请选择列和值") return try: # 从带底色标记的值中提取原始值和底色状态 if " (有底色)" in selected_value_with_bg: selected_value = selected_value_with_bg.replace(" (有底色)", "") selected_bg = "有底色" elif " (无底色)" in selected_value_with_bg: selected_value = selected_value_with_bg.replace(" (无底色)", "") selected_bg = "无底色" else: selected_value = selected_value_with_bg selected_bg = None # 应用值筛选 if pd.api.types.is_numeric_dtype(self.df[selected_col]): try: selected_value = float(selected_value) value_condition = (self.df[selected_col] == selected_value) except: value_condition = (self.df[selected_col].astype(str) == selected_value) else: value_condition = (self.df[selected_col].astype(str) == selected_value) # 应用底色状态筛选(如果有) if selected_bg and hasattr(self, 'cell_bg_status') and selected_col in self.cell_bg_status: bg_condition = (self.cell_bg_status[selected_col] == selected_bg) condition = value_condition & bg_condition else: condition = value_condition # 应用组合筛选条件 self.filtered_df = self.df[condition] # 更新预览 self.update_preview(self.filtered_df) # 更新筛选信息 match_count = len(self.filtered_df) total_count = len(self.df) self.filter_info_var.set( f"筛选结果: {match_count} 行 (共 {total_count} 行, {match_count/total_count:.1%})" ) # 更新统计信息 self.update_stats(self.col_var.get()) # 自动更新联动统计 if self.linkage_col_var.get() and self.linkage_col_var.get() in self.df.columns: self.update_linkage_stats() except Exception as e: messagebox.showerror("错误", f"筛选数据出错: {str(e)}") def clear_filter(self): """清除筛选条件""" if hasattr(self, 'filtered_df'): del self.filtered_df # 重置筛选控件 self.filter_col_var.set('') self.filter_value_var.set('') self.filter_value_combobox['values'] = [] # 恢复原始数据预览 if self.df is not None: self.update_preview(self.df) # 更新筛选信息 self.filter_info_var.set("筛选已清除") # 更新统计信息 if self.col_var.get(): self.update_stats(self.col_var.get()) # 清除联动统计 for item in self.linkage_tree.get_children(): self.linkage_tree.delete(item) self.linkage_info_var.set("未统计") def update_preview(self, df): """更新数据预览(可选:添加底色状态)""" if hasattr(self, 'pt'): # 创建带底色状态的预览副本 preview_df = df.copy().head(1000) # 添加底色状态列(如果存在) if hasattr(self, 'cell_bg_status') and not self.cell_bg_status.empty: for col in df.columns: if col in self.cell_bg_status: preview_df[f"{col}_底色状态"] = self.cell_bg_status[col].loc[preview_df.index] # 更新现有表格 self.pt.model = TableModel(dataframe=preview_df) self.pt.redraw() else: # 创建新表格 self.pt_frame = ttk.Frame(self.preview_table) self.pt_frame.pack(fill=tk.BOTH, expand=True) self.pt = Table( self.pt_frame, showtoolbar=False, showstatusbar=True, width=400, height=300 ) # 创建带底色状态的预览 preview_df = df.head(1000).copy() if hasattr(self, 'cell_bg_status') and not self.cell_bg_status.empty: for col in df.columns: if col in self.cell_bg_status: preview_df[f"{col}_底色状态"] = self.cell_bg_status[col].loc[preview_df.index] self.pt.model = TableModel(dataframe=preview_df) self.pt.show() def update_linkage_stats(self): """更新联动统计结果(考虑底色状态)""" if not hasattr(self, 'filtered_df') or self.filtered_df.empty: messagebox.showwarning("警告", "请先应用筛选") return target_col = self.linkage_col_var.get() if not target_col: return # 清空现有结果 for item in self.linkage_tree.get_children(): self.linkage_tree.delete(item) # 获取筛选后的数据 filtered_data = self.filtered_df # 添加底色状态列(如果存在) if hasattr(self, 'cell_bg_status') and target_col in self.cell_bg_status: # 合并底色状态到数据中 filtered_data = filtered_data.copy() filtered_data['底色状态'] = self.cell_bg_status[target_col].loc[filtered_data.index] # 统计值分布(包含底色状态) value_counts = filtered_data.groupby([target_col, '底色状态']).size().reset_index(name='计数') total_count = len(filtered_data) # 添加到树状视图 for _, row in value_counts.iterrows(): percent = f"{row['计数'] / total_count:.1%}" self.linkage_tree.insert("", "end", values=( row[target_col], row['底色状态'], row['计数'], percent )) # 更新统计信息 self.linkage_info_var.set( f"共 {len(value_counts)} 个组合 (总计 {total_count} 行)" ) else: # 没有底色信息的处理 value_counts = filtered_data[target_col].value_counts() total_count = len(filtered_data) for value, count in value_counts.items(): percent = f"{count / total_count:.1%}" self.linkage_tree.insert("", "end", values=(value, "", count, percent)) self.linkage_info_var.set( f"共 {len(value_counts)} 个唯一值 (总计 {total_count} 行)" ) def perform_advanced_analysis(self): """执行高级分析""" if self.df is None or self.df.empty: messagebox.showwarning("警告", "请先分析表格") return self.status_var.set("执行高级分析...") self.progress_var.set(0) # 备份原始底色状态 original_bg_status = self.cell_bg_status.copy() if hasattr(self, 'cell_bg_status') else None try: # 缺失值处理 missing_strategy = self.missing_var.get() if missing_strategy != "不处理": numeric_cols = self.df.select_dtypes(include=np.number).columns categorical_cols = self.df.select_dtypes(include='object').columns if missing_strategy == "删除行": self.df = self.df.dropna() else: if missing_strategy == "平均值填充": strategy = 'mean' elif missing_strategy == "中位数填充": strategy = 'median' else: # 众数填充 strategy = 'most_frequent' # 数值列填充 if len(numeric_cols) > 0: num_imputer = SimpleImputer(strategy=strategy) self.df[numeric_cols] = num_imputer.fit_transform(self.df[numeric_cols]) # 分类列填充 if len(categorical_cols) > 0: cat_imputer = SimpleImputer(strategy='most_frequent') self.df[categorical_cols] = cat_imputer.fit_transform(self.df[categorical_cols]) # 异常值检测 if self.outlier_var.get(): numeric_cols = self.df.select_dtypes(include=np.number).columns if len(numeric_cols) > 0: clf = IsolationForest(contamination=0.05, random_state=42) outliers = clf.fit_predict(self.df[numeric_cols]) self.df['is_outlier'] = outliers == -1 # 在界面上显示异常值数量 outlier_count = self.df['is_outlier'].sum() messagebox.showinfo("异常值检测", f"检测到 {outlier_count} 个异常值\n已添加 'is_outlier' 列标记") # 数据标准化 if self.scale_var.get(): numeric_cols = self.df.select_dtypes(include=np.number).columns if len(numeric_cols) > 0: for col in numeric_cols: min_val = self.df[col].min() max_val = self.df[col].max() if max_val > min_val: self.df[col] = (self.df[col] - min_val) / (max_val - min_val) # 更新分析结果 self.analyze_headers() self.analyze_data_quality() # 更新列选择框 self.col_combobox['values'] = list(self.df.columns) if self.df.columns.size > 0: self.col_combobox.current(0) self.update_stats() # 高级分析后恢复底色状态 if original_bg_status is not None: # 只保留现有行的底色状态 self.cell_bg_status = original_bg_status.loc[self.df.index] # 处理新增列(如果没有底色状态,设为"无底色") for col in self.df.columns: if col not in self.cell_bg_status: self.cell_bg_status[col] = "无底色" self.status_var.set("高级分析完成") self.progress_var.set(100) except Exception as e: if original_bg_status is not None: self.cell_bg_status = original_bg_status messagebox.showerror("错误", f"高级分析出错: {str(e)}") self.status_var.set(f"错误: {str(e)}") if __name__ == "__main__": root = tk.Tk() app = TableAnalyzer(root) root.mainloop() TableAnalyzer’ object has no attribute ‘apply_filter’ File “C:\Users\siyuanll2102\Desktop\classify\classify.py”, line 281, in create_filter_section command=self.apply_filter, File “C:\Users\siyuanll2102\Desktop\classify\classify.py”, line 72, in init self.create_filter_section(right_panel) File “C:\Users\siyuanll2102\Desktop\classify\classify.py”, line 1445, in app = TableAnalyzer(root) AttributeError: ‘TableAnalyzer’ object has no attribute ‘apply_filter’
最新发布
08-06
我运行完app.py然后在algorithmselection中上传报出以下错误:127.0.0.1 - - [21/Jun/2025 19:31:25] "POST /upload HTTP/1.1" 500 - Traceback (most recent call last): File "D:\python3.13.2\Lib\site-packages\flask\app.py", line 1536, in __call__ return self.wsgi_app(environ, start_response) ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\python3.13.2\Lib\site-packages\flask\app.py", line 1514, in wsgi_app response = self.handle_exception(e) File "D:\python3.13.2\Lib\site-packages\flask_cors\extension.py", line 176, in wrapped_function return cors_after_request(app.make_response(f(*args, **kwargs))) ~^^^^^^^^^^^^^^^^^ File "D:\python3.13.2\Lib\site-packages\flask\app.py", line 1511, in wsgi_app response = self.full_dispatch_request() File "D:\python3.13.2\Lib\site-packages\flask\app.py", line 919, in full_dispatch_request rv = self.handle_user_exception(e) File "D:\python3.13.2\Lib\site-packages\flask_cors\extension.py", line 176, in wrapped_function return cors_after_request(app.make_response(f(*args, **kwargs))) ~^^^^^^^^^^^^^^^^^ File "D:\python3.13.2\Lib\site-packages\flask\app.py", line 917, in full_dispatch_request rv = self.dispatch_request() File "D:\python3.13.2\Lib\site-packages\flask\app.py", line 902, in dispatch_request return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^ File "d:\Users\Administrator\Desktop\project\gas_sensor_app\app.py", line 130, in upload_files if not preprocess_data(): ~~~~~~~~~~~~~~~^^ File "d:\Users\Administrator\Desktop\project\gas_sensor_app\app.py", line 87, in preprocess_data sensor_columns = [col for col in dataset.columns if col.startswith('sensor')] ^^^^^^^^^^^^^^ AttributeError: 'int' object has no attribute 'startswith'
06-22
Traceback (most recent call last): File "D:\facerec\audiocluster.py", line 243, in <module> main() File "D:\facerec\audiocluster.py", line 219, in main kmeans, result_df, scaled_features = perform_clustering(merged_data, n_clusters=n_clusters) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\facerec\audiocluster.py", line 78, in perform_clustering cluster_labels = kmeans.fit_predict(scaled_features) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\anaconda\Lib\site-packages\sklearn\cluster\_kmeans.py", line 1068, in fit_predict return self.fit(X, sample_weight=sample_weight).labels_ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\anaconda\Lib\site-packages\sklearn\base.py", line 1151, in wrapper return fit_method(estimator, *args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\anaconda\Lib\site-packages\sklearn\cluster\_kmeans.py", line 1509, in fit self._check_mkl_vcomp(X, X.shape[0]) File "D:\anaconda\Lib\site-packages\sklearn\cluster\_kmeans.py", line 927, in _check_mkl_vcomp modules = threadpool_info() ^^^^^^^^^^^^^^^^^ File "D:\anaconda\Lib\site-packages\sklearn\utils\fixes.py", line 83, in threadpool_info return threadpoolctl.threadpool_info() ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\anaconda\Lib\site-packages\threadpoolctl.py", line 124, in threadpool_info return _ThreadpoolInfo(user_api=_ALL_USER_APIS).todicts() ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\anaconda\Lib\site-packages\threadpoolctl.py", line 340, in __init__ self._load_modules() File "D:\anaconda\Lib\site-packages\threadpoolctl.py", line 373, in _load_modules self._find_modules_with_enum_process_module_ex() File "D:\anaconda\Lib\site-packages\threadpoolctl.py", line 485, in _find_modules_with_enum_process_module_ex self._make_module_from_path(filepath) File "D:\anaconda\Lib\site-packages\threadpoolctl.py", line 515, in _make_module_from_path module = module_class(filepath, prefix, user_api, internal_api) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "D:\anaconda\Lib\site-packages\threadpoolctl.py", line 606, in __init__ self.version = self.get_version() ^^^^^^^^^^^^^^^^^^ File "D:\anaconda\Lib\site-packages\threadpoolctl.py", line 646, in get_version config = get_config().split() ^^^^^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'split'
07-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值