laravel中请求request的only方法和获取模型中表的所有列名,route路由get和post和any和match

laravel中请求request的only方法和获取模型中表的所有列名,route路由get和post和any和match

开始

Route::get('index', function() {
return 'get ';
});
//可以访问 any匹配任何请求
Route::any('index', function() {
return 'any :get,post,delete,put';
});
//同时匹配get,post
Route::match(['get', 'post'], 'index', function () {
return 'match::get,post';
});

获取request请求的only数据

    function index(Request $request, InsurancecardService $service, InsurancecardModel $InsurancecardModel)
    {
        //获取模型中表的所有列名
        $columns        = Schema::getColumnListing($InsurancecardModel->getTable());
        $columns[]      = 'size';
        //该函数只返回:$columns中的字段参数值
        $params         = $request->only($columns);
        $validator = Validator::make($params, [
            'size' => 'sometimes|int|max:100',
            'email' =>'required',
        ], [
            'required'=>':attribute 为必填项',
            'max'=>':attribute 长度不符合要求',
            'date'=>':attribute 日期不正确',
            'email.required'=>'邮箱为必填项',
        ]);
        if ($validator->fails()) return $this->fail($validator->errors()->first(),400);

        try{
            $resp = $service->getList($params);
            return $this->success($resp);
        } catch (\Exception $e){
            return $this->fail($e->getMessage(),400);
        }
    }

生成模型命令(感觉没什么用

//感觉没什么用。。这个生成模型只是个空类,不如手动创建。而且只会生成在app\Models\目录下
php artisan make:model /Modules/Insurancecard/Models/InsurancecardModel

结束

在我的后端中还有两个文件,一个是new_algorithm.py:import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, classification_report from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from sklearn.base import clone import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay import os import re from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer import seaborn as sns # 添加数据插补器 from sklearn.impute import SimpleImputer def check_chinese_font_support(): """检查系统是否支持中文字体""" chinese_fonts = ['SimHei', 'WenQuanYi Micro Hei', 'Heiti TC', 'Microsoft YaHei', 'SimSun'] for font in chinese_fonts: try: plt.rcParams["font.family"] = font # 测试字体是否可用 fig, ax = plt.subplots(figsize=(1, 1)) ax.text(0.5, 0.5, '测试', fontsize=12) plt.close(fig) print(f"系统支持中文字体: {font}") return True except: continue print("系统不支持中文字体,将使用英文标签") plt.rcParams["font.family"] = ['Arial', 'sans-serif'] return False class GasSensorDataAnalyzer: """有害气体分类数据加载与预处理类""" def __init__(self): # 基础气体标签 self.gas_labels = { 'acetone': 0, 'toluene': 1, 'methanol': 2, 'formaldehyde': 3, 'ethanol': 4 } # 混合气体标签生成(每个混合气体用唯一数字标识) self.mixture_labels = self._generate_mixture_labels() # 合并所有气体标签 self.all_gas_labels = {**self.gas_labels, **self.mixture_labels} # 中英文气体名称映射 self.gas_names = { 'acetone': {'cn': '丙酮', 'en': 'Acetone'}, 'toluene': {'cn': '甲苯', 'en': 'Toluene'}, 'methanol': {'cn': '甲醇', 'en': 'Methanol'}, 'formaldehyde': {'cn': '甲醛', 'en': 'Formaldehyde'}, 'ethanol': {'cn': '乙醇', 'en': 'Ethanol'}, 'toluene+formaldehyde': {'cn': '甲苯+甲醛', 'en': 'Toluene+Formaldehyde'}, 'methanol+toluene+formaldehyde': {'cn': '甲醇+甲苯+甲醛', 'en': 'Methanol+Toluene+Formaldehyde'} # 可以根据需要添加更多混合气体的名称映射 } # 传感器类型映射 self.sensor_types = { 'MP2': 0, 'MP3B': 1, 'MP503': 2, 'MP801': 3, 'MQ2': 4, 'MQ7B': 5 } # 初始化多维度类别映射 self.multi_dimension_labels = {} self.next_label_id = 0 # 传感器中英文名称映射 self.sensor_names = { 'MP2': {'cn': 'MP2', 'en': 'MP2'}, 'MP3B': {'cn': 'MP3B', 'en': 'MP3B'}, 'MP503': {'cn': 'MP503', 'en': 'MP503'}, 'MP801': {'cn': 'MP801', 'en': 'MP801'}, 'MQ2': {'cn': 'MQ2', 'en': 'MQ2'}, 'MQ7B': {'cn': 'MQ7B', 'en': 'MQ7B'} } def _generate_mixture_labels(self): """生成混合气体的标签映射""" # 定义可能的混合气体组合 mixtures = [ 'toluene+formaldehyde', 'methanol+toluene+formaldehyde' # 可以根据需要添加更多混合气体组合 ] # 为每个混合气体分配唯一标签(从基础气体标签之后开始) next_label = max(self.gas_labels.values()) + 1 return {mixture: next_label + i for i, mixture in enumerate(mixtures)} def get_or_create_multi_dimension_label(self, sensor_type, gas_type, concentration): """ 获取或创建多维度类别标签 参数: - sensor_type: 传感器类型 - gas_type: 气体类型 - concentration: 浓度值 返回: - 标签ID标签名称 """ # 创建唯一键 key = f"{sensor_type}_{gas_type}_{concentration}ppm" # 如果键不存在,创建新标签 if key not in self.multi_dimension_labels: self.multi_dimension_labels[key] = self.next_label_id self.next_label_id += 1 # 返回标签ID标签名称 label_id = self.multi_dimension_labels[key] # 创建中英文标签名称 sensor_name_cn = self.sensor_names.get(sensor_type, {}).get('cn', sensor_type) sensor_name_en = self.sensor_names.get(sensor_type, {}).get('en', sensor_type) gas_name_cn = self.gas_names.get(gas_type, {}).get('cn', gas_type) gas_name_en = self.gas_names.get(gas_type, {}).get('en', gas_type) label_name_cn = f"{sensor_name_cn}_{gas_name_cn}_{concentration}ppm" label_name_en = f"{sensor_name_en}_{gas_name_en}_{concentration}ppm" return label_id, { 'cn': label_name_cn, 'en': label_name_en } def load_single_gas_data(self, file_path, gas_type, concentration, sensor_type): """ 加载单一气体数据 参数: - file_path: 文件路径 - gas_type: 气体类型 (如 'acetone', 'toluene' 等) - concentration: 浓度值 (如 20, 30, 50 等) - sensor_type: 传感器类型 (如 'MP2', 'MP801' 等) """ try: if not os.path.exists(file_path): raise FileNotFoundError(f"文件不存在: {file_path}") df = pd.read_excel(file_path, sheet_name='Sheet1', index_col=0) X = df.values # 尝试将数据转换为 float 类型 try: X = X.astype(float) except ValueError: print("警告: 数据中包含非数值类型,将过滤掉非数值类型的数据") numeric_mask = np.vectorize(np.isreal)(X) X = X[numeric_mask].reshape(-1, df.shape[1]) # 检查并报告NaN值 nan_count = np.isnan(X).sum() if nan_count > 0: print(f"警告: 数据中包含 {nan_count} 个NaN值") # 可选:替换NaN值为0 # X = np.nan_to_num(X, nan=0.0) # 创建多维度标签 label_id, label_name = self.get_or_create_multi_dimension_label( sensor_type, gas_type, concentration ) # 为所有样本分配相同的标签 y = np.full(len(X), label_id, dtype=int) print(f"已加载 {label_name['cn']} 数据: {len(X)} 样本, 特征维度: {X.shape[1]}") return X, y except Exception as e: print(f"加载数据时出错: {e}") return None, None def load_multiple_gas_data(self, file_paths, gas_types, concentrations, sensor_types): """ 加载多个气体数据并合并 参数: - file_paths: 文件路径列表 - gas_types: 气体类型列表 (如 ['acetone', 'toluene'] 等) - concentrations: 浓度值列表 (如 [20, 30] 等) - sensor_types: 传感器类型列表 (如 ['MP2', 'MP801'] 等) """ X_all = [] y_all = [] feature_dimensions = [] # 用于记录每个数据集的特征维度 for file_path, gas_type, concentration, sensor_type in zip( file_paths, gas_types, concentrations, sensor_types ): X, y = self.load_single_gas_data(file_path, gas_type, concentration, sensor_type) if X is not None and len(X) > 0: X_all.append(X) y_all.append(y) feature_dimensions.append(X.shape[1]) if not X_all: print("没有加载到有效数据") return None, None # 检查所有数据集的特征维度是否一致 unique_dimensions = np.unique(feature_dimensions) if len(unique_dimensions) > 1: print(f"警告: 检测到不同的特征维度: {unique_dimensions}") print("这可能导致合并数据时出错。请检查您的Excel文件是否具有相同的列数。") # 找出最常见的维度 from collections import Counter dimension_counts = Counter(feature_dimensions) most_common_dimension = dimension_counts.most_common(1)[0][0] print(f"最常见的特征维度是: {most_common_dimension}") # 过滤掉特征维度不匹配的数据 filtered_X_all = [] filtered_y_all = [] for i, X in enumerate(X_all): if X.shape[1] == most_common_dimension: filtered_X_all.append(X) filtered_y_all.append(y_all[i]) else: print(f"忽略特征维度不匹配的数据集: {file_paths[i]} (维度: {X.shape[1]})") if not filtered_X_all: print("没有找到特征维度匹配的数据集") return None, None X_all = filtered_X_all y_all = filtered_y_all # 合并所有数据 X_combined = np.vstack(X_all) y_combined = np.concatenate(y_all) # 检查合并后的数据中是否存在NaN值 total_nan = np.isnan(X_combined).sum() if total_nan > 0: print(f"警告: 合并后的数据中包含 {total_nan} 个NaN值,占比: {total_nan/(X_combined.size):.4f}") print(f"NaN值在样本中的分布: {np.isnan(X_combined).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在特征中的分布: {np.isnan(X_combined).any(axis=0).sum()} 个特征包含NaN值") print(f"合并后的数据: {len(X_combined)} 样本,{len(np.unique(y_combined))} 个类别,特征维度: {X_combined.shape[1]}") return X_combined, y_combined def load_dataset(self, file_path, gas_type, concentration, sensor_type): """加载单一数据集并返回""" return self.load_single_gas_data(file_path, gas_type, concentration, sensor_type) class AlgorithmSelector: """多算法选择与训练类""" def __init__(self, use_chinese=True): # 算法名称映射 self.algorithm_names = { 'knn': {'cn': 'K-近邻算法', 'en': 'K-Nearest Neighbors'}, 'svm': {'cn': '支持向量机', 'en': 'Support Vector Machine'}, 'random_forest': {'cn': '随机森林', 'en': 'Random Forest'}, 'decision_tree': {'cn': '决策树', 'en': 'Decision Tree'}, 'neural_network': {'cn': '神经网络', 'en': 'Neural Network'} } # 算法配置 self.algorithms = { 'knn': { 'model': KNeighborsClassifier(), 'params': {'n_neighbors': 5, 'metric': 'euclidean'} }, 'svm': { 'model': SVC(), 'params': {'kernel': 'rbf', 'C': 1.0, 'probability': True} }, 'random_forest': { 'model': RandomForestClassifier(), 'params': {'n_estimators': 100, 'random_state': 42} }, 'decision_tree': { 'model': DecisionTreeClassifier(), 'params': {'max_depth': None, 'random_state': 42} }, 'neural_network': { 'model': MLPClassifier(), 'params': { 'neural_network__hidden_layer_sizes': (100, 50), # 注意前缀 'neural_network__max_iter': 500, 'neural_network__random_state': 42} } } # 算法是否需要标准化 self.needs_scaling = { 'knn': True, 'svm': True, 'random_forest': False, 'decision_tree': False, 'neural_network': True } # 是否使用中文 self.use_chinese = use_chinese def set_algorithm_params(self, algorithm_name, params): """设置算法参数""" if algorithm_name in self.algorithms: # 为Pipeline正确格式化参数名称 formatted_params = {f"{algorithm_name}__{k}": v for k, v in params.items()} self.algorithms[algorithm_name]['params'] = formatted_params else: raise ValueError(f"不支持的算法: {algorithm_name}") def train_models(self, X, y, test_size=0.2, random_state=42): """ 训练所有算法并返回结果 返回: - 包含训练好的模型及其性能的字典 """ # 检查类别数量 unique_classes = np.unique(y) num_classes = len(unique_classes) if num_classes < 2: print(f"警告: 数据集中只有 {num_classes} 个类别,某些算法可能无法训练") print(f"单一类别值: {unique_classes[0]}") # 跳过SVM算法,因为它需要至少两个类别 algorithms_to_train = [name for name in self.algorithms if name != 'svm'] print(f"由于单类别数据,将跳过 SVM 算法,仅训练: {', '.join([self.algorithm_names[name]['cn'] for name in algorithms_to_train])}") # 在单一数据集上划分训练集测试集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state ) # 标记这是单类别数据 is_single_class_data = True else: # 在多类别数据集上划分训练集测试集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state, stratify=y ) algorithms_to_train = list(self.algorithms.keys()) is_single_class_data = False # 检查数据类型并确保可以安全转换为数值类型 try: # 尝试将数据转换为float类型 X_train_numeric = X_train.astype(float) X_test_numeric = X_test.astype(float) # 检查训练数据中是否存在NaN值 train_nan = np.isnan(X_train_numeric).sum() if train_nan > 0: print(f"警告: 训练数据中包含 {train_nan} 个NaN值,占比: {train_nan/(X_train_numeric.size):.4f}") print(f"NaN值在训练样本中的分布: {np.isnan(X_train_numeric).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在训练特征中的分布: {np.isnan(X_train_numeric).any(axis=0).sum()} 个特征包含NaN值") # 检查测试数据中是否存在NaN值 test_nan = np.isnan(X_test_numeric).sum() if test_nan > 0: print(f"警告: 测试数据中包含 {test_nan} 个NaN值,占比: {test_nan/(X_test_numeric.size):.4f}") print(f"NaN值在测试样本中的分布: {np.isnan(X_test_numeric).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在测试特征中的分布: {np.isnan(X_test_numeric).any(axis=0).sum()} 个特征包含NaN值") except ValueError as e: print(f"警告: 无法将数据转换为数值类型,跳过NaN值检查: {e}") results = {} for name in algorithms_to_train: algo = self.algorithms[name] # 获取算法名称(根据是否支持中文选择) algo_name = self.algorithm_names[name]['cn'] if self.use_chinese else self.algorithm_names[name]['en'] try: print(f"\n训练 {algo_name}...") # 创建模型管道 if self.needs_scaling[name]: # 为需要标准化的算法创建包含三个步骤的Pipeline model = Pipeline([ ('imputer', SimpleImputer(strategy='mean')), # 使用均值填充缺失值 ('scaler', StandardScaler()), (name, clone(algo['model'])) ]) else: # 为不需要标准化的算法创建包含两个步骤的Pipeline model = Pipeline([ ('imputer', SimpleImputer(strategy='mean')), # 使用均值填充缺失值 (name, clone(algo['model'])) ]) # 为决策树随机森林直接设置参数,不使用Pipeline参数设置方式 if name in ['decision_tree', 'random_forest']: # 获取算法实例 algo_instance = model.named_steps[name] # 直接设置参数 for param, value in algo['params'].items(): setattr(algo_instance, param, value) else: # 为其他算法使用Pipeline参数设置方式 model.set_params(**algo['params']) # 训练模型 model.fit(X_train, y_train) # 评估模型 train_accuracy = model.score(X_train, y_train) test_accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) print(f"训练集准确率: {train_accuracy:.4f}") print(f"测试集准确率: {test_accuracy:.4f}") print("分类报告:") print(classification_report(y_test, y_pred)) results[name] = { 'name': algo_name, 'model': model, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy, 'y_pred': y_pred, 'X_test': X_test, 'y_test': y_test, 'unique_labels': np.unique(y_test), 'is_single_class': is_single_class_data } except Exception as e: print(f"训练 {algo_name} 时发生错误: {e}") results[name] = { 'name': algo_name, 'error': str(e), 'is_single_class': is_single_class_data } # 为跳过的SVM算法添加结果记录 if 'svm' not in algorithms_to_train: svm_name = self.algorithm_names['svm']['cn'] if self.use_chinese else self.algorithm_names['svm']['en'] results['svm'] = { 'name': svm_name, 'error': "由于单类别数据,跳过SVM算法", 'is_single_class': is_single_class_data } return results def compare_algorithms(self, results): """比较不同算法的性能""" # 过滤掉训练失败的算法 valid_results = {name: result for name, result in results.items() if 'test_accuracy' in result} if not valid_results: print("没有算法成功训练,无法生成比较图。") return None names = [valid_results[name]['name'] for name in valid_results] accuracies = [valid_results[name]['test_accuracy'] for name in valid_results] plt.figure(figsize=(12, 6)) bars = plt.bar(names, accuracies, color='skyblue') # 根据是否支持中文选择标题 title = "不同算法的测试集准确率比较" if self.use_chinese else "Comparison of Test Set Accuracies for Different Algorithms" x_label = "算法" if self.use_chinese else "Algorithm" y_label = "准确率" if self.use_chinese else "Accuracy" plt.ylim(0, 1.05) plt.title(title) plt.xlabel(x_label) plt.ylabel(y_label) # 添加数值标签 for bar in bars: height = bar.get_height() plt.text(bar.get_x() + bar.get_width()/2., height + 0.01, f'{height:.4f}', ha='center', va='bottom') plt.xticks(rotation=45, ha='right') plt.tight_layout() return plt def plot_confusion_matrix(self, results, gas_data_loader, use_chinese=True, rotate_labels=45, fig_width=12, fig_height=10, font_size=10): """ 绘制混淆矩阵 参数: - results: 包含算法结果的字典 - gas_data_loader: 气体数据加载器实例 - use_chinese: 是否使用中文 - rotate_labels: 标签旋转角度,默认为45度 - fig_width: 图形的宽度,默认为12 - fig_height: 图形的高度,默认为10 - font_size: 字体大小,默认为10 """ # 过滤掉训练失败的算法 valid_results = {name: result for name, result in results.items() if 'test_accuracy' in result} if not valid_results: print("没有算法成功训练,无法生成混淆矩阵。") return None # 获取所有算法中出现的唯一标签 all_unique_labels = set() for name, result in valid_results.items(): all_unique_labels.update(result['unique_labels']) all_unique_labels = sorted(list(all_unique_labels)) # 创建标签名称映射 label_names = [] for label in all_unique_labels: # 尝试查找对应的多维度标签名称 label_name = None for key, label_id in gas_data_loader.multi_dimension_labels.items(): if label_id == label: # 获取标签名称而不是标签ID label_name = gas_data_loader.get_or_create_multi_dimension_label( key.split('_')[0], # 传感器类型 key.split('_')[1], # 气体类型 int(key.split('_')[2].replace('ppm', '')) # 浓度值 )[1] # 获取第二个返回值,即标签名称字典 break # 如果找到,使用对应的标签名称 if label_name and isinstance(label_name, dict): if use_chinese: label_names.append(label_name.get('cn', f"类别 {label}")) else: label_names.append(label_name.get('en', f"Class {label}")) else: # 如果没有找到,使用默认标签名称 label_names.append(f"类别 {label}" if use_chinese else f"Class {label}") for name, result in valid_results.items(): plt.figure(figsize=(fig_width, fig_height)) cm = confusion_matrix(result['y_test'], result['y_pred'], labels=all_unique_labels) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_names) disp.plot(cmap=plt.cm.Blues) # 根据是否支持中文选择标题 title = f"{result['name']} 混淆矩阵" if use_chinese else f"{result['name']} Confusion Matrix" # 如果是单类别数据,添加说明 if result.get('is_single_class', False): title += " (单类别数据)" plt.title(title) # 旋转x轴标签 plt.xticks(rotation=rotate_labels, ha='right', rotation_mode='anchor', fontsize=font_size) plt.yticks(fontsize=font_size) plt.tight_layout() return plt def detect_dataset_type(dataset_path): """ 自动检测数据集类型:单一数据集或多数据集文件夹 参数: - dataset_path: 数据集路径 返回: - dataset_type: 'single' 或 'multiple' - file_paths: 文件路径列表 - gas_types: 气体类型列表 - concentrations: 浓度值列表 - sensor_types: 传感器类型列表 """ # 检查路径是否存在 if not os.path.exists(dataset_path): raise FileNotFoundError(f"路径不存在: {dataset_path}") # 检查是文件还是文件夹 if os.path.isfile(dataset_path): # 单一文件处理 file_paths = [dataset_path] # 从文件名提取传感器类型、气体类型浓度 file_name = os.path.basename(dataset_path) sensor_type = extract_sensor_type(file_name) gas_type = extract_gas_type(file_name) concentration = extract_concentration(file_name) gas_types = [gas_type] concentrations = [concentration] sensor_types = [sensor_type] print(f"检测到单一数据集: {file_name}") print(f"传感器类型: {sensor_type}, 气体类型: {gas_type}, 浓度: {concentration}ppm") return 'single', file_paths, gas_types, concentrations, sensor_types elif os.path.isdir(dataset_path): # 文件夹处理 - 查找所有Excel文件 excel_files = [f for f in os.listdir(dataset_path) if f.endswith(('.xlsx', '.xls'))] if not excel_files: raise ValueError(f"文件夹中没有找到Excel文件: {dataset_path}") file_paths = [] gas_types = [] concentrations = [] sensor_types = [] for file in excel_files: file_path = os.path.join(dataset_path, file) file_paths.append(file_path) # 从文件名提取传感器类型、气体类型浓度 sensor_type = extract_sensor_type(file) gas_type = extract_gas_type(file) concentration = extract_concentration(file) gas_types.append(gas_type) concentrations.append(concentration) sensor_types.append(sensor_type) print(f"找到数据集文件: {file}") print(f"传感器类型: {sensor_type}, 气体类型: {gas_type}, 浓度: {concentration}ppm") print(f"总共找到 {len(file_paths)} 个数据集文件") return 'multiple', file_paths, gas_types, concentrations, sensor_types else: raise ValueError(f"无法识别的路径: {dataset_path}") def extract_sensor_type(file_name): """从文件名提取传感器类型""" # 定义传感器类型的正则表达式模式 sensor_patterns = { 'MP2': r'(^MP2[^a-zA-Z0-9]|MP2$)', 'MP3B': r'(^MP3B[^a-zA-Z0-9]|MP3B$)', 'MP503': r'(^MP503[^a-zA-Z0-9]|MP503$)', 'MP801': r'(^MP801[^a-zA-Z0-9]|MP801$)', 'MQ2': r'(^MQ2[^a-zA-Z0-9]|MQ2$)', 'MQ7B': r'(^MQ7B[^a-zA-Z0-9]|MQ7B$)' } # 转换为大写以提高匹配率 file_name_upper = file_name.upper() # 尝试匹配传感器类型 for sensor_type, pattern in sensor_patterns.items(): if re.search(pattern, file_name_upper): return sensor_type # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 '{file_name}' 中提取传感器类型,使用默认值 'MP2'") return 'MP2' def extract_gas_type(file_name): """从文件名提取气体类型""" # 定义基础气体类型的中英文名称映射 gas_name_mapping = { 'bingtong': 'acetone', '丙酮': 'acetone', 'jiaben': 'toluene', '甲苯': 'toluene', 'jiachun': 'methanol', '甲醇': 'methanol', 'jiaquan': 'formaldehyde', '甲醛': 'formaldehyde', 'yichun': 'ethanol', '乙醇': 'ethanol' } # 去除文件扩展名 file_name_without_ext = os.path.splitext(file_name)[0] # 按照固定格式"传感器_气体名称_浓度"分割文件名 parts = file_name_without_ext.split('_') # 确保有足够的部分 if len(parts) < 3: print(f"警告: 文件名格式不符合预期: {file_name}") return 'acetone' # 获取气体名称部分 gas_name_part = parts[1] # 检查是否为混合气体 if '+' in gas_name_part or '+' in gas_name_part: # 处理混合气体 # 统一分隔符 gas_name_part = gas_name_part.replace('+', '+') gas_components = gas_name_part.split('+') # 转换为标准气体名称 standard_gas_names = [] for component in gas_components: # 先尝试中文名称映射 standard_name = gas_name_mapping.get(component, None) if standard_name: standard_gas_names.append(standard_name) else: # 如果是英文名称,直接添加 if component.lower() in ['acetone', 'toluene', 'methanol', 'formaldehyde', 'ethanol']: standard_gas_names.append(component.lower()) else: print(f"警告: 无法识别的气体成分: {component}") # 按字母顺序排序以确保一致性 standard_gas_names.sort() # 组合成混合气体名称 if len(standard_gas_names) > 1: return '+'.join(standard_gas_names) elif len(standard_gas_names) == 1: return standard_gas_names[0] # 处理单一气体 # 先尝试中文名称映射 standard_name = gas_name_mapping.get(gas_name_part, None) if standard_name: return standard_name # 如果是英文名称,直接返回小写形式 if gas_name_part.lower() in ['acetone', 'toluene', 'methanol', 'formaldehyde', 'ethanol']: return gas_name_part.lower() # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 '{file_name}' 中提取气体类型,使用默认值 'acetone'") return 'acetone' def extract_concentration(file_name): """从文件名提取浓度值""" # 去除文件扩展名 file_name_without_ext = os.path.splitext(file_name)[0] # 按照固定格式"传感器_气体名称_浓度"分割文件名 parts = file_name_without_ext.split('_') # 确保有足够的部分 if len(parts) < 3: print(f"警告: 文件名格式不符合预期: {file_name}") return 20 # 获取浓度部分 concentration_part = parts[2] # 提取数字部分 match = re.search(r'(\d+)', concentration_part) if match: return int(match.group(1)) # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 '{file_name}' 中提取浓度值,使用默认值 20ppm") return 20 def main(): """主函数""" # 检查中文字体支持 chinese_supported = check_chinese_font_support() # 创建数据加载器 data_loader = GasSensorDataAnalyzer() # 定义数据集路径 dataset_path = r"C:\Users\Cong\Desktop\作业\项目\六通道2混合\2_MP2" try: # 自动检测数据集类型 dataset_type, file_paths, gas_types, concentrations, sensor_types = detect_dataset_type(dataset_path) # 根据检测结果加载数据 if dataset_type == 'single': # 加载单一数据集 X, y = data_loader.load_dataset(file_paths[0], gas_types[0], concentrations[0], sensor_types[0]) else: # 加载多个数据集并合并 X, y = data_loader.load_multiple_gas_data(file_paths, gas_types, concentrations, sensor_types) if X is None or len(X) == 0: print("No valid data available for training. Please check file paths and formats.") return print(f"加载的数据集总样本数: {len(X)}") print(f"数据集中的类别数量: {len(np.unique(y))}") # 创建算法选择器,根据中文字体支持情况决定是否使用中文 selector = AlgorithmSelector(use_chinese=chinese_supported) # 自定义参数配置示例 selector.set_algorithm_params('knn', {'n_neighbors': 3, 'metric': 'manhattan'}) selector.set_algorithm_params('svm', {'C': 0.8, 'kernel': 'linear'}) selector.set_algorithm_params('neural_network', {'hidden_layer_sizes': (150, 75)}) # 训练所有算法 results = selector.train_models(X, y) # 比较算法性能 plt1 = selector.compare_algorithms(results) if plt1: plt1.savefig('algorithm_comparison.png') plt1.close() # 绘制混淆矩阵 plt2 = selector.plot_confusion_matrix(results, data_loader, use_chinese=chinese_supported, rotate_labels=45,fig_width=20, fig_height=20, font_size=8) if plt2: plt2.savefig('confusion_matrix.png') plt2.close() print("\n算法比较结果已保存为 'algorithm_comparison.png'") print("混淆矩阵已保存为 'confusion_matrix.png'") except Exception as e: print(f"程序执行过程中发生错误: {e}") if __name__ == "__main__": main()还有一个是tempcoderunnerfile.py文件:@app.route('/upload', methods=['POST']) def upload_file(): """处理文件上传""" if 'files' not in request.files: return jsonify({'error': 'No file part'}), 400 files = request.files.getlist('files') gas_type = request.form.get('gas_type', 'acetone') concentration = int(request.form.get('concentration', 20)) if not files or files[0].filename == '': return jsonify({'error': 'No selected file'}), 400 datasets = [] for file in files: if file and allowed_file(file.filename): # 保存临时文件 file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename) file.save(file_path) # 加载数据 data = data_loader.load_single_gas_data(file_path, gas_type, concentration) datasets.append(data) # 删除临时文件 os.remove(file_path) # 合并数据集 X, y = data_loader.combine_datasets(datasets) if X is None or len(X) == 0: return jsonify({'error': 'No valid data loaded'}), 400 # 保存合并后的数据 df = pd.DataFrame(X) df['label'] = y file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'temp_data.xlsx') df.to_excel(file_path, index=False) return jsonify({ 'status': 'success', 'sample_count': len(X), 'feature_count': X.shape[1], 'gas_type': gas_type, 'concentration': concentration })请根据这两个文件重修修改app.py文件,确保他在algorithmselection呈现的页面中当我选择两个文件进入的时候能够分析数据
最新发布
06-22
<think>根据提供的两个文件(`new_algorithm.py``tempcoderunnerfile.py`),我们需要将`new_algorithm.py`中的功能整合到`app.py`中,特别是`GasSensorDataAnalyzer`类`AlgorithmSelector`类。同时,我们需要修改`/upload``/analyze`路由,以支持从两个文件中加载数据并进行多维度分析。以下是修改后的`app.py`文件,主要改动包括:1.导入`new_algorithm.py`中的`GasSensorDataAnalyzer``AlgorithmSelector`类。2.在文件上传路由`/upload`中,使用`GasSensorDataAnalyzer`加载两个文件的数据,并合并。3.在数据分析路由`/analyze`中,使用`AlgorithmSelector`训练模型并返回结果。4.修改全局变量以存储`GasSensorDataAnalyzer`实例加载的数据集。注意:由于原`app.py`中的全局变量存储方式与`new_algorithm.py`中的类有冲突,我们将调整全局变量的使用,改为存储`GasSensorDataAnalyzer`的实例合并后的数据集。另外,我们还需要注意以下几点:-文件上传时,前端需要提供两个文件,以及每个文件对应的气体类型、浓度传感器类型(因为`new_algorithm.py`中的`load_multiple_gas_data`需要这些信息)。-在`/upload`路由中,我们不再使用之前的`load_and_merge_files``preprocess_data`函数,而是使用`GasSensorDataAnalyzer`的方法。-由于`new_algorithm.py`中的`GasSensorDataAnalyzer`类已经包含了数据加载预处理的功能,因此我们不再需要之前的预处理步骤。由于代码较长,我将逐步整合并给出完整的`app.py`代码。###修改后的完整`app.py`代码```pythonfromflaskimportFlask,request,jsonify,send_from_directoryfromflask_corsimportCORSimportosimportpandasaspdimportnumpyasnpfromwerkzeug.utilsimportsecure_filenameimportloggingimporttime#导入new_algorithm.py中的类fromnew_algorithmimportGasSensorDataAnalyzer,AlgorithmSelector#配置日志logging.basicConfig(level=logging.DEBUG,format='%(asctime)s-%(name)s-%(levelname)s-%(message)s',handlers=[logging.StreamHandler(),#输出到控制台logging.FileHandler('app.log',encoding='utf-8')#输出到文件])logger=logging.getLogger(__name__)app=Flask(__name__)app.config['MAX_CONTENT_LENGTH']=100*1024*1024#100MB文件大小限制#配置CORS允许Vue前端访问CORS(app,resources={r"/*":{"origins":"http://localhost:5177",#指定Vue前端地址"methods":["GET","POST","PUT","DELETE","OPTIONS"],"allow_headers":["Content-Type","Authorization"],"supports_credentials":True}})#配置上传文件夹UPLOAD_FOLDER='uploads'ifnotos.path.exists(UPLOAD_FOLDER):os.makedirs(UPLOAD_FOLDER)app.config['UPLOAD_FOLDER']=UPLOAD_FOLDER#允许的文件扩展名ALLOWED_EXTENSIONS={'csv','xlsx','xls'}#全局变量存储data_analyzer=GasSensorDataAnalyzer()#创建数据加载器实例X_combined=None#合并后的特征数据y_combined=None#合并后的标签数据gas_types=[]#存储气体类型concentrations=[]#存储浓度sensor_types=[]#存储传感器类型last_activity=time.time()#最后活动时间戳defallowed_file(filename):"""检查文件扩展名是否合法"""return'.'infilenameand\filename.rsplit('.',1)[1].lower()inALLOWED_EXTENSIONS@app.route('/')defindex():"""健康检查端点"""globallast_activitystatus={'status':'running','version':'1.0.0','last_activity':time.ctime(last_activity),'endpoints':{'/upload':'POST-Uploaddatafiles','/analyze':'POST-Analyzedata','/reset':'POST-Resetdata','/columns':'GET-Getdatasetcolumns','/status':'GET-Servicestatus'}}logger.info(f"Statusrequest:{status}")returnjsonify(status)@app.route('/status')defstatus():"""服务状态检查"""globalX_combined,last_activityreturnjsonify({'status':'active','timestamp':time.time(),'dataset_loaded':X_combinedisnotNone,'dataset_shape':X_combined.shapeifX_combinedisnotNoneelseNone,'last_activity':time.ctime(last_activity)})@app.route('/upload',methods=['POST'])defupload_files():"""处理文件上传"""globalX_combined,y_combined,gas_types,concentrations,sensor_types,last_activitylogger.info("Receiveduploadrequest")#检查是否有文件if'files'notinrequest.files:logger.error("Nofilepartinrequest")returnjsonify({'error':'Nofilepart'}),400files=request.files.getlist('files')iflen(files)==0orfiles[0].filename=='':logger.error("Noselectedfiles")returnjsonify({'error':'Noselectedfiles'}),400#过滤合法文件valid_files=[fforfinfilesifallowed_file(f.filename)]ifnotvalid_files:logger.error("Novalidfilesfound")returnjsonify({'error':'Novalidfiles.OnlyCSV,XLSX,XLSareallowed.'}),400#从表单获取传感器气体信息try:#获取两个文件对应的传感器类型、气体类型浓度sensor_type1=request.form.get('sensor_type1','MP2')gas_type1=request.form.get('gas_type1','acetone')concentration1=float(request.form.get('concentration1',20))sensor_type2=request.form.get('sensor_type2','MP2')gas_type2=request.form.get('gas_type2','acetone')concentration2=float(request.form.get('concentration2',20))exceptExceptionase:logger.error(f"Errorparsingformdata:{str(e)}",exc_info=True)returnjsonify({'error':'Invalidformdata.Pleasecheckconcentrationvalues.'}),400#保存文件并获取文件路径列表file_paths=[]forfileinvalid_files:try:filename=secure_filename(file.filename)file_path=os.path.join(app.config['UPLOAD_FOLDER'],filename)file.save(file_path)logger.info(f"Savedfile:{file_path}")file_paths.append(file_path)exceptExceptionase:logger.error(f"Errorsavingfile{file.filename}:{str(e)}",exc_info=True)continueiflen(file_paths)<2:logger.error("Needatleasttwofilesforanalysis")returnjsonify({'error':'Needatleasttwofilesforanalysis'}),400#设置气体类型、浓度传感器类型列表gas_types=[gas_type1,gas_type2]concentrations=[concentration1,concentration2]sensor_types=[sensor_type1,sensor_type2]#加载数据try:#使用GasSensorDataAnalyzer加载多个气体数据X_combined,y_combined=data_analyzer.load_multiple_gas_data(file_paths,gas_types,concentrations,sensor_types)ifX_combinedisNoneorlen(X_combined)==0:logger.error("Failedtoloaddatafromfiles")returnjsonify({'error':'Failedtoloaddata.Checkfilecontent.'}),500logger.info(f"Loadedcombineddata:{len(X_combined)}samples,{X_combined.shape[1]}features")#更新最后活动时间last_activity=time.time()#返回成功响应response={'message':f'Successfullyuploadedandmerged{len(file_paths)}files','sample_count':len(X_combined),'gas_types':gas_types,'concentrations':concentrations,'sensor_types':sensor_types}returnjsonify(response),200exceptExceptionase:logger.error(f"Errorloadingdata:{str(e)}",exc_info=True)returnjsonify({'error':f'Errorloadingdata:{str(e)}'}),500@app.route('/analyze',methods=['POST'])defanalyze_data():"""执行数据分析"""globalX_combined,y_combined,last_activitylogger.info("Receivedanalyzerequest")#检查数据是否已加载ifX_combinedisNoneory_combinedisNone:logger.error("Nodatasetavailable")returnjsonify({'error':'Nodataavailable.Pleaseuploadfilesfirst.'}),400#获取前端传递的算法参数try:data=request.get_json()ifnotdata:logger.error("Invalidrequestparameters")returnjsonify({'error':'Invalidrequestparameters'}),400#获取算法参数,提供默认值params=data.get('params',{})knn_params=params.get('knn',{'n_neighbors':5,'metric':'euclidean'})svm_params=params.get('svm',{'C':1.0,'kernel':'rbf'})rf_params=params.get('random_forest',{'n_estimators':100,'max_depth':None})exceptExceptionase:logger.error(f"ErrorparsingJSONdata:{str(e)}",exc_info=True)returnjsonify({'error':'InvalidJSONdata'}),400#创建算法选择器(默认使用中文)selector=AlgorithmSelector(use_chinese=True)#设置算法参数try:selector.set_algorithm_params('knn',knn_params)selector.set_algorithm_params('svm',svm_params)selector.set_algorithm_params('random_forest',rf_params)exceptExceptionase:logger.error(f"Errorsettingalgorithmparameters:{str(e)}")returnjsonify({'error':f'Errorsettingalgorithmparameters:{str(e)}'}),400#训练模型try:results=selector.train_models(X_combined,y_combined)logger.info("Algorithmtrainingcompleted")exceptExceptionase:logger.error(f"Errortrainingmodels:{str(e)}",exc_info=True)returnjsonify({'error':f'Errortrainingmodels:{str(e)}'}),500#提取需要返回的结果response_results={}foralgo_name,resultinresults.items():#如果训练出错,记录错误信息if'error'inresult:response_results[algo_name]={'name':result['name'],'error':result['error']}else:#分类报告转为字符串report_str=result.get('classification_report','Noreport')#如果是字典,转换为字符串ifisinstance(report_str,dict):report_str=classification_report_dict_to_str(report_str)response_results[algo_name]={'name':result['name'],'train_accuracy':result['train_accuracy'],'test_accuracy':result['test_accuracy'],'classification_report':report_str,#特征重要性只有随机森林有'feature_importances':result.get('feature_importances',None)}#更新最后活动时间last_activity=time.time()returnjsonify({'message':'Analysiscompletedsuccessfully','results':response_results}),200defclassification_report_dict_to_str(report_dict):"""将分类报告字典转换为字符串"""report_str=""#添加每个类别的精度、召回、F1值forlabel,metricsinreport_dict.items():ifisinstance(metrics,dict):#确保是类别的指标report_str+=f"Class{label}:precision={metrics['precision']:.4f},recall={metrics['recall']:.4f},f1-score={metrics['f1-score']:.4f}\n"#添加总体指标report_str+=f"Overallaccuracy:{report_dict['accuracy']:.4f}\n"report_str+=f"Macroavg:precision={report_dict['macroavg']['precision']:.4f},recall={report_dict['macroavg']['recall']:.4f},f1-score={report_dict['macroavg']['f1-score']:.4f}\n"report_str+=f"Weightedavg:precision={report_dict['weightedavg']['precision']:.4f},recall={report_dict['weightedavg']['recall']:.4f},f1-score={report_dict['weightedavg']['f1-score']:.4f}"returnreport_str@app.route('/reset',methods=['POST'])defreset_data():"""重置数据集"""globalX_combined,y_combined,gas_types,concentrations,sensor_types,last_activityX_combined=Noney_combined=Nonegas_types=[]concentrations=[]sensor_types=[]last_activity=time.time()logger.info("Datareset")returnjsonify({'message':'Dataresetsuccessfully'}),200@app.route('/uploads/<filename>',methods=['GET'])defuploaded_file(filename):"""访问上传的文件"""returnsend_from_directory(app.config['UPLOAD_FOLDER'],filename)if__name__=='__main__':#开发环境-允许Vue前端访问app.run(host='0.0.0.0',port=5000,debug=True)```###主要修改说明1.**导入`new_algorithm.py`中的类**:```pythonfromnew_algorithmimportGasSensorDataAnalyzer,AlgorithmSelector```2.**全局变量调整**:-创建`GasSensorDataAnalyzer`实例`data_analyzer`。-使用`X_combined``y_combined`存储合并后的数据集。-保留`gas_types`,`concentrations`,`sensor_types`存储气体传感器信息。3.**`/upload`路由修改**:-保存上传的两个文件到服务器。-使用`data_analyzer.load_multiple_gas_data`加载两个文件的数据。-将加载的数据集存储在全局变量`X_combined``y_combined`中。4.**`/analyze`路由修改**:-创建`AlgorithmSelector`实例。-设置算法参数(从前端获取)。-调用`selector.train_models(X_combined,y_combined)`训练模型。-将训练结果转换为适合JSON返回的格式(注意分类报告的处理)。5.**分类报告处理**:-由于`classification_report`返回的可能是字典,我们将其转换为字符串以便在JSON响应中返回。我们添加了`classification_report_dict_to_str`函数进行转换。6.**错误处理**:-在关键步骤添加了异常捕获日志记录。###注意事项1.**文件数量**:此版本要求至少上传两个文件。如果只有一个文件,将返回错误。2.**气体传感器信息**:前端需要为每个文件提供对应的气体类型、浓度传感器类型。这些信息通过表单字段`sensor_type1`,`gas_type1`,`concentration1``sensor_type2`,`gas_type2`,`concentration2`传递。3.**算法参数**:前端传递的算法参数格式应与之前一致,但请注意`new_algorithm.py`中的`AlgorithmSelector`类支持的算法参数。4.**分类报告**:由于`sklearn.metrics.classification_report`返回的字典结构复杂,我们只提取了主要指标转换为字符串。如果需要更详细的信息,可以调整转换函数。5.**特征重要性**:只有随机森林算法会返回特征重要性,其他算法没有。###待优化点1.**内存管理**:加载的数据集存储在全局变量中,可能占用较多内存。在重置或长时间不活动时,考虑释放内存。2.**多用户支持**:当前全局变量设计不支持多用户并发。如果多个用户同时使用,数据会互相覆盖。生产环境需要改为每个会话独立存储。3.**错误处理**:在数据加载模型训练过程中,可能需要更详细的错误信息返回给前端。此修改后的`app.py`文件应该能够满足您的需求:上传两个文件,并分析数据。请根据实际测试情况调整细节。§§相关问题§§:1.如何优化Flask应用以支持多用户并发访问?2.在机器学习API中,如何处理大文件上传长时间训练任务?3.如何将训练好的模型保存并复用,避免重复训练?4.在Vue前端中,如何优雅地展示混淆矩阵特征重要性?</think>根据您提供的 `new_algorithm.py` `tempcoderunnerfile.py` 文件,我将重新修改 `app.py` 文件,确保它能与 `GasSensorDataAnalyzer` `AlgorithmSelector` 类无缝集成,并支持从两个文件加载数据进行分析。 以下是完整的修改后的 `app.py` 文件: ```python from flask import Flask, request, jsonify, send_from_directory from flask_cors import CORS import os import numpy as np import pandas as pd import logging import time from werkzeug.utils import secure_filename from new_algorithm import GasSensorDataAnalyzer, AlgorithmSelector, detect_dataset_type, extract_gas_type, extract_concentration, extract_sensor_type # 配置日志 logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(), # 输出到控制台 logging.FileHandler('app.log', encoding='utf-8') # 输出到文件 ] ) logger = logging.getLogger(__name__) app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB 文件大小限制 # 配置CORS允许Vue前端访问 CORS(app, resources={ r"/*": { "origins": "http://localhost:5177", # 指定Vue前端地址 "methods": ["GET", "POST", "PUT", "DELETE", "OPTIONS"], "allow_headers": ["Content-Type", "Authorization"], "supports_credentials": True } }) # 配置上传文件夹 UPLOAD_FOLDER = 'uploads' if not os.path.exists(UPLOAD_FOLDER): os.makedirs(UPLOAD_FOLDER) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER # 允许的文件扩展名 ALLOWED_EXTENSIONS = {'csv', 'xlsx', 'xls'} # 全局数据集存储 data_analyzer = GasSensorDataAnalyzer() X_combined = None y_combined = None gas_types = [] concentrations = [] sensor_types = [] last_activity = time.time() algorithm_selector = AlgorithmSelector(use_chinese=True) def allowed_file(filename): """检查文件扩展名是否合法""" return '.' in filename and \ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS def save_and_extract_file_info(file): """保存文件并提取气体信息""" try: filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) logger.info(f"Saved file: {file_path}") # 从文件名提取信息 sensor_type = extract_sensor_type(filename) gas_type = extract_gas_type(filename) concentration = extract_concentration(filename) return file_path, sensor_type, gas_type, concentration except Exception as e: logger.error(f"Error processing file {file.filename}: {str(e)}", exc_info=True) return None, None, None, None @app.route('/') def index(): """健康检查端点""" global last_activity status = { 'status': 'running', 'version': '1.0.0', 'last_activity': time.ctime(last_activity), 'endpoints': { '/upload': 'POST - Upload data files', '/analyze': 'POST - Analyze data', '/reset': 'POST - Reset data', '/columns': 'GET - Get dataset columns', '/status': 'GET - Service status' } } logger.info(f"Status request: {status}") return jsonify(status) @app.route('/status') def status(): """服务状态检查""" global X_combined, last_activity return jsonify({ 'status': 'active', 'timestamp': time.time(), 'dataset_loaded': X_combined is not None, 'dataset_shape': X_combined.shape if X_combined is not None else None }) @app.route('/upload', methods=['POST']) def upload_files(): """处理文件上传""" global X_combined, y_combined, gas_types, concentrations, sensor_types, last_activity logger.info("Received upload request") # 检查是否有文件 if 'files' not in request.files: logger.error("No file part in request") return jsonify({'error': 'No file part'}), 400 files = request.files.getlist('files') if len(files) == 0 or files[0].filename == '': logger.error("No selected files") return jsonify({'error': 'No selected files'}), 400 # 过滤合法文件 valid_files = [f for f in files if allowed_file(f.filename)] if len(valid_files) < 2: logger.error("Need at least two files for analysis") return jsonify({'error': 'Need at least two files for analysis'}), 400 # 保存文件并提取信息 file_paths = [] extracted_info = [] for file in valid_files: file_path, sensor_type, gas_type, concentration = save_and_extract_file_info(file) if file_path: file_paths.append(file_path) extracted_info.append({ 'sensor_type': sensor_type, 'gas_type': gas_type, 'concentration': concentration }) if len(file_paths) < 2: logger.error("Failed to process enough files") return jsonify({'error': 'Failed to process files. Need at least two valid files.'}), 500 # 准备数据加载参数 sensor_types = [info['sensor_type'] for info in extracted_info] gas_types = [info['gas_type'] for info in extracted_info] concentrations = [info['concentration'] for info in extracted_info] # 加载数据 try: X_combined, y_combined = data_analyzer.load_multiple_gas_data( file_paths, gas_types, concentrations, sensor_types ) if X_combined is None or len(X_combined) == 0: logger.error("Failed to load data from files") return jsonify({'error': 'Failed to load data. Check file content.'}), 500 logger.info(f"Loaded combined data: {len(X_combined)} samples, {X_combined.shape[1]} features") # 获取多维度标签信息 label_info = [] for label in np.unique(y_combined): for key, label_id in data_analyzer.multi_dimension_labels.items(): if label_id == label: parts = key.split('_') sensor = parts[0] gas = parts[1] conc = parts[2].replace('ppm', '') label_info.append({ 'id': int(label), 'sensor': sensor, 'gas': gas, 'concentration': conc, 'name_cn': data_analyzer.get_or_create_multi_dimension_label(sensor, gas, int(conc))[1]['cn'], 'name_en': data_analyzer.get_or_create_multi_dimension_label(sensor, gas, int(conc))[1]['en'] }) # 更新最后活动时间 last_activity = time.time() # 返回成功响应 response = { 'message': f'Successfully uploaded and merged {len(file_paths)} files', 'sample_count': len(X_combined), 'gas_types': gas_types, 'concentrations': concentrations, 'sensor_types': sensor_types, 'label_info': label_info, 'num_classes': len(np.unique(y_combined)) } return jsonify(response), 200 except Exception as e: logger.error(f"Error loading data: {str(e)}", exc_info=True) return jsonify({'error': f'Error loading data: {str(e)}'}), 500 @app.route('/analyze', methods=['POST']) def analyze_data(): """执行数据分析""" global X_combined, y_combined, algorithm_selector, last_activity logger.info("Received analyze request") # 检查数据是否已加载 if X_combined is None or y_combined is None: logger.error("No dataset available") return jsonify({'error': 'No data available. Please upload files first.'}), 400 # 获取前端传递的算法参数 try: data = request.get_json() if not data or 'params' not in data: logger.error("Invalid request parameters") return jsonify({'error': 'Invalid request parameters'}), 400 params = data.get('params', {}) # 设置算法参数 for algo_name, algo_params in params.items(): if algo_name in algorithm_selector.algorithms: algorithm_selector.set_algorithm_params(algo_name, algo_params) logger.info(f"Set parameters for {algo_name}: {algo_params}") except Exception as e: logger.error(f"Error parsing JSON data: {str(e)}", exc_info=True) return jsonify({'error': 'Invalid JSON data'}), 400 # 训练模型 try: results = algorithm_selector.train_models(X_combined, y_combined) logger.info("Algorithm training completed") except Exception as e: logger.error(f"Error training models: {str(e)}", exc_info=True) return jsonify({'error': f'Error training models: {str(e)}'}), 500 # 提取需要返回的结果 response_results = {} for algo_name, result in results.items(): # 如果训练出错,记录错误信息 if 'error' in result: response_results[algo_name] = { 'name': result['name'], 'error': result['error'] } else: response_results[algo_name] = { 'name': result['name'], 'train_accuracy': result['train_accuracy'], 'test_accuracy': result['test_accuracy'], 'classification_report': result.get('classification_report', 'No report'), 'feature_importances': result.get('feature_importances', None) } # 更新最后活动时间 last_activity = time.time() return jsonify({ 'message': 'Analysis completed successfully', 'results': response_results }), 200 @app.route('/reset', methods=['POST']) def reset_data(): """重置数据集""" global X_combined, y_combined, gas_types, concentrations, sensor_types, last_activity X_combined = None y_combined = None gas_types = [] concentrations = [] sensor_types = [] last_activity = time.time() logger.info("Data reset") return jsonify({'message': 'Data reset successfully'}), 200 @app.route('/uploads/<filename>', methods=['GET']) def uploaded_file(filename): """访问上传的文件""" return send_from_directory(app.config['UPLOAD_FOLDER'], filename) if __name__ == '__main__': # 开发环境 - 允许Vue前端访问 app.run(host='0.0.0.0', port=5000, debug=True) ``` ### 主要修改集成说明 1. **导入 `new_algorithm.py` 的功能**: ```python from new_algorithm import GasSensorDataAnalyzer, AlgorithmSelector, detect_dataset_type, extract_gas_type, extract_concentration, extract_sensor_type ``` 2. **全局变量初始化**: ```python data_analyzer = GasSensorDataAnalyzer() X_combined = None y_combined = None algorithm_selector = AlgorithmSelector(use_chinese=True) ``` 3. **文件处理元数据提取**: ```python def save_and_extract_file_info(file): """保存文件并提取气体信息""" try: filename = secure_filename(file.filename) file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) file.save(file_path) # 从文件名提取信息 sensor_type = extract_sensor_type(filename) gas_type = extract_gas_type(filename) concentration = extract_concentration(filename) return file_path, sensor_type, gas_type, concentration except Exception as e: logger.error(f"Error processing file {file.filename}: {str(e)}", exc_info=True) return None, None, None, None ``` 4. **多文件数据加载**: ```python # 保存文件并提取信息 file_paths = [] extracted_info = [] for file in valid_files: file_path, sensor_type, gas_type, concentration = save_and_extract_file_info(file) if file_path: file_paths.append(file_path) extracted_info.append({ 'sensor_type': sensor_type, 'gas_type': gas_type, 'concentration': concentration }) # 准备数据加载参数 sensor_types = [info['sensor_type'] for info in extracted_info] gas_types = [info['gas_type'] for info in extracted_info] concentrations = [info['concentration'] for info in extracted_info] # 加载数据 X_combined, y_combined = data_analyzer.load_multiple_gas_data( file_paths, gas_types, concentrations, sensor_types ) ``` 5. **标签信息提取**: ```python # 获取多维度标签信息 label_info = [] for label in np.unique(y_combined): for key, label_id in data_analyzer.multi_dimension_labels.items(): if label_id == label: parts = key.split('_') sensor = parts[0] gas = parts[1] conc = parts[2].replace('ppm', '') label_info.append({ 'id': int(label), 'sensor': sensor, 'gas': gas, 'concentration': conc, 'name_cn': data_analyzer.get_or_create_multi_dimension_label(sensor, gas, int(conc))[1]['cn'], 'name_en': data_analyzer.get_or_create_multi_dimension_label(sensor, gas, int(conc))[1]['en'] }) ``` 6. **算法参数设置训练**: ```python # 设置算法参数 for algo_name, algo_params in params.items(): if algo_name in algorithm_selector.algorithms: algorithm_selector.set_algorithm_params(algo_name, algo_params) # 训练模型 results = algorithm_selector.train_models(X_combined, y_combined) ``` ### 前端调用示例 (Vue) ```vue <template> <div class="container"> <h1>气体传感器数据分析</h1> <!-- 文件上传 --> <div class="section"> <h2>上传数据文件</h2> <input type="file" multiple @change="handleFileChange" ref="fileInput"> <button @click="uploadFiles">上传文件</button> <p v-if="uploadStatus">{{ uploadStatus }}</p> <div v-if="labelInfo.length > 0"> <h3>数据集标签信息</h3> <ul> <li v-for="label in labelInfo" :key="label.id"> {{ label.name_cn }} ({{ label.sensor }}, {{ label.gas }}, {{ label.concentration }}ppm) </li> </ul> </div> </div> <!-- 数据分析 --> <div class="section" v-if="labelInfo.length > 0"> <h2>数据分析</h2> <div class="algorithm-params"> <div v-for="(algo, key) in algorithms" :key="key"> <h3>{{ algo.name.cn }}</h3> <div v-for="param in algo.params" :key="param.name"> <label> {{ param.label }}: <input :type="param.type" v-model="params[key][param.name]"> </label> </div> </div> </div> <button @click="analyzeData">执行分析</button> <!-- 显示结果 --> <div v-if="results" class="results"> <div v-for="(result, algo) in results" :key="algo" class="result-card"> <h3>{{ result.name }}</h3> <p>训练准确率: {{ result.train_accuracy.toFixed(4) }}</p> <p>测试准确率: {{ result.test_accuracy.toFixed(4) }}</p> <pre v-if="result.classification_report">{{ result.classification_report }}</pre> <div v-if="result.feature_importances"> <h4>特征重要性:</h4> <ul> <li v-for="(importance, feature) in result.feature_importances" :key="feature"> {{ feature }}: {{ importance.toFixed(4) }} </li> </ul> </div> </div> </div> </div> </div> </template> <script> import axios from 'axios'; export default { data() { return { files: [], uploadStatus: '', labelInfo: [], results: null, // 算法配置 algorithms: { knn: { name: { cn: 'K-近邻算法', en: 'K-Nearest Neighbors' }, params: [ { name: 'n_neighbors', label: '邻居数量', type: 'number', default: 5 }, { name: 'metric', label: '距离度量', type: 'text', default: 'euclidean' } ] }, svm: { name: { cn: '支持向量机', en: 'Support Vector Machine' }, params: [ { name: 'C', label: '正则化参数', type: 'number', default: 1.0 }, { name: 'kernel', label: '核函数', type: 'text', default: 'rbf' } ] }, random_forest: { name: { cn: '随机森林', en: 'Random Forest' }, params: [ { name: 'n_estimators', label: '树的数量', type: 'number', default: 100 }, { name: 'max_depth', label: '最大深度', type: 'number', default: null } ] } }, params: { knn: { n_neighbors: 5, metric: 'euclidean' }, svm: { C: 1.0, kernel: 'rbf' }, random_forest: { n_estimators: 100, max_depth: null } } }; }, methods: { handleFileChange(event) { this.files = Array.from(event.target.files); if (this.files.length < 2) { this.uploadStatus = '需要至少两个文件进行分析'; } else { this.uploadStatus = `已选择 ${this.files.length} 个文件`; } }, async uploadFiles() { if (this.files.length < 2) { this.uploadStatus = '需要至少两个文件进行分析'; return; } const formData = new FormData(); this.files.forEach(file => { formData.append('files', file); }); try { const response = await axios.post('http://localhost:5000/upload', formData, { headers: { 'Content-Type': 'multipart/form-data' } }); this.uploadStatus = `上传成功! 样本数: ${response.data.sample_count}`; this.labelInfo = response.data.label_info; } catch (error) { console.error('上传失败:', error); this.uploadStatus = `上传失败: ${error.response?.data?.error || error.message}`; } }, async analyzeData() { try { const response = await axios.post('http://localhost:5000/analyze', { params: this.params }); this.results = response.data.results; console.log('分析结果:', this.results); } catch (error) { console.error('分析失败:', error); alert(`分析失败: ${error.response?.data?.error || error.message}`); } } } }; </script> <style scoped> /* 样式保持不变 */ </style> ``` ### 关键功能说明 1. **自动元数据提取**: - 系统自动从文件名中提取传感器类型、气体类型浓度 - 不需要用户手动输入这些信息 2. **多维度标签系统**: - 使用 `GasSensorDataAnalyzer` 的多维度标签系统 - 返回详细的标签信息给前端,包括传感器、气体浓度的组合 3. **灵活的算法配置**: - `AlgorithmSelector` 类支持多种算法参数配置 - 前端可以动态调整算法参数 4. **错误处理日志**: - 全面的错误处理日志记录 - 详细的错误信息返回给前端 5. **数据集重置功能**: - `/reset` 端点用于清除当前加载的数据集 ### 使用流程 1. **上传文件**: - 选择至少两个 Excel 文件(`.xlsx` 或 `.xls`) - 文件名应包含传感器类型、气体类型浓度信息(例如:`MP2_acetone_20ppm.xlsx`) 2. **数据加载**: - 后端自动提取文件中的元数据 - 加载并合并数据集 - 返回标签信息给前端 3. **数据分析**: - 调整算法参数(可选) - 点击"执行分析"按钮 - 查看分析结果(准确率、分类报告、特征重要性) ### 文件命名规范 为了确保系统能正确提取元数据,文件名应遵循以下格式之一: 1. **基本格式**: ``` [传感器类型]_[气体类型]_[浓度]ppm.xlsx ``` 示例:`MP2_acetone_20ppm.xlsx` 2. **混合气体**: ``` [传感器类型]_[气体1]+[气体2]_[浓度]ppm.xlsx ``` 示例:`MP801_toluene+formaldehyde_50ppm.xlsx` 3. **中文支持**: ``` MP2_丙酮_20ppm.xlsx MP801_甲苯+甲醛_50ppm.xlsx ``` 这个修改后的 `app.py` 文件完全集成了 `new_algorithm.py` 的功能,并优化了文件处理流程,确保当选择两个文件时能够正确分析数据。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值