Algorithms—189.Rotate Array

思路:题目要求进行k次操作,把数组最后一个数放在开头。那么显然要先对k进行取于,重复nums.length次等于不变。

public class Solution {
    public void rotate(int[] nums, int k) {
    	k=k%nums.length;
        List<Integer> list=new ArrayList<Integer>();
        for (int i = nums.length-k; i < nums.length; i++) {
			list.add(nums[i]);
		}
        for (int i = nums.length-1; i >=k; i--) {
			nums[i]=nums[i-k];
		}
        for (int i = 0; i < k; i++) {
			nums[i]=list.get(i);
		}
    }
}


耗时:388ms,下游水准。搞笑的是末尾发现一位C语言600ms通关的。。。。


在我的后端中还有两个文件,一个是new_algorithm.py:import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, classification_report from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from sklearn.base import clone import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay import os import re from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer import seaborn as sns # 添加数据插补器 from sklearn.impute import SimpleImputer def check_chinese_font_support(): """检查系统是否支持中文字体""" chinese_fonts = ['SimHei', 'WenQuanYi Micro Hei', 'Heiti TC', 'Microsoft YaHei', 'SimSun'] for font in chinese_fonts: try: plt.rcParams["font.family"] = font # 测试字体是否可用 fig, ax = plt.subplots(figsize=(1, 1)) ax.text(0.5, 0.5, '测试', fontsize=12) plt.close(fig) print(f"系统支持中文字体: {font}") return True except: continue print("系统不支持中文字体,将使用英文标签") plt.rcParams["font.family"] = ['Arial', 'sans-serif'] return False class GasSensorDataAnalyzer: """有害气体分类数据加载与预处理类""" def __init__(self): # 基础气体标签 self.gas_labels = { 'acetone': 0, 'toluene': 1, 'methanol': 2, 'formaldehyde': 3, 'ethanol': 4 } # 混合气体标签生成(每个混合气体用唯一数字标识) self.mixture_labels = self._generate_mixture_labels() # 合并所有气体标签 self.all_gas_labels = {**self.gas_labels, **self.mixture_labels} # 中英文气体名称映射 self.gas_names = { 'acetone': {'cn': '丙酮', 'en': 'Acetone'}, 'toluene': {'cn': '甲苯', 'en': 'Toluene'}, 'methanol': {'cn': '甲醇', 'en': 'Methanol'}, 'formaldehyde': {'cn': '甲醛', 'en': 'Formaldehyde'}, 'ethanol': {'cn': '乙醇', 'en': 'Ethanol'}, 'toluene+formaldehyde': {'cn': '甲苯+甲醛', 'en': 'Toluene+Formaldehyde'}, 'methanol+toluene+formaldehyde': {'cn': '甲醇+甲苯+甲醛', 'en': 'Methanol+Toluene+Formaldehyde'} # 可以根据需要添加更多混合气体的名称映射 } # 传感器类型映射 self.sensor_types = { 'MP2': 0, 'MP3B': 1, 'MP503': 2, 'MP801': 3, 'MQ2': 4, 'MQ7B': 5 } # 初始化多维度类别映射 self.multi_dimension_labels = {} self.next_label_id = 0 # 传感器中英文名称映射 self.sensor_names = { 'MP2': {'cn': 'MP2', 'en': 'MP2'}, 'MP3B': {'cn': 'MP3B', 'en': 'MP3B'}, 'MP503': {'cn': 'MP503', 'en': 'MP503'}, 'MP801': {'cn': 'MP801', 'en': 'MP801'}, 'MQ2': {'cn': 'MQ2', 'en': 'MQ2'}, 'MQ7B': {'cn': 'MQ7B', 'en': 'MQ7B'} } def _generate_mixture_labels(self): """生成混合气体的标签映射""" # 定义可能的混合气体组合 mixtures = [ 'toluene+formaldehyde', 'methanol+toluene+formaldehyde' # 可以根据需要添加更多混合气体组合 ] # 为每个混合气体分配唯一标签(从基础气体标签之后开始) next_label = max(self.gas_labels.values()) + 1 return {mixture: next_label + i for i, mixture in enumerate(mixtures)} def get_or_create_multi_dimension_label(self, sensor_type, gas_type, concentration): """ 获取或创建多维度类别标签 参数: - sensor_type: 传感器类型 - gas_type: 气体类型 - concentration: 浓度值 返回: - 标签ID和标签名称 """ # 创建唯一键 key = f"{sensor_type}_{gas_type}_{concentration}ppm" # 如果键不存在,创建新标签 if key not in self.multi_dimension_labels: self.multi_dimension_labels[key] = self.next_label_id self.next_label_id += 1 # 返回标签ID和标签名称 label_id = self.multi_dimension_labels[key] # 创建中英文标签名称 sensor_name_cn = self.sensor_names.get(sensor_type, {}).get('cn', sensor_type) sensor_name_en = self.sensor_names.get(sensor_type, {}).get('en', sensor_type) gas_name_cn = self.gas_names.get(gas_type, {}).get('cn', gas_type) gas_name_en = self.gas_names.get(gas_type, {}).get('en', gas_type) label_name_cn = f"{sensor_name_cn}_{gas_name_cn}_{concentration}ppm" label_name_en = f"{sensor_name_en}_{gas_name_en}_{concentration}ppm" return label_id, { 'cn': label_name_cn, 'en': label_name_en } def load_single_gas_data(self, file_path, gas_type, concentration, sensor_type): """ 加载单一气体数据 参数: - file_path: 文件路径 - gas_type: 气体类型 (如 'acetone', 'toluene' 等) - concentration: 浓度值 (如 20, 30, 50 等) - sensor_type: 传感器类型 (如 'MP2', 'MP801' 等) """ try: if not os.path.exists(file_path): raise FileNotFoundError(f"文件不存在: {file_path}") df = pd.read_excel(file_path, sheet_name='Sheet1', index_col=0) X = df.values # 尝试将数据转换为 float 类型 try: X = X.astype(float) except ValueError: print("警告: 数据中包含非数值类型,将过滤掉非数值类型的数据") numeric_mask = np.vectorize(np.isreal)(X) X = X[numeric_mask].reshape(-1, df.shape[1]) # 检查并报告NaN值 nan_count = np.isnan(X).sum() if nan_count > 0: print(f"警告: 数据中包含 {nan_count} 个NaN值") # 可选:替换NaN值为0 # X = np.nan_to_num(X, nan=0.0) # 创建多维度标签 label_id, label_name = self.get_or_create_multi_dimension_label( sensor_type, gas_type, concentration ) # 为所有样本分配相同的标签 y = np.full(len(X), label_id, dtype=int) print(f"已加载 {label_name['cn']} 数据: {len(X)} 样本, 特征维度: {X.shape[1]}") return X, y except Exception as e: print(f"加载数据时出错: {e}") return None, None def load_multiple_gas_data(self, file_paths, gas_types, concentrations, sensor_types): """ 加载多个气体数据并合并 参数: - file_paths: 文件路径列表 - gas_types: 气体类型列表 (如 ['acetone', 'toluene'] 等) - concentrations: 浓度值列表 (如 [20, 30] 等) - sensor_types: 传感器类型列表 (如 ['MP2', 'MP801'] 等) """ X_all = [] y_all = [] feature_dimensions = [] # 用于记录每个数据集的特征维度 for file_path, gas_type, concentration, sensor_type in zip( file_paths, gas_types, concentrations, sensor_types ): X, y = self.load_single_gas_data(file_path, gas_type, concentration, sensor_type) if X is not None and len(X) > 0: X_all.append(X) y_all.append(y) feature_dimensions.append(X.shape[1]) if not X_all: print("没有加载到有效数据") return None, None # 检查所有数据集的特征维度是否一致 unique_dimensions = np.unique(feature_dimensions) if len(unique_dimensions) > 1: print(f"警告: 检测到不同的特征维度: {unique_dimensions}") print("这可能导致合并数据时出错。请检查您的Excel文件是否具有相同的列数。") # 找出最常见的维度 from collections import Counter dimension_counts = Counter(feature_dimensions) most_common_dimension = dimension_counts.most_common(1)[0][0] print(f"最常见的特征维度是: {most_common_dimension}") # 过滤掉特征维度不匹配的数据 filtered_X_all = [] filtered_y_all = [] for i, X in enumerate(X_all): if X.shape[1] == most_common_dimension: filtered_X_all.append(X) filtered_y_all.append(y_all[i]) else: print(f"忽略特征维度不匹配的数据集: {file_paths[i]} (维度: {X.shape[1]})") if not filtered_X_all: print("没有找到特征维度匹配的数据集") return None, None X_all = filtered_X_all y_all = filtered_y_all # 合并所有数据 X_combined = np.vstack(X_all) y_combined = np.concatenate(y_all) # 检查合并后的数据中是否存在NaN值 total_nan = np.isnan(X_combined).sum() if total_nan > 0: print(f"警告: 合并后的数据中包含 {total_nan} 个NaN值,占比: {total_nan/(X_combined.size):.4f}") print(f"NaN值在样本中的分布: {np.isnan(X_combined).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在特征中的分布: {np.isnan(X_combined).any(axis=0).sum()} 个特征包含NaN值") print(f"合并后的数据: {len(X_combined)} 样本,{len(np.unique(y_combined))} 个类别,特征维度: {X_combined.shape[1]}") return X_combined, y_combined def load_dataset(self, file_path, gas_type, concentration, sensor_type): """加载单一数据集并返回""" return self.load_single_gas_data(file_path, gas_type, concentration, sensor_type) class AlgorithmSelector: """多算法选择与训练类""" def __init__(self, use_chinese=True): # 算法名称映射 self.algorithm_names = { 'knn': {'cn': 'K-近邻算法', 'en': 'K-Nearest Neighbors'}, 'svm': {'cn': '支持向量机', 'en': 'Support Vector Machine'}, 'random_forest': {'cn': '随机森林', 'en': 'Random Forest'}, 'decision_tree': {'cn': '决策树', 'en': 'Decision Tree'}, 'neural_network': {'cn': '神经网络', 'en': 'Neural Network'} } # 算法配置 self.algorithms = { 'knn': { 'model': KNeighborsClassifier(), 'params': {'n_neighbors': 5, 'metric': 'euclidean'} }, 'svm': { 'model': SVC(), 'params': {'kernel': 'rbf', 'C': 1.0, 'probability': True} }, 'random_forest': { 'model': RandomForestClassifier(), 'params': {'n_estimators': 100, 'random_state': 42} }, 'decision_tree': { 'model': DecisionTreeClassifier(), 'params': {'max_depth': None, 'random_state': 42} }, 'neural_network': { 'model': MLPClassifier(), 'params': { 'neural_network__hidden_layer_sizes': (100, 50), # 注意前缀 'neural_network__max_iter': 500, 'neural_network__random_state': 42} } } # 算法是否需要标准化 self.needs_scaling = { 'knn': True, 'svm': True, 'random_forest': False, 'decision_tree': False, 'neural_network': True } # 是否使用中文 self.use_chinese = use_chinese def set_algorithm_params(self, algorithm_name, params): """设置算法参数""" if algorithm_name in self.algorithms: # 为Pipeline正确格式化参数名称 formatted_params = {f"{algorithm_name}__{k}": v for k, v in params.items()} self.algorithms[algorithm_name]['params'] = formatted_params else: raise ValueError(f"不支持的算法: {algorithm_name}") def train_models(self, X, y, test_size=0.2, random_state=42): """ 训练所有算法并返回结果 返回: - 包含训练好的模型及其性能的字典 """ # 检查类别数量 unique_classes = np.unique(y) num_classes = len(unique_classes) if num_classes < 2: print(f"警告: 数据集中只有 {num_classes} 个类别,某些算法可能无法训练") print(f"单一类别值: {unique_classes[0]}") # 跳过SVM算法,因为它需要至少两个类别 algorithms_to_train = [name for name in self.algorithms if name != 'svm'] print(f"由于单类别数据,将跳过 SVM 算法,仅训练: {', '.join([self.algorithm_names[name]['cn'] for name in algorithms_to_train])}") # 在单一数据集上划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state ) # 标记这是单类别数据 is_single_class_data = True else: # 在多类别数据集上划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state, stratify=y ) algorithms_to_train = list(self.algorithms.keys()) is_single_class_data = False # 检查数据类型并确保可以安全转换为数值类型 try: # 尝试将数据转换为float类型 X_train_numeric = X_train.astype(float) X_test_numeric = X_test.astype(float) # 检查训练数据中是否存在NaN值 train_nan = np.isnan(X_train_numeric).sum() if train_nan > 0: print(f"警告: 训练数据中包含 {train_nan} 个NaN值,占比: {train_nan/(X_train_numeric.size):.4f}") print(f"NaN值在训练样本中的分布: {np.isnan(X_train_numeric).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在训练特征中的分布: {np.isnan(X_train_numeric).any(axis=0).sum()} 个特征包含NaN值") # 检查测试数据中是否存在NaN值 test_nan = np.isnan(X_test_numeric).sum() if test_nan > 0: print(f"警告: 测试数据中包含 {test_nan} 个NaN值,占比: {test_nan/(X_test_numeric.size):.4f}") print(f"NaN值在测试样本中的分布: {np.isnan(X_test_numeric).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在测试特征中的分布: {np.isnan(X_test_numeric).any(axis=0).sum()} 个特征包含NaN值") except ValueError as e: print(f"警告: 无法将数据转换为数值类型,跳过NaN值检查: {e}") results = {} for name in algorithms_to_train: algo = self.algorithms[name] # 获取算法名称(根据是否支持中文选择) algo_name = self.algorithm_names[name]['cn'] if self.use_chinese else self.algorithm_names[name]['en'] try: print(f"\n训练 {algo_name}...") # 创建模型管道 if self.needs_scaling[name]: # 为需要标准化的算法创建包含三个步骤的Pipeline model = Pipeline([ ('imputer', SimpleImputer(strategy='mean')), # 使用均值填充缺失值 ('scaler', StandardScaler()), (name, clone(algo['model'])) ]) else: # 为不需要标准化的算法创建包含两个步骤的Pipeline model = Pipeline([ ('imputer', SimpleImputer(strategy='mean')), # 使用均值填充缺失值 (name, clone(algo['model'])) ]) # 为决策树和随机森林直接设置参数,不使用Pipeline参数设置方式 if name in ['decision_tree', 'random_forest']: # 获取算法实例 algo_instance = model.named_steps[name] # 直接设置参数 for param, value in algo['params'].items(): setattr(algo_instance, param, value) else: # 为其他算法使用Pipeline参数设置方式 model.set_params(**algo['params']) # 训练模型 model.fit(X_train, y_train) # 评估模型 train_accuracy = model.score(X_train, y_train) test_accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) print(f"训练集准确率: {train_accuracy:.4f}") print(f"测试集准确率: {test_accuracy:.4f}") print("分类报告:") print(classification_report(y_test, y_pred)) results[name] = { 'name': algo_name, 'model': model, 'train_accuracy': train_accuracy, 'test_accuracy': test_accuracy, 'y_pred': y_pred, 'X_test': X_test, 'y_test': y_test, 'unique_labels': np.unique(y_test), 'is_single_class': is_single_class_data } except Exception as e: print(f"训练 {algo_name} 时发生错误: {e}") results[name] = { 'name': algo_name, 'error': str(e), 'is_single_class': is_single_class_data } # 为跳过的SVM算法添加结果记录 if 'svm' not in algorithms_to_train: svm_name = self.algorithm_names['svm']['cn'] if self.use_chinese else self.algorithm_names['svm']['en'] results['svm'] = { 'name': svm_name, 'error': "由于单类别数据,跳过SVM算法", 'is_single_class': is_single_class_data } return results def compare_algorithms(self, results): """比较不同算法的性能""" # 过滤掉训练失败的算法 valid_results = {name: result for name, result in results.items() if 'test_accuracy' in result} if not valid_results: print("没有算法成功训练,无法生成比较图。") return None names = [valid_results[name]['name'] for name in valid_results] accuracies = [valid_results[name]['test_accuracy'] for name in valid_results] plt.figure(figsize=(12, 6)) bars = plt.bar(names, accuracies, color='skyblue') # 根据是否支持中文选择标题 title = "不同算法的测试集准确率比较" if self.use_chinese else "Comparison of Test Set Accuracies for Different Algorithms" x_label = "算法" if self.use_chinese else "Algorithm" y_label = "准确率" if self.use_chinese else "Accuracy" plt.ylim(0, 1.05) plt.title(title) plt.xlabel(x_label) plt.ylabel(y_label) # 添加数值标签 for bar in bars: height = bar.get_height() plt.text(bar.get_x() + bar.get_width()/2., height + 0.01, f'{height:.4f}', ha='center', va='bottom') plt.xticks(rotation=45, ha='right') plt.tight_layout() return plt def plot_confusion_matrix(self, results, gas_data_loader, use_chinese=True, rotate_labels=45, fig_width=12, fig_height=10, font_size=10): """ 绘制混淆矩阵 参数: - results: 包含算法结果的字典 - gas_data_loader: 气体数据加载器实例 - use_chinese: 是否使用中文 - rotate_labels: 标签旋转角度,默认为45度 - fig_width: 图形的宽度,默认为12 - fig_height: 图形的高度,默认为10 - font_size: 字体大小,默认为10 """ # 过滤掉训练失败的算法 valid_results = {name: result for name, result in results.items() if 'test_accuracy' in result} if not valid_results: print("没有算法成功训练,无法生成混淆矩阵。") return None # 获取所有算法中出现的唯一标签 all_unique_labels = set() for name, result in valid_results.items(): all_unique_labels.update(result['unique_labels']) all_unique_labels = sorted(list(all_unique_labels)) # 创建标签名称映射 label_names = [] for label in all_unique_labels: # 尝试查找对应的多维度标签名称 label_name = None for key, label_id in gas_data_loader.multi_dimension_labels.items(): if label_id == label: # 获取标签名称而不是标签ID label_name = gas_data_loader.get_or_create_multi_dimension_label( key.split('_')[0], # 传感器类型 key.split('_')[1], # 气体类型 int(key.split('_')[2].replace('ppm', '')) # 浓度值 )[1] # 获取第二个返回值,即标签名称字典 break # 如果找到,使用对应的标签名称 if label_name and isinstance(label_name, dict): if use_chinese: label_names.append(label_name.get('cn', f"类别 {label}")) else: label_names.append(label_name.get('en', f"Class {label}")) else: # 如果没有找到,使用默认标签名称 label_names.append(f"类别 {label}" if use_chinese else f"Class {label}") for name, result in valid_results.items(): plt.figure(figsize=(fig_width, fig_height)) cm = confusion_matrix(result['y_test'], result['y_pred'], labels=all_unique_labels) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_names) disp.plot(cmap=plt.cm.Blues) # 根据是否支持中文选择标题 title = f"{result['name']} 混淆矩阵" if use_chinese else f"{result['name']} Confusion Matrix" # 如果是单类别数据,添加说明 if result.get('is_single_class', False): title += " (单类别数据)" plt.title(title) # 旋转x轴标签 plt.xticks(rotation=rotate_labels, ha='right', rotation_mode='anchor', fontsize=font_size) plt.yticks(fontsize=font_size) plt.tight_layout() return plt def detect_dataset_type(dataset_path): """ 自动检测数据集类型:单一数据集或多数据集文件夹 参数: - dataset_path: 数据集路径 返回: - dataset_type: 'single' 或 'multiple' - file_paths: 文件路径列表 - gas_types: 气体类型列表 - concentrations: 浓度值列表 - sensor_types: 传感器类型列表 """ # 检查路径是否存在 if not os.path.exists(dataset_path): raise FileNotFoundError(f"路径不存在: {dataset_path}") # 检查是文件还是文件夹 if os.path.isfile(dataset_path): # 单一文件处理 file_paths = [dataset_path] # 从文件名提取传感器类型、气体类型和浓度 file_name = os.path.basename(dataset_path) sensor_type = extract_sensor_type(file_name) gas_type = extract_gas_type(file_name) concentration = extract_concentration(file_name) gas_types = [gas_type] concentrations = [concentration] sensor_types = [sensor_type] print(f"检测到单一数据集: {file_name}") print(f"传感器类型: {sensor_type}, 气体类型: {gas_type}, 浓度: {concentration}ppm") return 'single', file_paths, gas_types, concentrations, sensor_types elif os.path.isdir(dataset_path): # 文件夹处理 - 查找所有Excel文件 excel_files = [f for f in os.listdir(dataset_path) if f.endswith(('.xlsx', '.xls'))] if not excel_files: raise ValueError(f"文件夹中没有找到Excel文件: {dataset_path}") file_paths = [] gas_types = [] concentrations = [] sensor_types = [] for file in excel_files: file_path = os.path.join(dataset_path, file) file_paths.append(file_path) # 从文件名提取传感器类型、气体类型和浓度 sensor_type = extract_sensor_type(file) gas_type = extract_gas_type(file) concentration = extract_concentration(file) gas_types.append(gas_type) concentrations.append(concentration) sensor_types.append(sensor_type) print(f"找到数据集文件: {file}") print(f"传感器类型: {sensor_type}, 气体类型: {gas_type}, 浓度: {concentration}ppm") print(f"总共找到 {len(file_paths)} 个数据集文件") return 'multiple', file_paths, gas_types, concentrations, sensor_types else: raise ValueError(f"无法识别的路径: {dataset_path}") def extract_sensor_type(file_name): """从文件名提取传感器类型""" # 定义传感器类型的正则表达式模式 sensor_patterns = { 'MP2': r'(^MP2[^a-zA-Z0-9]|MP2$)', 'MP3B': r'(^MP3B[^a-zA-Z0-9]|MP3B$)', 'MP503': r'(^MP503[^a-zA-Z0-9]|MP503$)', 'MP801': r'(^MP801[^a-zA-Z0-9]|MP801$)', 'MQ2': r'(^MQ2[^a-zA-Z0-9]|MQ2$)', 'MQ7B': r'(^MQ7B[^a-zA-Z0-9]|MQ7B$)' } # 转换为大写以提高匹配率 file_name_upper = file_name.upper() # 尝试匹配传感器类型 for sensor_type, pattern in sensor_patterns.items(): if re.search(pattern, file_name_upper): return sensor_type # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 '{file_name}' 中提取传感器类型,使用默认值 'MP2'") return 'MP2' def extract_gas_type(file_name): """从文件名提取气体类型""" # 定义基础气体类型的中英文名称映射 gas_name_mapping = { 'bingtong': 'acetone', '丙酮': 'acetone', 'jiaben': 'toluene', '甲苯': 'toluene', 'jiachun': 'methanol', '甲醇': 'methanol', 'jiaquan': 'formaldehyde', '甲醛': 'formaldehyde', 'yichun': 'ethanol', '乙醇': 'ethanol' } # 去除文件扩展名 file_name_without_ext = os.path.splitext(file_name)[0] # 按照固定格式"传感器_气体名称_浓度"分割文件名 parts = file_name_without_ext.split('_') # 确保有足够的部分 if len(parts) < 3: print(f"警告: 文件名格式不符合预期: {file_name}") return 'acetone' # 获取气体名称部分 gas_name_part = parts[1] # 检查是否为混合气体 if '+' in gas_name_part or '+' in gas_name_part: # 处理混合气体 # 统一分隔符 gas_name_part = gas_name_part.replace('+', '+') gas_components = gas_name_part.split('+') # 转换为标准气体名称 standard_gas_names = [] for component in gas_components: # 先尝试中文名称映射 standard_name = gas_name_mapping.get(component, None) if standard_name: standard_gas_names.append(standard_name) else: # 如果是英文名称,直接添加 if component.lower() in ['acetone', 'toluene', 'methanol', 'formaldehyde', 'ethanol']: standard_gas_names.append(component.lower()) else: print(f"警告: 无法识别的气体成分: {component}") # 按字母顺序排序以确保一致性 standard_gas_names.sort() # 组合成混合气体名称 if len(standard_gas_names) > 1: return '+'.join(standard_gas_names) elif len(standard_gas_names) == 1: return standard_gas_names[0] # 处理单一气体 # 先尝试中文名称映射 standard_name = gas_name_mapping.get(gas_name_part, None) if standard_name: return standard_name # 如果是英文名称,直接返回小写形式 if gas_name_part.lower() in ['acetone', 'toluene', 'methanol', 'formaldehyde', 'ethanol']: return gas_name_part.lower() # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 '{file_name}' 中提取气体类型,使用默认值 'acetone'") return 'acetone' def extract_concentration(file_name): """从文件名提取浓度值""" # 去除文件扩展名 file_name_without_ext = os.path.splitext(file_name)[0] # 按照固定格式"传感器_气体名称_浓度"分割文件名 parts = file_name_without_ext.split('_') # 确保有足够的部分 if len(parts) < 3: print(f"警告: 文件名格式不符合预期: {file_name}") return 20 # 获取浓度部分 concentration_part = parts[2] # 提取数字部分 match = re.search(r'(\d+)', concentration_part) if match: return int(match.group(1)) # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 '{file_name}' 中提取浓度值,使用默认值 20ppm") return 20 def main(): """主函数""" # 检查中文字体支持 chinese_supported = check_chinese_font_support() # 创建数据加载器 data_loader = GasSensorDataAnalyzer() # 定义数据集路径 dataset_path = r"C:\Users\Cong\Desktop\作业\项目\六通道2混合\2_MP2" try: # 自动检测数据集类型 dataset_type, file_paths, gas_types, concentrations, sensor_types = detect_dataset_type(dataset_path) # 根据检测结果加载数据 if dataset_type == 'single': # 加载单一数据集 X, y = data_loader.load_dataset(file_paths[0], gas_types[0], concentrations[0], sensor_types[0]) else: # 加载多个数据集并合并 X, y = data_loader.load_multiple_gas_data(file_paths, gas_types, concentrations, sensor_types) if X is None or len(X) == 0: print("No valid data available for training. Please check file paths and formats.") return print(f"加载的数据集总样本数: {len(X)}") print(f"数据集中的类别数量: {len(np.unique(y))}") # 创建算法选择器,根据中文字体支持情况决定是否使用中文 selector = AlgorithmSelector(use_chinese=chinese_supported) # 自定义参数配置示例 selector.set_algorithm_params('knn', {'n_neighbors': 3, 'metric': 'manhattan'}) selector.set_algorithm_params('svm', {'C': 0.8, 'kernel': 'linear'}) selector.set_algorithm_params('neural_network', {'hidden_layer_sizes': (150, 75)}) # 训练所有算法 results = selector.train_models(X, y) # 比较算法性能 plt1 = selector.compare_algorithms(results) if plt1: plt1.savefig('algorithm_comparison.png') plt1.close() # 绘制混淆矩阵 plt2 = selector.plot_confusion_matrix(results, data_loader, use_chinese=chinese_supported, rotate_labels=45,fig_width=20, fig_height=20, font_size=8) if plt2: plt2.savefig('confusion_matrix.png') plt2.close() print("\n算法比较结果已保存为 'algorithm_comparison.png'") print("混淆矩阵已保存为 'confusion_matrix.png'") except Exception as e: print(f"程序执行过程中发生错误: {e}") if __name__ == "__main__": main()还有一个是tempcoderunnerfile.py文件:@app.route('/upload', methods=['POST']) def upload_file(): """处理文件上传""" if 'files' not in request.files: return jsonify({'error': 'No file part'}), 400 files = request.files.getlist('files') gas_type = request.form.get('gas_type', 'acetone') concentration = int(request.form.get('concentration', 20)) if not files or files[0].filename == '': return jsonify({'error': 'No selected file'}), 400 datasets = [] for file in files: if file and allowed_file(file.filename): # 保存临时文件 file_path = os.path.join(app.config['UPLOAD_FOLDER'], file.filename) file.save(file_path) # 加载数据 data = data_loader.load_single_gas_data(file_path, gas_type, concentration) datasets.append(data) # 删除临时文件 os.remove(file_path) # 合并数据集 X, y = data_loader.combine_datasets(datasets) if X is None or len(X) == 0: return jsonify({'error': 'No valid data loaded'}), 400 # 保存合并后的数据 df = pd.DataFrame(X) df['label'] = y file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'temp_data.xlsx') df.to_excel(file_path, index=False) return jsonify({ 'status': 'success', 'sample_count': len(X), 'feature_count': X.shape[1], 'gas_type': gas_type, 'concentration': concentration })请根据这两个文件重修修改app.py文件,确保他在algorithmselection呈现的页面中当我选择两个文件进入的时候能够分析数据
06-22
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值