loader.get_template(template_name) # You need to create a 500.html template

本文介绍了解决Apache服务器中mod_python应用出现500内部服务器错误时,如何通过设置自定义500.html模板文件来优雅地显示错误页面。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

大家在使用apache+mod_python的时候在目录/var/www添加了404.html和500.html,以及在Settings.py中的dubug=flase,但是当View中抛出异常后,去加载template就会报出:
loader.get_template(template_name) # You need to create a 500.html template

最省劲的解决方法:在你的template根目录下,添加一个500.html,就可以了。

在我的后端中还有两个文件,一个是new_algorithm.py:import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neural_network import MLPClassifier from sklearn.metrics import accuracy_score, classification_report from sklearn.preprocessing import StandardScaler from sklearn.pipeline import make_pipeline from sklearn.base import clone import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay import os import re from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer import seaborn as sns # 添加数据插补器 from sklearn.impute import SimpleImputer def check_chinese_font_support(): """检查系统是否支持中文字体""" chinese_fonts = [&#39;SimHei&#39;, &#39;WenQuanYi Micro Hei&#39;, &#39;Heiti TC&#39;, &#39;Microsoft YaHei&#39;, &#39;SimSun&#39;] for font in chinese_fonts: try: plt.rcParams["font.family"] = font # 测试字体是否可用 fig, ax = plt.subplots(figsize=(1, 1)) ax.text(0.5, 0.5, &#39;测试&#39;, fontsize=12) plt.close(fig) print(f"系统支持中文字体: {font}") return True except: continue print("系统不支持中文字体,将使用英文标签") plt.rcParams["font.family"] = [&#39;Arial&#39;, &#39;sans-serif&#39;] return False class GasSensorDataAnalyzer: """有害气体分类数据加载与预处理类""" def __init__(self): # 基础气体标签 self.gas_labels = { &#39;acetone&#39;: 0, &#39;toluene&#39;: 1, &#39;methanol&#39;: 2, &#39;formaldehyde&#39;: 3, &#39;ethanol&#39;: 4 } # 混合气体标签生成(每个混合气体用唯一数字标识) self.mixture_labels = self._generate_mixture_labels() # 合并所有气体标签 self.all_gas_labels = {**self.gas_labels, **self.mixture_labels} # 中英文气体名称映射 self.gas_names = { &#39;acetone&#39;: {&#39;cn&#39;: &#39;丙酮&#39;, &#39;en&#39;: &#39;Acetone&#39;}, &#39;toluene&#39;: {&#39;cn&#39;: &#39;甲苯&#39;, &#39;en&#39;: &#39;Toluene&#39;}, &#39;methanol&#39;: {&#39;cn&#39;: &#39;甲醇&#39;, &#39;en&#39;: &#39;Methanol&#39;}, &#39;formaldehyde&#39;: {&#39;cn&#39;: &#39;甲醛&#39;, &#39;en&#39;: &#39;Formaldehyde&#39;}, &#39;ethanol&#39;: {&#39;cn&#39;: &#39;乙醇&#39;, &#39;en&#39;: &#39;Ethanol&#39;}, &#39;toluene+formaldehyde&#39;: {&#39;cn&#39;: &#39;甲苯+甲醛&#39;, &#39;en&#39;: &#39;Toluene+Formaldehyde&#39;}, &#39;methanol+toluene+formaldehyde&#39;: {&#39;cn&#39;: &#39;甲醇+甲苯+甲醛&#39;, &#39;en&#39;: &#39;Methanol+Toluene+Formaldehyde&#39;} # 可以根据需要添加更多混合气体的名称映射 } # 传感器类型映射 self.sensor_types = { &#39;MP2&#39;: 0, &#39;MP3B&#39;: 1, &#39;MP503&#39;: 2, &#39;MP801&#39;: 3, &#39;MQ2&#39;: 4, &#39;MQ7B&#39;: 5 } # 初始化多维度类别映射 self.multi_dimension_labels = {} self.next_label_id = 0 # 传感器中英文名称映射 self.sensor_names = { &#39;MP2&#39;: {&#39;cn&#39;: &#39;MP2&#39;, &#39;en&#39;: &#39;MP2&#39;}, &#39;MP3B&#39;: {&#39;cn&#39;: &#39;MP3B&#39;, &#39;en&#39;: &#39;MP3B&#39;}, &#39;MP503&#39;: {&#39;cn&#39;: &#39;MP503&#39;, &#39;en&#39;: &#39;MP503&#39;}, &#39;MP801&#39;: {&#39;cn&#39;: &#39;MP801&#39;, &#39;en&#39;: &#39;MP801&#39;}, &#39;MQ2&#39;: {&#39;cn&#39;: &#39;MQ2&#39;, &#39;en&#39;: &#39;MQ2&#39;}, &#39;MQ7B&#39;: {&#39;cn&#39;: &#39;MQ7B&#39;, &#39;en&#39;: &#39;MQ7B&#39;} } def _generate_mixture_labels(self): """生成混合气体的标签映射""" # 定义可能的混合气体组合 mixtures = [ &#39;toluene+formaldehyde&#39;, &#39;methanol+toluene+formaldehyde&#39; # 可以根据需要添加更多混合气体组合 ] # 为每个混合气体分配唯一标签(从基础气体标签之后开始) next_label = max(self.gas_labels.values()) + 1 return {mixture: next_label + i for i, mixture in enumerate(mixtures)} def get_or_create_multi_dimension_label(self, sensor_type, gas_type, concentration): """ 获取或创建多维度类别标签 参数: - sensor_type: 传感器类型 - gas_type: 气体类型 - concentration: 浓度值 返回: - 标签ID和标签名称 """ # 创建唯一键 key = f"{sensor_type}_{gas_type}_{concentration}ppm" # 如果键不存在,创建新标签 if key not in self.multi_dimension_labels: self.multi_dimension_labels[key] = self.next_label_id self.next_label_id += 1 # 返回标签ID和标签名称 label_id = self.multi_dimension_labels[key] # 创建中英文标签名称 sensor_name_cn = self.sensor_names.get(sensor_type, {}).get(&#39;cn&#39;, sensor_type) sensor_name_en = self.sensor_names.get(sensor_type, {}).get(&#39;en&#39;, sensor_type) gas_name_cn = self.gas_names.get(gas_type, {}).get(&#39;cn&#39;, gas_type) gas_name_en = self.gas_names.get(gas_type, {}).get(&#39;en&#39;, gas_type) label_name_cn = f"{sensor_name_cn}_{gas_name_cn}_{concentration}ppm" label_name_en = f"{sensor_name_en}_{gas_name_en}_{concentration}ppm" return label_id, { &#39;cn&#39;: label_name_cn, &#39;en&#39;: label_name_en } def load_single_gas_data(self, file_path, gas_type, concentration, sensor_type): """ 加载单一气体数据 参数: - file_path: 文件路径 - gas_type: 气体类型 (如 &#39;acetone&#39;, &#39;toluene&#39; 等) - concentration: 浓度值 (如 20, 30, 50 等) - sensor_type: 传感器类型 (如 &#39;MP2&#39;, &#39;MP801&#39; 等) """ try: if not os.path.exists(file_path): raise FileNotFoundError(f"文件不存在: {file_path}") df = pd.read_excel(file_path, sheet_name=&#39;Sheet1&#39;, index_col=0) X = df.values # 尝试将数据转换为 float 类型 try: X = X.astype(float) except ValueError: print("警告: 数据中包含非数值类型,将过滤掉非数值类型的数据") numeric_mask = np.vectorize(np.isreal)(X) X = X[numeric_mask].reshape(-1, df.shape[1]) # 检查并报告NaN值 nan_count = np.isnan(X).sum() if nan_count > 0: print(f"警告: 数据中包含 {nan_count} 个NaN值") # 可选:替换NaN值为0 # X = np.nan_to_num(X, nan=0.0) # 创建多维度标签 label_id, label_name = self.get_or_create_multi_dimension_label( sensor_type, gas_type, concentration ) # 为所有样本分配相同的标签 y = np.full(len(X), label_id, dtype=int) print(f"已加载 {label_name[&#39;cn&#39;]} 数据: {len(X)} 样本, 特征维度: {X.shape[1]}") return X, y except Exception as e: print(f"加载数据时出错: {e}") return None, None def load_multiple_gas_data(self, file_paths, gas_types, concentrations, sensor_types): """ 加载多个气体数据并合并 参数: - file_paths: 文件路径列表 - gas_types: 气体类型列表 (如 [&#39;acetone&#39;, &#39;toluene&#39;] 等) - concentrations: 浓度值列表 (如 [20, 30] 等) - sensor_types: 传感器类型列表 (如 [&#39;MP2&#39;, &#39;MP801&#39;] 等) """ X_all = [] y_all = [] feature_dimensions = [] # 用于记录每个数据集的特征维度 for file_path, gas_type, concentration, sensor_type in zip( file_paths, gas_types, concentrations, sensor_types ): X, y = self.load_single_gas_data(file_path, gas_type, concentration, sensor_type) if X is not None and len(X) > 0: X_all.append(X) y_all.append(y) feature_dimensions.append(X.shape[1]) if not X_all: print("没有加载到有效数据") return None, None # 检查所有数据集的特征维度是否一致 unique_dimensions = np.unique(feature_dimensions) if len(unique_dimensions) > 1: print(f"警告: 检测到不同的特征维度: {unique_dimensions}") print("这可能导致合并数据时出错。请检查您的Excel文件是否具有相同的列数。") # 找出最常见的维度 from collections import Counter dimension_counts = Counter(feature_dimensions) most_common_dimension = dimension_counts.most_common(1)[0][0] print(f"最常见的特征维度是: {most_common_dimension}") # 过滤掉特征维度不匹配的数据 filtered_X_all = [] filtered_y_all = [] for i, X in enumerate(X_all): if X.shape[1] == most_common_dimension: filtered_X_all.append(X) filtered_y_all.append(y_all[i]) else: print(f"忽略特征维度不匹配的数据集: {file_paths[i]} (维度: {X.shape[1]})") if not filtered_X_all: print("没有找到特征维度匹配的数据集") return None, None X_all = filtered_X_all y_all = filtered_y_all # 合并所有数据 X_combined = np.vstack(X_all) y_combined = np.concatenate(y_all) # 检查合并后的数据中是否存在NaN值 total_nan = np.isnan(X_combined).sum() if total_nan > 0: print(f"警告: 合并后的数据中包含 {total_nan} 个NaN值,占比: {total_nan/(X_combined.size):.4f}") print(f"NaN值在样本中的分布: {np.isnan(X_combined).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在特征中的分布: {np.isnan(X_combined).any(axis=0).sum()} 个特征包含NaN值") print(f"合并后的数据: {len(X_combined)} 样本,{len(np.unique(y_combined))} 个类别,特征维度: {X_combined.shape[1]}") return X_combined, y_combined def load_dataset(self, file_path, gas_type, concentration, sensor_type): """加载单一数据集并返回""" return self.load_single_gas_data(file_path, gas_type, concentration, sensor_type) class AlgorithmSelector: """多算法选择与训练类""" def __init__(self, use_chinese=True): # 算法名称映射 self.algorithm_names = { &#39;knn&#39;: {&#39;cn&#39;: &#39;K-近邻算法&#39;, &#39;en&#39;: &#39;K-Nearest Neighbors&#39;}, &#39;svm&#39;: {&#39;cn&#39;: &#39;支持向量机&#39;, &#39;en&#39;: &#39;Support Vector Machine&#39;}, &#39;random_forest&#39;: {&#39;cn&#39;: &#39;随机森林&#39;, &#39;en&#39;: &#39;Random Forest&#39;}, &#39;decision_tree&#39;: {&#39;cn&#39;: &#39;决策树&#39;, &#39;en&#39;: &#39;Decision Tree&#39;}, &#39;neural_network&#39;: {&#39;cn&#39;: &#39;神经网络&#39;, &#39;en&#39;: &#39;Neural Network&#39;} } # 算法配置 self.algorithms = { &#39;knn&#39;: { &#39;model&#39;: KNeighborsClassifier(), &#39;params&#39;: {&#39;n_neighbors&#39;: 5, &#39;metric&#39;: &#39;euclidean&#39;} }, &#39;svm&#39;: { &#39;model&#39;: SVC(), &#39;params&#39;: {&#39;kernel&#39;: &#39;rbf&#39;, &#39;C&#39;: 1.0, &#39;probability&#39;: True} }, &#39;random_forest&#39;: { &#39;model&#39;: RandomForestClassifier(), &#39;params&#39;: {&#39;n_estimators&#39;: 100, &#39;random_state&#39;: 42} }, &#39;decision_tree&#39;: { &#39;model&#39;: DecisionTreeClassifier(), &#39;params&#39;: {&#39;max_depth&#39;: None, &#39;random_state&#39;: 42} }, &#39;neural_network&#39;: { &#39;model&#39;: MLPClassifier(), &#39;params&#39;: { &#39;neural_network__hidden_layer_sizes&#39;: (100, 50), # 注意前缀 &#39;neural_network__max_iter&#39;: 500, &#39;neural_network__random_state&#39;: 42} } } # 算法是否需要标准化 self.needs_scaling = { &#39;knn&#39;: True, &#39;svm&#39;: True, &#39;random_forest&#39;: False, &#39;decision_tree&#39;: False, &#39;neural_network&#39;: True } # 是否使用中文 self.use_chinese = use_chinese def set_algorithm_params(self, algorithm_name, params): """设置算法参数""" if algorithm_name in self.algorithms: # 为Pipeline正确格式化参数名称 formatted_params = {f"{algorithm_name}__{k}": v for k, v in params.items()} self.algorithms[algorithm_name][&#39;params&#39;] = formatted_params else: raise ValueError(f"不支持的算法: {algorithm_name}") def train_models(self, X, y, test_size=0.2, random_state=42): """ 训练所有算法并返回结果 返回: - 包含训练好的模型及其性能的字典 """ # 检查类别数量 unique_classes = np.unique(y) num_classes = len(unique_classes) if num_classes < 2: print(f"警告: 数据集中只有 {num_classes} 个类别,某些算法可能无法训练") print(f"单一类别值: {unique_classes[0]}") # 跳过SVM算法,因为它需要至少两个类别 algorithms_to_train = [name for name in self.algorithms if name != &#39;svm&#39;] print(f"由于单类别数据,将跳过 SVM 算法,仅训练: {&#39;, &#39;.join([self.algorithm_names[name][&#39;cn&#39;] for name in algorithms_to_train])}") # 在单一数据集上划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state ) # 标记这是单类别数据 is_single_class_data = True else: # 在多类别数据集上划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=random_state, stratify=y ) algorithms_to_train = list(self.algorithms.keys()) is_single_class_data = False # 检查数据类型并确保可以安全转换为数值类型 try: # 尝试将数据转换为float类型 X_train_numeric = X_train.astype(float) X_test_numeric = X_test.astype(float) # 检查训练数据中是否存在NaN值 train_nan = np.isnan(X_train_numeric).sum() if train_nan > 0: print(f"警告: 训练数据中包含 {train_nan} 个NaN值,占比: {train_nan/(X_train_numeric.size):.4f}") print(f"NaN值在训练样本中的分布: {np.isnan(X_train_numeric).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在训练特征中的分布: {np.isnan(X_train_numeric).any(axis=0).sum()} 个特征包含NaN值") # 检查测试数据中是否存在NaN值 test_nan = np.isnan(X_test_numeric).sum() if test_nan > 0: print(f"警告: 测试数据中包含 {test_nan} 个NaN值,占比: {test_nan/(X_test_numeric.size):.4f}") print(f"NaN值在测试样本中的分布: {np.isnan(X_test_numeric).any(axis=1).sum()} 个样本包含NaN值") print(f"NaN值在测试特征中的分布: {np.isnan(X_test_numeric).any(axis=0).sum()} 个特征包含NaN值") except ValueError as e: print(f"警告: 无法将数据转换为数值类型,跳过NaN值检查: {e}") results = {} for name in algorithms_to_train: algo = self.algorithms[name] # 获取算法名称(根据是否支持中文选择) algo_name = self.algorithm_names[name][&#39;cn&#39;] if self.use_chinese else self.algorithm_names[name][&#39;en&#39;] try: print(f"\n训练 {algo_name}...") # 创建模型管道 if self.needs_scaling[name]: # 为需要标准化的算法创建包含三个步骤的Pipeline model = Pipeline([ (&#39;imputer&#39;, SimpleImputer(strategy=&#39;mean&#39;)), # 使用均值填充缺失值 (&#39;scaler&#39;, StandardScaler()), (name, clone(algo[&#39;model&#39;])) ]) else: # 为不需要标准化的算法创建包含两个步骤的Pipeline model = Pipeline([ (&#39;imputer&#39;, SimpleImputer(strategy=&#39;mean&#39;)), # 使用均值填充缺失值 (name, clone(algo[&#39;model&#39;])) ]) # 为决策树和随机森林直接设置参数,不使用Pipeline参数设置方式 if name in [&#39;decision_tree&#39;, &#39;random_forest&#39;]: # 获取算法实例 algo_instance = model.named_steps[name] # 直接设置参数 for param, value in algo[&#39;params&#39;].items(): setattr(algo_instance, param, value) else: # 为其他算法使用Pipeline参数设置方式 model.set_params(**algo[&#39;params&#39;]) # 训练模型 model.fit(X_train, y_train) # 评估模型 train_accuracy = model.score(X_train, y_train) test_accuracy = model.score(X_test, y_test) y_pred = model.predict(X_test) print(f"训练集准确率: {train_accuracy:.4f}") print(f"测试集准确率: {test_accuracy:.4f}") print("分类报告:") print(classification_report(y_test, y_pred)) results[name] = { &#39;name&#39;: algo_name, &#39;model&#39;: model, &#39;train_accuracy&#39;: train_accuracy, &#39;test_accuracy&#39;: test_accuracy, &#39;y_pred&#39;: y_pred, &#39;X_test&#39;: X_test, &#39;y_test&#39;: y_test, &#39;unique_labels&#39;: np.unique(y_test), &#39;is_single_class&#39;: is_single_class_data } except Exception as e: print(f"训练 {algo_name} 时发生错误: {e}") results[name] = { &#39;name&#39;: algo_name, &#39;error&#39;: str(e), &#39;is_single_class&#39;: is_single_class_data } # 为跳过的SVM算法添加结果记录 if &#39;svm&#39; not in algorithms_to_train: svm_name = self.algorithm_names[&#39;svm&#39;][&#39;cn&#39;] if self.use_chinese else self.algorithm_names[&#39;svm&#39;][&#39;en&#39;] results[&#39;svm&#39;] = { &#39;name&#39;: svm_name, &#39;error&#39;: "由于单类别数据,跳过SVM算法", &#39;is_single_class&#39;: is_single_class_data } return results def compare_algorithms(self, results): """比较不同算法的性能""" # 过滤掉训练失败的算法 valid_results = {name: result for name, result in results.items() if &#39;test_accuracy&#39; in result} if not valid_results: print("没有算法成功训练,无法生成比较图。") return None names = [valid_results[name][&#39;name&#39;] for name in valid_results] accuracies = [valid_results[name][&#39;test_accuracy&#39;] for name in valid_results] plt.figure(figsize=(12, 6)) bars = plt.bar(names, accuracies, color=&#39;skyblue&#39;) # 根据是否支持中文选择标题 title = "不同算法的测试集准确率比较" if self.use_chinese else "Comparison of Test Set Accuracies for Different Algorithms" x_label = "算法" if self.use_chinese else "Algorithm" y_label = "准确率" if self.use_chinese else "Accuracy" plt.ylim(0, 1.05) plt.title(title) plt.xlabel(x_label) plt.ylabel(y_label) # 添加数值标签 for bar in bars: height = bar.get_height() plt.text(bar.get_x() + bar.get_width()/2., height + 0.01, f&#39;{height:.4f}&#39;, ha=&#39;center&#39;, va=&#39;bottom&#39;) plt.xticks(rotation=45, ha=&#39;right&#39;) plt.tight_layout() return plt def plot_confusion_matrix(self, results, gas_data_loader, use_chinese=True, rotate_labels=45, fig_width=12, fig_height=10, font_size=10): """ 绘制混淆矩阵 参数: - results: 包含算法结果的字典 - gas_data_loader: 气体数据加载器实例 - use_chinese: 是否使用中文 - rotate_labels: 标签旋转角度,默认为45度 - fig_width: 图形的宽度,默认为12 - fig_height: 图形的高度,默认为10 - font_size: 字体大小,默认为10 """ # 过滤掉训练失败的算法 valid_results = {name: result for name, result in results.items() if &#39;test_accuracy&#39; in result} if not valid_results: print("没有算法成功训练,无法生成混淆矩阵。") return None # 获取所有算法中出现的唯一标签 all_unique_labels = set() for name, result in valid_results.items(): all_unique_labels.update(result[&#39;unique_labels&#39;]) all_unique_labels = sorted(list(all_unique_labels)) # 创建标签名称映射 label_names = [] for label in all_unique_labels: # 尝试查找对应的多维度标签名称 label_name = None for key, label_id in gas_data_loader.multi_dimension_labels.items(): if label_id == label: # 获取标签名称而不是标签ID label_name = gas_data_loader.get_or_create_multi_dimension_label( key.split(&#39;_&#39;)[0], # 传感器类型 key.split(&#39;_&#39;)[1], # 气体类型 int(key.split(&#39;_&#39;)[2].replace(&#39;ppm&#39;, &#39;&#39;)) # 浓度值 )[1] # 获取第二个返回值,即标签名称字典 break # 如果找到,使用对应的标签名称 if label_name and isinstance(label_name, dict): if use_chinese: label_names.append(label_name.get(&#39;cn&#39;, f"类别 {label}")) else: label_names.append(label_name.get(&#39;en&#39;, f"Class {label}")) else: # 如果没有找到,使用默认标签名称 label_names.append(f"类别 {label}" if use_chinese else f"Class {label}") for name, result in valid_results.items(): plt.figure(figsize=(fig_width, fig_height)) cm = confusion_matrix(result[&#39;y_test&#39;], result[&#39;y_pred&#39;], labels=all_unique_labels) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_names) disp.plot(cmap=plt.cm.Blues) # 根据是否支持中文选择标题 title = f"{result[&#39;name&#39;]} 混淆矩阵" if use_chinese else f"{result[&#39;name&#39;]} Confusion Matrix" # 如果是单类别数据,添加说明 if result.get(&#39;is_single_class&#39;, False): title += " (单类别数据)" plt.title(title) # 旋转x轴标签 plt.xticks(rotation=rotate_labels, ha=&#39;right&#39;, rotation_mode=&#39;anchor&#39;, fontsize=font_size) plt.yticks(fontsize=font_size) plt.tight_layout() return plt def detect_dataset_type(dataset_path): """ 自动检测数据集类型:单一数据集或多数据集文件夹 参数: - dataset_path: 数据集路径 返回: - dataset_type: &#39;single&#39; 或 &#39;multiple&#39; - file_paths: 文件路径列表 - gas_types: 气体类型列表 - concentrations: 浓度值列表 - sensor_types: 传感器类型列表 """ # 检查路径是否存在 if not os.path.exists(dataset_path): raise FileNotFoundError(f"路径不存在: {dataset_path}") # 检查是文件还是文件夹 if os.path.isfile(dataset_path): # 单一文件处理 file_paths = [dataset_path] # 从文件名提取传感器类型、气体类型和浓度 file_name = os.path.basename(dataset_path) sensor_type = extract_sensor_type(file_name) gas_type = extract_gas_type(file_name) concentration = extract_concentration(file_name) gas_types = [gas_type] concentrations = [concentration] sensor_types = [sensor_type] print(f"检测到单一数据集: {file_name}") print(f"传感器类型: {sensor_type}, 气体类型: {gas_type}, 浓度: {concentration}ppm") return &#39;single&#39;, file_paths, gas_types, concentrations, sensor_types elif os.path.isdir(dataset_path): # 文件夹处理 - 查找所有Excel文件 excel_files = [f for f in os.listdir(dataset_path) if f.endswith((&#39;.xlsx&#39;, &#39;.xls&#39;))] if not excel_files: raise ValueError(f"文件夹中没有找到Excel文件: {dataset_path}") file_paths = [] gas_types = [] concentrations = [] sensor_types = [] for file in excel_files: file_path = os.path.join(dataset_path, file) file_paths.append(file_path) # 从文件名提取传感器类型、气体类型和浓度 sensor_type = extract_sensor_type(file) gas_type = extract_gas_type(file) concentration = extract_concentration(file) gas_types.append(gas_type) concentrations.append(concentration) sensor_types.append(sensor_type) print(f"找到数据集文件: {file}") print(f"传感器类型: {sensor_type}, 气体类型: {gas_type}, 浓度: {concentration}ppm") print(f"总共找到 {len(file_paths)} 个数据集文件") return &#39;multiple&#39;, file_paths, gas_types, concentrations, sensor_types else: raise ValueError(f"无法识别的路径: {dataset_path}") def extract_sensor_type(file_name): """从文件名提取传感器类型""" # 定义传感器类型的正则表达式模式 sensor_patterns = { &#39;MP2&#39;: r&#39;(^MP2[^a-zA-Z0-9]|MP2$)&#39;, &#39;MP3B&#39;: r&#39;(^MP3B[^a-zA-Z0-9]|MP3B$)&#39;, &#39;MP503&#39;: r&#39;(^MP503[^a-zA-Z0-9]|MP503$)&#39;, &#39;MP801&#39;: r&#39;(^MP801[^a-zA-Z0-9]|MP801$)&#39;, &#39;MQ2&#39;: r&#39;(^MQ2[^a-zA-Z0-9]|MQ2$)&#39;, &#39;MQ7B&#39;: r&#39;(^MQ7B[^a-zA-Z0-9]|MQ7B$)&#39; } # 转换为大写以提高匹配率 file_name_upper = file_name.upper() # 尝试匹配传感器类型 for sensor_type, pattern in sensor_patterns.items(): if re.search(pattern, file_name_upper): return sensor_type # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 &#39;{file_name}&#39; 中提取传感器类型,使用默认值 &#39;MP2&#39;") return &#39;MP2&#39; def extract_gas_type(file_name): """从文件名提取气体类型""" # 定义基础气体类型的中英文名称映射 gas_name_mapping = { &#39;bingtong&#39;: &#39;acetone&#39;, &#39;丙酮&#39;: &#39;acetone&#39;, &#39;jiaben&#39;: &#39;toluene&#39;, &#39;甲苯&#39;: &#39;toluene&#39;, &#39;jiachun&#39;: &#39;methanol&#39;, &#39;甲醇&#39;: &#39;methanol&#39;, &#39;jiaquan&#39;: &#39;formaldehyde&#39;, &#39;甲醛&#39;: &#39;formaldehyde&#39;, &#39;yichun&#39;: &#39;ethanol&#39;, &#39;乙醇&#39;: &#39;ethanol&#39; } # 去除文件扩展名 file_name_without_ext = os.path.splitext(file_name)[0] # 按照固定格式"传感器_气体名称_浓度"分割文件名 parts = file_name_without_ext.split(&#39;_&#39;) # 确保有足够的部分 if len(parts) < 3: print(f"警告: 文件名格式不符合预期: {file_name}") return &#39;acetone&#39; # 获取气体名称部分 gas_name_part = parts[1] # 检查是否为混合气体 if &#39;+&#39; in gas_name_part or &#39;+&#39; in gas_name_part: # 处理混合气体 # 统一分隔符 gas_name_part = gas_name_part.replace(&#39;+&#39;, &#39;+&#39;) gas_components = gas_name_part.split(&#39;+&#39;) # 转换为标准气体名称 standard_gas_names = [] for component in gas_components: # 先尝试中文名称映射 standard_name = gas_name_mapping.get(component, None) if standard_name: standard_gas_names.append(standard_name) else: # 如果是英文名称,直接添加 if component.lower() in [&#39;acetone&#39;, &#39;toluene&#39;, &#39;methanol&#39;, &#39;formaldehyde&#39;, &#39;ethanol&#39;]: standard_gas_names.append(component.lower()) else: print(f"警告: 无法识别的气体成分: {component}") # 按字母顺序排序以确保一致性 standard_gas_names.sort() # 组合成混合气体名称 if len(standard_gas_names) > 1: return &#39;+&#39;.join(standard_gas_names) elif len(standard_gas_names) == 1: return standard_gas_names[0] # 处理单一气体 # 先尝试中文名称映射 standard_name = gas_name_mapping.get(gas_name_part, None) if standard_name: return standard_name # 如果是英文名称,直接返回小写形式 if gas_name_part.lower() in [&#39;acetone&#39;, &#39;toluene&#39;, &#39;methanol&#39;, &#39;formaldehyde&#39;, &#39;ethanol&#39;]: return gas_name_part.lower() # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 &#39;{file_name}&#39; 中提取气体类型,使用默认值 &#39;acetone&#39;") return &#39;acetone&#39; def extract_concentration(file_name): """从文件名提取浓度值""" # 去除文件扩展名 file_name_without_ext = os.path.splitext(file_name)[0] # 按照固定格式"传感器_气体名称_浓度"分割文件名 parts = file_name_without_ext.split(&#39;_&#39;) # 确保有足够的部分 if len(parts) < 3: print(f"警告: 文件名格式不符合预期: {file_name}") return 20 # 获取浓度部分 concentration_part = parts[2] # 提取数字部分 match = re.search(r&#39;(\d+)&#39;, concentration_part) if match: return int(match.group(1)) # 如果没有匹配到,返回默认值 print(f"警告: 无法从文件名 &#39;{file_name}&#39; 中提取浓度值,使用默认值 20ppm") return 20 def main(): """主函数""" # 检查中文字体支持 chinese_supported = check_chinese_font_support() # 创建数据加载器 data_loader = GasSensorDataAnalyzer() # 定义数据集路径 dataset_path = r"C:\Users\Cong\Desktop\作业\项目\六通道2混合\2_MP2" try: # 自动检测数据集类型 dataset_type, file_paths, gas_types, concentrations, sensor_types = detect_dataset_type(dataset_path) # 根据检测结果加载数据 if dataset_type == &#39;single&#39;: # 加载单一数据集 X, y = data_loader.load_dataset(file_paths[0], gas_types[0], concentrations[0], sensor_types[0]) else: # 加载多个数据集并合并 X, y = data_loader.load_multiple_gas_data(file_paths, gas_types, concentrations, sensor_types) if X is None or len(X) == 0: print("No valid data available for training. Please check file paths and formats.") return print(f"加载的数据集总样本数: {len(X)}") print(f"数据集中的类别数量: {len(np.unique(y))}") # 创建算法选择器,根据中文字体支持情况决定是否使用中文 selector = AlgorithmSelector(use_chinese=chinese_supported) # 自定义参数配置示例 selector.set_algorithm_params(&#39;knn&#39;, {&#39;n_neighbors&#39;: 3, &#39;metric&#39;: &#39;manhattan&#39;}) selector.set_algorithm_params(&#39;svm&#39;, {&#39;C&#39;: 0.8, &#39;kernel&#39;: &#39;linear&#39;}) selector.set_algorithm_params(&#39;neural_network&#39;, {&#39;hidden_layer_sizes&#39;: (150, 75)}) # 训练所有算法 results = selector.train_models(X, y) # 比较算法性能 plt1 = selector.compare_algorithms(results) if plt1: plt1.savefig(&#39;algorithm_comparison.png&#39;) plt1.close() # 绘制混淆矩阵 plt2 = selector.plot_confusion_matrix(results, data_loader, use_chinese=chinese_supported, rotate_labels=45,fig_width=20, fig_height=20, font_size=8) if plt2: plt2.savefig(&#39;confusion_matrix.png&#39;) plt2.close() print("\n算法比较结果已保存为 &#39;algorithm_comparison.png&#39;") print("混淆矩阵已保存为 &#39;confusion_matrix.png&#39;") except Exception as e: print(f"程序执行过程中发生错误: {e}") if __name__ == "__main__": main()还有一个是tempcoderunnerfile.py文件:@app.route(&#39;/upload&#39;, methods=[&#39;POST&#39;]) def upload_file(): """处理文件上传""" if &#39;files&#39; not in request.files: return jsonify({&#39;error&#39;: &#39;No file part&#39;}), 400 files = request.files.getlist(&#39;files&#39;) gas_type = request.form.get(&#39;gas_type&#39;, &#39;acetone&#39;) concentration = int(request.form.get(&#39;concentration&#39;, 20)) if not files or files[0].filename == &#39;&#39;: return jsonify({&#39;error&#39;: &#39;No selected file&#39;}), 400 datasets = [] for file in files: if file and allowed_file(file.filename): # 保存临时文件 file_path = os.path.join(app.config[&#39;UPLOAD_FOLDER&#39;], file.filename) file.save(file_path) # 加载数据 data = data_loader.load_single_gas_data(file_path, gas_type, concentration) datasets.append(data) # 删除临时文件 os.remove(file_path) # 合并数据集 X, y = data_loader.combine_datasets(datasets) if X is None or len(X) == 0: return jsonify({&#39;error&#39;: &#39;No valid data loaded&#39;}), 400 # 保存合并后的数据 df = pd.DataFrame(X) df[&#39;label&#39;] = y file_path = os.path.join(app.config[&#39;UPLOAD_FOLDER&#39;], &#39;temp_data.xlsx&#39;) df.to_excel(file_path, index=False) return jsonify({ &#39;status&#39;: &#39;success&#39;, &#39;sample_count&#39;: len(X), &#39;feature_count&#39;: X.shape[1], &#39;gas_type&#39;: gas_type, &#39;concentration&#39;: concentration })请根据这两个文件重修修改app.py文件,确保他在algorithmselection呈现的页面中当我选择两个文件进入的时候能够分析数据
最新发布
06-22
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值