从图片到x_train,y_train的过程汇总

1、这个是从.npz 到x_train,y_train,至于从jpg到npz以后再总结:

(x_tain,y_train),(x_test,y_test) = mnist.loda_data('mnist.npz')
if K.image_data_format() == 'channels first':
	x_train = x_train.reshape([batch_size,1,img_rows,img_cols])  #step 1
	x_test = x_test.reshape([batch_size,1,img_rows,img_cols])
	input_shape = [1,img_rows,img_cols]
else:
	x_train = x_train.reshape([batch_size,img_rows,img_cols,1])
	x_test = x_test.reshape([batch_size,img_rows,img_cols,1])
	input_shape = [img_rows,img_cols,1]

x_train = x_train.astype('float32')     #step 2  GPU对单精度的计算性能更好
x_test = x_test.astype('float32')     #更正,第一遍写的这两块的先后顺序弄反了

x_train /=255
x_test /=255       #step 3



y_train = keras.utils.to_catagorical(y_train,num_classes)
y_test = keras.utils.to_categorical(y_test,num_classes)

得到了x_train ,y_train 就可以作为model.fit(x_train,y_train )的输入了
逻辑:以模型为中心,首先了解模型需要的输入,Keras 这里的model.fit()方法 就是需要x_train,y_train,shape 是 [总样本数,img_rows,img_cols,通道数],注意这个方法里的是总样本数,要实

对不同模型的超参数进行网格搜索和交叉验证,选取不同模型最佳的超参数 def ANN(X_train, X_test, y_train, y_test, StandScaler=None): if StandScaler: scaler = StandardScaler() # 标准化转换 X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) clf = MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(10, 8), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) clf.fit(X_train, y_train.ravel()) y_pred = clf.predict(X_test) # 获取预测值 y_scores = clf.predict_proba(X_test) # 获取预测概率 return y_pred, y_scores def SVM(X_train, X_test, y_train, y_test): from sklearn.svm import SVC from sklearn.metrics import accuracy_score model = SVC(probability=True) # 确保启用概率估计 model.fit(X_train, y_train) y_pred = model.predict(X_test) y_prob = model.predict_proba(X_test) return y_pred def PLS_DA(X_train, X_test, y_train, y_test): # 将y_train转换为one-hot编码形式 y_train_one_hot = pd.get_dummies(y_train) # 创建PLS模型 model = PLSRegression(n_components=2) model.fit(X_train, y_train_one_hot) # 预测测试集 y_pred = model.predict(X_test) # 将预测结果转换为数值标签 y_pred_labels = np.array([np.argmax(i) for i in y_pred]) # 尝试计算近似概率值 # 通过将预测结果归一化到 [0, 1] 区间 y_scores = y_pred / np.sum(y_pred, axis=1, keepdims=True) return y_pred, y_scores def RF(X_train, X_test, y_train, y_test): """ 使用随机森林模型进行分类,并返回预测值和预测概率。 参数: X_train: 训练集特征 X_test: 测试集特征 y_train: 训练集标签 y_test: 测试集标签(用于评估,但不直接用于模型训练) 返回: y_pred: 预测的类别标签 y_prob: 预测的概率 """ # 创建随机森林模型 RF = Rand
03-25
``` import numpy as np import pandas as pd from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.svm import SVR from sklearn.metrics import mean_squared_error, r2_score df = pd.read_excel('集胞藻-Cd.xlsx') # 加载数据 df.columns = df.columns.str.strip() # 清理列名 # 定义特征和目标变量 features =['T','Ph','Biomass','Time','Initial'] target_column = 'Removel' # 提取特征和目标数据 X = df[features] y = df[target_column] # 示例代码,直接使用 sklearn 的数据集 # from sklearn.datasets import load_boston # data = load_boston() # X = data.data # y = data.target # 分割数据为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # 特征归一化 scaler_X = MinMaxScaler() X_train_scaled = scaler_X.fit_transform(X_train) X_test_scaled = scaler_X.transform(X_test) # 目标归一化(修改后) scaler_y = MinMaxScaler() y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten() y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten() # # 定义 SVR 参数网格 # param_grid = { # 'kernel': ['linear', 'rbf', 'poly'], # 'C': [1e0, 1e1, 1e2], # 'gamma': [0.1, 0.01, 0.001], # 'epsilon': [0.1, 0.2, 0.3] # } # # # 创建 SVR 模型 # svr = SVR() # 优化后的参数网格示例 param_grid = { 'kernel': ['rbf', 'poly'], 'C': np.logspace(-2, 3, 6), # [0.01, 0.1, 1, 10, 100, 1000] 'gamma': np.logspace(-4, 0, 5), # [0.0001, 0.001, 0.01, 0.1, 1] 'epsilon': [0.05, 0.1, 0.2], 'degree': [2, 3] } # 添加早停机制 svr = SVR(max_iter=10000, tol=1e-4) # 使用网格搜索进行参数优化 grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1) grid_search.fit(X_train_scaled, y_train_scaled) # 获取最佳参数和最优模型 best_params = grid_search.best_params_ best_svr = grid_search.best_estimator_ # 预测 y_train_pred = best_svr.predict(X_train_scaled) y_test_pred = best_svr.predict(X_test_scaled) # 反归一化预测结果(如果目标进行了归一化) y_train_pred_original = scaler_y.inverse_transform(y_train_pred.reshape(-1, 1)).flatten() y_test_pred_original = scaler_y.inverse_transform(y_test_pred.reshape(-1, 1)).flatten() # 计算性能指标 mse_train = mean_squared_error(y_train, y_train_pred_original) rmse_train = np.sqrt(mse_train) r2_train = r2_score(y_train, y_train_pred_original) mse_test = mean_squared_error(y_test, y_test_pred_original) rmse_test = np.sqrt(mse_test) r2_test = r2_score(y_test, y_test_pred_original) print(f"最佳参数: {best_params}") print(f"训练集 MSE: {mse_train}, RMSE: {rmse_train}, R^2: {r2_train}") print(f"测试集 MSE: {mse_test}, RMSE: {rmse_test}, R^2: {r2_test}") # 将结果保存到DataFrame results_df = pd.DataFrame({ '数据集': ['训练集', '测试集'], 'MSE': [mse_train, mse_test], 'RMSE': [rmse_train, rmse_test], 'R²': [r2_train, r2_test] }) # 保存到Excel文件(推荐使用) results_df.to_excel('支持向量机结果/集胞藻-Cd模型评估结果.xlsx', index=False)```根据上述代码生成一个遍历数据文件的代码,分别输出结果,并再最后输出一个汇总结果的文件
03-08
% ========== 训练阶段:统一预处理流程 ========== clc; clear; % ========== 参数配置 ========== test_dir = 'F:\dataset\test'; val_dir = 'F:\dataset\val'; save_dir = 'F:\results'; rows = 480; cols = 480; bands = 300; sg_params.window = 11; sg_params.order = 3; % ========== 加载训练数据 ========== roi_dir = 'F:\dataset_roi_masks'; [X_train, Y_train] = load_all_rois(roi_dir); if iscell(X_train), X_train = cell2mat(X_train); end X_train = double(X_train); % SG 滤波 X_train = apply_sg_filter(X_train, sg_params.window, sg_params.order); % 去除NaN nan_rows = any(isnan(X_train), 2); X_train(nan_rows, :) = []; Y_train(nan_rows, :) = []; % PCA:训练阶段只 fit 一次 [coeff, ~, latent, ~, explained, mu] = pca(X_train); cum_explained = cumsum(explained); num_pc = find(cum_explained >= 95, 1); X_train_pca = (X_train - mu) * coeff(:, 1:num_pc); num_pc = min(num_pc, 10); pca_coeff = coeff; pca_mu = mu; fprintf('选择主成分数 num_pc = %d,累计贡献率 = %.2f%%\n', num_pc, cum_explained(num_pc)); % 类别样本权重(平衡) num_0 = sum(Y_train == 0); num_1 = sum(Y_train == 1); weight_0 = length(Y_train) / (2 * num_0); weight_1 = length(Y_train) / (2 * num_1); sample_weights = zeros(size(Y_train)); sample_weights(Y_train == 0) = weight_0; sample_weights(Y_train == 1) = weight_1; % ========== SVM 训练 ========== C = 0.1; Gamma = 0.1; SVMModel = fitcsvm(X_train_pca, Y_train, ... 'KernelFunction', 'rbf', ... 'BoxConstraint', C, ... 'KernelScale', Gamma, ... 'Weights', sample_weights, ... 'Standardize', true, ... 'ClassNames', [0; 1]); fprintf('训练完成,支持向量数量: %d\n', size(SVMModel.SupportVectors, 1)); % 保存模型与参数 save('SVMModel.mat', 'SVMModel', 'pca_coeff', 'pca_mu', 'sg_params', 'num_pc'); % ========== 批量预测 ========== predict_folder(test_dir, SVMModel, rows, cols, bands, ... sg_params, pca_coeff, pca_mu, num_pc, save_dir, 'test_'); predict_folder(val_dir, SVMModel, rows, cols, bands, ... sg_params, pca_coeff, pca_mu, num_pc, save_dir, 'val_'); 分析代码
最新发布
06-25
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值