```
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
df = pd.read_excel('集胞藻-Cd.xlsx') # 加载数据
df.columns = df.columns.str.strip() # 清理列名
# 定义特征和目标变量
features =['T','Ph','Biomass','Time','Initial']
target_column = 'Removel'
# 提取特征和目标数据
X = df[features]
y = df[target_column]
# 示例代码,直接使用 sklearn 的数据集
# from sklearn.datasets import load_boston
# data = load_boston()
# X = data.data
# y = data.target
# 分割数据为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 特征归一化
scaler_X = MinMaxScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
# 目标归一化(修改后)
scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()
# # 定义 SVR 参数网格
# param_grid = {
# 'kernel': ['linear', 'rbf', 'poly'],
# 'C': [1e0, 1e1, 1e2],
# 'gamma': [0.1, 0.01, 0.001],
# 'epsilon': [0.1, 0.2, 0.3]
# }
#
# # 创建 SVR 模型
# svr = SVR()
# 优化后的参数网格示例
param_grid = {
'kernel': ['rbf', 'poly'],
'C': np.logspace(-2, 3, 6), # [0.01, 0.1, 1, 10, 100, 1000]
'gamma': np.logspace(-4, 0, 5), # [0.0001, 0.001, 0.01, 0.1, 1]
'epsilon': [0.05, 0.1, 0.2],
'degree': [2, 3]
}
# 添加早停机制
svr = SVR(max_iter=10000, tol=1e-4)
# 使用网格搜索进行参数优化
grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', verbose=1)
grid_search.fit(X_train_scaled, y_train_scaled)
# 获取最佳参数和最优模型
best_params = grid_search.best_params_
best_svr = grid_search.best_estimator_
# 预测
y_train_pred = best_svr.predict(X_train_scaled)
y_test_pred = best_svr.predict(X_test_scaled)
# 反归一化预测结果(如果目标进行了归一化)
y_train_pred_original = scaler_y.inverse_transform(y_train_pred.reshape(-1, 1)).flatten()
y_test_pred_original = scaler_y.inverse_transform(y_test_pred.reshape(-1, 1)).flatten()
# 计算性能指标
mse_train = mean_squared_error(y_train, y_train_pred_original)
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(y_train, y_train_pred_original)
mse_test = mean_squared_error(y_test, y_test_pred_original)
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(y_test, y_test_pred_original)
print(f"最佳参数: {best_params}")
print(f"训练集 MSE: {mse_train}, RMSE: {rmse_train}, R^2: {r2_train}")
print(f"测试集 MSE: {mse_test}, RMSE: {rmse_test}, R^2: {r2_test}")
# 将结果保存到DataFrame
results_df = pd.DataFrame({
'数据集': ['训练集', '测试集'],
'MSE': [mse_train, mse_test],
'RMSE': [rmse_train, rmse_test],
'R²': [r2_train, r2_test]
})
# 保存到Excel文件(推荐使用)
results_df.to_excel('支持向量机结果/集胞藻-Cd模型评估结果.xlsx', index=False)```根据上述代码生成一个遍历数据文件的代码,分别输出结果,并再最后输出一个汇总结果的文件