要在Extra Trees回归模型中引入L1正则化,Lasso回归的预测结果作为新的特征
import pandas as pd
from sklearn.model_selection import train_test_split
import joblib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Lasso
data = pd.read_csv('data.csv')
print(data.shape)
features = ['features']
target = data['target']
X_train, X_test, y_train, y_test = train_test_split(data[features], target, test_size=0.2, random_state=300)
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
lasso = Lasso(alpha=0.5)
lasso.fit(X_train, y_train)
x_train_lasso = lasso.predict(X_train).reshape(-1, 1)
x_test_lasso = lasso.predict(X_test).reshape(-1, 1)
x_train_combined = np.hstack((X_train, x_train_lasso))
x_test_combined = np.hstack((X_test, x_test_lasso))
model = ExtraTreesRegressor(max_depth=9,min_samples_leaf =2,min_samples_split=4,n_estimators=100)
model.fit(x_train_combined, y_train)
y_train_pred = model.predict(x_train_combined)
y_test_pred = model.predict(x_test_combined)
mse = mean_squared_error(y_test, y_test_pred)
print("均方误差:", mse)
r2_train = r2_score(y_train, y_train_pred)
r2_test = r2_score(y_test, y_test_pred)
print("训练集R^2分数:", r2_train)
print("测试集R^2分数:", r2_test)
joblib.dump(model, 'model.pkl')
fig = plt.figure(figsize=(15, 8), dpi=80)
plt.rcParams['font.sans-serif'] = ['SimSun']
plt.rcParams['axes.unicode_minus'] = False
plt.subplot(121)
plt.plot(y_train_pred, 'ro-', label='训练集预测值')
plt.plot(np.asarray(y_train), 'bo-.', label='真实值')
plt.title("训练集")
plt.xlabel('样本序号')
plt.ylabel('血糖值')
plt.legend(loc='best')
plt.subplot(122)
plt.plot(y_test_pred, 'ro-', label='测试集预测值')
plt.plot(np.asarray(y_test), 'bo-.', label='真实值')
plt.title("测试集")
plt.xlabel('样本序号')
plt.ylabel('血糖值')
plt.legend(loc='best')
plt.show()
oblib.dump(model, 'lssvr.pkl')
joblib.dump(lasso, 'lasso.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(target_scaler, 'target_scaler.pkl')
用生成的模型进行预测数据
import joblib
import numpy as np
from sklearn.metrics import mean_absolute_error
from matplotlib import pyplot as plt
from sklearn.metrics import r2_score
import pandas as pd
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Lasso
lssvr_model = joblib.load('lssvr.pkl')
lasso_model = joblib.load('lasso.pkl')
new_data = pd.read_csv('test.csv')
features = ['features ']
original_features = new_data[features]
scaler = joblib.load('scaler.pkl')
new_data_scaled = scaler.transform(original_features)
new_data_lasso_pred = lasso_model.predict(new_data_scaled)
new_features_with_lasso = np.column_stack((new_data_scaled, new_data_lasso_pred))
new_predictions_scaled = lssvr_model.predict(new_features_with_lasso)
target_scaler = joblib.load('target_scaler.pkl')
new_predictions = target_scaler.inverse_transform(new_predictions_scaled.reshape(-1,1))
print(new_predictions)
test_y=new_data[['target']]
test_y=np.array(test_y)
mse = mean_squared_error(test_y, new_predictions)
print("mpe=",mse)
r2 = r2_score(test_y, new_predictions)
print(r2)
fig = plt.figure(figsize=(15, 8), dpi=80)
plt.rcParams['font.sans-serif'] = ['SimSun']
plt.rcParams['axes.unicode_minus'] = False
plt.plot(new_predictions, 'ro-', label='预测值')
plt.plot(np.asarray(test_y), 'bo-.', label='真实值')
plt.title("LSSVR")
plt.xlabel('样本序号')
plt.ylabel('血糖值')
plt.legend(loc='best')
plt.show()