Python训练营打卡 Day55

原创于 2025-06-19 21:10:11 发布 · 98 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#python #开发语言

Python入门（坚持）专栏收录该内容

52 篇文章

订阅专栏

作业：手动构造类似的数据集（如cosx数据），观察不同的机器学习模型的差异

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
 
# 设置随机种子
np.random.seed(42)
 
# 生成合成时间序列数据
x = np.linspace(0, 100, 1000)
y = np.cos(x) + 0.1 * x + np.random.normal(0, 0.5, 1000)
 
# 数据预处理
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_y = scaler.fit_transform(y.reshape(-1, 1)).flatten()
 
# 创建序列数据
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)
 
seq_length = 30
X, y = create_sequences(scaled_y, seq_length)
 
# 划分训练集和测试集
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
 
# 准备数据以适应机器学习模型
n_samples_train = X_train.shape[0]
n_samples_test = X_test.shape[0]
X_train_rf = X_train.reshape(n_samples_train, -1)
X_test_rf = X_test.reshape(n_samples_test, -1)
 
# 训练随机森林模型
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_rf, y_train)
train_predict_rf = rf_model.predict(X_train_rf)
test_predict_rf = rf_model.predict(X_test_rf)
 
# 训练 LightGBM 模型
lgb_model = lgb.LGBMRegressor(random_state=42)
lgb_model.fit(X_train_rf, y_train)
train_predict_lgb = lgb_model.predict(X_train_rf)
test_predict_lgb = lgb_model.predict(X_test_rf)
 
# 反标准化预测结果
train_predict_rf = scaler.inverse_transform(train_predict_rf.reshape(-1, 1))
test_predict_rf = scaler.inverse_transform(test_predict_rf.reshape(-1, 1))
train_predict_lgb = scaler.inverse_transform(train_predict_lgb.reshape(-1, 1))
test_predict_lgb = scaler.inverse_transform(test_predict_lgb.reshape(-1, 1))
 
y_train_orig = scaler.inverse_transform(y_train.reshape(-1, 1))
y_test_orig = scaler.inverse_transform(y_test.reshape(-1, 1))
 
# 计算 RMSE
rf_train_rmse = np.sqrt(mean_squared_error(y_train_orig, train_predict_rf))
rf_test_rmse = np.sqrt(mean_squared_error(y_test_orig, test_predict_rf))
lgb_train_rmse = np.sqrt(mean_squared_error(y_train_orig, train_predict_lgb))
lgb_test_rmse = np.sqrt(mean_squared_error(y_test_orig, test_predict_lgb))
 
# 可视化结果
plt.figure(figsize=(15, 7))
plt.plot(y, label='原始数据', color='gray', alpha=0.5)
 
# 随机森林结果
train_predict_plot_rf = np.empty_like(y)
train_predict_plot_rf[:] = np.nan
train_predict_plot_rf[seq_length:len(train_predict_rf) + seq_length] = train_predict_rf.flatten()
test_predict_plot_rf = np.empty_like(y)
test_predict_plot_rf[:] = np.nan
test_predict_plot_rf[len(train_predict_rf) + seq_length:] = test_predict_rf.flatten()
 
# LightGBM 结果
train_predict_plot_lgb = np.empty_like(y)
train_predict_plot_lgb[:] = np.nan
train_predict_plot_lgb[seq_length:len(train_predict_lgb) + seq_length] = train_predict_lgb.flatten()
test_predict_plot_lgb = np.empty_like(y)
test_predict_plot_lgb[:] = np.nan
test_predict_plot_lgb[len(train_predict_lgb) + seq_length:] = test_predict_lgb.flatten()
 
plt.plot(train_predict_plot_rf, label='随机森林训练集预测值', color='blue', linestyle='--')
plt.plot(test_predict_plot_rf, label='随机森林测试集预测值', color='red', linestyle='--')
plt.plot(train_predict_plot_lgb, label='LightGBM 训练集预测值', color='green', linestyle=':')
plt.plot(test_predict_plot_lgb, label='LightGBM 测试集预测值', color='orange', linestyle=':')
plt.title('时间序列预测结果对比')
plt.xlabel('时间步')
plt.ylabel('值')
plt.legend()
plt.grid(True)
plt.show()
 
print(f"随机森林训练集 RMSE: {rf_train_rmse:.4f}")
print(f"随机森林测试集 RMSE: {rf_test_rmse:.4f}")
print(f"LightGBM 训练集 RMSE: {lgb_train_rmse:.4f}")
print(f"LightGBM 测试集 RMSE: {lgb_test_rmse:.4f}")

@浙大疏锦行