Python训练营打卡 Day55

作业:手动构造类似的数据集(如cosx数据),观察不同的机器学习模型的差异

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
 
# 设置随机种子
np.random.seed(42)
 
# 生成合成时间序列数据
x = np.linspace(0, 100, 1000)
y = np.cos(x) + 0.1 * x + np.random.normal(0, 0.5, 1000)
 
# 数据预处理
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_y = scaler.fit_transform(y.reshape(-1, 1)).flatten()
 
# 创建序列数据
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)
 
seq_length = 30
X, y = create_sequences(scaled_y, seq_length)
 
# 划分训练集和测试集
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
 
# 准备数据以适应机器学习模型
n_samples_train = X_train.shape[0]
n_samples_test = X_test.shape[0]
X_train_rf = X_train.reshape(n_samples_train, -1)
X_test_rf = X_test.reshape(n_samples_test, -1)
 
# 训练随机森林模型
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_rf, y_train)
train_predict_rf = rf_model.predict(X_train_rf)
test_predict_rf = rf_model.predict(X_test_rf)
 
# 训练 LightGBM 模型
lgb_model = lgb.LGBMRegressor(random_state=42)
lgb_model.fit(X_train_rf, y_train)
train_predict_lgb = lgb_model.predict(X_train_rf)
test_predict_lgb = lgb_model.predict(X_test_rf)
 
# 反标准化预测结果
train_predict_rf = scaler.inverse_transform(train_predict_rf.reshape(-1, 1))
test_predict_rf = scaler.inverse_transform(test_predict_rf.reshape(-1, 1))
train_predict_lgb = scaler.inverse_transform(train_predict_lgb.reshape(-1, 1))
test_predict_lgb = scaler.inverse_transform(test_predict_lgb.reshape(-1, 1))
 
y_train_orig = scaler.inverse_transform(y_train.reshape(-1, 1))
y_test_orig = scaler.inverse_transform(y_test.reshape(-1, 1))
 
# 计算 RMSE
rf_train_rmse = np.sqrt(mean_squared_error(y_train_orig, train_predict_rf))
rf_test_rmse = np.sqrt(mean_squared_error(y_test_orig, test_predict_rf))
lgb_train_rmse = np.sqrt(mean_squared_error(y_train_orig, train_predict_lgb))
lgb_test_rmse = np.sqrt(mean_squared_error(y_test_orig, test_predict_lgb))
 
# 可视化结果
plt.figure(figsize=(15, 7))
plt.plot(y, label='原始数据', color='gray', alpha=0.5)
 
# 随机森林结果
train_predict_plot_rf = np.empty_like(y)
train_predict_plot_rf[:] = np.nan
train_predict_plot_rf[seq_length:len(train_predict_rf) + seq_length] = train_predict_rf.flatten()
test_predict_plot_rf = np.empty_like(y)
test_predict_plot_rf[:] = np.nan
test_predict_plot_rf[len(train_predict_rf) + seq_length:] = test_predict_rf.flatten()
 
# LightGBM 结果
train_predict_plot_lgb = np.empty_like(y)
train_predict_plot_lgb[:] = np.nan
train_predict_plot_lgb[seq_length:len(train_predict_lgb) + seq_length] = train_predict_lgb.flatten()
test_predict_plot_lgb = np.empty_like(y)
test_predict_plot_lgb[:] = np.nan
test_predict_plot_lgb[len(train_predict_lgb) + seq_length:] = test_predict_lgb.flatten()
 
plt.plot(train_predict_plot_rf, label='随机森林训练集预测值', color='blue', linestyle='--')
plt.plot(test_predict_plot_rf, label='随机森林测试集预测值', color='red', linestyle='--')
plt.plot(train_predict_plot_lgb, label='LightGBM 训练集预测值', color='green', linestyle=':')
plt.plot(test_predict_plot_lgb, label='LightGBM 测试集预测值', color='orange', linestyle=':')
plt.title('时间序列预测结果对比')
plt.xlabel('时间步')
plt.ylabel('值')
plt.legend()
plt.grid(True)
plt.show()
 
print(f"随机森林训练集 RMSE: {rf_train_rmse:.4f}")
print(f"随机森林测试集 RMSE: {rf_test_rmse:.4f}")
print(f"LightGBM 训练集 RMSE: {lgb_train_rmse:.4f}")
print(f"LightGBM 测试集 RMSE: {lgb_test_rmse:.4f}")

@浙大疏锦行

评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值