视频解释:
结果:
数据


代码:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error
# 设置中文显示(根据系统选择合适的字体)
plt.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体
plt.rcParams['axes.unicode_minus'] = False # 正常显示负号
# -------------------------------
# 1. 读取数据
# -------------------------------
data_file = '新能源汽车因素数据_sorted.xlsx'
df = pd.read_excel(data_file)
# 假设数据中有'sales_num'这一列,代表销售数量
if 'sales_num' not in df.columns:
raise ValueError("数据中未找到'sales_num'列,请确认数据列名!")
# -------------------------------
# 2. 数据预处理:归一化
# -------------------------------
# 这里只使用'sales_num'列作为预测目标。假设数据按时间顺序排列
sales = df['sales_num'].values.reshape(-1, 1)
# 使用 MinMaxScaler 将数据缩放到 [0, 1],便于LSTM训练
scaler = MinMaxScaler(feature_range=(0, 1))
sales_scaled = scaler.fit_transform(sales)
# 定义生成序列的函数,使用过去10个时间点预测下一个时间点
def create_sequences(data, seq_length=10):
X, y = [], []
for i in range(len(data) - seq_length):
X.append(data[i: i+seq_length])
y.append(data[i+seq_length])
return np.array(X), np.array(y)
sequence_length = 10
X, y = create_sequences(sales_scaled, sequence_length)
# 检查数据维度
print("输入序列形状:", X.shape) # (样本数, 10, 1)
print("目标值形状:", y.shape) # (样本数, 1)
# -------------------------------
# 3. 划分训练集和测试集
# -------------------------------
# 按照时间顺序划分(例如 80% 训练,20% 测试)
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
print("训练集样本数:", X_train.shape[0])
print("测试集样本数:", X_test.shape[0])
# -------------------------------
# 4. 构建 LSTM 模型,并设置学习率
# -------------------------------
# 设置学习率,例如:0.001
learning_rate = 0.01
optimizer = Adam(learning_rate=learning_rate)
model = Sequential()
model.add(LSTM(50, activation='tanh', input_shape=(sequence_length, 1), return_sequences=False))
model.add(Dense(1))
model.compile(optimizer=optimizer, loss='mse')
model.summary()
# -------------------------------
# 5. 模型训练
# -------------------------------
# 使用 EarlyStopping 防止过拟合
early_stop = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=200, batch_size=256, verbose=1,
callbacks=[early_stop], validation_data=(X_test, y_test))
# -------------------------------
# 6. 绘制 Loss 曲线
# -------------------------------
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='训练 Loss')
plt.plot(history.history['val_loss'], label='验证 Loss')
plt.title("LSTM模型训练过程中的Loss曲线")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.tight_layout()
loss_plot_file = 'lstm_loss_curve.png'
plt.savefig(loss_plot_file)
plt.show()
print(f"Loss曲线已保存至:{loss_plot_file}")
# -------------------------------
# 7. 模型预测与评价(反归一化)
# -------------------------------
# 在测试集上进行预测
y_pred = model.predict(X_test)
# 计算均方误差(MSE)作为评价指标
# 计算多种评价指标
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
# r2 = r2_score(y_test, y_pred)
print("测试集均方误差 (MSE):", mse)
print("测试集平均绝对误差 (MAE):", mae)
print("测试集均方根误差 (RMSE):", rmse)
# 将预测结果反归一化,恢复到原始数值
y_pred_inv = scaler.inverse_transform(y_pred)
y_test_inv = scaler.inverse_transform(y_test)
# -------------------------------
# 8. 绘制真实值与预测值拟合图
# -------------------------------
plt.figure(figsize=(10, 6))
plt.plot(y_test_inv, label='真实值', marker='o', linestyle='-')
plt.plot(y_pred_inv, label='预测值', marker='x', linestyle='--')
plt.title('真实值与预测值拟合图')
plt.xlabel('样本索引')
plt.ylabel('销售数量')
plt.legend()
plt.tight_layout()
fit_plot_file = 'true_vs_predicted.png'
plt.savefig(fit_plot_file)
plt.show()
print(f"真实值预测值拟合图已保存至:{fit_plot_file}")
完整代码数据:

https://download.youkuaiyun.com/download/qq_38735017/90564988
1169

被折叠的 条评论
为什么被折叠?



