目录
🌟 前言:当神经网络遇见时间维度
"快递分拣系统如何记住包裹的运输路径?天气预报怎样关联历史气象数据?这一切都源自对序列的深刻理解!"
本文将用快递分拣中心的类比带你理解循环神经网络(RNN),手写唐诗生成器揭示LSTM的遗忘艺术,并通过台风路径预测实战展示序列建模的魔法。文末还附赠RNN/LSTM/GRU对比决策树,帮你轻松选择时序模型!
一、序列数据的三维透视(代码+可视化)
1.1 序列数据的四大特征
import numpy as np
import matplotlib.pyplot as plt
# 生成多元时间序列
time = np.arange(0, 100, 0.1)
sine_wave = np.sin(time) # 周期性
trend = 0.01 * time # 趋势性
noise = 0.1 * np.random.randn(len(time)) # 随机性
seasonal = 0.5 * np.sin(time/5) # 季节性
complex_series = sine_wave + trend + noise + seasonal
plt.figure(figsize=(12,4))
plt.plot(time, complex_series)
plt.title("复杂时间序列:包含周期、趋势、噪声和季节性")
plt.xlabel("时间步")
plt.show()
二、循环神经网络(RNN)原理解析
2.1 RNN细胞手工实现(NumPy版)
class SimpleRNNCell:
def __init__(self, input_size, hidden_size):
self.Wxh = np.random.randn(hidden_size, input_size) * 0.01 # 输入权重
self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01 # 循环权重
self.bh = np.zeros((hidden_size, 1)) # 偏置
def __call__(self, x, h_prev):
"""
x: 当前输入 (input_size, 1)
h_prev: 前一步隐藏状态 (hidden_size, 1)
"""
h_next = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h_prev) + self.bh)
return h_next
# 测试RNN细胞
rnn_cell = SimpleRNNCell(input_size=3, hidden_size=5)
x = np.random.randn(3, 1)
h = np.zeros((5, 1))
print("初始隐藏状态:", h.flatten())
for t in range(3):
h = rnn_cell(x, h)
print(f"第{t}步隐藏状态:", h.flatten())
2.2 梯度消失问题可视化
# 模拟梯度传播
T = 20 # 时间步长度
grads = []
for t in range(T):
grad = (0.9 ** t) # 假设每次梯度衰减为0.9倍
grads.append(grad)
plt.plot(grads, 'ro-')
plt.title("梯度随时间步衰减曲线")
plt.xlabel("时间步")
plt.ylabel("梯度幅值")
plt.yscale('log')
plt.show()
三、LSTM与GRU:记忆管理大师
3.1 LSTM细胞架构解析(带注释实现)
class LSTMCell:
def __init__(self, input_size, hidden_size):
# 合并输入和隐藏状态
self.W = np.random.randn(4*hidden_size, input_size + hidden_size) * 0.01
self.b = np.zeros((4*hidden_size, 1))
def __call__(self, x, h_prev, c_prev):
a = np.dot(self.W, np.vstack((x, h_prev))) + self.b
# 分割四个门
i = sigmoid(a[:hidden_size]) # 输入门
f = sigmoid(a[hidden_size:2*hidden_size]) # 遗忘门
o = sigmoid(a[2*hidden_size:3*hidden_size]) # 输出门
g = np.tanh(a[3*hidden_size:]) # 候选记忆
c_next = f * c_prev + i * g
h_next = o * np.tanh(c_next)
return h_next, c_next
# 测试LSTM
lstm_cell = LSTMCell(input_size=3, hidden_size=5)
h = np.zeros((5,1))
c = np.zeros((5,1))
x = np.random.randn(3,1)
print("初始记忆单元:", c.flatten())
for t in range(3):
h, c = lstm_cell(x, h, c)
print(f"第{t}步记忆值:", c.flatten())
3.2 GRU简化版实现(PyTorch接口)
import torch
import torch.nn as nn
class GRUNet(nn.Module):
def __init__(self, input_size, hidden_size):
super().__init__()
self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
def forward(self, x):
# x形状:(batch_size, seq_len, input_size)
out, hn = self.gru(x)
return out
# 示例:处理批量序列数据
batch_size = 4
seq_len = 10
input_size = 8
hidden_size = 16
model = GRUNet(input_size, hidden_size)
inputs = torch.randn(batch_size, seq_len, input_size)
outputs = model(inputs)
print("GRU输出形状:", outputs.shape) # [4,10,16]
四、实战:宋词生成器(LSTM+注意力)
4.1 数据预处理
import tensorflow as tf
# 加载宋词数据集
with open('songci.txt', 'r', encoding='utf-8') as f:
text = f.read()
# 创建词表
vocab = sorted(set(text))
char2idx = {char:i for i,char in enumerate(vocab)}
idx2char = np.array(vocab)
# 生成训练样本
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)
char_dataset = tf.data.Dataset.from_tensor_slices(
[text[i:i+seq_length+1] for i in range(examples_per_epoch)])
4.2 模型构建与训练
model = tf.keras.Sequential([
tf.keras.layers.Embedding(len(vocab), 256),
tf.keras.layers.LSTM(1024, return_sequences=True),
tf.keras.layers.Attention(),
tf.keras.layers.Dense(len(vocab))
])
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))
# 训练回调:生成示例文本
class TextGenerator(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs=None):
start_string = "明月几时有"
generated = []
input_eval = [char2idx[c] for c in start_string]
for i in range(100):
predictions = model(tf.expand_dims(input_eval, 0))
predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
input_eval.append(predicted_id)
generated.append(idx2char[predicted_id])
print(f"\n生成文本:{start_string + ''.join(generated)}\n")
history = model.fit(dataset, epochs=50, callbacks=[TextGenerator()])
🔥 序列模型选型指南
模型 | 参数量 | 训练速度 | 长程依赖 | 适用场景 |
---|---|---|---|---|
RNN | 少 | 快 | 差 | 短序列分类 |
LSTM | 多 | 慢 | 优 | 文本生成 |
GRU | 中 | 中 | 良 | 实时预测 |
Transformer | 多 | 慢 | 极优 | 机器翻译 |