# =====================
# 1. 从 MySQL 拉取数据
# =====================
torch.cuda.empty_cache() # 清理 GPU 缓存
conn = pymysql.connect(
host='127.0.0.1', user='root', passwd='123456',
port=3306, db='weibo', charset='utf8'
)
cur = conn.cursor()
cur.execute("SELECT * FROM Data")
rows = cur.fetchall()
columns = [desc[0] for desc in cur.description]
df = pd.DataFrame(rows, columns=columns)
conn.close()
# 保证数值型
for col in ['Likes', 'Shares', 'Comments']:
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
# ==================================
# 2. 生成随机 Heat,并四舍五入到 1 位
# ==================================
def calculate_heat(row):
a, b, c = row['Likes'], row['Shares'], row['Comments']
coeff_a = np.random.uniform(0.2, 0.4)
coeff_b = np.random.uniform(0.4, 0.6)
coeff_c = np.random.uniform(0.5, 0.6)
return round(a*coeff_a + b*coeff_b + c*coeff_c, 1)
df['Heat'] = df.apply(calculate_heat, axis=1)
# ==========================
# 3. 划分、标准化、训练模型
# ==========================
# 超参
INPUT_DIM, HIDDEN_DIM, N_BLOCKS = 3, 128, 3
DROPOUT, BATCH_SIZE = 0.3, 64
LR, WD = 1e-3, 1e-5
EPOCHS, PATIENCE, VALID_FRAC = 200, 10, 0.2
# 特征与标签
X = df[['Likes','Shares','Comments']].values.astype(np.float32)
y = df['Heat'].values.reshape(-1,1).astype(np.float32)
# 标准化
scaler_X = StandardScaler().fit(X)
scaler_y = StandardScaler().fit(y)
X_scaled = scaler_X.transform(X)
y_scaled = scaler_y.transform(y)
# Dataset & DataLoader
dataset = TensorDataset(torch.from_numpy(X_scaled), torch.from_numpy(y_scaled))
n_val = int(len(dataset) * VALID_FRAC)
n_train = len(dataset) - n_val
train_ds, val_ds = random_split(dataset, [n_train, n_val])
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)
# 模型定义
class ResidualBlock(nn.Module):
def __init__(self, dim):
super().__init__()
self.net = nn.Sequential(
nn.Linear(dim, dim),
nn.BatchNorm1d(dim),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(dim, dim),
nn.BatchNorm1d(dim),
)
self.relu = nn.ReLU()
def forward(self, x):
return self.relu(self.net(x) + x)
class ComplexHeatModel(nn.Module):
def __init__(self, input_dim, hidden_dim, n_blocks, dropout):
super().__init__()
self.input_layer = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.BatchNorm1d(hidden_dim),
nn.ReLU(),
nn.Dropout(dropout),
)
self.res_blocks = nn.Sequential(
*[ResidualBlock(hidden_dim) for _ in range(n_blocks)]
)
self.output_layer = nn.Sequential(
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(hidden_dim, 1)
)
def forward(self, x):
x = self.input_layer(x)
x = self.res_blocks(x)
return self.output_layer(x)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ComplexHeatModel(INPUT_DIM, HIDDEN_DIM, N_BLOCKS, DROPOUT).to(device)
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=WD)
best_val_loss = float('inf')
patience_cnt = 0
history = {'train_loss': [], 'val_loss': [], 'lr': []}
for epoch in range(1, EPOCHS+1):
# — 训练 —
model.train()
train_losses = []
for xb, yb in train_loader:
xb, yb = xb.to(device), yb.to(device)
optimizer.zero_grad()
pred = model(xb)
loss = criterion(pred, yb)
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
train_losses.append(loss.item())
# — 验证 —
model.eval()
val_losses = []
with torch.no_grad():
for xb, yb in val_loader:
xb, yb = xb.to(device), yb.to(device)
val_losses.append(criterion(model(xb), yb).item())
train_loss = np.mean(train_losses)
val_loss = np.mean(val_losses)
lr_now = optimizer.param_groups[0]['lr']
history['train_loss'].append(train_loss)
history['val_loss'].append(val_loss)
history['lr'].append(lr_now)
print(f"Epoch {epoch:03d}: train {train_loss:.4f}, val {val_loss:.4f}, lr {lr_now:.6f}")
# 保存 & 早停
if val_loss < best_val_loss:
best_val_loss = val_loss
patience_cnt = 0
# 只保存 model.state_dict()
torch.save(model.state_dict(), 'best_heat_weights.pth')
# 单独保存两个 Scaler
joblib.dump(scaler_X, 'scaler_X.pkl')
joblib.dump(scaler_y, 'scaler_y.pkl')
else:
patience_cnt += 1
if patience_cnt >= PATIENCE:
print(f"Early stopping at epoch {epoch}")
break
print(f"Best val loss: {best_val_loss:.4f}")
# ============================
# 4. 加载权重 & Scaler 进行预测
# ============================
# 重新实例化、加载权重
model = ComplexHeatModel(INPUT_DIM, HIDDEN_DIM, N_BLOCKS, DROPOUT).to(device)
model.load_state_dict(torch.load('best_heat_weights.pth', map_location=device))
model.eval()
# 加载 Scaler
scaler_X = joblib.load('scaler_X.pkl')
scaler_y = joblib.load('scaler_y.pkl')
# 对全量数据做预测
X_all = torch.from_numpy(X_scaled).to(device)
with torch.no_grad():
preds_scaled = model(X_all).cpu().numpy()
preds_rescaled = scaler_y.inverse_transform(preds_scaled)
# ======================
# 5. 评估 & 可视化
# ======================
# 指标
mse = mean_squared_error(y, preds_rescaled)
mae = mean_absolute_error(y, preds_rescaled)
nonzero = y.flatten() != 0
mape = np.mean(np.abs((preds_rescaled.flatten()[nonzero] - y.flatten()[nonzero]) /
y.flatten()[nonzero])) * 100
print(f"\nMetrics on all data:\n MSE={mse:.4f}, MAE={mae:.4f}, MAPE={mape:.2f}%")
# 对比表格(前 10 条)
df_cmp = pd.DataFrame({
'Actual_Heat': y.flatten(),
'Predicted_Heat': preds_rescaled.flatten()
})
print("\nFirst 10 rows:\n", df_cmp.head(10))
# 可视化
plt.figure()
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.title('Loss Curve'); plt.xlabel('Epoch'); plt.ylabel('MSE Loss')
plt.legend(); plt.grid(True); plt.show()
plt.figure()
plt.plot(history['lr'])
plt.title('Learning Rate'); plt.xlabel('Epoch'); plt.ylabel('LR')
plt.grid(True); plt.show()
plt.figure()
plt.plot(y.flatten(), label='Actual')
plt.plot(preds_rescaled.flatten(), label='Predicted', linestyle='--')
plt.title('Actual vs Predicted Heat')
plt.xlabel('Index'); plt.ylabel('Heat')
plt.legend(); plt.grid(True); plt.show()
这些代码使用了什么技术,并讲解代码