import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from dataclasses import dataclass
from typing import List, Tuple, Optional
np.random.seed(123)
# 1) 生成合成非线性季节性时间序列
n = 360 # 12个月 * 30天作为示例长度
t = np.arange(n)
# 真实过程:趋势 + 月度季节性 + 慢速季节性机制 + 阈值非线性
trend = 0.03 * t
monthly = 8 * np.sin(2 * np.pi * t / 30) # 30天周期
season90 = 3 * np.sign(np.sin(2 * np.pi * t / 90)) # 大约每45天机制翻转
threshold_boost = np.where(monthly > 6, 4, 0) # 阈值非线性
y_true = 20 + trend + monthly + season90 + threshold_boost
y = y_true + np.random.normal(scale=1.2, size=n)
# 2) 框架化为监督学习
def make_lag_features(series: np.ndarray, max_lag: int) -> pd.DataFrame:
s = pd.Series(series)
data = {f"lag_{k}": s.shift(k) for k in range(1, max_lag + 1)}
return pd.DataFrame(data)
max_lag = 10
df = make_lag_features(y, max_lag)
# 日历/季节性特征
df["sin_30"] = np.sin(2 * np.pi * np.arange(len(df)) / 30)
df["cos_30"] = np.cos(2 * np.pi * np.arange(len(df)) / 30)
df["sin_90"] = np.sin(2 * np.pi * np.arange(len(df)) / 90)
df["cos_90"] = np.cos(2 * np.pi * np.arange(len(df)) / 90)
df["time_idx"] = np.arange(len(df))
df["target"] = y
df = df.dropna().reset_index(drop=True)
# 按时间顺序分割
train_frac = 0.8
split_idx = int(len(df) * train_frac)
X_train = df.drop(columns=["target"]).iloc[:split_idx].to_numpy()
y_train = df["target"].iloc[:split_idx].to_numpy()
X_test = df.drop(columns=["target"]).iloc[split_idx:].to_numpy()
y_test = df["target"].iloc[split_idx:].to_numpy()
feature_names = df.drop(columns=["target"]).columns.tolist()
# 3) 轻量级MARS类模型(铰链基 + 线性项),前向选择 + 后向剪枝
def hinge(vec: np.ndarray, c: float, direction: int) -> np.ndarray:
# direction: +1 => max(0, x - c), -1 => max(0, c - x)
if direction == 1:
return np.maximum(0.0, vec - c)
else:
return np.maximum(0.0, c - vec)
@dataclass
class BasisSpec:
kind: str # "hinge" 或 "linear"
feat_idx: int
knot: Optional[float] # 线性为None
direction: Optional[int] # 铰链为+1/-1,线性为None
@dataclass
class MarsLiteTS:
basis: List[BasisSpec]
beta: np.ndarray
def design_matrix(X: np.ndarray, basis_list: List[BasisSpec]) -> np.ndarray:
parts = [np.ones((X.shape[0], 1))] # 截距
for b in basis_list:
col = X[:, b.feat_idx]
if b.kind == "linear":
vec = col
else:
vec = hinge(col, b.knot, b.direction)
parts.append(vec.reshape(-1, 1))
return np.hstack(parts)
def gcv(y_true: np.ndarray, y_hat: np.ndarray, m_terms: int, penalty: float = 2.0) -> float:
n = len(y_true)
rss = np.sum((y_true - y_hat) ** 2)
# 有效参数数量:1(截距)+ m_terms,按惩罚缩放
c_eff = 1 + penalty * m_terms
denom = max(1e-3, (1 - c_eff / n))
return rss / (n * denom ** 2)
def fit_mars_lite_ts(X: np.ndarray, y: np.ndarray, max_terms: int = 20, q_knots: int = 15,
allow_linear: bool = True, penalty: float = 2.0, min_improve: float = 1e-3) -> MarsLiteTS:
n, p = X.shape
# 从每个特征的分位数获得候选节点
quantiles = np.linspace(0.05, 0.95, q_knots)
cand_knots = [np.quantile(X[:, j], quantiles) for j in range(p)]
basis_list: List[BasisSpec] = []
# 仅截距基线
D = design_matrix(X, basis_list)
beta, *_ = np.linalg.lstsq(D, y, rcond=None)
yhat = D @ beta
best_score = gcv(y, yhat, m_terms=0, penalty=penalty)
# 前向选择
for _ in range(max_terms):
best_add = None
best_add_score = best_score
# 尝试线性项
if allow_linear:
for j in range(p):
spec = BasisSpec(kind="linear", feat_idx=j, knot=None, direction=None)
D_try = design_matrix(X, basis_list + [spec])
beta_try, *_ = np.linalg.lstsq(D_try, y, rcond=None)
yhat_try = D_try @ beta_try
score = gcv(y, yhat_try, m_terms=len(basis_list) + 1, penalty=penalty)
if score < best_add_score - 1e-12:
best_add_score = score
best_add = (spec, beta_try, yhat_try)
# 尝试铰链项
for j in range(p):
for c in cand_knots[j]:
for d in (1, -1):
spec = BasisSpec(kind="hinge", feat_idx=j, knot=float(c), direction=d)
D_try = design_matrix(X, basis_list + [spec])
beta_try, *_ = np.linalg.lstsq(D_try, y, rcond=None)
yhat_try = D_try @ beta_try
score = gcv(y, yhat_try, m_terms=len(basis_list) + 1, penalty=penalty)
if score < best_add_score - 1e-12:
best_add_score = score
best_add = (spec, beta_try, yhat_try)
if best_add is None or (best_score - best_add_score) < min_improve:
break
# 接受添加
spec, beta, yhat = best_add
basis_list.append(spec)
best_score = best_add_score
# 后向剪枝(贪心)
improved = True
while improved and len(basis_list) > 0:
improved = False
current_best = best_score
drop_idx = None
for j in range(len(basis_list)):
trial = [b for i, b in enumerate(basis_list) if i != j]
D_try = design_matrix(X, trial)
beta_try, *_ = np.linalg.lstsq(D_try, y, rcond=None)
yhat_try = D_try @ beta_try
score = gcv(y, yhat_try, m_terms=len(trial), penalty=penalty)
if score < current_best - 1e-12:
current_best = score
drop_idx = j
best_beta_try = beta_try
best_yhat_try = yhat_try
if drop_idx is not None:
basis_list.pop(drop_idx)
beta = best_beta_try
yhat = best_yhat_try
best_score = current_best
improved = True
# 最终重新拟合
D_final = design_matrix(X, basis_list)
beta_final, *_ = np.linalg.lstsq(D_final, y, rcond=None)
return MarsLiteTS(basis=basis_list, beta=beta_final)
def predict(model: MarsLiteTS, X: np.ndarray) -> np.ndarray:
D = design_matrix(X, model.basis)
return D @ model.beta
# 训练模型
model = fit_mars_lite_ts(X_train, y_train, max_terms=25, q_knots=12, allow_linear=True, penalty=2.0, min_improve=1e-3)
# 评估
y_pred = predict(model, X_test)
rmse = float(np.sqrt(np.mean((y_test - y_pred) ** 2)))
mape = float(np.mean(np.abs((y_test - y_pred) / np.maximum(1e-6, np.abs(y_test)))) * 100)
# 4) 绘图
plt.figure(figsize=(12, 6))
plt.plot(y, label="Actual")
plt.axvline(split_idx + max_lag, linestyle="--", label="Train/Test split")
# 将预测对齐到原始索引
start = split_idx + max_lag
x_axis = np.arange(start, start + len(y_pred))
plt.plot(x_axis, y_pred, label="MARS-like forecast")
plt.title(f"MARS-like Time Series Forecast | RMSE: {rmse:.2f}, MAPE: {mape:.1f}%")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.tight_layout()
plt.show()
- 1.
- 2.
- 3.
- 4.
- 5.
- 6.
- 7.
- 8.
- 9.
- 10.
- 11.
- 12.
- 13.
- 14.
- 15.
- 16.
- 17.
- 18.
- 19.
- 20.
- 21.
- 22.
- 23.
- 24.
- 25.
- 26.
- 27.
- 28.
- 29.
- 30.
- 31.
- 32.
- 33.
- 34.
- 35.
- 36.
- 37.
- 38.
- 39.
- 40.
- 41.
- 42.
- 43.
- 44.
- 45.
- 46.
- 47.
- 48.
- 49.
- 50.
- 51.
- 52.
- 53.
- 54.
- 55.
- 56.
- 57.
- 58.
- 59.
- 60.
- 61.
- 62.
- 63.
- 64.
- 65.
- 66.
- 67.
- 68.
- 69.
- 70.
- 71.
- 72.
- 73.
- 74.
- 75.
- 76.
- 77.
- 78.
- 79.
- 80.
- 81.
- 82.
- 83.
- 84.
- 85.
- 86.
- 87.
- 88.
- 89.
- 90.
- 91.
- 92.
- 93.
- 94.
- 95.
- 96.
- 97.
- 98.
- 99.
- 100.
- 101.
- 102.
- 103.
- 104.
- 105.
- 106.
- 107.
- 108.
- 109.
- 110.
- 111.
- 112.
- 113.
- 114.
- 115.
- 116.
- 117.
- 118.
- 119.
- 120.
- 121.
- 122.
- 123.
- 124.
- 125.
- 126.
- 127.
- 128.
- 129.
- 130.
- 131.
- 132.
- 133.
- 134.
- 135.
- 136.
- 137.
- 138.
- 139.
- 140.
- 141.
- 142.
- 143.
- 144.
- 145.
- 146.
- 147.
- 148.
- 149.
- 150.
- 151.
- 152.
- 153.
- 154.
- 155.
- 156.
- 157.
- 158.
- 159.
- 160.
- 161.
- 162.
- 163.
- 164.
- 165.
- 166.
- 167.
- 168.
- 169.
- 170.
- 171.
- 172.
- 173.
- 174.
- 175.
- 176.
- 177.
- 178.
- 179.
- 180.
- 181.
- 182.
- 183.
- 184.
- 185.
- 186.
- 187.
- 188.
- 189.