前一篇完成了 TimesFM 的运行
这篇针对里面运行的数据进行分析
根据代码 数据是来自 exp, exp 是来自引用 的 .utils.py
这是 utilis.py 的代码
是从Nixtla来的 尼克斯塔与时间序列预测
"""Forked from https://github.com/Nixtla/nixtla/blob/main/experiments/amazon-chronos/src/utils.py."""
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Forked from https://github.com/Nixtla/nixtla/blob/main/experiments/amazon-chronos/src/utils.py."""
from functools import partial
from itertools import repeat
import multiprocessing
import os
from pathlib import Path
from typing import List
from gluonts.dataset import Dataset
from gluonts.dataset.repository.datasets import (
dataset_names as gluonts_datasets,
get_dataset,
)
from gluonts.time_feature.seasonality import get_seasonality
import numpy as np
import pandas as pd
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import mae, mase, smape
def parallel_transform(inp):
ts, last_n = inp[0], inp[1]
return ExperimentHandler._transform_gluonts_instance_to_df(ts, last_n=last_n)
def quantile_loss(
df: pd.DataFrame,
models: list,
q: float = 0.5,
id_col: str = "unique_id",
target_col: str = "y",
) -> pd.DataFrame:
delta_y = df[models].sub(df[target_col], axis=0)
res = (
np.maximum(q * delta_y, (q - 1) * delta_y)
.groupby(df[id_col], observed=True)
.mean()
)
res.index.name = id_col
res = res.reset_index()
return res
class ExperimentHandler:
def __init__(
self,
dataset: str,
quantiles: List[float] = list(np.arange(1, 10) / 10.0),
results_dir: str = "./results",
models_dir: str = "./models",
):
if dataset not in gluonts_datasets:
raise Exception(
f"dataset {dataset} not found in gluonts "
f"available datasets: {', '.join(gluonts_datasets)}"
)
self.dataset = dataset
self.quantiles = quantiles
self.level = self._transform_quantiles_to_levels(quantiles)
self.results_dir = results_dir
self.models_dir = models_dir
# defining datasets
self._maybe_download_m3_or_m5_file(self.dataset)
gluonts_dataset = get_dataset(self.dataset)
self.horizon = gluonts_dataset.metadata.prediction_length
if self.horizon is None:
raise Exception(
f"horizon not found for dataset {self.dataset} "
"experiment cannot be run"
)
self.freq = gluonts_dataset.metadata.freq
# get_seasonality() returns 1 for freq='D', override this to 7. This significantly improves the accuracy of
# statistical models on datasets like m5/nn5_daily. The models like AutoARIMA/AutoETS can still set
# seasonality=1 internally on datasets like weather by choosing non-seasonal models during model selection.
if self.freq == "D":
self.seasonality = 7
else:
self.seasonality = get_seasonality(self.freq)
self.gluonts_train_dataset = gluonts_dataset.train
self.gluonts_test_dataset = gluonts_dataset.test
self._create_dir_if_not_exists(self.results_dir)
try:
multiprocessing.set_start_method("spawn")
except RuntimeError:
print("Multiprocessing context has already been set.")
@staticmethod
def _maybe_download_m3_or_m5_file(dataset: str):
if dataset[:2] == "m3":
m3_file = Path.home() / ".gluonts" / "datasets" / "M3C.xls"
if not m3_file.exists():
from datasetsforecast.m3 import M3
from datasetsforecast.utils import download_file
download_file(m3_file.parent, M3.source_url)
elif dataset == "m5":
m5_raw_dir = Path.home() / ".gluonts" / "m5"
if not m5_raw_dir.exists():
import zipfile
from datasetsforecast.m5 import M5
from datasetsforecast.utils import download_file
download_file(m5_raw_dir, M5.source_url)
with zipfile.ZipFile(m5_raw_dir / "m5.zip", "r") as zip_ref:
zip_ref.extractall(m5_raw_dir)
@staticmethod
def _transform_quantiles_to_levels(quantiles: List[