TimesFM（Time Series Foundation Model）时间序列预测的数据研究(3)

本文链接：https://blog.youkuaiyun.com/chenchihwen/article/details/144444583

前一篇完成了 TimesFM 的运行

TimesFM（Time Series Foundation Model）安装（2）-优快云博客文章浏览阅读520次，点赞13次，收藏24次。决定在小红帽ubuntu UBUNTU安装 timesFM在 ide.cloud.tencent.com 的环境上进行安装环境慎选环境，确保>16G安装Conda 3.10 python重要步骤安装 pyenv and poetry确认已经完成安装这里安装完需要设置环境变量，如果不能看到 version 版本时Add `export PATH="/root/.local/bin:$PATH"` to your shell configuratiohttps://blog.youkuaiyun.com/chenchihwen/article/details/144386472?sharetype=blogdetail&sharerId=144386472&sharerefer=PC&sharesource=chenchihwen&spm=1011.2480.3001.8118

这篇针对里面运行的数据进行分析

根据代码数据是来自 exp, exp 是来自引用的 .utils.py

这是 utilis.py 的代码

是从Nixtla来的尼克斯塔与时间序列预测

"""Forked from https://github.com/Nixtla/nixtla/blob/main/experiments/amazon-chronos/src/utils.py."""

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Forked from https://github.com/Nixtla/nixtla/blob/main/experiments/amazon-chronos/src/utils.py."""

from functools import partial
from itertools import repeat
import multiprocessing
import os
from pathlib import Path
from typing import List

from gluonts.dataset import Dataset
from gluonts.dataset.repository.datasets import (
    dataset_names as gluonts_datasets,
    get_dataset,
)
from gluonts.time_feature.seasonality import get_seasonality
import numpy as np
import pandas as pd
from utilsforecast.evaluation import evaluate
from utilsforecast.losses import mae, mase, smape


def parallel_transform(inp):
  ts, last_n = inp[0], inp[1]
  return ExperimentHandler._transform_gluonts_instance_to_df(ts, last_n=last_n)


def quantile_loss(
    df: pd.DataFrame,
    models: list,
    q: float = 0.5,
    id_col: str = "unique_id",
    target_col: str = "y",
) -> pd.DataFrame:
  delta_y = df[models].sub(df[target_col], axis=0)
  res = (
      np.maximum(q * delta_y, (q - 1) * delta_y)
      .groupby(df[id_col], observed=True)
      .mean()
  )
  res.index.name = id_col
  res = res.reset_index()
  return res


class ExperimentHandler:

  def __init__(
      self,
      dataset: str,
      quantiles: List[float] = list(np.arange(1, 10) / 10.0),
      results_dir: str = "./results",
      models_dir: str = "./models",
  ):
    if dataset not in gluonts_datasets:
      raise Exception(
          f"dataset {dataset} not found in gluonts "
          f"available datasets: {', '.join(gluonts_datasets)}"
      )
    self.dataset = dataset
    self.quantiles = quantiles
    self.level = self._transform_quantiles_to_levels(quantiles)
    self.results_dir = results_dir
    self.models_dir = models_dir
    # defining datasets
    self._maybe_download_m3_or_m5_file(self.dataset)
    gluonts_dataset = get_dataset(self.dataset)
    self.horizon = gluonts_dataset.metadata.prediction_length
    if self.horizon is None:
      raise Exception(
          f"horizon not found for dataset {self.dataset} "
          "experiment cannot be run"
      )
    self.freq = gluonts_dataset.metadata.freq
    # get_seasonality() returns 1 for freq='D', override this to 7. This significantly improves the accuracy of
    # statistical models on datasets like m5/nn5_daily. The models like AutoARIMA/AutoETS can still set
    # seasonality=1 internally on datasets like weather by choosing non-seasonal models during model selection.
    if self.freq == "D":
      self.seasonality = 7
    else:
      self.seasonality = get_seasonality(self.freq)
    self.gluonts_train_dataset = gluonts_dataset.train
    self.gluonts_test_dataset = gluonts_dataset.test
    self._create_dir_if_not_exists(self.results_dir)
    try:
      multiprocessing.set_start_method("spawn")
    except RuntimeError:
      print("Multiprocessing context has already been set.")

  @staticmethod
  def _maybe_download_m3_or_m5_file(dataset: str):
    if dataset[:2] == "m3":
      m3_file = Path.home() / ".gluonts" / "datasets" / "M3C.xls"
      if not m3_file.exists():
        from datasetsforecast.m3 import M3
        from datasetsforecast.utils import download_file

        download_file(m3_file.parent, M3.source_url)
    elif dataset == "m5":
      m5_raw_dir = Path.home() / ".gluonts" / "m5"
      if not m5_raw_dir.exists():
        import zipfile
        from datasetsforecast.m5 import M5
        from datasetsforecast.utils import download_file

        download_file(m5_raw_dir, M5.source_url)
        with zipfile.ZipFile(m5_raw_dir / "m5.zip", "r") as zip_ref:
          zip_ref.extractall(m5_raw_dir)

  @staticmethod
  def _transform_quantiles_to_levels(quantiles: List[