pq / v2 /test_predictor_statsforecast.py
Daniel Varga
hangs
64b066d
raw
history blame
No virus
4.67 kB
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsforecast import StatsForecast
from statsforecast.models import (
AutoARIMA,
AutoETS,
AutoCES,
DynamicOptimizedTheta,
SeasonalNaive,
)
os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
os.environ["NIXTLA_NUMBA_CACHE"] = "1"
data = pd.read_csv("terheles_fixed.tsv", sep="\t")
data['ds'] = pd.to_datetime(data['Korrigált időpont'])
data['y'] = data['Hatásos teljesítmény']
data = data[['ds', 'y']]
data['unique_id'] = 1
data = data[data['ds'] < '2019-09-01']
Y_df = data
train_df = Y_df[Y_df['ds'] < '2019-08-01']
horizon = 4 * 24 * 7 # 7 days
def ensemble_forecasts(
fcsts_df,
quantiles,
name_models,
model_name,
) -> pd.DataFrame:
fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values # type: ignore
# compute quantiles based on the mean of the forecasts
sigma_models = []
for model in name_models:
fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model]
sigma_models.append(f"sigma_{model}")
fcsts_df[f"std_{model_name}"] = (
fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5)
)
z = norm.ppf(quantiles)
q_cols = []
for q, zq in zip(quantiles, z):
q_col = f"{model_name}-q-{q}"
fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"]
q_cols.append(q_col)
fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols]
return fcsts_df
def run_statistical_ensemble(
train_df: pd.DataFrame,
horizon: int,
freq: str,
seasonality: int,
quantiles,
):
os.environ["NIXTLA_ID_AS_COL"] = "true"
models = [
AutoARIMA(season_length=seasonality),
AutoETS(season_length=seasonality),
AutoCES(season_length=seasonality),
DynamicOptimizedTheta(season_length=seasonality),
]
init_time = time()
series_per_core = 15
n_series = train_df["unique_id"].nunique()
n_jobs = min(n_series // series_per_core, os.cpu_count())
sf = StatsForecast(
models=models,
freq=freq,
n_jobs=n_jobs,
)
fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
name_models = [repr(model) for model in models]
model_name = "StatisticalEnsemble"
fcsts_df = ensemble_forecasts(
fcsts_df,
quantiles,
name_models,
model_name,
)
total_time = time() - init_time
return fcsts_df, total_time, model_name
seasonality = 4 * 24 * 7 # 1 week
models = [
AutoARIMA(season_length=seasonality),
AutoETS(season_length=seasonality),
AutoCES(season_length=seasonality),
DynamicOptimizedTheta(season_length=seasonality),
]
freq = '15min'
sf = StatsForecast(
models=models[:1],
freq=freq,
n_jobs=1,
)
print("starting forecast, dataset size", len(train_df))
Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
print(Y_hat_df)
Y_hat_df = Y_hat_df.reset_index()
fig, ax = plt.subplots(1, 1, figsize = (20, 7))
# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
plot_Y_df = plot_Y_df.set_index('ds')[['y']]
plot_Y_df.plot(ax=ax, linewidth=1)
Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)
ax.set_title('AirPassengers Forecast', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Timestamp [t]', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()
plt.savefig("neuralforecast.pdf")
exit()
quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
fcst_df, total_time, model_name = run_statistical_ensemble(
train_df,
horizon=horizon,
freq='15m',
seasonality=4 * 24 * 7,
quantiles=quantiles
)
nf.fit(df=train_df)
Y_hat_df = nf.predict()
print(Y_df)
Y_hat_df = Y_hat_df.reset_index()
fig, ax = plt.subplots(1, 1, figsize = (20, 7))
# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
plot_Y_df = plot_Y_df.set_index('ds')[['y']]
plot_Y_df.plot(ax=ax, linewidth=1)
Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)
ax.set_title('AirPassengers Forecast', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Timestamp [t]', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()
plt.savefig("neuralforecast.pdf")