Spaces:
Sleeping
Sleeping
import os | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from statsforecast import StatsForecast | |
from statsforecast.models import ( | |
AutoARIMA, | |
AutoETS, | |
AutoCES, | |
DynamicOptimizedTheta, | |
SeasonalNaive, | |
) | |
os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1" | |
os.environ["NIXTLA_NUMBA_CACHE"] = "1" | |
data = pd.read_csv("terheles_fixed.tsv", sep="\t") | |
data['ds'] = pd.to_datetime(data['Korrigált időpont']) | |
data['y'] = data['Hatásos teljesítmény'] | |
data = data[['ds', 'y']] | |
data['unique_id'] = 1 | |
data = data[data['ds'] < '2019-09-01'] | |
Y_df = data | |
train_df = Y_df[Y_df['ds'] < '2019-08-01'] | |
horizon = 4 * 24 * 7 # 7 days | |
def ensemble_forecasts( | |
fcsts_df, | |
quantiles, | |
name_models, | |
model_name, | |
) -> pd.DataFrame: | |
fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values # type: ignore | |
# compute quantiles based on the mean of the forecasts | |
sigma_models = [] | |
for model in name_models: | |
fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model] | |
sigma_models.append(f"sigma_{model}") | |
fcsts_df[f"std_{model_name}"] = ( | |
fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5) | |
) | |
z = norm.ppf(quantiles) | |
q_cols = [] | |
for q, zq in zip(quantiles, z): | |
q_col = f"{model_name}-q-{q}" | |
fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"] | |
q_cols.append(q_col) | |
fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols] | |
return fcsts_df | |
def run_statistical_ensemble( | |
train_df: pd.DataFrame, | |
horizon: int, | |
freq: str, | |
seasonality: int, | |
quantiles, | |
): | |
os.environ["NIXTLA_ID_AS_COL"] = "true" | |
models = [ | |
AutoARIMA(season_length=seasonality), | |
AutoETS(season_length=seasonality), | |
AutoCES(season_length=seasonality), | |
DynamicOptimizedTheta(season_length=seasonality), | |
] | |
init_time = time() | |
series_per_core = 15 | |
n_series = train_df["unique_id"].nunique() | |
n_jobs = min(n_series // series_per_core, os.cpu_count()) | |
sf = StatsForecast( | |
models=models, | |
freq=freq, | |
n_jobs=n_jobs, | |
) | |
fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27]) | |
name_models = [repr(model) for model in models] | |
model_name = "StatisticalEnsemble" | |
fcsts_df = ensemble_forecasts( | |
fcsts_df, | |
quantiles, | |
name_models, | |
model_name, | |
) | |
total_time = time() - init_time | |
return fcsts_df, total_time, model_name | |
seasonality = 4 * 24 * 7 # 1 week | |
models = [ | |
AutoARIMA(season_length=seasonality), | |
AutoETS(season_length=seasonality), | |
AutoCES(season_length=seasonality), | |
DynamicOptimizedTheta(season_length=seasonality), | |
] | |
freq = '15min' | |
sf = StatsForecast( | |
models=models[:1], | |
freq=freq, | |
n_jobs=1, | |
) | |
print("starting forecast, dataset size", len(train_df)) | |
Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27]) | |
print(Y_hat_df) | |
Y_hat_df = Y_hat_df.reset_index() | |
fig, ax = plt.subplots(1, 1, figsize = (20, 7)) | |
# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes | |
# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2) | |
plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01'] | |
plot_Y_df = plot_Y_df.set_index('ds')[['y']] | |
plot_Y_df.plot(ax=ax, linewidth=1) | |
Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1) | |
ax.set_title('AirPassengers Forecast', fontsize=22) | |
ax.set_ylabel('Monthly Passengers', fontsize=20) | |
ax.set_xlabel('Timestamp [t]', fontsize=20) | |
ax.legend(prop={'size': 15}) | |
ax.grid() | |
plt.savefig("neuralforecast.pdf") | |
exit() | |
quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] | |
fcst_df, total_time, model_name = run_statistical_ensemble( | |
train_df, | |
horizon=horizon, | |
freq='15m', | |
seasonality=4 * 24 * 7, | |
quantiles=quantiles | |
) | |
nf.fit(df=train_df) | |
Y_hat_df = nf.predict() | |
print(Y_df) | |
Y_hat_df = Y_hat_df.reset_index() | |
fig, ax = plt.subplots(1, 1, figsize = (20, 7)) | |
# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes | |
# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2) | |
plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01'] | |
plot_Y_df = plot_Y_df.set_index('ds')[['y']] | |
plot_Y_df.plot(ax=ax, linewidth=1) | |
Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1) | |
ax.set_title('AirPassengers Forecast', fontsize=22) | |
ax.set_ylabel('Monthly Passengers', fontsize=20) | |
ax.set_xlabel('Timestamp [t]', fontsize=20) | |
ax.legend(prop={'size': 15}) | |
ax.grid() | |
plt.savefig("neuralforecast.pdf") |