import os import numpy as np import pandas as pd import matplotlib.pyplot as plt from statsforecast import StatsForecast from statsforecast.models import ( AutoARIMA, AutoETS, AutoCES, DynamicOptimizedTheta, SeasonalNaive, ) os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1" os.environ["NIXTLA_NUMBA_CACHE"] = "1" data = pd.read_csv("terheles_fixed.tsv", sep="\t") data['ds'] = pd.to_datetime(data['Korrigált időpont']) data['y'] = data['Hatásos teljesítmény'] data = data[['ds', 'y']] data['unique_id'] = 1 data = data[data['ds'] < '2019-09-01'] Y_df = data train_df = Y_df[Y_df['ds'] < '2019-08-01'] horizon = 4 * 24 * 7 # 7 days def ensemble_forecasts( fcsts_df, quantiles, name_models, model_name, ) -> pd.DataFrame: fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values # type: ignore # compute quantiles based on the mean of the forecasts sigma_models = [] for model in name_models: fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model] sigma_models.append(f"sigma_{model}") fcsts_df[f"std_{model_name}"] = ( fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5) ) z = norm.ppf(quantiles) q_cols = [] for q, zq in zip(quantiles, z): q_col = f"{model_name}-q-{q}" fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"] q_cols.append(q_col) fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols] return fcsts_df def run_statistical_ensemble( train_df: pd.DataFrame, horizon: int, freq: str, seasonality: int, quantiles, ): os.environ["NIXTLA_ID_AS_COL"] = "true" models = [ AutoARIMA(season_length=seasonality), AutoETS(season_length=seasonality), AutoCES(season_length=seasonality), DynamicOptimizedTheta(season_length=seasonality), ] init_time = time() series_per_core = 15 n_series = train_df["unique_id"].nunique() n_jobs = min(n_series // series_per_core, os.cpu_count()) sf = StatsForecast( models=models, freq=freq, n_jobs=n_jobs, ) fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27]) name_models = [repr(model) for model in models] model_name = "StatisticalEnsemble" fcsts_df = ensemble_forecasts( fcsts_df, quantiles, name_models, model_name, ) total_time = time() - init_time return fcsts_df, total_time, model_name seasonality = 4 * 24 * 7 # 1 week models = [ AutoARIMA(season_length=seasonality), AutoETS(season_length=seasonality), AutoCES(season_length=seasonality), DynamicOptimizedTheta(season_length=seasonality), ] freq = '15min' sf = StatsForecast( models=models[:1], freq=freq, n_jobs=1, ) print("starting forecast, dataset size", len(train_df)) Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27]) print(Y_hat_df) Y_hat_df = Y_hat_df.reset_index() fig, ax = plt.subplots(1, 1, figsize = (20, 7)) # plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes # plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2) plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01'] plot_Y_df = plot_Y_df.set_index('ds')[['y']] plot_Y_df.plot(ax=ax, linewidth=1) Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1) ax.set_title('AirPassengers Forecast', fontsize=22) ax.set_ylabel('Monthly Passengers', fontsize=20) ax.set_xlabel('Timestamp [t]', fontsize=20) ax.legend(prop={'size': 15}) ax.grid() plt.savefig("neuralforecast.pdf") exit() quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] fcst_df, total_time, model_name = run_statistical_ensemble( train_df, horizon=horizon, freq='15m', seasonality=4 * 24 * 7, quantiles=quantiles ) nf.fit(df=train_df) Y_hat_df = nf.predict() print(Y_df) Y_hat_df = Y_hat_df.reset_index() fig, ax = plt.subplots(1, 1, figsize = (20, 7)) # plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes # plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2) plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01'] plot_Y_df = plot_Y_df.set_index('ds')[['y']] plot_Y_df.plot(ax=ax, linewidth=1) Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1) ax.set_title('AirPassengers Forecast', fontsize=22) ax.set_ylabel('Monthly Passengers', fontsize=20) ax.set_xlabel('Timestamp [t]', fontsize=20) ax.legend(prop={'size': 15}) ax.grid() plt.savefig("neuralforecast.pdf")