Spaces:

danielvarga
/

pq

Sleeping

File size: 4,674 Bytes

import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsforecast import StatsForecast
from statsforecast.models import (
    AutoARIMA,
    AutoETS,
    AutoCES,
    DynamicOptimizedTheta,
    SeasonalNaive,
)


os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
os.environ["NIXTLA_NUMBA_CACHE"] = "1"



data = pd.read_csv("terheles_fixed.tsv", sep="\t")
data['ds'] = pd.to_datetime(data['Korrigált időpont'])
data['y'] = data['Hatásos teljesítmény']

data = data[['ds', 'y']]
data['unique_id'] = 1

data = data[data['ds'] < '2019-09-01']
Y_df = data

train_df = Y_df[Y_df['ds'] < '2019-08-01']


horizon = 4 * 24 * 7 # 7 days



def ensemble_forecasts(
    fcsts_df,
    quantiles,
    name_models,
    model_name,
) -> pd.DataFrame:
    fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values  # type: ignore
    # compute quantiles based on the mean of the forecasts
    sigma_models = []
    for model in name_models:
        fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model]
        sigma_models.append(f"sigma_{model}")
    fcsts_df[f"std_{model_name}"] = (
        fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5)
    )
    z = norm.ppf(quantiles)
    q_cols = []
    for q, zq in zip(quantiles, z):
        q_col = f"{model_name}-q-{q}"
        fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"]
        q_cols.append(q_col)
    fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols]
    return fcsts_df


def run_statistical_ensemble(
    train_df: pd.DataFrame,
    horizon: int,
    freq: str,
    seasonality: int,
    quantiles,
):
    os.environ["NIXTLA_ID_AS_COL"] = "true"
    models = [
        AutoARIMA(season_length=seasonality),
        AutoETS(season_length=seasonality),
        AutoCES(season_length=seasonality),
        DynamicOptimizedTheta(season_length=seasonality),
    ]
    init_time = time()
    series_per_core = 15
    n_series = train_df["unique_id"].nunique()
    n_jobs = min(n_series // series_per_core, os.cpu_count())
    sf = StatsForecast(
        models=models,
        freq=freq,
        n_jobs=n_jobs,
    )
    fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
    name_models = [repr(model) for model in models]
    model_name = "StatisticalEnsemble"
    fcsts_df = ensemble_forecasts(
        fcsts_df,
        quantiles,
        name_models,
        model_name,
    )
    total_time = time() - init_time
    return fcsts_df, total_time, model_name


seasonality = 4 * 24 * 7 # 1 week
models = [
        AutoARIMA(season_length=seasonality),
        AutoETS(season_length=seasonality),
        AutoCES(season_length=seasonality),
        DynamicOptimizedTheta(season_length=seasonality),
]
freq = '15min'
sf = StatsForecast(
        models=models[:1],
        freq=freq,
        n_jobs=1,
)

print("starting forecast, dataset size", len(train_df))
Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])

print(Y_hat_df)

Y_hat_df = Y_hat_df.reset_index()

fig, ax = plt.subplots(1, 1, figsize = (20, 7))

# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)

plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
plot_Y_df = plot_Y_df.set_index('ds')[['y']]
plot_Y_df.plot(ax=ax, linewidth=1)
Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)


ax.set_title('AirPassengers Forecast', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Timestamp [t]', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()

plt.savefig("neuralforecast.pdf")



exit()






















quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
fcst_df, total_time, model_name = run_statistical_ensemble(
    train_df,
    horizon=horizon,
    freq='15m',
    seasonality=4 * 24 * 7,
    quantiles=quantiles
)


nf.fit(df=train_df)

Y_hat_df = nf.predict()

print(Y_df)

Y_hat_df = Y_hat_df.reset_index()

fig, ax = plt.subplots(1, 1, figsize = (20, 7))

# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)

plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
plot_Y_df = plot_Y_df.set_index('ds')[['y']]
plot_Y_df.plot(ax=ax, linewidth=1)
Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)


ax.set_title('AirPassengers Forecast', fontsize=22)
ax.set_ylabel('Monthly Passengers', fontsize=20)
ax.set_xlabel('Timestamp [t]', fontsize=20)
ax.legend(prop={'size': 15})
ax.grid()

plt.savefig("neuralforecast.pdf")