Spaces:

danielvarga
/

pq

Sleeping

File size: 5,171 Bytes

41bce18

# https://github.com/Nixtla/statsforecast/tree/main/experiments/mstl
# https://github.com/Nixtla/statsforecast/blob/main/experiments/mstl/src/main.py

from time import time

import pandas as pd
import numpy as np
from datasetsforecast.losses import (
    mae, mape, mase, rmse, smape
)
from fire import Fire
from neuralprophet import NeuralProphet
from prophet import Prophet
from prophet.diagnostics import cross_validation
from statsforecast import StatsForecast
from statsforecast.models import MSTL, AutoARIMA, SeasonalNaive

def evaluate_performace(y_hist, y_true, models):
    cutoffs = y_true['cutoff'].unique()
    eval_ = []
    for cutoff in cutoffs:
        evaluation = {}
        for model in models:
            evaluation[model] = {}
            for metric in [mase, mae, mape, rmse, smape]:
                metric_name = metric.__name__
                if metric_name == 'mase':
                    evaluation[model][metric_name] = metric(
                        y_true.query('cutoff == @cutoff')['y'].values, 
                        y_true.query('cutoff == @cutoff')[model].values, 
                        y_hist.query('ds <= @cutoff')['y'].values, 
                        seasonality=24
                    )
                else:
                    evaluation[model][metric_name] = metric(
                        y_true.query('cutoff == @cutoff')['y'].values, 
                        y_true.query('cutoff == @cutoff')[model].values
                    )
        eval_cutoff = pd.DataFrame(evaluation).T
        eval_cutoff.insert(0, 'cutoff', cutoff)
        eval_cutoff.index = eval_cutoff.index.rename('model')
        eval_.append(eval_cutoff)
    return pd.concat(eval_)

def experiment():
    filename = 'https://github.com/archd3sai/Hourly-Energy-Consumption-Prediction/raw/master/PJME_hourly.csv'
    # 404: filename = 'https://raw.githubusercontent.com/jnagura/Energy-consumption-prediction-analysis/master/PJM_Load_hourly.csv'
    df = pd.read_csv(filename)
    df.columns = ['ds', 'y']
    df.insert(0, 'unique_id', 'PJM_Load_hourly')
    df['ds'] = pd.to_datetime(df['ds'])
    df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)

    # MSTL model
    mstl = MSTL(
	season_length=[24, 24 * 7], # seasonalities of the time series 
	trend_forecaster=AutoARIMA() # model used to forecast trend
    )
    sf = StatsForecast(
        df=df,
	models=[mstl], 
	freq='H'
    )
    init = time()
    forecasts_cv = sf.cross_validation(h=24, n_windows=7, step_size=24)
    end = time()
    time_mstl = (end - init) / 60
    print(f'MSTL Time: {time_mstl:.2f} minutes')

    # SeasonalNaive model
    sf = StatsForecast(
        df=df,
	models=[SeasonalNaive(season_length=24)], 
	freq='H'
    )
    init = time()
    forecasts_cv_seas = sf.cross_validation(h=24, n_windows=7, step_size=24)
    end = time()
    time_seas = (end - init) / 60
    print(f'SeasonalNaive Time: {time_seas:.2f} minutes')
    forecasts_cv = forecasts_cv.merge(forecasts_cv_seas.drop(columns='y'), how='left', on=['unique_id', 'ds', 'cutoff'])

    cutoffs = forecasts_cv['cutoff'].unique()
    # Prophet model
    forecasts_cv['Prophet'] = None
    time_prophet = 0
    for cutoff in cutoffs:
        df_train = df.query('ds <= @cutoff')
        prophet = Prophet()
        # produce forecasts
        init = time()
        prophet.fit(df_train)
        # produce forecasts
        future = prophet.make_future_dataframe(periods=24, freq='H', include_history=False)
        forecast_prophet = prophet.predict(future)
        end = time()
        assert (forecast_prophet['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
        forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'Prophet'] = forecast_prophet['yhat'].values
        # data wrangling
        time_prophet += (end - init) / 60
    print(f'Prophet Time: {time_prophet:.2f} minutes')
    times = pd.DataFrame({
        'model': ['MSTL', 'SeasonalNaive', 'Prophet'], 
        'time (mins)': [time_mstl, time_seas, time_prophet]
    })

    # NeuralProphet
    forecasts_cv['NeuralProphet'] = None
    time_np = 0
    for cutoff in cutoffs:
        df_train = df.query('ds <= @cutoff')
        neuralprophet = NeuralProphet()
        init = time()
        neuralprophet.fit(df_train.drop(columns='unique_id'))
        future = neuralprophet.make_future_dataframe(df=df_train.drop(columns='unique_id'), periods=24)
        forecast_np = neuralprophet.predict(future)
        end = time()
        assert (forecast_np['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
        forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'NeuralProphet'] = forecast_np['yhat1'].values
        time_np += (end - init) / 60
    print(f'NeuralProphet Time: {time_np:.2f} minutes')
    times = times.append({'model': 'NeuralProphet', 'time (mins)': time_np}, ignore_index=True)
    # Final evalaution
    evaluation = evaluate_performace(df_train, forecasts_cv, models=['MSTL', 'NeuralProphet', 'Prophet', 'SeasonalNaive'])
    print(times)
    print(evaluation)
    print(evaluation.groupby('model').mean(numeric_only=True))


if __name__=="__main__":
    Fire(experiment)