Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

pq / v2 /test_sf_vs_prophet.py

Daniel Varga

test_predictor_statsforecast.py has MSTL and SeasonalNaive correctly set up.

41bce18 5 months ago

raw

history blame

No virus

5.17 kB

	# https://github.com/Nixtla/statsforecast/tree/main/experiments/mstl
	# https://github.com/Nixtla/statsforecast/blob/main/experiments/mstl/src/main.py

	from time import time

	import pandas as pd
	import numpy as np
	from datasetsforecast.losses import (
	mae, mape, mase, rmse, smape
	)
	from fire import Fire
	from neuralprophet import NeuralProphet
	from prophet import Prophet
	from prophet.diagnostics import cross_validation
	from statsforecast import StatsForecast
	from statsforecast.models import MSTL, AutoARIMA, SeasonalNaive

	def evaluate_performace(y_hist, y_true, models):
	cutoffs = y_true['cutoff'].unique()
	eval_ = []
	for cutoff in cutoffs:
	evaluation = {}
	for model in models:
	evaluation[model] = {}
	for metric in [mase, mae, mape, rmse, smape]:
	metric_name = metric.__name__
	if metric_name == 'mase':
	evaluation[model][metric_name] = metric(
	y_true.query('cutoff == @cutoff')['y'].values,
	y_true.query('cutoff == @cutoff')[model].values,
	y_hist.query('ds <= @cutoff')['y'].values,
	seasonality=24
	)
	else:
	evaluation[model][metric_name] = metric(
	y_true.query('cutoff == @cutoff')['y'].values,
	y_true.query('cutoff == @cutoff')[model].values
	)
	eval_cutoff = pd.DataFrame(evaluation).T
	eval_cutoff.insert(0, 'cutoff', cutoff)
	eval_cutoff.index = eval_cutoff.index.rename('model')
	eval_.append(eval_cutoff)
	return pd.concat(eval_)

	def experiment():
	filename = 'https://github.com/archd3sai/Hourly-Energy-Consumption-Prediction/raw/master/PJME_hourly.csv'
	# 404: filename = 'https://raw.githubusercontent.com/jnagura/Energy-consumption-prediction-analysis/master/PJM_Load_hourly.csv'
	df = pd.read_csv(filename)
	df.columns = ['ds', 'y']
	df.insert(0, 'unique_id', 'PJM_Load_hourly')
	df['ds'] = pd.to_datetime(df['ds'])
	df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)

	# MSTL model
	mstl = MSTL(
	season_length=[24, 24 * 7], # seasonalities of the time series
	trend_forecaster=AutoARIMA() # model used to forecast trend
	)
	sf = StatsForecast(
	df=df,
	models=[mstl],
	freq='H'
	)
	init = time()
	forecasts_cv = sf.cross_validation(h=24, n_windows=7, step_size=24)
	end = time()
	time_mstl = (end - init) / 60
	print(f'MSTL Time: {time_mstl:.2f} minutes')

	# SeasonalNaive model
	sf = StatsForecast(
	df=df,
	models=[SeasonalNaive(season_length=24)],
	freq='H'
	)
	init = time()
	forecasts_cv_seas = sf.cross_validation(h=24, n_windows=7, step_size=24)
	end = time()
	time_seas = (end - init) / 60
	print(f'SeasonalNaive Time: {time_seas:.2f} minutes')
	forecasts_cv = forecasts_cv.merge(forecasts_cv_seas.drop(columns='y'), how='left', on=['unique_id', 'ds', 'cutoff'])

	cutoffs = forecasts_cv['cutoff'].unique()
	# Prophet model
	forecasts_cv['Prophet'] = None
	time_prophet = 0
	for cutoff in cutoffs:
	df_train = df.query('ds <= @cutoff')
	prophet = Prophet()
	# produce forecasts
	init = time()
	prophet.fit(df_train)
	# produce forecasts
	future = prophet.make_future_dataframe(periods=24, freq='H', include_history=False)
	forecast_prophet = prophet.predict(future)
	end = time()
	assert (forecast_prophet['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
	forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'Prophet'] = forecast_prophet['yhat'].values
	# data wrangling
	time_prophet += (end - init) / 60
	print(f'Prophet Time: {time_prophet:.2f} minutes')
	times = pd.DataFrame({
	'model': ['MSTL', 'SeasonalNaive', 'Prophet'],
	'time (mins)': [time_mstl, time_seas, time_prophet]
	})

	# NeuralProphet
	forecasts_cv['NeuralProphet'] = None
	time_np = 0
	for cutoff in cutoffs:
	df_train = df.query('ds <= @cutoff')
	neuralprophet = NeuralProphet()
	init = time()
	neuralprophet.fit(df_train.drop(columns='unique_id'))
	future = neuralprophet.make_future_dataframe(df=df_train.drop(columns='unique_id'), periods=24)
	forecast_np = neuralprophet.predict(future)
	end = time()
	assert (forecast_np['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
	forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'NeuralProphet'] = forecast_np['yhat1'].values
	time_np += (end - init) / 60
	print(f'NeuralProphet Time: {time_np:.2f} minutes')
	times = times.append({'model': 'NeuralProphet', 'time (mins)': time_np}, ignore_index=True)
	# Final evalaution
	evaluation = evaluate_performace(df_train, forecasts_cv, models=['MSTL', 'NeuralProphet', 'Prophet', 'SeasonalNaive'])
	print(times)
	print(evaluation)
	print(evaluation.groupby('model').mean(numeric_only=True))


	if __name__=="__main__":
	Fire(experiment)