Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

pq / v2 /test_predictor_statsforecast.py

Daniel Varga

docs

1fee7d7 5 months ago

raw

history blame contribute delete

No virus

5.99 kB

	import os

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	from statsforecast import StatsForecast
	from statsforecast.models import (
	ARIMA,
	AutoARIMA,
	AutoETS,
	AutoCES,
	DynamicOptimizedTheta,
	MSTL,
	SeasonalNaive,
	)
	from datasetsforecast.losses import rmse, mae


	os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
	os.environ["NIXTLA_NUMBA_CACHE"] = "1"


	data = pd.read_csv("terheles_fixed.tsv", sep="\t")
	data['ds'] = pd.to_datetime(data['Korrigált időpont'])
	data['y'] = data['Hatásos teljesítmény']

	data = data[['ds', 'y']]
	data['unique_id'] = 1

	data = data[data['ds'] < '2019-03-01']
	Y_df = data

	train_df = Y_df[Y_df['ds'] < '2019-02-01']


	horizon = 4 * 24 * 7 # 7 days



	def ensemble_forecasts(
	fcsts_df,
	quantiles,
	name_models,
	model_name,
	) -> pd.DataFrame:
	fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values # type: ignore
	# compute quantiles based on the mean of the forecasts
	sigma_models = []
	for model in name_models:
	fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model]
	sigma_models.append(f"sigma_{model}")
	fcsts_df[f"std_{model_name}"] = (
	fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5)
	)
	z = norm.ppf(quantiles)
	q_cols = []
	for q, zq in zip(quantiles, z):
	q_col = f"{model_name}-q-{q}"
	fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"]
	q_cols.append(q_col)
	fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols]
	return fcsts_df


	def run_statistical_ensemble(
	train_df: pd.DataFrame,
	horizon: int,
	freq: str,
	seasonality: int,
	quantiles,
	):
	os.environ["NIXTLA_ID_AS_COL"] = "true"
	models = [
	AutoARIMA(season_length=seasonality),
	AutoETS(season_length=seasonality),
	AutoCES(season_length=seasonality),
	DynamicOptimizedTheta(season_length=seasonality),
	]
	init_time = time()
	series_per_core = 15
	n_series = train_df["unique_id"].nunique()
	n_jobs = min(n_series // series_per_core, os.cpu_count())
	sf = StatsForecast(
	models=models,
	freq=freq,
	n_jobs=n_jobs,
	)
	fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
	name_models = [repr(model) for model in models]
	model_name = "StatisticalEnsemble"
	fcsts_df = ensemble_forecasts(
	fcsts_df,
	quantiles,
	name_models,
	model_name,
	)
	total_time = time() - init_time
	return fcsts_df, total_time, model_name


	# unlike MSTL, the others only allow a single season_length:
	seasonality = 4 * 24 * 1 # 1 day

	models = [
	MSTL(
	season_length=[4 * 24, 4 * 24 * 7], # seasonalities of the time series
	trend_forecaster=AutoARIMA() # model used to forecast trend
	),
	SeasonalNaive(season_length=seasonality)
	]

	EXTENDED_TEST = True
	if EXTENDED_TEST:
	models += [
	# AutoARIMA(season_length=4 * 24) is just too slow, never even finishes,
	# spends all its time in scipy bfgs.
	# which is weird, because it's works okay as trend-detector of MSTL.
	AutoARIMA(),
	AutoETS(season_length=seasonality),
	AutoCES(season_length=seasonality),
	DynamicOptimizedTheta(season_length=seasonality),
	]


	freq = '15min'
	sf = StatsForecast(
	models=models,
	freq=freq,
	n_jobs=-1,
	)


	model_names = [repr(model) for model in models]


	n_windows = len(train_df) // horizon - 1
	print("crossvalidation with", n_windows, "windows")
	print("models:", ", ".join(model_names))
	crossvalidation_df = sf.cross_validation(df=train_df, h=horizon, step_size=horizon, n_windows=n_windows)

	for model_name in model_names:
	rmse_crossval = rmse(crossvalidation_df['y'], crossvalidation_df[model_name])
	mae_crossval = mae(crossvalidation_df['y'], crossvalidation_df[model_name])
	print(model_name, "RMSE", rmse_crossval, "MAE", mae_crossval)


	exit()

	sf.fit(train_df)

	sf.fitted_[0, 0].model_.tail(4 * 24 * 7 * 2).plot(subplots=True, grid=True)
	plt.tight_layout()
	plt.show()


	print("starting forecast, dataset size", len(train_df))
	# Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
	Y_hat_df = sf.predict(h=horizon, level=[68.27])


	print(Y_hat_df)

	Y_hat_df = Y_hat_df.reset_index()

	fig, ax = plt.subplots(1, 1, figsize = (20, 7))

	# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
	# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)

	plot_Y_df = Y_df # [Y_df['ds'] > '2019-07-01']
	plot_Y_df = plot_Y_df.set_index('ds')[['y']]
	plot_Y_df.plot(ax=ax, linewidth=1)
	Y_hat_df.set_index('ds').plot(ax=ax, linewidth=1)


	ax.set_title('AirPassengers Forecast', fontsize=22)
	ax.set_ylabel('Monthly Passengers', fontsize=20)
	ax.set_xlabel('Timestamp [t]', fontsize=20)
	ax.legend(prop={'size': 15})
	ax.grid()

	plt.savefig("neuralforecast.pdf")



	exit()






















	quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
	fcst_df, total_time, model_name = run_statistical_ensemble(
	train_df,
	horizon=horizon,
	freq='15m',
	seasonality=4 * 24 * 7,
	quantiles=quantiles
	)


	nf.fit(df=train_df)

	Y_hat_df = nf.predict()

	print(Y_df)

	Y_hat_df = Y_hat_df.reset_index()

	fig, ax = plt.subplots(1, 1, figsize = (20, 7))

	# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
	# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)

	plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
	plot_Y_df = plot_Y_df.set_index('ds')[['y']]
	plot_Y_df.plot(ax=ax, linewidth=1)
	Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)


	ax.set_title('AirPassengers Forecast', fontsize=22)
	ax.set_ylabel('Monthly Passengers', fontsize=20)
	ax.set_xlabel('Timestamp [t]', fontsize=20)
	ax.legend(prop={'size': 15})
	ax.grid()

	plt.savefig("neuralforecast.pdf")