Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

pq / v2 /test_predictor_statsforecast.py

Daniel Varga

hangs

64b066d 5 months ago

raw

history blame

No virus

4.67 kB

	import os

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt

	from statsforecast import StatsForecast
	from statsforecast.models import (
	AutoARIMA,
	AutoETS,
	AutoCES,
	DynamicOptimizedTheta,
	SeasonalNaive,
	)


	os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
	os.environ["NIXTLA_NUMBA_CACHE"] = "1"



	data = pd.read_csv("terheles_fixed.tsv", sep="\t")
	data['ds'] = pd.to_datetime(data['Korrigált időpont'])
	data['y'] = data['Hatásos teljesítmény']

	data = data[['ds', 'y']]
	data['unique_id'] = 1

	data = data[data['ds'] < '2019-09-01']
	Y_df = data

	train_df = Y_df[Y_df['ds'] < '2019-08-01']


	horizon = 4 * 24 * 7 # 7 days



	def ensemble_forecasts(
	fcsts_df,
	quantiles,
	name_models,
	model_name,
	) -> pd.DataFrame:
	fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values # type: ignore
	# compute quantiles based on the mean of the forecasts
	sigma_models = []
	for model in name_models:
	fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model]
	sigma_models.append(f"sigma_{model}")
	fcsts_df[f"std_{model_name}"] = (
	fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5)
	)
	z = norm.ppf(quantiles)
	q_cols = []
	for q, zq in zip(quantiles, z):
	q_col = f"{model_name}-q-{q}"
	fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"]
	q_cols.append(q_col)
	fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols]
	return fcsts_df


	def run_statistical_ensemble(
	train_df: pd.DataFrame,
	horizon: int,
	freq: str,
	seasonality: int,
	quantiles,
	):
	os.environ["NIXTLA_ID_AS_COL"] = "true"
	models = [
	AutoARIMA(season_length=seasonality),
	AutoETS(season_length=seasonality),
	AutoCES(season_length=seasonality),
	DynamicOptimizedTheta(season_length=seasonality),
	]
	init_time = time()
	series_per_core = 15
	n_series = train_df["unique_id"].nunique()
	n_jobs = min(n_series // series_per_core, os.cpu_count())
	sf = StatsForecast(
	models=models,
	freq=freq,
	n_jobs=n_jobs,
	)
	fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
	name_models = [repr(model) for model in models]
	model_name = "StatisticalEnsemble"
	fcsts_df = ensemble_forecasts(
	fcsts_df,
	quantiles,
	name_models,
	model_name,
	)
	total_time = time() - init_time
	return fcsts_df, total_time, model_name


	seasonality = 4 * 24 * 7 # 1 week
	models = [
	AutoARIMA(season_length=seasonality),
	AutoETS(season_length=seasonality),
	AutoCES(season_length=seasonality),
	DynamicOptimizedTheta(season_length=seasonality),
	]
	freq = '15min'
	sf = StatsForecast(
	models=models[:1],
	freq=freq,
	n_jobs=1,
	)

	print("starting forecast, dataset size", len(train_df))
	Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])

	print(Y_hat_df)

	Y_hat_df = Y_hat_df.reset_index()

	fig, ax = plt.subplots(1, 1, figsize = (20, 7))

	# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
	# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)

	plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
	plot_Y_df = plot_Y_df.set_index('ds')[['y']]
	plot_Y_df.plot(ax=ax, linewidth=1)
	Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)


	ax.set_title('AirPassengers Forecast', fontsize=22)
	ax.set_ylabel('Monthly Passengers', fontsize=20)
	ax.set_xlabel('Timestamp [t]', fontsize=20)
	ax.legend(prop={'size': 15})
	ax.grid()

	plt.savefig("neuralforecast.pdf")



	exit()






















	quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
	fcst_df, total_time, model_name = run_statistical_ensemble(
	train_df,
	horizon=horizon,
	freq='15m',
	seasonality=4 * 24 * 7,
	quantiles=quantiles
	)


	nf.fit(df=train_df)

	Y_hat_df = nf.predict()

	print(Y_df)

	Y_hat_df = Y_hat_df.reset_index()

	fig, ax = plt.subplots(1, 1, figsize = (20, 7))

	# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
	# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)

	plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
	plot_Y_df = plot_Y_df.set_index('ds')[['y']]
	plot_Y_df.plot(ax=ax, linewidth=1)
	Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)


	ax.set_title('AirPassengers Forecast', fontsize=22)
	ax.set_ylabel('Monthly Passengers', fontsize=20)
	ax.set_xlabel('Timestamp [t]', fontsize=20)
	ax.legend(prop={'size': 15})
	ax.grid()

	plt.savefig("neuralforecast.pdf")