Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

pq / v2 /predictor.py

Daniel Varga

in progress: making predictor usable from architecture.py

6f8bbb1 9 months ago

raw

history blame

No virus

6.49 kB

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from prophet import Prophet
	import holidays
	import logging
	from sklearn.metrics import mean_absolute_error


	PREDICTION_LOWER_BOUND = 0 # 15 [kW]
	print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")


	def get_holidays():
	hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
	holiday_df = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
	return holiday_df


	def prophet_backend(train_data, forecast_horizon):
	# Initialize and train the Prophet model using the training data
	model = Prophet(seasonality_mode='multiplicative', growth='flat',
	yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
	holidays=get_holidays())

	# we can also play with setting daily_seasonality=False above, and then manually adding
	# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
	# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
	# the rest didn't have much effect.

	model.fit(train_data)

	# Create a DataFrame with future timestamps for the evaluation period
	future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)

	# Make predictions for the evaluation period
	forecast = model.predict(future)
	assert len(forecast) == forecast_horizon

	# we never predict below zero, although prophet happily does.
	for key in ('yhat', 'yhat_lower', 'yhat_upper'):
	forecast[key] = np.maximum(forecast[key], 0)

	return forecast, model


	def prediction_task(backend, df, split_date, forecast_horizon):
	# Split the data into training (past) and evaluation (future) sets
	train_data = df[df['ds'] <= split_date]
	eval_data = df[df['ds'] > split_date]
	eval_data = eval_data.head(forecast_horizon)

	forecast, model = backend(train_data, forecast_horizon)

	mae = mean_absolute_error(eval_data['y'], forecast['yhat'])

	do_vis = False
	if do_vis:
	future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=True)
	forecast = model.predict(future)

	plt.figure(figsize=(12, 6))
	plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
	plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
	plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
	plt.xlabel('Timestamp')
	plt.ylabel('Value')
	plt.title('Actual vs. Predicted Values')
	plt.legend()
	plt.grid(True)
	plt.show()

	fig1 = model.plot(forecast)
	plt.plot(eval_data['ds'], eval_data['y'], c='r')
	plt.show()

	fig2 = model.plot_components(forecast)
	plt.show()
	exit()

	return mae, eval_data['y'].mean()


	def quiet_logging():
	logger = logging.getLogger('cmdstanpy')
	logger.addHandler(logging.NullHandler())
	logger.propagate = False
	logger.setLevel(logging.CRITICAL)


	def build_predictor(training_data: pd.Series):
	quiet_logging()
	training_data_frame = pd.DataFrame({'ds': training_data.index, 'y': training_data})
	model = Prophet(seasonality_mode='multiplicative', growth='flat',
	yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
	holidays=get_holidays())

	# we can also play with setting daily_seasonality=False above, and then manually adding
	# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
	# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
	# the rest didn't have much effect.

	model.fit(training_data_frame)
	return model


	def make_prediction(prophet_model: Prophet, test_data: pd.Series, batch_size_in_days: int):
	date_range = pd.date_range(start=test_data.index[0], end=test_data.index[-1], freq=f'{batch_size_in_days}d')
	for split_date in date_range:
	future = prophet_model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)

	# Make predictions for the evaluation period
	forecast = prophet_model.predict(future)
	assert len(forecast) == forecast_horizon

	# we never predict below zero, although prophet happily does.
	for key in ('yhat', 'yhat_lower', 'yhat_upper'):
	forecast[key] = np.maximum(forecast[key], 0)

	return forecast


	def main():
	quiet_logging()

	cons_filename = 'pq_terheles_2021_adatok.tsv'

	df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
	df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
	df = df.set_index('Time')
	df['Consumption'] = df['Hatásos teljesítmény [kW]']

	df['ds'] = df.index
	df['y'] = df['Consumption']

	# we slightly alter both the train and the test
	# because we have an almost constant shift, and the model is multiplicative.
	# we add it back in the end.
	print("values below PREDICTION_LOWER_BOUND", PREDICTION_LOWER_BOUND, ":",
	(df['y'] <= PREDICTION_LOWER_BOUND).sum(), "/", len(df['y']))
	df['y'] = (df['y'] - PREDICTION_LOWER_BOUND).clip(0.0)

	# TODO 15 minutes timestep hardwired!
	forecast_horizon = 7 * 24 * 4
	print("forecast horizon", forecast_horizon // 4, "hours")


	start_date = '2021-06-01'
	end_date = '2021-10-24'

	weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')


	maes = []
	mean_values = []
	for split_date in weekly_date_range:
	# prophet_backend is the only backend currently
	mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
	mean_value += PREDICTION_LOWER_BOUND
	maes.append(mae)
	mean_values.append(mean_value)
	print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)

	maes = np.array(maes)
	mean_values = np.array(mean_values)
	aggregate_mae = maes.mean()
	print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
	print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())


	if __name__ == '__main__':
	main()