pq / demo_prophet.py
Daniel Varga
mean absolute error
0ea2c50
raw
history blame
No virus
3.56 kB
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
import logging
# kW
PREDICTION_LOWER_BOUND = 0 # 15
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
def prediction_task(df, split_date, forecast_horizon):
# Split the data into training (past) and evaluation (future) sets
train_data = df[df['ds'] <= split_date]
eval_data = df[df['ds'] > split_date]
# Initialize and train the Prophet model using the training data
model = Prophet(seasonality_mode='multiplicative', growth='flat',
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True)
model.fit(train_data)
# Create a DataFrame with future timestamps for the evaluation period
future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
# Make predictions for the evaluation period
forecast = model.predict(future)
# Calculate evaluation metrics (e.g., MAE, MSE, RMSE) by comparing eval_predictions with eval_data['y']
# For example, you can calculate MAE as follows:
from sklearn.metrics import mean_absolute_error
eval_data = eval_data[eval_data['ds'] <= forecast['ds'].max()]
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)
mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
# Print or store the evaluation metrics
do_vis = False
if do_vis:
plt.figure(figsize=(12, 6))
plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.title('Actual vs. Predicted Values')
plt.legend()
plt.grid(True)
plt.show()
'''
fig1 = model.plot(forecast)
plt.plot(eval_data['ds'], eval_data['y'], c='r')
plt.show()
'''
fig2 = model.plot_components(forecast)
plt.show()
return mae, eval_data['y'].mean()
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)
cons_filename = 'pq_terheles_2021_adatok.tsv'
df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
df = df.set_index('Time')
df['Consumption'] = df['Hatásos teljesítmény [kW]']
df['ds'] = df.index
df['y'] = df['Consumption']
# TODO 15 minutes timestep hardwired!
forecast_horizon = 7 * 24 * 4
start_date = '2021-06-01'
end_date = '2021-10-24'
weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
maes = []
mean_values = []
for split_date in weekly_date_range:
mae, mean_value = prediction_task(df, split_date, forecast_horizon)
maes.append(mae)
mean_values.append(mean_value)
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
maes = np.array(maes)
mean_values = np.array(mean_values)
aggregate_mae = maes.mean()
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())