Spaces:
Sleeping
Sleeping
File size: 4,312 Bytes
f1abbf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
import holidays
import logging
from sklearn.metrics import mean_absolute_error
# kW
PREDICTION_LOWER_BOUND = 0 # 15
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
def prophet_backend(train_data, forecast_horizon):
# Initialize and train the Prophet model using the training data
model = Prophet(seasonality_mode='multiplicative', growth='flat',
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
holidays=HOLIDAY_DF)
# we can also play with setting daily_seasonality=False above, and then manually adding
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
# the rest didn't have much effect.
model.fit(train_data)
# Create a DataFrame with future timestamps for the evaluation period
future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
# Make predictions for the evaluation period
forecast = model.predict(future)
assert len(forecast) == forecast_horizon
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)
return forecast, model
def prediction_task(backend, df, split_date, forecast_horizon):
# Split the data into training (past) and evaluation (future) sets
train_data = df[df['ds'] <= split_date]
eval_data = df[df['ds'] > split_date]
eval_data = eval_data.head(forecast_horizon)
forecast, model = backend(train_data, forecast_horizon)
mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
do_vis = False
if do_vis:
future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=True)
forecast = model.predict(future)
plt.figure(figsize=(12, 6))
plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.title('Actual vs. Predicted Values')
plt.legend()
plt.grid(True)
plt.show()
fig1 = model.plot(forecast)
plt.plot(eval_data['ds'], eval_data['y'], c='r')
plt.show()
fig2 = model.plot_components(forecast)
plt.show()
exit()
return mae, eval_data['y'].mean()
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)
cons_filename = 'pq_terheles_2021_adatok.tsv'
df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
df = df.set_index('Time')
df['Consumption'] = df['Hatásos teljesítmény [kW]']
df['ds'] = df.index
df['y'] = df['Consumption']
# TODO 15 minutes timestep hardwired!
forecast_horizon = 24 * 4
print("forecast horizon", forecast_horizon // 4, "hours")
start_date = '2021-06-01'
end_date = '2021-10-24'
weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
maes = []
mean_values = []
for split_date in weekly_date_range:
# prophet_backend is the only backend currently
mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
maes.append(mae)
mean_values.append(mean_value)
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
maes = np.array(maes)
mean_values = np.array(mean_values)
aggregate_mae = maes.mean()
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
|