Spaces:
Sleeping
Sleeping
File size: 6,488 Bytes
f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 f1abbf3 6f8bbb1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 |
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
import holidays
import logging
from sklearn.metrics import mean_absolute_error
PREDICTION_LOWER_BOUND = 0 # 15 [kW]
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
def get_holidays():
hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
holiday_df = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
return holiday_df
def prophet_backend(train_data, forecast_horizon):
# Initialize and train the Prophet model using the training data
model = Prophet(seasonality_mode='multiplicative', growth='flat',
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
holidays=get_holidays())
# we can also play with setting daily_seasonality=False above, and then manually adding
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
# the rest didn't have much effect.
model.fit(train_data)
# Create a DataFrame with future timestamps for the evaluation period
future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
# Make predictions for the evaluation period
forecast = model.predict(future)
assert len(forecast) == forecast_horizon
# we never predict below zero, although prophet happily does.
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
forecast[key] = np.maximum(forecast[key], 0)
return forecast, model
def prediction_task(backend, df, split_date, forecast_horizon):
# Split the data into training (past) and evaluation (future) sets
train_data = df[df['ds'] <= split_date]
eval_data = df[df['ds'] > split_date]
eval_data = eval_data.head(forecast_horizon)
forecast, model = backend(train_data, forecast_horizon)
mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
do_vis = False
if do_vis:
future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=True)
forecast = model.predict(future)
plt.figure(figsize=(12, 6))
plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
plt.xlabel('Timestamp')
plt.ylabel('Value')
plt.title('Actual vs. Predicted Values')
plt.legend()
plt.grid(True)
plt.show()
fig1 = model.plot(forecast)
plt.plot(eval_data['ds'], eval_data['y'], c='r')
plt.show()
fig2 = model.plot_components(forecast)
plt.show()
exit()
return mae, eval_data['y'].mean()
def quiet_logging():
logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)
def build_predictor(training_data: pd.Series):
quiet_logging()
training_data_frame = pd.DataFrame({'ds': training_data.index, 'y': training_data})
model = Prophet(seasonality_mode='multiplicative', growth='flat',
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
holidays=get_holidays())
# we can also play with setting daily_seasonality=False above, and then manually adding
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
# the rest didn't have much effect.
model.fit(training_data_frame)
return model
def make_prediction(prophet_model: Prophet, test_data: pd.Series, batch_size_in_days: int):
date_range = pd.date_range(start=test_data.index[0], end=test_data.index[-1], freq=f'{batch_size_in_days}d')
for split_date in date_range:
future = prophet_model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
# Make predictions for the evaluation period
forecast = prophet_model.predict(future)
assert len(forecast) == forecast_horizon
# we never predict below zero, although prophet happily does.
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
forecast[key] = np.maximum(forecast[key], 0)
return forecast
def main():
quiet_logging()
cons_filename = 'pq_terheles_2021_adatok.tsv'
df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
df = df.set_index('Time')
df['Consumption'] = df['Hatásos teljesítmény [kW]']
df['ds'] = df.index
df['y'] = df['Consumption']
# we slightly alter both the train and the test
# because we have an almost constant shift, and the model is multiplicative.
# we add it back in the end.
print("values below PREDICTION_LOWER_BOUND", PREDICTION_LOWER_BOUND, ":",
(df['y'] <= PREDICTION_LOWER_BOUND).sum(), "/", len(df['y']))
df['y'] = (df['y'] - PREDICTION_LOWER_BOUND).clip(0.0)
# TODO 15 minutes timestep hardwired!
forecast_horizon = 7 * 24 * 4
print("forecast horizon", forecast_horizon // 4, "hours")
start_date = '2021-06-01'
end_date = '2021-10-24'
weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
maes = []
mean_values = []
for split_date in weekly_date_range:
# prophet_backend is the only backend currently
mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
mean_value += PREDICTION_LOWER_BOUND
maes.append(mae)
mean_values.append(mean_value)
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
maes = np.array(maes)
mean_values = np.array(mean_values)
aggregate_mae = maes.mean()
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
if __name__ == '__main__':
main()
|