File size: 4,312 Bytes
f1abbf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
import holidays
import logging
from sklearn.metrics import mean_absolute_error


# kW
PREDICTION_LOWER_BOUND = 0 # 15
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")

hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])



def prophet_backend(train_data, forecast_horizon):
    # Initialize and train the Prophet model using the training data
    model = Prophet(seasonality_mode='multiplicative', growth='flat',
        yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
        holidays=HOLIDAY_DF)

    # we can also play with setting daily_seasonality=False above, and then manually adding
    # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
    # ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
    # the rest didn't have much effect.

    model.fit(train_data)

    # Create a DataFrame with future timestamps for the evaluation period
    future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)

    # Make predictions for the evaluation period
    forecast = model.predict(future)
    assert len(forecast) == forecast_horizon

    for key in ('yhat', 'yhat_lower', 'yhat_upper'):
        forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)

    return forecast, model


def prediction_task(backend, df, split_date, forecast_horizon):
    # Split the data into training (past) and evaluation (future) sets
    train_data = df[df['ds'] <= split_date]
    eval_data = df[df['ds'] > split_date]
    eval_data = eval_data.head(forecast_horizon)

    forecast, model = backend(train_data, forecast_horizon)

    mae = mean_absolute_error(eval_data['y'], forecast['yhat'])

    do_vis = False
    if do_vis:
        future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=True)
        forecast = model.predict(future)

        plt.figure(figsize=(12, 6))
        plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
        plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
        plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
        plt.xlabel('Timestamp')
        plt.ylabel('Value')
        plt.title('Actual vs. Predicted Values')
        plt.legend()
        plt.grid(True)
        plt.show()

        fig1 = model.plot(forecast)
        plt.plot(eval_data['ds'], eval_data['y'], c='r')
        plt.show()

        fig2 = model.plot_components(forecast)
        plt.show()
        exit()

    return mae, eval_data['y'].mean()


logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)



cons_filename = 'pq_terheles_2021_adatok.tsv'

df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
df = df.set_index('Time')
df['Consumption'] = df['Hatásos teljesítmény [kW]']

df['ds'] = df.index
df['y'] = df['Consumption']


# TODO 15 minutes timestep hardwired!
forecast_horizon = 24 * 4
print("forecast horizon", forecast_horizon // 4, "hours")


start_date = '2021-06-01'
end_date = '2021-10-24'

weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')


maes = []
mean_values = []
for split_date in weekly_date_range:
    # prophet_backend is the only backend currently
    mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
    maes.append(mae)
    mean_values.append(mean_value)
    print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)

maes = np.array(maes)
mean_values = np.array(mean_values)
aggregate_mae = maes.mean()
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())