File size: 4,102 Bytes
0ea2c50
e9cac80
 
 
7c98739
0ea2c50
8ec3059
e9cac80
 
0ea2c50
 
 
e9cac80
7c98739
 
 
e9cac80
 
8ec3059
0ea2c50
 
7c98739
 
e9cac80
0ea2c50
e9cac80
0ea2c50
 
 
 
 
8ec3059
0ea2c50
 
 
 
8ec3059
 
 
 
 
 
 
 
 
 
 
e9cac80
8ec3059
 
 
 
 
 
 
 
 
 
e9cac80
0ea2c50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9cac80
0ea2c50
 
8ec3059
0ea2c50
e9cac80
 
0ea2c50
 
 
 
e9cac80
 
 
0ea2c50
 
 
 
 
 
 
 
 
 
 
 
 
e9cac80
 
0ea2c50
 
e9cac80
0ea2c50
e9cac80
 
0ea2c50
 
 
8ec3059
0ea2c50
 
 
e9cac80
0ea2c50
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
import holidays
import logging
from sklearn.metrics import mean_absolute_error


# kW
PREDICTION_LOWER_BOUND = 0 # 15
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")

hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])



def prophet_backend(train_data, forecast_horizon):
    # Initialize and train the Prophet model using the training data
    model = Prophet(seasonality_mode='multiplicative', growth='flat',
        yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
        holidays=HOLIDAY_DF)

    model.fit(train_data)

    # Create a DataFrame with future timestamps for the evaluation period
    future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)

    # Make predictions for the evaluation period
    forecast = model.predict(future)
    assert len(forecast) == forecast_horizon

    for key in ('yhat', 'yhat_lower', 'yhat_upper'):
        forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)

    return forecast


def sklearn_backend(train_data, forecast_horizon):
    dc = train_data[['y']]
    # inserting new column with yesterday's consumption values
    for i in range(1, 4 * 24 + 1):
        dc.loc[:, 'd%02d' % i] = dc.loc[:,'y'].shift(4 * 24 + i) # t-2days to t-1day
        dc.loc[:, 'w%02d' % i] = dc.loc[:,'y'].shift(7 * 4 * 24 + i) # t-7days to t-8days
    dc.info()
    exit()


def prediction_task(backend, df, split_date, forecast_horizon):
    # Split the data into training (past) and evaluation (future) sets
    train_data = df[df['ds'] <= split_date]
    eval_data = df[df['ds'] > split_date]
    eval_data = eval_data.head(forecast_horizon)

    forecast = backend(train_data, forecast_horizon)

    mae = mean_absolute_error(eval_data['y'], forecast['yhat'])

    do_vis = False
    if do_vis:
        plt.figure(figsize=(12, 6))
        plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
        plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
        plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
        plt.xlabel('Timestamp')
        plt.ylabel('Value')
        plt.title('Actual vs. Predicted Values')
        plt.legend()
        plt.grid(True)
        plt.show()
        '''
        fig1 = model.plot(forecast)
        plt.plot(eval_data['ds'], eval_data['y'], c='r')
        plt.show()
        '''

        fig2 = model.plot_components(forecast)
        plt.show()

    return mae, eval_data['y'].mean()


logger = logging.getLogger('cmdstanpy')
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.CRITICAL)



cons_filename = 'pq_terheles_2021_adatok.tsv'

df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
df = df.set_index('Time')
df['Consumption'] = df['Hatásos teljesítmény [kW]']

df['ds'] = df.index
df['y'] = df['Consumption']


# TODO 15 minutes timestep hardwired!
forecast_horizon = 7 * 24 * 4


start_date = '2021-06-01'
end_date = '2021-10-24'

weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')


maes = []
mean_values = []
for split_date in weekly_date_range:
    mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
    maes.append(mae)
    mean_values.append(mean_value)
    print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)

maes = np.array(maes)
mean_values = np.array(mean_values)
aggregate_mae = maes.mean()
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())