Daniel Varga commited on
Commit
6f8bbb1
1 Parent(s): 5711d94

in progress: making predictor usable from architecture.py

Browse files
Files changed (1) hide show
  1. v2/predictor.py +84 -36
v2/predictor.py CHANGED
@@ -7,20 +7,21 @@ import logging
7
  from sklearn.metrics import mean_absolute_error
8
 
9
 
10
- # kW
11
- PREDICTION_LOWER_BOUND = 0 # 15
12
  print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
13
 
14
- hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
15
- HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
16
 
 
 
 
 
17
 
18
 
19
  def prophet_backend(train_data, forecast_horizon):
20
  # Initialize and train the Prophet model using the training data
21
  model = Prophet(seasonality_mode='multiplicative', growth='flat',
22
  yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
23
- holidays=HOLIDAY_DF)
24
 
25
  # we can also play with setting daily_seasonality=False above, and then manually adding
26
  # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
@@ -36,8 +37,9 @@ def prophet_backend(train_data, forecast_horizon):
36
  forecast = model.predict(future)
37
  assert len(forecast) == forecast_horizon
38
 
 
39
  for key in ('yhat', 'yhat_lower', 'yhat_upper'):
40
- forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)
41
 
42
  return forecast, model
43
 
@@ -79,46 +81,92 @@ def prediction_task(backend, df, split_date, forecast_horizon):
79
  return mae, eval_data['y'].mean()
80
 
81
 
82
- logger = logging.getLogger('cmdstanpy')
83
- logger.addHandler(logging.NullHandler())
84
- logger.propagate = False
85
- logger.setLevel(logging.CRITICAL)
 
86
 
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
- cons_filename = 'pq_terheles_2021_adatok.tsv'
 
90
 
91
- df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
92
- df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
93
- df = df.set_index('Time')
94
- df['Consumption'] = df['Hatásos teljesítmény [kW]']
 
 
95
 
96
- df['ds'] = df.index
97
- df['y'] = df['Consumption']
 
98
 
99
 
100
- # TODO 15 minutes timestep hardwired!
101
- forecast_horizon = 24 * 4
102
- print("forecast horizon", forecast_horizon // 4, "hours")
103
 
 
104
 
105
- start_date = '2021-06-01'
106
- end_date = '2021-10-24'
107
 
108
- weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
 
 
 
 
 
 
 
 
109
 
 
 
 
 
 
110
 
111
- maes = []
112
- mean_values = []
113
- for split_date in weekly_date_range:
114
- # prophet_backend is the only backend currently
115
- mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
116
- maes.append(mae)
117
- mean_values.append(mean_value)
118
- print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
119
 
120
- maes = np.array(maes)
121
- mean_values = np.array(mean_values)
122
- aggregate_mae = maes.mean()
123
- print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
124
- print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
 
7
  from sklearn.metrics import mean_absolute_error
8
 
9
 
10
+ PREDICTION_LOWER_BOUND = 0 # 15 [kW]
 
11
  print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
12
 
 
 
13
 
14
+ def get_holidays():
15
+ hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
16
+ holiday_df = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
17
+ return holiday_df
18
 
19
 
20
  def prophet_backend(train_data, forecast_horizon):
21
  # Initialize and train the Prophet model using the training data
22
  model = Prophet(seasonality_mode='multiplicative', growth='flat',
23
  yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
24
+ holidays=get_holidays())
25
 
26
  # we can also play with setting daily_seasonality=False above, and then manually adding
27
  # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
 
37
  forecast = model.predict(future)
38
  assert len(forecast) == forecast_horizon
39
 
40
+ # we never predict below zero, although prophet happily does.
41
  for key in ('yhat', 'yhat_lower', 'yhat_upper'):
42
+ forecast[key] = np.maximum(forecast[key], 0)
43
 
44
  return forecast, model
45
 
 
81
  return mae, eval_data['y'].mean()
82
 
83
 
84
+ def quiet_logging():
85
+ logger = logging.getLogger('cmdstanpy')
86
+ logger.addHandler(logging.NullHandler())
87
+ logger.propagate = False
88
+ logger.setLevel(logging.CRITICAL)
89
 
90
 
91
+ def build_predictor(training_data: pd.Series):
92
+ quiet_logging()
93
+ training_data_frame = pd.DataFrame({'ds': training_data.index, 'y': training_data})
94
+ model = Prophet(seasonality_mode='multiplicative', growth='flat',
95
+ yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
96
+ holidays=get_holidays())
97
+
98
+ # we can also play with setting daily_seasonality=False above, and then manually adding
99
+ # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
100
+ # ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
101
+ # the rest didn't have much effect.
102
+
103
+ model.fit(training_data_frame)
104
+ return model
105
+
106
+
107
+ def make_prediction(prophet_model: Prophet, test_data: pd.Series, batch_size_in_days: int):
108
+ date_range = pd.date_range(start=test_data.index[0], end=test_data.index[-1], freq=f'{batch_size_in_days}d')
109
+ for split_date in date_range:
110
+ future = prophet_model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
111
+
112
+ # Make predictions for the evaluation period
113
+ forecast = prophet_model.predict(future)
114
+ assert len(forecast) == forecast_horizon
115
+
116
+ # we never predict below zero, although prophet happily does.
117
+ for key in ('yhat', 'yhat_lower', 'yhat_upper'):
118
+ forecast[key] = np.maximum(forecast[key], 0)
119
+
120
+ return forecast
121
+
122
+
123
+ def main():
124
+ quiet_logging()
125
+
126
+ cons_filename = 'pq_terheles_2021_adatok.tsv'
127
+
128
+ df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
129
+ df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
130
+ df = df.set_index('Time')
131
+ df['Consumption'] = df['Hatásos teljesítmény [kW]']
132
 
133
+ df['ds'] = df.index
134
+ df['y'] = df['Consumption']
135
 
136
+ # we slightly alter both the train and the test
137
+ # because we have an almost constant shift, and the model is multiplicative.
138
+ # we add it back in the end.
139
+ print("values below PREDICTION_LOWER_BOUND", PREDICTION_LOWER_BOUND, ":",
140
+ (df['y'] <= PREDICTION_LOWER_BOUND).sum(), "/", len(df['y']))
141
+ df['y'] = (df['y'] - PREDICTION_LOWER_BOUND).clip(0.0)
142
 
143
+ # TODO 15 minutes timestep hardwired!
144
+ forecast_horizon = 7 * 24 * 4
145
+ print("forecast horizon", forecast_horizon // 4, "hours")
146
 
147
 
148
+ start_date = '2021-06-01'
149
+ end_date = '2021-10-24'
 
150
 
151
+ weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
152
 
 
 
153
 
154
+ maes = []
155
+ mean_values = []
156
+ for split_date in weekly_date_range:
157
+ # prophet_backend is the only backend currently
158
+ mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
159
+ mean_value += PREDICTION_LOWER_BOUND
160
+ maes.append(mae)
161
+ mean_values.append(mean_value)
162
+ print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
163
 
164
+ maes = np.array(maes)
165
+ mean_values = np.array(mean_values)
166
+ aggregate_mae = maes.mean()
167
+ print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
168
+ print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
169
 
 
 
 
 
 
 
 
 
170
 
171
+ if __name__ == '__main__':
172
+ main()