Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

Daniel Varga commited on Dec 21, 2023

Commit

6f8bbb1

•

1 Parent(s): 5711d94

in progress: making predictor usable from architecture.py

Browse files

Files changed (1) hide show

v2/predictor.py +84 -36

v2/predictor.py CHANGED Viewed

@@ -7,20 +7,21 @@ import logging
 from sklearn.metrics import mean_absolute_error
-# kW
-PREDICTION_LOWER_BOUND = 0 # 15
 print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
-hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
-HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
 def prophet_backend(train_data, forecast_horizon):
     # Initialize and train the Prophet model using the training data
     model = Prophet(seasonality_mode='multiplicative', growth='flat',
         yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
-        holidays=HOLIDAY_DF)
     # we can also play with setting daily_seasonality=False above, and then manually adding
     # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
@@ -36,8 +37,9 @@ def prophet_backend(train_data, forecast_horizon):
     forecast = model.predict(future)
     assert len(forecast) == forecast_horizon
     for key in ('yhat', 'yhat_lower', 'yhat_upper'):
-        forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)
     return forecast, model
@@ -79,46 +81,92 @@ def prediction_task(backend, df, split_date, forecast_horizon):
     return mae, eval_data['y'].mean()
-logger = logging.getLogger('cmdstanpy')
-logger.addHandler(logging.NullHandler())
-logger.propagate = False
-logger.setLevel(logging.CRITICAL)
-cons_filename = 'pq_terheles_2021_adatok.tsv'
-df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
-df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
-df = df.set_index('Time')
-df['Consumption'] = df['Hatásos teljesítmény [kW]']
-df['ds'] = df.index
-df['y'] = df['Consumption']
-# TODO 15 minutes timestep hardwired!
-forecast_horizon = 24 * 4
-print("forecast horizon", forecast_horizon // 4, "hours")
-start_date = '2021-06-01'
-end_date = '2021-10-24'
-weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
-maes = []
-mean_values = []
-for split_date in weekly_date_range:
-    # prophet_backend is the only backend currently
-    mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
-    maes.append(mae)
-    mean_values.append(mean_value)
-    print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
-maes = np.array(maes)
-mean_values = np.array(mean_values)
-aggregate_mae = maes.mean()
-print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
-print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())

 from sklearn.metrics import mean_absolute_error
+PREDICTION_LOWER_BOUND = 0 # 15 [kW]
 print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
+def get_holidays():
+    hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
+    holiday_df = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
+    return holiday_df
 def prophet_backend(train_data, forecast_horizon):
     # Initialize and train the Prophet model using the training data
     model = Prophet(seasonality_mode='multiplicative', growth='flat',
         yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
+        holidays=get_holidays())
     # we can also play with setting daily_seasonality=False above, and then manually adding
     # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
     forecast = model.predict(future)
     assert len(forecast) == forecast_horizon
+    # we never predict below zero, although prophet happily does.
     for key in ('yhat', 'yhat_lower', 'yhat_upper'):
+        forecast[key] = np.maximum(forecast[key], 0)
     return forecast, model
     return mae, eval_data['y'].mean()
+def quiet_logging():
+    logger = logging.getLogger('cmdstanpy')
+    logger.addHandler(logging.NullHandler())
+    logger.propagate = False
+    logger.setLevel(logging.CRITICAL)
+def build_predictor(training_data: pd.Series):
+    quiet_logging()
+    training_data_frame = pd.DataFrame({'ds': training_data.index, 'y': training_data})
+    model = Prophet(seasonality_mode='multiplicative', growth='flat',
+        yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
+        holidays=get_holidays())
+    # we can also play with setting daily_seasonality=False above, and then manually adding
+    # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
+    # ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
+    # the rest didn't have much effect.
+    model.fit(training_data_frame)
+    return model
+def make_prediction(prophet_model: Prophet, test_data: pd.Series, batch_size_in_days: int):
+    date_range = pd.date_range(start=test_data.index[0], end=test_data.index[-1], freq=f'{batch_size_in_days}d')
+    for split_date in date_range:
+        future = prophet_model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
+        # Make predictions for the evaluation period
+        forecast = prophet_model.predict(future)
+        assert len(forecast) == forecast_horizon
+        # we never predict below zero, although prophet happily does.
+        for key in ('yhat', 'yhat_lower', 'yhat_upper'):
+            forecast[key] = np.maximum(forecast[key], 0)
+        return forecast
+def main():
+    quiet_logging()
+    cons_filename = 'pq_terheles_2021_adatok.tsv'
+    df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
+    df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
+    df = df.set_index('Time')
+    df['Consumption'] = df['Hatásos teljesítmény [kW]']
+    df['ds'] = df.index
+    df['y'] = df['Consumption']
+    # we slightly alter both the train and the test
+    # because we have an almost constant shift, and the model is multiplicative.
+    # we add it back in the end.
+    print("values below PREDICTION_LOWER_BOUND", PREDICTION_LOWER_BOUND, ":",
+        (df['y'] <= PREDICTION_LOWER_BOUND).sum(), "/", len(df['y']))
+    df['y'] = (df['y'] - PREDICTION_LOWER_BOUND).clip(0.0)
+    # TODO 15 minutes timestep hardwired!
+    forecast_horizon = 7 * 24 * 4
+    print("forecast horizon", forecast_horizon // 4, "hours")
+    start_date = '2021-06-01'
+    end_date = '2021-10-24'
+    weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
+    maes = []
+    mean_values = []
+    for split_date in weekly_date_range:
+        # prophet_backend is the only backend currently
+        mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
+        mean_value += PREDICTION_LOWER_BOUND
+        maes.append(mae)
+        mean_values.append(mean_value)
+        print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
+    maes = np.array(maes)
+    mean_values = np.array(mean_values)
+    aggregate_mae = maes.mean()
+    print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
+    print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
+if __name__ == '__main__':
+    main()