Daniel Varga commited on
Commit
0ea2c50
1 Parent(s): e9cac80

mean absolute error

Browse files
Files changed (1) hide show
  1. demo_prophet.py +82 -33
demo_prophet.py CHANGED
@@ -1,58 +1,107 @@
 
1
  import pandas as pd
2
  import matplotlib.pyplot as plt
3
  from prophet import Prophet
 
4
 
5
- # df = pd.read_csv('https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv')
6
 
7
- cons_filename = 'pq_terheles_2021_adatok.tsv'
 
 
8
 
9
- df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
10
- df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
11
- df = df.set_index('Time')
12
- df['Consumption'] = df['Hatásos teljesítmény [kW]']
13
 
14
- df['ds'] = df.index
15
- df['y'] = df['Consumption']
 
 
16
 
17
 
18
- split_date = '2021-07-01'
 
 
19
 
 
20
 
21
- # TODO 15 minutes hardwired!
22
- forecast_horizon = 7 * 24 * 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Split the data into training (past) and evaluation (future) sets
25
- train_data = df[df['ds'] <= split_date]
26
- eval_data = df[df['ds'] > split_date]
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # Initialize and train the Prophet model using the training data
30
- model = Prophet(seasonality_mode='multiplicative', growth='flat',
31
- yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
32
 
33
- model.fit(train_data)
34
 
35
- # Create a DataFrame with future timestamps for the evaluation period
36
- future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
 
 
37
 
38
- # Make predictions for the evaluation period
39
- forecast = model.predict(future)
40
 
41
- # Calculate evaluation metrics (e.g., MAE, MSE, RMSE) by comparing eval_predictions with eval_data['y']
42
 
43
- # For example, you can calculate MAE as follows:
44
- from sklearn.metrics import mean_absolute_error
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- eval_data = eval_data[eval_data['ds'] <= forecast['ds'].max()]
47
 
48
- mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
 
49
 
50
- # Print or store the evaluation metrics
51
- print(f"Mean Absolute Error (MAE): {mae}")
52
 
53
 
54
- fig1 = model.plot(forecast)
55
- plt.show()
 
 
 
 
 
56
 
57
- fig2 = model.plot_components(forecast)
58
- plt.show()
 
 
 
 
1
+ import numpy as np
2
  import pandas as pd
3
  import matplotlib.pyplot as plt
4
  from prophet import Prophet
5
+ import logging
6
 
 
7
 
8
+ # kW
9
+ PREDICTION_LOWER_BOUND = 0 # 15
10
+ print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
11
 
 
 
 
 
12
 
13
+ def prediction_task(df, split_date, forecast_horizon):
14
+ # Split the data into training (past) and evaluation (future) sets
15
+ train_data = df[df['ds'] <= split_date]
16
+ eval_data = df[df['ds'] > split_date]
17
 
18
 
19
+ # Initialize and train the Prophet model using the training data
20
+ model = Prophet(seasonality_mode='multiplicative', growth='flat',
21
+ yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True)
22
 
23
+ model.fit(train_data)
24
 
25
+ # Create a DataFrame with future timestamps for the evaluation period
26
+ future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
27
+
28
+ # Make predictions for the evaluation period
29
+ forecast = model.predict(future)
30
+
31
+ # Calculate evaluation metrics (e.g., MAE, MSE, RMSE) by comparing eval_predictions with eval_data['y']
32
+
33
+ # For example, you can calculate MAE as follows:
34
+ from sklearn.metrics import mean_absolute_error
35
+
36
+ eval_data = eval_data[eval_data['ds'] <= forecast['ds'].max()]
37
+ for key in ('yhat', 'yhat_lower', 'yhat_upper'):
38
+ forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)
39
+
40
+ mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
41
 
42
+ # Print or store the evaluation metrics
 
 
43
 
44
+ do_vis = False
45
+ if do_vis:
46
+ plt.figure(figsize=(12, 6))
47
+ plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
48
+ plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
49
+ plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
50
+ plt.xlabel('Timestamp')
51
+ plt.ylabel('Value')
52
+ plt.title('Actual vs. Predicted Values')
53
+ plt.legend()
54
+ plt.grid(True)
55
+ plt.show()
56
+ '''
57
+ fig1 = model.plot(forecast)
58
+ plt.plot(eval_data['ds'], eval_data['y'], c='r')
59
+ plt.show()
60
+ '''
61
 
62
+ fig2 = model.plot_components(forecast)
63
+ plt.show()
64
+ return mae, eval_data['y'].mean()
65
 
 
66
 
67
+ logger = logging.getLogger('cmdstanpy')
68
+ logger.addHandler(logging.NullHandler())
69
+ logger.propagate = False
70
+ logger.setLevel(logging.CRITICAL)
71
 
 
 
72
 
 
73
 
74
+ cons_filename = 'pq_terheles_2021_adatok.tsv'
75
+
76
+ df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
77
+ df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
78
+ df = df.set_index('Time')
79
+ df['Consumption'] = df['Hatásos teljesítmény [kW]']
80
+
81
+ df['ds'] = df.index
82
+ df['y'] = df['Consumption']
83
+
84
+
85
+ # TODO 15 minutes timestep hardwired!
86
+ forecast_horizon = 7 * 24 * 4
87
 
 
88
 
89
+ start_date = '2021-06-01'
90
+ end_date = '2021-10-24'
91
 
92
+ weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
 
93
 
94
 
95
+ maes = []
96
+ mean_values = []
97
+ for split_date in weekly_date_range:
98
+ mae, mean_value = prediction_task(df, split_date, forecast_horizon)
99
+ maes.append(mae)
100
+ mean_values.append(mean_value)
101
+ print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
102
 
103
+ maes = np.array(maes)
104
+ mean_values = np.array(mean_values)
105
+ aggregate_mae = maes.mean()
106
+ print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
107
+ print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())