Spaces:
Sleeping
Sleeping
Daniel Varga
commited on
Commit
•
0ea2c50
1
Parent(s):
e9cac80
mean absolute error
Browse files- demo_prophet.py +82 -33
demo_prophet.py
CHANGED
@@ -1,58 +1,107 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
import matplotlib.pyplot as plt
|
3 |
from prophet import Prophet
|
|
|
4 |
|
5 |
-
# df = pd.read_csv('https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv')
|
6 |
|
7 |
-
|
|
|
|
|
8 |
|
9 |
-
df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
|
10 |
-
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
|
11 |
-
df = df.set_index('Time')
|
12 |
-
df['Consumption'] = df['Hatásos teljesítmény [kW]']
|
13 |
|
14 |
-
df
|
15 |
-
|
|
|
|
|
16 |
|
17 |
|
18 |
-
|
|
|
|
|
19 |
|
|
|
20 |
|
21 |
-
#
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
#
|
25 |
-
train_data = df[df['ds'] <= split_date]
|
26 |
-
eval_data = df[df['ds'] > split_date]
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
|
33 |
-
model.fit(train_data)
|
34 |
|
35 |
-
|
36 |
-
|
|
|
|
|
37 |
|
38 |
-
# Make predictions for the evaluation period
|
39 |
-
forecast = model.predict(future)
|
40 |
|
41 |
-
# Calculate evaluation metrics (e.g., MAE, MSE, RMSE) by comparing eval_predictions with eval_data['y']
|
42 |
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
eval_data = eval_data[eval_data['ds'] <= forecast['ds'].max()]
|
47 |
|
48 |
-
|
|
|
49 |
|
50 |
-
|
51 |
-
print(f"Mean Absolute Error (MAE): {mae}")
|
52 |
|
53 |
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
import pandas as pd
|
3 |
import matplotlib.pyplot as plt
|
4 |
from prophet import Prophet
|
5 |
+
import logging
|
6 |
|
|
|
7 |
|
8 |
+
# kW
|
9 |
+
PREDICTION_LOWER_BOUND = 0 # 15
|
10 |
+
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
|
11 |
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
def prediction_task(df, split_date, forecast_horizon):
|
14 |
+
# Split the data into training (past) and evaluation (future) sets
|
15 |
+
train_data = df[df['ds'] <= split_date]
|
16 |
+
eval_data = df[df['ds'] > split_date]
|
17 |
|
18 |
|
19 |
+
# Initialize and train the Prophet model using the training data
|
20 |
+
model = Prophet(seasonality_mode='multiplicative', growth='flat',
|
21 |
+
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True)
|
22 |
|
23 |
+
model.fit(train_data)
|
24 |
|
25 |
+
# Create a DataFrame with future timestamps for the evaluation period
|
26 |
+
future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
|
27 |
+
|
28 |
+
# Make predictions for the evaluation period
|
29 |
+
forecast = model.predict(future)
|
30 |
+
|
31 |
+
# Calculate evaluation metrics (e.g., MAE, MSE, RMSE) by comparing eval_predictions with eval_data['y']
|
32 |
+
|
33 |
+
# For example, you can calculate MAE as follows:
|
34 |
+
from sklearn.metrics import mean_absolute_error
|
35 |
+
|
36 |
+
eval_data = eval_data[eval_data['ds'] <= forecast['ds'].max()]
|
37 |
+
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
|
38 |
+
forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)
|
39 |
+
|
40 |
+
mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
|
41 |
|
42 |
+
# Print or store the evaluation metrics
|
|
|
|
|
43 |
|
44 |
+
do_vis = False
|
45 |
+
if do_vis:
|
46 |
+
plt.figure(figsize=(12, 6))
|
47 |
+
plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
|
48 |
+
plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
|
49 |
+
plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
|
50 |
+
plt.xlabel('Timestamp')
|
51 |
+
plt.ylabel('Value')
|
52 |
+
plt.title('Actual vs. Predicted Values')
|
53 |
+
plt.legend()
|
54 |
+
plt.grid(True)
|
55 |
+
plt.show()
|
56 |
+
'''
|
57 |
+
fig1 = model.plot(forecast)
|
58 |
+
plt.plot(eval_data['ds'], eval_data['y'], c='r')
|
59 |
+
plt.show()
|
60 |
+
'''
|
61 |
|
62 |
+
fig2 = model.plot_components(forecast)
|
63 |
+
plt.show()
|
64 |
+
return mae, eval_data['y'].mean()
|
65 |
|
|
|
66 |
|
67 |
+
logger = logging.getLogger('cmdstanpy')
|
68 |
+
logger.addHandler(logging.NullHandler())
|
69 |
+
logger.propagate = False
|
70 |
+
logger.setLevel(logging.CRITICAL)
|
71 |
|
|
|
|
|
72 |
|
|
|
73 |
|
74 |
+
cons_filename = 'pq_terheles_2021_adatok.tsv'
|
75 |
+
|
76 |
+
df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
|
77 |
+
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
|
78 |
+
df = df.set_index('Time')
|
79 |
+
df['Consumption'] = df['Hatásos teljesítmény [kW]']
|
80 |
+
|
81 |
+
df['ds'] = df.index
|
82 |
+
df['y'] = df['Consumption']
|
83 |
+
|
84 |
+
|
85 |
+
# TODO 15 minutes timestep hardwired!
|
86 |
+
forecast_horizon = 7 * 24 * 4
|
87 |
|
|
|
88 |
|
89 |
+
start_date = '2021-06-01'
|
90 |
+
end_date = '2021-10-24'
|
91 |
|
92 |
+
weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
|
|
|
93 |
|
94 |
|
95 |
+
maes = []
|
96 |
+
mean_values = []
|
97 |
+
for split_date in weekly_date_range:
|
98 |
+
mae, mean_value = prediction_task(df, split_date, forecast_horizon)
|
99 |
+
maes.append(mae)
|
100 |
+
mean_values.append(mean_value)
|
101 |
+
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
|
102 |
|
103 |
+
maes = np.array(maes)
|
104 |
+
mean_values = np.array(mean_values)
|
105 |
+
aggregate_mae = maes.mean()
|
106 |
+
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
|
107 |
+
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
|