Daniel Varga commited on
Commit
64b066d
1 Parent(s): 3fd575d
Files changed (1) hide show
  1. v2/test_predictor_statsforecast.py +156 -30
v2/test_predictor_statsforecast.py CHANGED
@@ -1,8 +1,22 @@
 
 
 
1
  import pandas as pd
2
  import matplotlib.pyplot as plt
3
 
4
- from neuralforecast import NeuralForecast
5
- from neuralforecast.models import LSTM, NHITS, RNN, PatchTST
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
8
  data = pd.read_csv("terheles_fixed.tsv", sep="\t")
@@ -14,39 +28,151 @@ data['unique_id'] = 1
14
 
15
  data = data[data['ds'] < '2019-09-01']
16
  Y_df = data
17
- print(Y_df)
 
18
 
19
 
20
  horizon = 4 * 24 * 7 # 7 days
21
 
22
- # Try different hyperparmeters to improve accuracy.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  models = [
24
- PatchTST(h=horizon, # Forecast horizon
25
- input_size=2 * horizon, # Length of input sequence
26
- max_steps=20, # Number of steps to train
27
- scaler_type='standard'), # Type of scaler to normalize data
28
-
29
- NHITS(h=horizon, # Forecast horizon
30
- input_size=2 * horizon, # Length of input sequence
31
- max_steps=100, # Number of steps to train
32
- n_freq_downsample=[2, 1, 1]) # Downsampling factors for each stack output
33
- ]
34
- '''
35
- LSTM(h=horizon, # Forecast horizon
36
- max_steps=100, # Number of steps to train
37
- scaler_type='standard', # Type of scaler to normalize data
38
- encoder_hidden_size=64, # Defines the size of the hidden state of the LSTM
39
- decoder_hidden_size=64,), # Defines the number of hidden units of each layer of the MLP decoder
40
- '''
41
-
42
-
43
- nf = NeuralForecast(models=models, freq='15min')
44
-
45
- shorter_Y_df = Y_df[Y_df['ds'] < '2019-08-01']
46
- print("-=======-")
47
- print(len(shorter_Y_df))
48
- print(shorter_Y_df)
49
- nf.fit(df=shorter_Y_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  Y_hat_df = nf.predict()
52
 
 
1
+ import os
2
+
3
+ import numpy as np
4
  import pandas as pd
5
  import matplotlib.pyplot as plt
6
 
7
+ from statsforecast import StatsForecast
8
+ from statsforecast.models import (
9
+ AutoARIMA,
10
+ AutoETS,
11
+ AutoCES,
12
+ DynamicOptimizedTheta,
13
+ SeasonalNaive,
14
+ )
15
+
16
+
17
+ os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
18
+ os.environ["NIXTLA_NUMBA_CACHE"] = "1"
19
+
20
 
21
 
22
  data = pd.read_csv("terheles_fixed.tsv", sep="\t")
 
28
 
29
  data = data[data['ds'] < '2019-09-01']
30
  Y_df = data
31
+
32
+ train_df = Y_df[Y_df['ds'] < '2019-08-01']
33
 
34
 
35
  horizon = 4 * 24 * 7 # 7 days
36
 
37
+
38
+
39
+ def ensemble_forecasts(
40
+ fcsts_df,
41
+ quantiles,
42
+ name_models,
43
+ model_name,
44
+ ) -> pd.DataFrame:
45
+ fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values # type: ignore
46
+ # compute quantiles based on the mean of the forecasts
47
+ sigma_models = []
48
+ for model in name_models:
49
+ fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model]
50
+ sigma_models.append(f"sigma_{model}")
51
+ fcsts_df[f"std_{model_name}"] = (
52
+ fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5)
53
+ )
54
+ z = norm.ppf(quantiles)
55
+ q_cols = []
56
+ for q, zq in zip(quantiles, z):
57
+ q_col = f"{model_name}-q-{q}"
58
+ fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"]
59
+ q_cols.append(q_col)
60
+ fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols]
61
+ return fcsts_df
62
+
63
+
64
+ def run_statistical_ensemble(
65
+ train_df: pd.DataFrame,
66
+ horizon: int,
67
+ freq: str,
68
+ seasonality: int,
69
+ quantiles,
70
+ ):
71
+ os.environ["NIXTLA_ID_AS_COL"] = "true"
72
+ models = [
73
+ AutoARIMA(season_length=seasonality),
74
+ AutoETS(season_length=seasonality),
75
+ AutoCES(season_length=seasonality),
76
+ DynamicOptimizedTheta(season_length=seasonality),
77
+ ]
78
+ init_time = time()
79
+ series_per_core = 15
80
+ n_series = train_df["unique_id"].nunique()
81
+ n_jobs = min(n_series // series_per_core, os.cpu_count())
82
+ sf = StatsForecast(
83
+ models=models,
84
+ freq=freq,
85
+ n_jobs=n_jobs,
86
+ )
87
+ fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
88
+ name_models = [repr(model) for model in models]
89
+ model_name = "StatisticalEnsemble"
90
+ fcsts_df = ensemble_forecasts(
91
+ fcsts_df,
92
+ quantiles,
93
+ name_models,
94
+ model_name,
95
+ )
96
+ total_time = time() - init_time
97
+ return fcsts_df, total_time, model_name
98
+
99
+
100
+ seasonality = 4 * 24 * 7 # 1 week
101
  models = [
102
+ AutoARIMA(season_length=seasonality),
103
+ AutoETS(season_length=seasonality),
104
+ AutoCES(season_length=seasonality),
105
+ DynamicOptimizedTheta(season_length=seasonality),
106
+ ]
107
+ freq = '15min'
108
+ sf = StatsForecast(
109
+ models=models[:1],
110
+ freq=freq,
111
+ n_jobs=1,
112
+ )
113
+
114
+ print("starting forecast, dataset size", len(train_df))
115
+ Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
116
+
117
+ print(Y_hat_df)
118
+
119
+ Y_hat_df = Y_hat_df.reset_index()
120
+
121
+ fig, ax = plt.subplots(1, 1, figsize = (20, 7))
122
+
123
+ # plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
124
+ # plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
125
+
126
+ plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
127
+ plot_Y_df = plot_Y_df.set_index('ds')[['y']]
128
+ plot_Y_df.plot(ax=ax, linewidth=1)
129
+ Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)
130
+
131
+
132
+ ax.set_title('AirPassengers Forecast', fontsize=22)
133
+ ax.set_ylabel('Monthly Passengers', fontsize=20)
134
+ ax.set_xlabel('Timestamp [t]', fontsize=20)
135
+ ax.legend(prop={'size': 15})
136
+ ax.grid()
137
+
138
+ plt.savefig("neuralforecast.pdf")
139
+
140
+
141
+
142
+ exit()
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+ quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
166
+ fcst_df, total_time, model_name = run_statistical_ensemble(
167
+ train_df,
168
+ horizon=horizon,
169
+ freq='15m',
170
+ seasonality=4 * 24 * 7,
171
+ quantiles=quantiles
172
+ )
173
+
174
+
175
+ nf.fit(df=train_df)
176
 
177
  Y_hat_df = nf.predict()
178