import pandas as pd import matplotlib.pyplot as plt from sktime.datasets import load_airline from sktime.forecasting.base import ForecastingHorizon from sktime.forecasting.naive import NaiveForecaster from sktime.forecasting.arima import AutoARIMA from sktime.forecasting.ets import AutoETS from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError from sktime.split import temporal_train_test_split from sktime.split import ExpandingWindowSplitter from sktime.forecasting.model_evaluation import evaluate from sktime.utils.plotting import plot_series from data_processing import read_datasets, add_production_field, interpolate_and_join, SolarParameters parameters = SolarParameters() met_2021_data, cons_2021_data = read_datasets() add_production_field(met_2021_data, parameters) all_data = interpolate_and_join(met_2021_data, cons_2021_data) all_data['y'] = all_data['Consumption'] y = all_data[['y']] y = y[y.index <= '2021-01-20'] print(len(y['y']), "data points read") # 5 mins timestep means: period = 12*24 forecaster = NaiveForecaster(strategy="last", sp=period) # forecaster = AutoETS(auto=True, sp=period, n_jobs=-1) # forecaster = AutoARIMA(sp=period, suppress_warnings=True) cv = ExpandingWindowSplitter( step_length=period, fh=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], initial_window=period*10 # step_length=period, fh=[1, 2], initial_window=period*2 ) strategy = "no-update_params" df = evaluate(forecaster=forecaster, y=y, cv=cv, strategy=strategy, return_data=True) print(df['y_pred']) fig, ax = plot_series( y, df["y_pred"].iloc[0], df["y_pred"].iloc[1], df["y_pred"].iloc[2], df["y_pred"].iloc[3], df["y_pred"].iloc[4], df["y_pred"].iloc[5], markers=["o", "", "", "", "", "", ""], labels=["y_true"] + ["y_pred (Backtest " + str(x) + ")" for x in range(6)], ) ax.legend() plt.show() exit() y_train, y_test = temporal_train_test_split(y, test_size=len(y.index) // 2) # step 2: running the basic forecasting workflow fh = ForecastingHorizon(y_test.index, is_relative=False) forecaster.fit(y_train) y_pred = forecaster.predict(fh) plot_series(y_train, y_test, y_pred, labels=["y_train", "y_test", "y_pred"]) plt.show() # step 3: specifying the evaluation metric mape = MeanAbsolutePercentageError(symmetric=False) # if function interface is used, just use the function directly in step 4 # step 4: computing the forecast performance print(mape(y_test, y_pred))