LSTM Example

[1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scalecast.Forecaster import Forecaster

sns.set(rc={'figure.figsize':(15,8)})

Data preprocessing

[2]:
data = pd.read_csv('AirPassengers.csv',parse_dates=['Month'])
[3]:
data.head()
[3]:
Month #Passengers
0 1949-01-01 112
1 1949-02-01 118
2 1949-03-01 132
3 1949-04-01 129
4 1949-05-01 121
[4]:
data.shape
[4]:
(144, 2)
[5]:
data['Month'].min()
[5]:
Timestamp('1949-01-01 00:00:00')
[6]:
data['Month'].max()
[6]:
Timestamp('1960-12-01 00:00:00')

EDA

[7]:
f = Forecaster(y=data['#Passengers'],current_dates=data['Month'])
f.plot()
../../_images/Forecaster_examples_LSTM_9_0.png
[8]:
f
[8]:
Forecaster(
    DateStartActuals=1949-01-01T00:00:00.000000000
    DateEndActuals=1960-12-01T00:00:00.000000000
    Freq=MS
    ForecastLength=0
    Xvars=[]
    Differenced=0
    TestLength=1
    ValidationLength=1
    ValidationMetric=rmse
    CILevel=0.95
    BootstrapSamples=100
)
[9]:
f.plot_acf(lags=26)
plt.show()
../../_images/Forecaster_examples_LSTM_11_0.png
[10]:
f.plot_pacf(lags=26)
plt.show()
../../_images/Forecaster_examples_LSTM_12_0.png
[11]:
f.seasonal_decompose().plot()
plt.show()
../../_images/Forecaster_examples_LSTM_13_0.png
[12]:
stat, pval, _, _, _, _ = f.adf_test(full_res=True)
print(stat)
print(pval)
0.8153688792060569
0.9918802434376411

LSTM Model

[13]:
f.set_test_length(12)
f.generate_future_dates(12)
f.set_estimator('lstm')

Attempt 1

[14]:
f.manual_forecast(call_me='lstm_default')
f.plot_test_set(ci=True)
4/4 [==============================] - 1s 3ms/step - loss: 0.3460
5/5 [==============================] - 1s 2ms/step - loss: 0.3293
../../_images/Forecaster_examples_LSTM_18_1.png

Attempt 2

[15]:
f.manual_forecast(call_me='lstm_24lags',lags=24)
f.plot_test_set(ci=True)
3/3 [==============================] - 1s 6ms/step - loss: 0.4245
4/4 [==============================] - 1s 9ms/step - loss: 0.3630
../../_images/Forecaster_examples_LSTM_20_1.png

Attempt 3

[16]:
f.manual_forecast(call_me='lstm_24lags_5epochs',lags=24,epochs=5,validation_split=.2,shuffle=True)
f.plot_test_set(ci=True)
Epoch 1/5
3/3 [==============================] - 2s 191ms/step - loss: 0.4728 - val_loss: 0.1959
Epoch 2/5
3/3 [==============================] - 0s 16ms/step - loss: 0.4631 - val_loss: 0.1895
Epoch 3/5
3/3 [==============================] - 0s 15ms/step - loss: 0.4533 - val_loss: 0.1830
Epoch 4/5
3/3 [==============================] - 0s 14ms/step - loss: 0.4432 - val_loss: 0.1765
Epoch 5/5
3/3 [==============================] - 0s 17ms/step - loss: 0.4331 - val_loss: 0.1698
Epoch 1/5
3/3 [==============================] - 2s 174ms/step - loss: 0.4601 - val_loss: 0.1779
Epoch 2/5
3/3 [==============================] - 0s 15ms/step - loss: 0.4520 - val_loss: 0.1725
Epoch 3/5
3/3 [==============================] - 0s 17ms/step - loss: 0.4442 - val_loss: 0.1672
Epoch 4/5
3/3 [==============================] - 0s 17ms/step - loss: 0.4362 - val_loss: 0.1619
Epoch 5/5
3/3 [==============================] - 0s 20ms/step - loss: 0.4285 - val_loss: 0.1566
../../_images/Forecaster_examples_LSTM_22_1.png

Attempt 4

[17]:
from tensorflow.keras.callbacks import EarlyStopping
f.manual_forecast(call_me='lstm_24lags_earlystop_3layers',
                  lags=24,
                  epochs=25,
                  validation_split=.2,
                  shuffle=True,
                  callbacks=EarlyStopping(monitor='val_loss',
                                          patience=5),
                  lstm_layer_sizes=(16,16,16),
                  dropout=(0,0,0))
f.plot_test_set(ci=True)
Epoch 1/25
3/3 [==============================] - 5s 429ms/step - loss: 0.4558 - val_loss: 0.1840
Epoch 2/25
3/3 [==============================] - 0s 32ms/step - loss: 0.4332 - val_loss: 0.1693
Epoch 3/25
3/3 [==============================] - 0s 30ms/step - loss: 0.4066 - val_loss: 0.1506
Epoch 4/25
3/3 [==============================] - 0s 36ms/step - loss: 0.3715 - val_loss: 0.1265
Epoch 5/25
3/3 [==============================] - 0s 34ms/step - loss: 0.3267 - val_loss: 0.1022
Epoch 6/25
3/3 [==============================] - 0s 36ms/step - loss: 0.2773 - val_loss: 0.0927
Epoch 7/25
3/3 [==============================] - 0s 36ms/step - loss: 0.2318 - val_loss: 0.1061
Epoch 8/25
3/3 [==============================] - 0s 36ms/step - loss: 0.2014 - val_loss: 0.1344
Epoch 9/25
3/3 [==============================] - 0s 34ms/step - loss: 0.1833 - val_loss: 0.1470
Epoch 10/25
3/3 [==============================] - 0s 38ms/step - loss: 0.1687 - val_loss: 0.1388
Epoch 11/25
3/3 [==============================] - 0s 38ms/step - loss: 0.1551 - val_loss: 0.1213
Epoch 1/25
3/3 [==============================] - 4s 463ms/step - loss: 0.4363 - val_loss: 0.1611
Epoch 2/25
3/3 [==============================] - 0s 38ms/step - loss: 0.4122 - val_loss: 0.1417
Epoch 3/25
3/3 [==============================] - 0s 31ms/step - loss: 0.3800 - val_loss: 0.1166
Epoch 4/25
3/3 [==============================] - 0s 30ms/step - loss: 0.3379 - val_loss: 0.0927
Epoch 5/25
3/3 [==============================] - 0s 33ms/step - loss: 0.2842 - val_loss: 0.0887
Epoch 6/25
3/3 [==============================] - 0s 37ms/step - loss: 0.2350 - val_loss: 0.1134
Epoch 7/25
3/3 [==============================] - 0s 39ms/step - loss: 0.2065 - val_loss: 0.1444
Epoch 8/25
3/3 [==============================] - 0s 35ms/step - loss: 0.1936 - val_loss: 0.1461
Epoch 9/25
3/3 [==============================] - 0s 36ms/step - loss: 0.1735 - val_loss: 0.1193
Epoch 10/25
3/3 [==============================] - 0s 35ms/step - loss: 0.1508 - val_loss: 0.0893
../../_images/Forecaster_examples_LSTM_24_1.png

Attempt 5

[22]:
f.manual_forecast(call_me='lstm_best',
                  lags=36,
                  batch_size=32,
                  epochs=15,
                  validation_split=.2,
                  shuffle=True,
                  activation='tanh',
                  optimizer='Adam',
                  learning_rate=0.001,
                  lstm_layer_sizes=(72,)*4,
                  dropout=(0,)*4,
                  plot_loss=True)
f.plot_test_set(order_by='LevelTestSetMAPE',models='top_2',ci=True)
Epoch 1/15
3/3 [==============================] - 7s 729ms/step - loss: 0.4853 - val_loss: 0.2078
Epoch 2/15
3/3 [==============================] - 0s 98ms/step - loss: 0.3940 - val_loss: 0.0994
Epoch 3/15
3/3 [==============================] - 0s 117ms/step - loss: 0.2115 - val_loss: 0.1568
Epoch 4/15
3/3 [==============================] - 0s 117ms/step - loss: 0.1480 - val_loss: 0.0611
Epoch 5/15
3/3 [==============================] - 0s 118ms/step - loss: 0.1177 - val_loss: 0.0646
Epoch 6/15
3/3 [==============================] - 0s 114ms/step - loss: 0.1161 - val_loss: 0.0677
Epoch 7/15
3/3 [==============================] - 0s 114ms/step - loss: 0.1049 - val_loss: 0.0551
Epoch 8/15
3/3 [==============================] - 0s 112ms/step - loss: 0.0993 - val_loss: 0.0548
Epoch 9/15
3/3 [==============================] - 0s 115ms/step - loss: 0.0957 - val_loss: 0.0562
Epoch 10/15
3/3 [==============================] - 0s 111ms/step - loss: 0.0967 - val_loss: 0.0561
Epoch 11/15
3/3 [==============================] - 0s 109ms/step - loss: 0.0925 - val_loss: 0.0531
Epoch 12/15
3/3 [==============================] - 0s 105ms/step - loss: 0.0987 - val_loss: 0.0592
Epoch 13/15
3/3 [==============================] - 0s 115ms/step - loss: 0.0948 - val_loss: 0.0720
Epoch 14/15
3/3 [==============================] - 0s 114ms/step - loss: 0.0939 - val_loss: 0.0515
Epoch 15/15
3/3 [==============================] - 0s 109ms/step - loss: 0.0935 - val_loss: 0.0532
Epoch 1/15
3/3 [==============================] - 6s 674ms/step - loss: 0.4620 - val_loss: 0.1708
Epoch 2/15
3/3 [==============================] - 0s 97ms/step - loss: 0.3402 - val_loss: 0.1124
Epoch 3/15
3/3 [==============================] - 0s 104ms/step - loss: 0.2069 - val_loss: 0.1296
Epoch 4/15
3/3 [==============================] - 0s 113ms/step - loss: 0.1372 - val_loss: 0.0567
Epoch 5/15
3/3 [==============================] - 0s 117ms/step - loss: 0.1333 - val_loss: 0.0756
Epoch 6/15
3/3 [==============================] - 0s 118ms/step - loss: 0.1093 - val_loss: 0.1006
Epoch 7/15
3/3 [==============================] - 0s 126ms/step - loss: 0.1034 - val_loss: 0.0534
Epoch 8/15
3/3 [==============================] - 0s 124ms/step - loss: 0.0989 - val_loss: 0.0519
Epoch 9/15
3/3 [==============================] - 0s 119ms/step - loss: 0.0895 - val_loss: 0.0615
Epoch 10/15
3/3 [==============================] - 0s 119ms/step - loss: 0.0905 - val_loss: 0.0473
Epoch 11/15
3/3 [==============================] - 0s 122ms/step - loss: 0.0881 - val_loss: 0.0469
Epoch 12/15
3/3 [==============================] - 0s 122ms/step - loss: 0.0847 - val_loss: 0.0476
Epoch 13/15
3/3 [==============================] - 0s 120ms/step - loss: 0.0853 - val_loss: 0.0524
Epoch 14/15
3/3 [==============================] - 0s 125ms/step - loss: 0.0845 - val_loss: 0.0501
Epoch 15/15
3/3 [==============================] - 0s 120ms/step - loss: 0.0833 - val_loss: 0.0521
../../_images/Forecaster_examples_LSTM_26_1.png
../../_images/Forecaster_examples_LSTM_26_2.png

MLR Modeling

[23]:
f.set_estimator('mlr')
f.add_ar_terms(24)
f.add_seasonal_regressors('month','quarter',dummy=True)
f.add_seasonal_regressors('year')
f.add_time_trend()
f.diff()
[24]:
f.manual_forecast()
f.plot_test_set(order_by='LevelTestSetMAPE',models='top_2')
../../_images/Forecaster_examples_LSTM_29_0.png
[32]:
f.plot_test_set(models='mlr',ci=True)
../../_images/Forecaster_examples_LSTM_30_0.png
[45]:
f.plot(order_by='LevelTestSetMAPE',models='top_2')
../../_images/Forecaster_examples_LSTM_31_0.png

Benchmarking

[25]:
f.export('model_summaries',determine_best_by='LevelTestSetMAPE')[
    ['ModelNickname','LevelTestSetMAPE','LevelTestSetRMSE','LevelTestSetR2','best_model']
]
[25]:
ModelNickname LevelTestSetMAPE LevelTestSetRMSE LevelTestSetR2 best_model
0 mlr 0.023420 13.932124 0.964960 True
1 lstm_best 0.113758 83.110945 -0.246947 False
2 lstm_24lags_earlystop_3layers 0.313254 176.288432 -4.610212 False
3 lstm_24lags_5epochs 0.719614 360.578667 -22.471004 False
4 lstm_default 0.766014 377.468538 -24.721313 False
5 lstm_24lags 0.789948 395.179590 -27.191658 False

Export Results

[35]:
f.export_forecasts_with_cis('mlr')
[35]:
DATE UpperForecast Forecast LowerForecast ModelNickname
0 1961-01-01 48.200665 30.246319 12.291972 mlr
1 1961-02-01 -26.568385 -44.522731 -62.477077 mlr
2 1961-03-01 61.632285 43.677939 25.723593 mlr
3 1961-04-01 47.017771 29.063425 11.109079 mlr
4 1961-05-01 30.351736 12.397390 -5.556956 mlr
5 1961-06-01 91.214331 73.259985 55.305639 mlr
6 1961-07-01 123.495172 105.540825 87.586479 mlr
7 1961-08-01 -15.632798 -33.587144 -51.541490 mlr
8 1961-09-01 -72.577979 -90.532325 -108.486671 mlr
9 1961-10-01 -41.418451 -59.372797 -77.327144 mlr
10 1961-11-01 -54.486169 -72.440515 -90.394862 mlr
11 1961-12-01 66.525041 48.570694 30.616348 mlr
[36]:
f.export_test_set_preds_with_cis('mlr')
[36]:
DATE UpperPreds Preds Actuals LowerPreds ModelNickname
0 1960-01-01 28.556057 10.601711 12.0 -7.352636 mlr
1 1960-02-01 1.231249 -16.723097 -26.0 -34.677443 mlr
2 1960-03-01 66.376317 48.421971 28.0 30.467624 mlr
3 1960-04-01 3.388136 -14.566210 42.0 -32.520557 mlr
4 1960-05-01 50.838291 32.883945 11.0 14.929599 mlr
5 1960-06-01 78.852376 60.898029 63.0 42.943683 mlr
6 1960-07-01 104.158996 86.204650 87.0 68.250303 mlr
7 1960-08-01 23.959542 6.005196 -16.0 -11.949150 mlr
8 1960-09-01 -86.291126 -104.245472 -98.0 -122.199818 mlr
9 1960-10-01 -50.666725 -68.621071 -47.0 -86.575417 mlr
10 1960-11-01 -43.483259 -61.437606 -71.0 -79.391952 mlr
11 1960-12-01 67.033891 49.079545 42.0 31.125199 mlr

Export Feature Info

[37]:
f.save_feature_importance()
f.export_feature_importance('mlr')
[37]:
weight std
feature
AR1 0.353654 0.040011
AR12 0.212440 0.035555
AR4 0.195865 0.031963
AR10 0.172288 0.016409
AR21 0.144722 0.014716
AR13 0.110973 0.008805
AR2 0.103469 0.011157
AR18 0.095767 0.006091
AR20 0.080367 0.014563
AR11 0.078889 0.015673
AR3 0.072955 0.005806
AR7 0.064859 0.010104
AR8 0.061835 0.005144
AR22 0.059363 0.004871
month_9 0.047820 0.004102
AR9 0.043480 0.009598
AR14 0.032727 0.003600
AR24 0.026527 0.006712
AR17 0.024978 0.004891
AR23 0.020637 0.005097
AR19 0.018921 0.002676
month_7 0.015187 0.002255
month_2 0.010931 0.001847
AR5 0.009453 0.004327
month_10 0.006637 0.001670
month_8 0.006208 0.003219
AR16 0.005731 0.001594
month_11 0.003629 0.001834
month_3 0.003575 0.001255
AR6 0.003097 0.000892
year 0.001463 0.001607
month_1 0.001226 0.000950
month 0.000897 0.000571
AR15 0.000765 0.000396
month_12 0.000628 0.000638
t 0.000578 0.000714
month_6 0.000424 0.000652
quarter_2 0.000368 0.000750
quarter_1 0.000236 0.000311
quarter_4 0.000116 0.000379
month_4 0.000106 0.000160
quarter_3 0.000030 0.000179
quarter -0.000020 0.000130
month_5 -0.000115 0.000365
[ ]:

[ ]: