Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/statespace/exponential_smoothing.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Linear exponential smoothing models
4Author: Chad Fulton
5License: BSD-3
6"""
8import numpy as np
9import pandas as pd
10from statsmodels.base.data import PandasData
12from statsmodels.genmod.generalized_linear_model import GLM
13from statsmodels.tools.validation import (array_like, bool_like, float_like,
14 string_like, int_like)
16from statsmodels.tsa.exponential_smoothing import initialization as es_init
17from statsmodels.tsa.statespace import initialization as ss_init
18from statsmodels.tsa.statespace.kalman_filter import (
19 MEMORY_CONSERVE, MEMORY_NO_FORECAST)
21from statsmodels.compat.pandas import Appender
22import statsmodels.base.wrapper as wrap
24from statsmodels.iolib.summary import forg
25from statsmodels.iolib.table import SimpleTable
26from statsmodels.iolib.tableformatting import fmt_params
28from .mlemodel import MLEModel, MLEResults, MLEResultsWrapper
31class ExponentialSmoothing(MLEModel):
32 """
33 Linear exponential smoothing models
35 Parameters
36 ----------
37 endog : array_like
38 The observed time-series process :math:`y`
39 trend : bool, optional
40 Whether or not to include a trend component. Default is False.
41 damped_trend : bool, optional
42 Whether or not an included trend component is damped. Default is False.
43 seasonal : int, optional
44 The number of periods in a complete seasonal cycle for seasonal
45 (Holt-Winters) models. For example, 4 for quarterly data with an
46 annual cycle or 7 for daily data with a weekly cycle. Default is
47 no seasonal effects.
48 initialization_method : str, optional
49 Method for initialize the recursions. One of:
51 * 'estimated'
52 * 'concentrated'
53 * 'heuristic'
54 * 'known'
56 If 'known' initialization is used, then `initial_level` must be
57 passed, as well as `initial_slope` and `initial_seasonal` if
58 applicable. Default is 'estimated'.
59 initial_level : float, optional
60 The initial level component. Only used if initialization is 'known'.
61 initial_trend : float, optional
62 The initial trend component. Only used if initialization is 'known'.
63 initial_seasonal : array_like, optional
64 The initial seasonal component. An array of length `seasonal`
65 or length `seasonal - 1` (in which case the last initial value
66 is computed to make the average effect zero). Only used if
67 initialization is 'known'.
68 bounds : iterable[tuple], optional
69 An iterable containing bounds for the parameters. Must contain four
70 elements, where each element is a tuple of the form (lower, upper).
71 Default is (0.0001, 0.9999) for the level, trend, and seasonal
72 smoothing parameters and (0.8, 0.98) for the trend damping parameter.
73 concentrate_scale : bool, optional
74 Whether or not to concentrate the scale (variance of the error term)
75 out of the likelihood.
77 Notes
78 -----
79 The parameters and states of this model are estimated by setting up the
80 exponential smoothing equations as a special case of a linear Gaussian
81 state space model and applying the Kalman filter. As such, it has slightly
82 worse performance than the dedicated exponential smoothing model,
83 `sm.tsa.ExponentialSmoothing`, and it does not support multiplicative
84 (nonlinear) exponential smoothing models.
86 However, as a subclass of the state space models, this model class shares
87 a consistent set of functionality with those models, which can make it
88 easier to work with. In addition, it supports computing confidence
89 intervals for forecasts and it supports concentrating the initial
90 state out of the likelihood function.
92 References
93 ----------
94 [1] Hyndman, Rob, Anne B. Koehler, J. Keith Ord, and Ralph D. Snyder.
95 Forecasting with exponential smoothing: the state space approach.
96 Springer Science & Business Media, 2008.
97 """
98 def __init__(self, endog, trend=False, damped_trend=False, seasonal=None,
99 initialization_method='estimated', initial_level=None,
100 initial_trend=None, initial_seasonal=None, bounds=None,
101 concentrate_scale=True, dates=None, freq=None,
102 missing='none'):
103 # Model definition
104 self.trend = bool_like(trend, 'trend')
105 self.damped_trend = bool_like(damped_trend, 'damped_trend')
106 self.seasonal_periods = int_like(seasonal, 'seasonal', optional=True)
107 self.seasonal = self.seasonal_periods is not None
108 self.initialization_method = string_like(
109 initialization_method, 'initialization_method').lower()
110 self.concentrate_scale = bool_like(concentrate_scale,
111 'concentrate_scale')
113 # TODO: add validation for bounds (e.g. have all bounds, upper > lower)
114 # TODO: add `bounds_method` argument to choose between "usual" and
115 # "admissible" as in Hyndman et al. (2008)
116 self.bounds = bounds
117 if self.bounds is None:
118 self.bounds = [(1e-4, 1-1e-4)] * 3 + [(0.8, 0.98)]
120 # Validation
121 if self.seasonal_periods == 1:
122 raise ValueError('Cannot have a seasonal period of 1.')
124 if self.seasonal and self.seasonal_periods is None:
125 raise NotImplementedError('Unable to detect season automatically;'
126 ' please specify `seasonal_periods`.')
128 if self.initialization_method not in ['concentrated', 'estimated',
129 'simple', 'heuristic', 'known']:
130 raise ValueError('Invalid initialization method "%s".'
131 % initialization_method)
133 if self.initialization_method == 'known':
134 if initial_level is None:
135 raise ValueError('`initial_level` argument must be provided'
136 ' when initialization method is set to'
137 ' "known".')
138 if initial_trend is None and self.trend:
139 raise ValueError('`initial_trend` argument must be provided'
140 ' for models with a trend component when'
141 ' initialization method is set to "known".')
142 if initial_seasonal is None and self.seasonal:
143 raise ValueError('`initial_seasonal` argument must be provided'
144 ' for models with a seasonal component when'
145 ' initialization method is set to "known".')
147 # Initialize the state space model
148 if not self.seasonal or self.seasonal_periods is None:
149 self._seasonal_periods = 0
150 else:
151 self._seasonal_periods = self.seasonal_periods
153 k_states = 2 + int(self.trend) + self._seasonal_periods
154 k_posdef = 1
156 init = ss_init.Initialization(k_states, 'known',
157 constant=[0] * k_states)
158 super(ExponentialSmoothing, self).__init__(
159 endog, k_states=k_states, k_posdef=k_posdef,
160 initialization=init, dates=dates, freq=freq, missing=missing)
162 # Concentrate the scale out of the likelihood function
163 if self.concentrate_scale:
164 self.ssm.filter_concentrated = True
166 # Setup fixed elements of the system matrices
167 # Observation error
168 self.ssm['design', 0, 0] = 1.
169 self.ssm['selection', 0, 0] = 1.
170 self.ssm['state_cov', 0, 0] = 1.
172 # Level
173 self.ssm['design', 0, 1] = 1.
174 self.ssm['transition', 1, 1] = 1.
176 # Trend
177 if self.trend:
178 self.ssm['transition', 1:3, 2] = 1.
180 # Seasonal
181 if self.seasonal:
182 k = 2 + int(self.trend)
183 self.ssm['design', 0, k] = 1.
184 self.ssm['transition', k, -1] = 1.
185 self.ssm['transition', k + 1:k_states, k:k_states - 1] = (
186 np.eye(self.seasonal_periods - 1))
188 # Initialization of the states
189 if self.initialization_method != 'known':
190 msg = ('Cannot give `%%s` argument when initialization is "%s"'
191 % initialization_method)
192 if initial_level is not None:
193 raise ValueError(msg % 'initial_level')
194 if initial_trend is not None:
195 raise ValueError(msg % 'initial_trend')
196 if initial_seasonal is not None:
197 raise ValueError(msg % 'initial_seasonal')
199 if self.initialization_method == 'simple':
200 initial_level, initial_trend, initial_seasonal = (
201 es_init._initialization_simple(
202 self.endog[:, 0], trend='add' if self.trend else None,
203 seasonal='add' if self.seasonal else None,
204 seasonal_periods=self.seasonal_periods))
205 elif self.initialization_method == 'heuristic':
206 initial_level, initial_trend, initial_seasonal = (
207 es_init._initialization_heuristic(
208 self.endog[:, 0], trend='add' if self.trend else None,
209 seasonal='add' if self.seasonal else None,
210 seasonal_periods=self.seasonal_periods))
211 elif self.initialization_method == 'known':
212 initial_level = float_like(initial_level, 'initial_level')
213 if self.trend:
214 initial_trend = float_like(initial_trend, 'initial_trend')
215 if self.seasonal:
216 initial_seasonal = array_like(initial_seasonal,
217 'initial_seasonal')
219 if len(initial_seasonal) == self.seasonal_periods - 1:
220 initial_seasonal = np.r_[initial_seasonal,
221 0 - np.sum(initial_seasonal)]
223 if len(initial_seasonal) != self.seasonal_periods:
224 raise ValueError(
225 'Invalid length of initial seasonal values. Must be'
226 ' one of s or s-1, where s is the number of seasonal'
227 ' periods.')
229 self._initial_level = initial_level
230 self._initial_trend = initial_trend
231 self._initial_seasonal = initial_seasonal
232 self._initial_state = None
234 # Initialize now if possible (if we have a damped trend, then
235 # initialization will depend on the phi parameter, and so has to be
236 # done at each `update`)
237 methods = ['simple', 'heuristic', 'known']
238 if not self.damped_trend and self.initialization_method in methods:
239 self._initialize_constant_statespace(initial_level, initial_trend,
240 initial_seasonal)
242 # Save keys for kwarg initialization
243 self._init_keys += ['trend', 'damped_trend', 'seasonal',
244 'initialization_method', 'initial_level',
245 'initial_trend', 'initial_seasonal', 'bounds',
246 'concentrate_scale', 'dates', 'freq', 'missing']
248 def _get_init_kwds(self):
249 kwds = super()._get_init_kwds()
250 kwds['seasonal'] = self.seasonal_periods
251 return kwds
253 @property
254 def _res_classes(self):
255 return {'fit': (ExponentialSmoothingResults,
256 ExponentialSmoothingResultsWrapper)}
258 def clone(self, endog, exog=None, **kwargs):
259 if exog is not None:
260 raise NotImplementedError(
261 'ExponentialSmoothing does not support `exog`.')
262 return self._clone_from_init_kwds(endog, **kwargs)
264 @property
265 def state_names(self):
266 state_names = ['error', 'level']
267 if self.trend:
268 state_names += ['trend']
269 if self.seasonal:
270 state_names += ['seasonal.%d' % i
271 for i in range(self.seasonal_periods)]
273 return state_names
275 @property
276 def param_names(self):
277 param_names = ['smoothing_level']
278 if self.trend:
279 param_names += ['smoothing_trend']
280 if self.seasonal:
281 param_names += ['smoothing_seasonal']
282 if self.damped_trend:
283 param_names += ['damping_trend']
284 if not self.concentrate_scale:
285 param_names += ['sigma2']
287 # Initialization
288 if self.initialization_method == 'estimated':
289 param_names += ['initial_level']
290 if self.trend:
291 param_names += ['initial_trend']
292 if self.seasonal:
293 param_names += ['initial_seasonal.%d' % i
294 for i in range(self.seasonal_periods - 1)]
296 return param_names
298 @property
299 def start_params(self):
300 # Make sure starting parameters aren't beyond or right on the bounds
301 bounds = [(x[0] + 1e-3, x[1] - 1e-3) for x in self.bounds]
303 # See Hyndman p.24
304 start_params = [np.clip(0.1, *bounds[0])]
305 if self.trend:
306 start_params += [np.clip(0.01, *bounds[1])]
307 if self.seasonal:
308 start_params += [np.clip(0.01, *bounds[2])]
309 if self.damped_trend:
310 start_params += [np.clip(0.98, *bounds[3])]
311 if not self.concentrate_scale:
312 start_params += [np.var(self.endog)]
314 # Initialization
315 if self.initialization_method == 'estimated':
316 initial_level, initial_trend, initial_seasonal = (
317 es_init._initialization_simple(
318 self.endog[:, 0],
319 trend='add' if self.trend else None,
320 seasonal='add' if self.seasonal else None,
321 seasonal_periods=self.seasonal_periods))
322 start_params += [initial_level]
323 if self.trend:
324 start_params += [initial_trend]
325 if self.seasonal:
326 start_params += initial_seasonal.tolist()[:-1]
328 return np.array(start_params)
330 @property
331 def k_params(self):
332 k_params = (
333 1 + int(self.trend) + int(self.seasonal) +
334 int(not self.concentrate_scale) + int(self.damped_trend))
335 if self.initialization_method == 'estimated':
336 k_params += (
337 1 + int(self.trend) +
338 int(self.seasonal) * (self._seasonal_periods - 1))
339 return k_params
341 def transform_params(self, unconstrained):
342 unconstrained = np.array(unconstrained, ndmin=1)
343 constrained = np.zeros_like(unconstrained)
345 # Alpha in (0, 1)
346 low, high = self.bounds[0]
347 constrained[0] = (
348 1 / (1 + np.exp(-unconstrained[0])) * (high - low) + low)
349 i = 1
351 # Beta in (0, alpha)
352 if self.trend:
353 low, high = self.bounds[1]
354 high = min(high, constrained[0])
355 constrained[i] = (
356 1 / (1 + np.exp(-unconstrained[i])) * (high - low) + low)
357 i += 1
359 # Gamma in (0, 1 - alpha)
360 if self.seasonal:
361 low, high = self.bounds[2]
362 high = min(high, 1 - constrained[0])
363 constrained[i] = (
364 1 / (1 + np.exp(-unconstrained[i])) * (high - low) + low)
365 i += 1
367 # Phi in bounds (e.g. default is [0.8, 0.98])
368 if self.damped_trend:
369 low, high = self.bounds[3]
370 constrained[i] = (
371 1 / (1 + np.exp(-unconstrained[i])) * (high - low) + low)
372 i += 1
374 # sigma^2 positive
375 if not self.concentrate_scale:
376 constrained[i] = unconstrained[i]**2
377 i += 1
379 # Initial parameters are as-is
380 if self.initialization_method == 'estimated':
381 constrained[i:] = unconstrained[i:]
383 return constrained
385 def untransform_params(self, constrained):
386 constrained = np.array(constrained, ndmin=1)
387 unconstrained = np.zeros_like(constrained)
389 # Alpha in (0, 1)
390 low, high = self.bounds[0]
391 tmp = (constrained[0] - low) / (high - low)
392 unconstrained[0] = np.log(tmp / (1 - tmp))
393 i = 1
395 # Beta in (0, alpha)
396 if self.trend:
397 low, high = self.bounds[1]
398 high = min(high, constrained[0])
399 tmp = (constrained[i] - low) / (high - low)
400 unconstrained[i] = np.log(tmp / (1 - tmp))
401 i += 1
403 # Gamma in (0, 1 - alpha)
404 if self.seasonal:
405 low, high = self.bounds[2]
406 high = min(high, 1 - constrained[0])
407 tmp = (constrained[i] - low) / (high - low)
408 unconstrained[i] = np.log(tmp / (1 - tmp))
409 i += 1
411 # Phi in bounds (e.g. default is [0.8, 0.98])
412 if self.damped_trend:
413 low, high = self.bounds[3]
414 tmp = (constrained[i] - low) / (high - low)
415 unconstrained[i] = np.log(tmp / (1 - tmp))
416 i += 1
418 # sigma^2 positive
419 if not self.concentrate_scale:
420 unconstrained[i] = constrained[i]**0.5
421 i += 1
423 # Initial parameters are as-is
424 if self.initialization_method == 'estimated':
425 unconstrained[i:] = constrained[i:]
427 return unconstrained
429 def _initialize_constant_statespace(self, initial_level,
430 initial_trend=None,
431 initial_seasonal=None):
432 # Note: this should be run after `update` has already put any new
433 # parameters into the transition matrix, since it uses the transition
434 # matrix explicitly.
436 # Due to timing differences, the state space representation integrates
437 # the trend into the level in the "predicted_state" (only the
438 # "filtered_state" corresponds to the timing of the exponential
439 # smoothing models)
441 # Initial values are interpreted as "filtered" values
442 constant = np.array([0., initial_level])
443 if self.trend and initial_trend is not None:
444 constant = np.r_[constant, initial_trend]
445 if self.seasonal and initial_seasonal is not None:
446 constant = np.r_[constant, initial_seasonal]
447 self._initial_state = constant[1:]
449 # Apply the prediction step to get to what we need for our Kalman
450 # filter implementation
451 constant = np.dot(self.ssm['transition'], constant)
453 self.initialization.constant = constant
455 def _initialize_stationary_cov_statespace(self):
456 R = self.ssm['selection']
457 Q = self.ssm['state_cov']
458 self.initialization.stationary_cov = R.dot(Q).dot(R.T)
460 def update(self, params, transformed=True, includes_fixed=False,
461 complex_step=False):
462 params = self.handle_params(params, transformed=transformed,
463 includes_fixed=includes_fixed)
465 # State space system matrices
466 self.ssm['selection', 0, 0] = 1 - params[0]
467 self.ssm['selection', 1, 0] = params[0]
468 i = 1
469 if self.trend:
470 self.ssm['selection', 2, 0] = params[i]
471 i += 1
472 if self.seasonal:
473 self.ssm['selection', 0, 0] -= params[i]
474 self.ssm['selection', i + 1, 0] = params[i]
475 i += 1
476 if self.damped_trend:
477 self.ssm['transition', 1:3, 2] = params[i]
478 i += 1
479 if not self.concentrate_scale:
480 self.ssm['state_cov', 0, 0] = params[i]
481 i += 1
483 # State initialization
484 if self.initialization_method == 'estimated':
485 initial_level = params[i]
486 i += 1
487 initial_trend = None
488 initial_seasonal = None
490 if self.trend:
491 initial_trend = params[i]
492 i += 1
493 if self.seasonal:
494 initial_seasonal = params[i: i + self.seasonal_periods - 1]
495 initial_seasonal = np.r_[initial_seasonal,
496 0 - np.sum(initial_seasonal)]
497 self._initialize_constant_statespace(initial_level, initial_trend,
498 initial_seasonal)
500 methods = ['simple', 'heuristic', 'known']
501 if self.damped_trend and self.initialization_method in methods:
502 self._initialize_constant_statespace(
503 self._initial_level, self._initial_trend,
504 self._initial_seasonal)
506 self._initialize_stationary_cov_statespace()
508 def _compute_concentrated_states(self, params, *args, **kwargs):
509 # Apply the usual filter, but keep forecasts
510 kwargs['conserve_memory'] = MEMORY_CONSERVE & ~MEMORY_NO_FORECAST
511 super().loglike(params, *args, **kwargs)
513 # Compute the initial state vector
514 y_tilde = np.array(self.ssm._kalman_filter.forecast_error[0],
515 copy=True)
517 # Need to modify our state space system matrices slightly to get them
518 # back into the form of the innovations framework of
519 # De Livera et al. (2011)
520 T = self['transition', 1:, 1:]
521 R = self['selection', 1:]
522 Z = self['design', :, 1:].copy()
523 i = 1
524 if self.trend:
525 Z[0, i] = 1.
526 i += 1
527 if self.seasonal:
528 Z[0, i] = 0.
529 Z[0, -1] = 1.
531 # Now compute the regression components as described in
532 # De Livera et al. (2011), equation (10).
533 D = T - R.dot(Z)
534 w = np.zeros((self.nobs, self.k_states - 1), dtype=D.dtype)
535 w[0] = Z
536 for i in range(self.nobs - 1):
537 w[i + 1] = w[i].dot(D)
538 mod_ols = GLM(y_tilde, w)
540 # If we have seasonal parameters, constrain them to sum to zero
541 # (otherwise the initial level gets confounded with the sum of the
542 # seasonals).
543 if self.seasonal:
544 R = np.zeros_like(Z)
545 R[0, -self.seasonal_periods:] = 1.
546 q = np.zeros((1, 1))
547 res_ols = mod_ols.fit_constrained((R, q))
548 else:
549 res_ols = mod_ols.fit()
551 # Separate into individual components
552 initial_level = res_ols.params[0]
553 initial_trend = res_ols.params[1] if self.trend else None
554 initial_seasonal = (
555 res_ols.params[-self.seasonal_periods:] if self.seasonal else None)
557 return initial_level, initial_trend, initial_seasonal
559 @Appender(MLEModel.loglike.__doc__)
560 def loglike(self, params, *args, **kwargs):
561 if self.initialization_method == 'concentrated':
562 self._initialize_constant_statespace(
563 *self._compute_concentrated_states(params, *args, **kwargs))
564 llf = self.ssm.loglike()
565 self.ssm.initialization.constant = np.zeros(self.k_states)
566 else:
567 llf = super().loglike(params, *args, **kwargs)
568 return llf
570 @Appender(MLEModel.filter.__doc__)
571 def filter(self, params, cov_type=None, cov_kwds=None,
572 return_ssm=False, results_class=None,
573 results_wrapper_class=None, *args, **kwargs):
574 if self.initialization_method == 'concentrated':
575 self._initialize_constant_statespace(
576 *self._compute_concentrated_states(params, *args, **kwargs))
578 results = super().filter(
579 params, cov_type=cov_type, cov_kwds=cov_kwds,
580 return_ssm=return_ssm, results_class=results_class,
581 results_wrapper_class=results_wrapper_class, *args, **kwargs)
583 if self.initialization_method == 'concentrated':
584 self.ssm.initialization.constant = np.zeros(self.k_states)
585 return results
587 @Appender(MLEModel.smooth.__doc__)
588 def smooth(self, params, cov_type=None, cov_kwds=None,
589 return_ssm=False, results_class=None,
590 results_wrapper_class=None, *args, **kwargs):
591 if self.initialization_method == 'concentrated':
592 self._initialize_constant_statespace(
593 *self._compute_concentrated_states(params, *args, **kwargs))
595 results = super().smooth(
596 params, cov_type=cov_type, cov_kwds=cov_kwds,
597 return_ssm=return_ssm, results_class=results_class,
598 results_wrapper_class=results_wrapper_class, *args, **kwargs)
600 if self.initialization_method == 'concentrated':
601 self.ssm.initialization.constant = np.zeros(self.k_states)
602 return results
605class ExponentialSmoothingResults(MLEResults):
606 def __init__(self, model, params, filter_results, cov_type=None,
607 **kwargs):
608 super().__init__(model, params, filter_results, cov_type, **kwargs)
610 # Save the states
611 self.initial_state = model._initial_state
612 if isinstance(self.data, PandasData):
613 index = self.data.row_labels
614 self.initial_state = pd.DataFrame(
615 [model._initial_state], columns=model.state_names[1:])
616 if model._index_dates and model._index_freq is not None:
617 self.initial_state.index = index.shift(-1)[:1]
619 @Appender(MLEResults.summary.__doc__)
620 def summary(self, alpha=.05, start=None):
621 specification = ['A']
622 if self.model.trend and self.model.damped_trend:
623 specification.append('Ad')
624 elif self.model.trend:
625 specification.append('A')
626 else:
627 specification.append('N')
628 if self.model.seasonal:
629 specification.append('A')
630 else:
631 specification.append('N')
633 model_name = 'ETS(' + ', '.join(specification) + ')'
635 summary = super(ExponentialSmoothingResults, self).summary(
636 alpha=alpha, start=start, title='Exponential Smoothing Results',
637 model_name=model_name)
639 if self.model.initialization_method != 'estimated':
640 params = np.array(self.initial_state)
641 if params.ndim > 1:
642 params = params[0]
643 names = self.model.state_names
644 param_header = ['initialization method: %s'
645 % self.model.initialization_method]
646 params_stubs = names
647 params_data = [[forg(params[i], prec=4)]
648 for i in range(len(params))]
650 initial_state_table = SimpleTable(params_data,
651 param_header,
652 params_stubs,
653 txt_fmt=fmt_params)
654 summary.tables.insert(-1, initial_state_table)
656 return summary
659class ExponentialSmoothingResultsWrapper(MLEResultsWrapper):
660 _attrs = {}
661 _wrap_attrs = wrap.union_dicts(MLEResultsWrapper._wrap_attrs,
662 _attrs)
663 _methods = {}
664 _wrap_methods = wrap.union_dicts(MLEResultsWrapper._wrap_methods,
665 _methods)
666wrap.populate_wrapper(ExponentialSmoothingResultsWrapper, # noqa:E305
667 ExponentialSmoothingResults)