Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/statespace/mlemodel.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3State Space Model
5Author: Chad Fulton
6License: Simplified-BSD
7"""
8import contextlib
9import warnings
11from collections import OrderedDict
12from types import SimpleNamespace
13import numpy as np
14import pandas as pd
15from scipy.stats import norm
17from statsmodels.tools.tools import pinv_extended, Bunch
18from statsmodels.tools.sm_exceptions import PrecisionWarning, ValueWarning
19from statsmodels.tools.numdiff import (_get_epsilon, approx_hess_cs,
20 approx_fprime_cs, approx_fprime)
21from statsmodels.tools.decorators import cache_readonly
22from statsmodels.tools.eval_measures import aic, aicc, bic, hqic
24import statsmodels.base.wrapper as wrap
26import statsmodels.genmod._prediction as pred
27from statsmodels.genmod.families.links import identity
29from statsmodels.base.data import PandasData
30import statsmodels.tsa.base.tsa_model as tsbase
32from .simulation_smoother import SimulationSmoother
33from .kalman_smoother import SmootherResults
34from .kalman_filter import INVERT_UNIVARIATE, SOLVE_LU, MEMORY_CONSERVE
35from .initialization import Initialization
36from .tools import prepare_exog, concat
39def _handle_args(names, defaults, *args, **kwargs):
40 output_args = []
41 # We need to handle positional arguments in two ways, in case this was
42 # called by a Scipy optimization routine
43 if len(args) > 0:
44 # the fit() method will pass a dictionary
45 if isinstance(args[0], dict):
46 flags = args[0]
47 # otherwise, a user may have just used positional arguments...
48 else:
49 flags = dict(zip(names, args))
50 for i in range(len(names)):
51 output_args.append(flags.get(names[i], defaults[i]))
53 for name, value in flags.items():
54 if name in kwargs:
55 raise TypeError("loglike() got multiple values for keyword"
56 " argument '%s'" % name)
57 else:
58 for i in range(len(names)):
59 output_args.append(kwargs.pop(names[i], defaults[i]))
61 return tuple(output_args) + (kwargs,)
64def _check_index(desired_index, dta, title='data'):
65 given_index = None
66 if isinstance(dta, (pd.Series, pd.DataFrame)):
67 given_index = dta.index
68 if given_index is not None and not desired_index.equals(given_index):
69 desired_freq = getattr(desired_index, 'freq', None)
70 given_freq = getattr(given_index, 'freq', None)
71 if ((desired_freq is not None or given_freq is not None) and
72 desired_freq != given_freq):
73 raise ValueError('Given %s does not have an index'
74 ' that extends the index of the'
75 ' model. Expected index frequency is'
76 ' "%s", but got "%s".'
77 % (title, desired_freq, given_freq))
78 else:
79 raise ValueError('Given %s does not have an index'
80 ' that extends the index of the'
81 ' model.' % title)
84class MLEModel(tsbase.TimeSeriesModel):
85 r"""
86 State space model for maximum likelihood estimation
88 Parameters
89 ----------
90 endog : array_like
91 The observed time-series process :math:`y`
92 k_states : int
93 The dimension of the unobserved state process.
94 exog : array_like, optional
95 Array of exogenous regressors, shaped nobs x k. Default is no
96 exogenous regressors.
97 dates : array_like of datetime, optional
98 An array-like object of datetime objects. If a Pandas object is given
99 for endog, it is assumed to have a DateIndex.
100 freq : str, optional
101 The frequency of the time-series. A Pandas offset or 'B', 'D', 'W',
102 'M', 'A', or 'Q'. This is optional if dates are given.
103 **kwargs
104 Keyword arguments may be used to provide default values for state space
105 matrices or for Kalman filtering options. See `Representation`, and
106 `KalmanFilter` for more details.
108 Attributes
109 ----------
110 ssm : statsmodels.tsa.statespace.kalman_filter.KalmanFilter
111 Underlying state space representation.
113 Notes
114 -----
115 This class wraps the state space model with Kalman filtering to add in
116 functionality for maximum likelihood estimation. In particular, it adds
117 the concept of updating the state space representation based on a defined
118 set of parameters, through the `update` method or `updater` attribute (see
119 below for more details on which to use when), and it adds a `fit` method
120 which uses a numerical optimizer to select the parameters that maximize
121 the likelihood of the model.
123 The `start_params` `update` method must be overridden in the
124 child class (and the `transform` and `untransform` methods, if needed).
126 See Also
127 --------
128 statsmodels.tsa.statespace.mlemodel.MLEResults
129 statsmodels.tsa.statespace.kalman_filter.KalmanFilter
130 statsmodels.tsa.statespace.representation.Representation
131 """
133 def __init__(self, endog, k_states, exog=None, dates=None, freq=None,
134 **kwargs):
135 # Initialize the model base
136 super(MLEModel, self).__init__(endog=endog, exog=exog,
137 dates=dates, freq=freq,
138 missing='none')
140 # Store kwargs to recreate model
141 self._init_kwargs = kwargs
143 # Prepared the endog array: C-ordered, shape=(nobs x k_endog)
144 self.endog, self.exog = self.prepare_data()
146 # Dimensions
147 self.nobs = self.endog.shape[0]
148 self.k_states = k_states
150 # Initialize the state-space representation
151 self.initialize_statespace(**kwargs)
153 # Setup holder for fixed parameters
154 self._has_fixed_params = False
155 self._fixed_params = None
156 self._params_index = None
157 self._fixed_params_index = None
158 self._free_params_index = None
160 def prepare_data(self):
161 """
162 Prepare data for use in the state space representation
163 """
164 endog = np.array(self.data.orig_endog, order='C')
165 exog = self.data.orig_exog
166 if exog is not None:
167 exog = np.array(exog)
169 # Base class may allow 1-dim data, whereas we need 2-dim
170 if endog.ndim == 1:
171 endog.shape = (endog.shape[0], 1) # this will be C-contiguous
173 return endog, exog
175 def initialize_statespace(self, **kwargs):
176 """
177 Initialize the state space representation
179 Parameters
180 ----------
181 **kwargs
182 Additional keyword arguments to pass to the state space class
183 constructor.
184 """
185 # (Now self.endog is C-ordered and in long format (nobs x k_endog). To
186 # get F-ordered and in wide format just need to transpose)
187 endog = self.endog.T
189 # Instantiate the state space object
190 self.ssm = SimulationSmoother(endog.shape[0], self.k_states,
191 nobs=endog.shape[1], **kwargs)
192 # Bind the data to the model
193 self.ssm.bind(endog)
195 # Other dimensions, now that `ssm` is available
196 self.k_endog = self.ssm.k_endog
198 def _get_index_with_initial_state(self):
199 # The index we inherit from `TimeSeriesModel` will only cover the
200 # data sample itself, but we will also need an index value for the
201 # initial state which is the previous time step to the first datapoint.
202 # This method figures out an appropriate value for the three types of
203 # supported indexes: date-based, Int64Index, or RangeIndex
204 if self._index_dates:
205 # value = self._index.shift(-1)[0]
206 if isinstance(self._index, pd.DatetimeIndex):
207 index = pd.date_range(
208 end=self._index[-1], periods=len(self._index) + 1,
209 freq=self._index.freq)
210 elif isinstance(self._index, pd.PeriodIndex):
211 index = pd.period_range(
212 end=self._index[-1], periods=len(self._index) + 1,
213 freq=self._index.freq)
214 else:
215 raise NotImplementedError
216 elif isinstance(self._index, pd.Int64Index):
217 # The only valid Int64Index is a full, incrementing index, so this
218 # is general
219 value = self._index[0] - 1
220 index = pd.Int64Index([value] + self._index.tolist())
221 elif isinstance(self._index, pd.RangeIndex):
222 index = pd.RangeIndex(self._index.start - self._index.step,
223 self._index.end, self._index.step)
224 else:
225 raise NotImplementedError
226 return index
228 def __setitem__(self, key, value):
229 return self.ssm.__setitem__(key, value)
231 def __getitem__(self, key):
232 return self.ssm.__getitem__(key)
234 def _get_init_kwds(self):
235 # Get keywords based on model attributes
236 kwds = super(MLEModel, self)._get_init_kwds()
238 for key, value in kwds.items():
239 if value is None and hasattr(self.ssm, key):
240 kwds[key] = getattr(self.ssm, key)
242 return kwds
244 def clone(self, endog, exog=None, **kwargs):
245 raise NotImplementedError
247 def _clone_from_init_kwds(self, endog, **kwargs):
248 # Cannot make this the default, because there is extra work required
249 # for subclasses to make _get_init_kwds useful.
250 use_kwargs = self._get_init_kwds()
251 use_kwargs.update(kwargs)
253 # Check for `exog`
254 if getattr(self, 'k_exog', 0) > 0 and kwargs.get('exog', None) is None:
255 raise ValueError('Cloning a model with an exogenous component'
256 ' requires specifying a new exogenous array using'
257 ' the `exog` argument.')
259 return self.__class__(endog, **use_kwargs)
261 def set_filter_method(self, filter_method=None, **kwargs):
262 """
263 Set the filtering method
265 The filtering method controls aspects of which Kalman filtering
266 approach will be used.
268 Parameters
269 ----------
270 filter_method : int, optional
271 Bitmask value to set the filter method to. See notes for details.
272 **kwargs
273 Keyword arguments may be used to influence the filter method by
274 setting individual boolean flags. See notes for details.
276 Notes
277 -----
278 This method is rarely used. See the corresponding function in the
279 `KalmanFilter` class for details.
280 """
281 self.ssm.set_filter_method(filter_method, **kwargs)
283 def set_inversion_method(self, inversion_method=None, **kwargs):
284 """
285 Set the inversion method
287 The Kalman filter may contain one matrix inversion: that of the
288 forecast error covariance matrix. The inversion method controls how and
289 if that inverse is performed.
291 Parameters
292 ----------
293 inversion_method : int, optional
294 Bitmask value to set the inversion method to. See notes for
295 details.
296 **kwargs
297 Keyword arguments may be used to influence the inversion method by
298 setting individual boolean flags. See notes for details.
300 Notes
301 -----
302 This method is rarely used. See the corresponding function in the
303 `KalmanFilter` class for details.
304 """
305 self.ssm.set_inversion_method(inversion_method, **kwargs)
307 def set_stability_method(self, stability_method=None, **kwargs):
308 """
309 Set the numerical stability method
311 The Kalman filter is a recursive algorithm that may in some cases
312 suffer issues with numerical stability. The stability method controls
313 what, if any, measures are taken to promote stability.
315 Parameters
316 ----------
317 stability_method : int, optional
318 Bitmask value to set the stability method to. See notes for
319 details.
320 **kwargs
321 Keyword arguments may be used to influence the stability method by
322 setting individual boolean flags. See notes for details.
324 Notes
325 -----
326 This method is rarely used. See the corresponding function in the
327 `KalmanFilter` class for details.
328 """
329 self.ssm.set_stability_method(stability_method, **kwargs)
331 def set_conserve_memory(self, conserve_memory=None, **kwargs):
332 """
333 Set the memory conservation method
335 By default, the Kalman filter computes a number of intermediate
336 matrices at each iteration. The memory conservation options control
337 which of those matrices are stored.
339 Parameters
340 ----------
341 conserve_memory : int, optional
342 Bitmask value to set the memory conservation method to. See notes
343 for details.
344 **kwargs
345 Keyword arguments may be used to influence the memory conservation
346 method by setting individual boolean flags.
348 Notes
349 -----
350 This method is rarely used. See the corresponding function in the
351 `KalmanFilter` class for details.
352 """
353 self.ssm.set_conserve_memory(conserve_memory, **kwargs)
355 def set_smoother_output(self, smoother_output=None, **kwargs):
356 """
357 Set the smoother output
359 The smoother can produce several types of results. The smoother output
360 variable controls which are calculated and returned.
362 Parameters
363 ----------
364 smoother_output : int, optional
365 Bitmask value to set the smoother output to. See notes for details.
366 **kwargs
367 Keyword arguments may be used to influence the smoother output by
368 setting individual boolean flags.
370 Notes
371 -----
372 This method is rarely used. See the corresponding function in the
373 `KalmanSmoother` class for details.
374 """
375 self.ssm.set_smoother_output(smoother_output, **kwargs)
377 def initialize_known(self, initial_state, initial_state_cov):
378 """Initialize known"""
379 self.ssm.initialize_known(initial_state, initial_state_cov)
381 def initialize_approximate_diffuse(self, variance=None):
382 """Initialize approximate diffuse"""
383 self.ssm.initialize_approximate_diffuse(variance)
385 def initialize_stationary(self):
386 """Initialize stationary"""
387 self.ssm.initialize_stationary()
389 @property
390 def initialization(self):
391 return self.ssm.initialization
393 @initialization.setter
394 def initialization(self, value):
395 self.ssm.initialization = value
397 @property
398 def initial_variance(self):
399 return self.ssm.initial_variance
401 @initial_variance.setter
402 def initial_variance(self, value):
403 self.ssm.initial_variance = value
405 @property
406 def loglikelihood_burn(self):
407 return self.ssm.loglikelihood_burn
409 @loglikelihood_burn.setter
410 def loglikelihood_burn(self, value):
411 self.ssm.loglikelihood_burn = value
413 @property
414 def tolerance(self):
415 return self.ssm.tolerance
417 @tolerance.setter
418 def tolerance(self, value):
419 self.ssm.tolerance = value
421 def _validate_can_fix_params(self, param_names):
422 for param_name in param_names:
423 if param_name not in self.param_names:
424 raise ValueError('Invalid parameter name passed: "%s".'
425 % param_name)
427 @contextlib.contextmanager
428 def fix_params(self, params):
429 """
430 Fix parameters to specific values (context manager)
432 Parameters
433 ----------
434 params : dict
435 Dictionary describing the fixed parameter values, of the form
436 `param_name: fixed_value`. See the `param_names` property for valid
437 parameter names.
439 Examples
440 --------
441 >>> mod = sm.tsa.SARIMAX(endog, order=(1, 0, 1))
442 >>> with mod.fix_params({'ar.L1': 0.5}):
443 res = mod.fit()
444 """
445 k_params = len(self.param_names)
446 # Initialization (this is done here rather than in the constructor
447 # because param_names may not be available at that point)
448 if self._fixed_params is None:
449 self._fixed_params = {}
450 self._params_index = OrderedDict(
451 zip(self.param_names, np.arange(k_params)))
453 # Cache the current fixed parameters
454 cache_fixed_params = self._fixed_params.copy()
455 cache_has_fixed_params = self._has_fixed_params
456 cache_fixed_params_index = self._fixed_params_index
457 cache_free_params_index = self._free_params_index
459 # Validate parameter names and values
460 self._validate_can_fix_params(set(params.keys()))
462 # Set the new fixed parameters, keeping the order as given by
463 # param_names
464 self._fixed_params.update(params)
465 self._fixed_params = OrderedDict([
466 (name, self._fixed_params[name]) for name in self.param_names
467 if name in self._fixed_params])
469 # Update associated values
470 self._has_fixed_params = True
471 self._fixed_params_index = [self._params_index[key]
472 for key in self._fixed_params.keys()]
473 self._free_params_index = list(
474 set(np.arange(k_params)).difference(self._fixed_params_index))
476 try:
477 yield
478 finally:
479 # Reset the fixed parameters
480 self._has_fixed_params = cache_has_fixed_params
481 self._fixed_params = cache_fixed_params
482 self._fixed_params_index = cache_fixed_params_index
483 self._free_params_index = cache_free_params_index
485 def fit(self, start_params=None, transformed=True, includes_fixed=False,
486 cov_type=None, cov_kwds=None, method='lbfgs', maxiter=50,
487 full_output=1, disp=5, callback=None, return_params=False,
488 optim_score=None, optim_complex_step=None, optim_hessian=None,
489 flags=None, low_memory=False, **kwargs):
490 """
491 Fits the model by maximum likelihood via Kalman filter.
493 Parameters
494 ----------
495 start_params : array_like, optional
496 Initial guess of the solution for the loglikelihood maximization.
497 If None, the default is given by Model.start_params.
498 transformed : bool, optional
499 Whether or not `start_params` is already transformed. Default is
500 True.
501 includes_fixed : bool, optional
502 If parameters were previously fixed with the `fix_params` method,
503 this argument describes whether or not `start_params` also includes
504 the fixed parameters, in addition to the free parameters. Default
505 is False.
506 cov_type : str, optional
507 The `cov_type` keyword governs the method for calculating the
508 covariance matrix of parameter estimates. Can be one of:
510 - 'opg' for the outer product of gradient estimator
511 - 'oim' for the observed information matrix estimator, calculated
512 using the method of Harvey (1989)
513 - 'approx' for the observed information matrix estimator,
514 calculated using a numerical approximation of the Hessian matrix.
515 - 'robust' for an approximate (quasi-maximum likelihood) covariance
516 matrix that may be valid even in the presence of some
517 misspecifications. Intermediate calculations use the 'oim'
518 method.
519 - 'robust_approx' is the same as 'robust' except that the
520 intermediate calculations use the 'approx' method.
521 - 'none' for no covariance matrix calculation.
523 Default is 'opg' unless memory conservation is used to avoid
524 computing the loglikelihood values for each observation, in which
525 case the default is 'approx'.
526 cov_kwds : dict or None, optional
527 A dictionary of arguments affecting covariance matrix computation.
529 **opg, oim, approx, robust, robust_approx**
531 - 'approx_complex_step' : bool, optional - If True, numerical
532 approximations are computed using complex-step methods. If False,
533 numerical approximations are computed using finite difference
534 methods. Default is True.
535 - 'approx_centered' : bool, optional - If True, numerical
536 approximations computed using finite difference methods use a
537 centered approximation. Default is False.
538 method : str, optional
539 The `method` determines which solver from `scipy.optimize`
540 is used, and it can be chosen from among the following strings:
542 - 'newton' for Newton-Raphson, 'nm' for Nelder-Mead
543 - 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS)
544 - 'lbfgs' for limited-memory BFGS with optional box constraints
545 - 'powell' for modified Powell's method
546 - 'cg' for conjugate gradient
547 - 'ncg' for Newton-conjugate gradient
548 - 'basinhopping' for global basin-hopping solver
550 The explicit arguments in `fit` are passed to the solver,
551 with the exception of the basin-hopping solver. Each
552 solver has several optional arguments that are not the same across
553 solvers. See the notes section below (or scipy.optimize) for the
554 available arguments and for the list of explicit arguments that the
555 basin-hopping solver supports.
556 maxiter : int, optional
557 The maximum number of iterations to perform.
558 full_output : bool, optional
559 Set to True to have all available output in the Results object's
560 mle_retvals attribute. The output is dependent on the solver.
561 See LikelihoodModelResults notes section for more information.
562 disp : bool, optional
563 Set to True to print convergence messages.
564 callback : callable callback(xk), optional
565 Called after each iteration, as callback(xk), where xk is the
566 current parameter vector.
567 return_params : bool, optional
568 Whether or not to return only the array of maximizing parameters.
569 Default is False.
570 optim_score : {'harvey', 'approx'} or None, optional
571 The method by which the score vector is calculated. 'harvey' uses
572 the method from Harvey (1989), 'approx' uses either finite
573 difference or complex step differentiation depending upon the
574 value of `optim_complex_step`, and None uses the built-in gradient
575 approximation of the optimizer. Default is None. This keyword is
576 only relevant if the optimization method uses the score.
577 optim_complex_step : bool, optional
578 Whether or not to use complex step differentiation when
579 approximating the score; if False, finite difference approximation
580 is used. Default is True. This keyword is only relevant if
581 `optim_score` is set to 'harvey' or 'approx'.
582 optim_hessian : {'opg','oim','approx'}, optional
583 The method by which the Hessian is numerically approximated. 'opg'
584 uses outer product of gradients, 'oim' uses the information
585 matrix formula from Harvey (1989), and 'approx' uses numerical
586 approximation. This keyword is only relevant if the
587 optimization method uses the Hessian matrix.
588 low_memory : bool, optional
589 If set to True, techniques are applied to substantially reduce
590 memory usage. If used, some features of the results object will
591 not be available (including smoothed results and in-sample
592 prediction), although out-of-sample forecasting is possible.
593 Default is False.
594 **kwargs
595 Additional keyword arguments to pass to the optimizer.
597 Returns
598 -------
599 MLEResults
601 See Also
602 --------
603 statsmodels.base.model.LikelihoodModel.fit
604 statsmodels.tsa.statespace.mlemodel.MLEResults
605 """
606 if start_params is None:
607 start_params = self.start_params
608 transformed = True
609 includes_fixed = True
611 # Update the score method
612 if optim_score is None and method == 'lbfgs':
613 kwargs.setdefault('approx_grad', True)
614 kwargs.setdefault('epsilon', 1e-5)
615 elif optim_score is None:
616 optim_score = 'approx'
618 # Check for complex step differentiation
619 if optim_complex_step is None:
620 optim_complex_step = not self.ssm._complex_endog
621 elif optim_complex_step and self.ssm._complex_endog:
622 raise ValueError('Cannot use complex step derivatives when data'
623 ' or parameters are complex.')
625 # Standardize starting parameters
626 start_params = self.handle_params(start_params, transformed=True,
627 includes_fixed=includes_fixed)
629 # Unconstrain the starting parameters
630 if transformed:
631 start_params = self.untransform_params(start_params)
633 # Remove any fixed parameters
634 if self._has_fixed_params:
635 start_params = start_params[self._free_params_index]
637 # If all parameters are fixed, we are done
638 if self._has_fixed_params and len(start_params) == 0:
639 mlefit = Bunch(params=[], mle_retvals=None,
640 mle_settings=None)
641 else:
642 # Maximum likelihood estimation
643 if flags is None:
644 flags = {}
645 flags.update({
646 'transformed': False,
647 'includes_fixed': False,
648 'score_method': optim_score,
649 'approx_complex_step': optim_complex_step
650 })
651 if optim_hessian is not None:
652 flags['hessian_method'] = optim_hessian
653 fargs = (flags,)
654 mlefit = super(MLEModel, self).fit(start_params, method=method,
655 fargs=fargs,
656 maxiter=maxiter,
657 full_output=full_output,
658 disp=disp, callback=callback,
659 skip_hessian=True, **kwargs)
661 # Just return the fitted parameters if requested
662 if return_params:
663 return self.handle_params(mlefit.params, transformed=False,
664 includes_fixed=False)
665 # Otherwise construct the results class if desired
666 else:
667 # Handle memory conservation option
668 if low_memory:
669 conserve_memory = self.ssm.conserve_memory
670 self.ssm.set_conserve_memory(MEMORY_CONSERVE)
672 # Perform filtering / smoothing
673 if (self.ssm.memory_no_predicted or self.ssm.memory_no_gain
674 or self.ssm.memory_no_smoothing):
675 func = self.filter
676 else:
677 func = self.smooth
678 res = func(mlefit.params, transformed=False, includes_fixed=False,
679 cov_type=cov_type, cov_kwds=cov_kwds)
681 res.mlefit = mlefit
682 res.mle_retvals = mlefit.mle_retvals
683 res.mle_settings = mlefit.mle_settings
685 # Reset memory conservation
686 if low_memory:
687 self.ssm.set_conserve_memory(conserve_memory)
689 return res
691 def fit_constrained(self, constraints, start_params=None, **fit_kwds):
692 """
693 Fit the model with some parameters subject to equality constraints.
695 Parameters
696 ----------
697 constraints : dict
698 Dictionary of constraints, of the form `param_name: fixed_value`.
699 See the `param_names` property for valid parameter names.
700 start_params : array_like, optional
701 Initial guess of the solution for the loglikelihood maximization.
702 If None, the default is given by Model.start_params.
703 **fit_kwds : keyword arguments
704 fit_kwds are used in the optimization of the remaining parameters.
706 Returns
707 -------
708 results : Results instance
710 Examples
711 --------
712 >>> mod = sm.tsa.SARIMAX(endog, order=(1, 0, 1))
713 >>> res = mod.fit_constrained({'ar.L1': 0.5})
714 """
715 with self.fix_params(constraints):
716 res = self.fit(start_params, **fit_kwds)
717 return res
719 @property
720 def _res_classes(self):
721 return {'fit': (MLEResults, MLEResultsWrapper)}
723 def _wrap_results(self, params, result, return_raw, cov_type=None,
724 cov_kwds=None, results_class=None, wrapper_class=None):
725 if not return_raw:
726 # Wrap in a results object
727 result_kwargs = {}
728 if cov_type is not None:
729 result_kwargs['cov_type'] = cov_type
730 if cov_kwds is not None:
731 result_kwargs['cov_kwds'] = cov_kwds
733 if results_class is None:
734 results_class = self._res_classes['fit'][0]
735 if wrapper_class is None:
736 wrapper_class = self._res_classes['fit'][1]
738 res = results_class(self, params, result, **result_kwargs)
739 result = wrapper_class(res)
740 return result
742 def filter(self, params, transformed=True, includes_fixed=False,
743 complex_step=False, cov_type=None, cov_kwds=None,
744 return_ssm=False, results_class=None,
745 results_wrapper_class=None, low_memory=False, **kwargs):
746 """
747 Kalman filtering
749 Parameters
750 ----------
751 params : array_like
752 Array of parameters at which to evaluate the loglikelihood
753 function.
754 transformed : bool, optional
755 Whether or not `params` is already transformed. Default is True.
756 return_ssm : bool,optional
757 Whether or not to return only the state space output or a full
758 results object. Default is to return a full results object.
759 cov_type : str, optional
760 See `MLEResults.fit` for a description of covariance matrix types
761 for results object.
762 cov_kwds : dict or None, optional
763 See `MLEResults.get_robustcov_results` for a description required
764 keywords for alternative covariance estimators
765 low_memory : bool, optional
766 If set to True, techniques are applied to substantially reduce
767 memory usage. If used, some features of the results object will
768 not be available (including in-sample prediction), although
769 out-of-sample forecasting is possible. Default is False.
770 **kwargs
771 Additional keyword arguments to pass to the Kalman filter. See
772 `KalmanFilter.filter` for more details.
773 """
774 params = self.handle_params(params, transformed=transformed,
775 includes_fixed=includes_fixed)
776 self.update(params, transformed=True, includes_fixed=True,
777 complex_step=complex_step)
779 # Save the parameter names
780 self.data.param_names = self.param_names
782 if complex_step:
783 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
785 # Handle memory conservation
786 if low_memory:
787 kwargs['conserve_memory'] = MEMORY_CONSERVE
789 # Get the state space output
790 result = self.ssm.filter(complex_step=complex_step, **kwargs)
792 # Wrap in a results object
793 return self._wrap_results(params, result, return_ssm, cov_type,
794 cov_kwds, results_class,
795 results_wrapper_class)
797 def smooth(self, params, transformed=True, includes_fixed=False,
798 complex_step=False, cov_type=None, cov_kwds=None,
799 return_ssm=False, results_class=None,
800 results_wrapper_class=None, **kwargs):
801 """
802 Kalman smoothing
804 Parameters
805 ----------
806 params : array_like
807 Array of parameters at which to evaluate the loglikelihood
808 function.
809 transformed : bool, optional
810 Whether or not `params` is already transformed. Default is True.
811 return_ssm : bool,optional
812 Whether or not to return only the state space output or a full
813 results object. Default is to return a full results object.
814 cov_type : str, optional
815 See `MLEResults.fit` for a description of covariance matrix types
816 for results object.
817 cov_kwds : dict or None, optional
818 See `MLEResults.get_robustcov_results` for a description required
819 keywords for alternative covariance estimators
820 **kwargs
821 Additional keyword arguments to pass to the Kalman filter. See
822 `KalmanFilter.filter` for more details.
823 """
824 params = self.handle_params(params, transformed=transformed,
825 includes_fixed=includes_fixed)
826 self.update(params, transformed=True, includes_fixed=True,
827 complex_step=complex_step)
829 # Save the parameter names
830 self.data.param_names = self.param_names
832 if complex_step:
833 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
835 # Get the state space output
836 result = self.ssm.smooth(complex_step=complex_step, **kwargs)
838 # Wrap in a results object
839 return self._wrap_results(params, result, return_ssm, cov_type,
840 cov_kwds, results_class,
841 results_wrapper_class)
843 _loglike_param_names = ['transformed', 'includes_fixed', 'complex_step']
844 _loglike_param_defaults = [True, False, False]
846 def loglike(self, params, *args, **kwargs):
847 """
848 Loglikelihood evaluation
850 Parameters
851 ----------
852 params : array_like
853 Array of parameters at which to evaluate the loglikelihood
854 function.
855 transformed : bool, optional
856 Whether or not `params` is already transformed. Default is True.
857 **kwargs
858 Additional keyword arguments to pass to the Kalman filter. See
859 `KalmanFilter.filter` for more details.
861 Notes
862 -----
863 [1]_ recommend maximizing the average likelihood to avoid scale issues;
864 this is done automatically by the base Model fit method.
866 References
867 ----------
868 .. [1] Koopman, Siem Jan, Neil Shephard, and Jurgen A. Doornik. 1999.
869 Statistical Algorithms for Models in State Space Using SsfPack 2.2.
870 Econometrics Journal 2 (1): 107-60. doi:10.1111/1368-423X.00023.
872 See Also
873 --------
874 update : modifies the internal state of the state space model to
875 reflect new params
876 """
877 transformed, includes_fixed, complex_step, kwargs = _handle_args(
878 MLEModel._loglike_param_names, MLEModel._loglike_param_defaults,
879 *args, **kwargs)
881 params = self.handle_params(params, transformed=transformed,
882 includes_fixed=includes_fixed)
883 self.update(params, transformed=True, includes_fixed=True,
884 complex_step=complex_step)
886 if complex_step:
887 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
889 loglike = self.ssm.loglike(complex_step=complex_step, **kwargs)
891 # Koopman, Shephard, and Doornik recommend maximizing the average
892 # likelihood to avoid scale issues, but the averaging is done
893 # automatically in the base model `fit` method
894 return loglike
896 def loglikeobs(self, params, transformed=True, includes_fixed=False,
897 complex_step=False, **kwargs):
898 """
899 Loglikelihood evaluation
901 Parameters
902 ----------
903 params : array_like
904 Array of parameters at which to evaluate the loglikelihood
905 function.
906 transformed : bool, optional
907 Whether or not `params` is already transformed. Default is True.
908 **kwargs
909 Additional keyword arguments to pass to the Kalman filter. See
910 `KalmanFilter.filter` for more details.
912 Notes
913 -----
914 [1]_ recommend maximizing the average likelihood to avoid scale issues;
915 this is done automatically by the base Model fit method.
917 References
918 ----------
919 .. [1] Koopman, Siem Jan, Neil Shephard, and Jurgen A. Doornik. 1999.
920 Statistical Algorithms for Models in State Space Using SsfPack 2.2.
921 Econometrics Journal 2 (1): 107-60. doi:10.1111/1368-423X.00023.
923 See Also
924 --------
925 update : modifies the internal state of the Model to reflect new params
926 """
927 params = self.handle_params(params, transformed=transformed,
928 includes_fixed=includes_fixed)
930 # If we're using complex-step differentiation, then we cannot use
931 # Cholesky factorization
932 if complex_step:
933 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
935 self.update(params, transformed=True, includes_fixed=True,
936 complex_step=complex_step)
938 return self.ssm.loglikeobs(complex_step=complex_step, **kwargs)
940 def simulation_smoother(self, simulation_output=None, **kwargs):
941 r"""
942 Retrieve a simulation smoother for the state space model.
944 Parameters
945 ----------
946 simulation_output : int, optional
947 Determines which simulation smoother output is calculated.
948 Default is all (including state and disturbances).
949 **kwargs
950 Additional keyword arguments, used to set the simulation output.
951 See `set_simulation_output` for more details.
953 Returns
954 -------
955 SimulationSmoothResults
956 """
957 return self.ssm.simulation_smoother(
958 simulation_output=simulation_output, **kwargs)
960 def _forecasts_error_partial_derivatives(self, params, transformed=True,
961 includes_fixed=False,
962 approx_complex_step=None,
963 approx_centered=False,
964 res=None, **kwargs):
965 params = np.array(params, ndmin=1)
967 # We cannot use complex-step differentiation with non-transformed
968 # parameters
969 if approx_complex_step is None:
970 approx_complex_step = transformed
971 if not transformed and approx_complex_step:
972 raise ValueError("Cannot use complex-step approximations to"
973 " calculate the observed_information_matrix"
974 " with untransformed parameters.")
976 # If we're using complex-step differentiation, then we cannot use
977 # Cholesky factorization
978 if approx_complex_step:
979 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
981 # Get values at the params themselves
982 if res is None:
983 self.update(params, transformed=transformed,
984 includes_fixed=includes_fixed,
985 complex_step=approx_complex_step)
986 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs)
988 # Setup
989 n = len(params)
991 # Compute partial derivatives w.r.t. forecast error and forecast
992 # error covariance
993 partials_forecasts_error = (
994 np.zeros((self.k_endog, self.nobs, n))
995 )
996 partials_forecasts_error_cov = (
997 np.zeros((self.k_endog, self.k_endog, self.nobs, n))
998 )
999 if approx_complex_step:
1000 epsilon = _get_epsilon(params, 2, None, n)
1001 increments = np.identity(n) * 1j * epsilon
1003 for i, ih in enumerate(increments):
1004 self.update(params + ih, transformed=transformed,
1005 includes_fixed=includes_fixed,
1006 complex_step=True)
1007 _res = self.ssm.filter(complex_step=True, **kwargs)
1009 partials_forecasts_error[:, :, i] = (
1010 _res.forecasts_error.imag / epsilon[i]
1011 )
1013 partials_forecasts_error_cov[:, :, :, i] = (
1014 _res.forecasts_error_cov.imag / epsilon[i]
1015 )
1016 elif not approx_centered:
1017 epsilon = _get_epsilon(params, 2, None, n)
1018 ei = np.zeros((n,), float)
1019 for i in range(n):
1020 ei[i] = epsilon[i]
1021 self.update(params + ei, transformed=transformed,
1022 includes_fixed=includes_fixed, complex_step=False)
1023 _res = self.ssm.filter(complex_step=False, **kwargs)
1025 partials_forecasts_error[:, :, i] = (
1026 _res.forecasts_error - res.forecasts_error) / epsilon[i]
1028 partials_forecasts_error_cov[:, :, :, i] = (
1029 _res.forecasts_error_cov -
1030 res.forecasts_error_cov) / epsilon[i]
1031 ei[i] = 0.0
1032 else:
1033 epsilon = _get_epsilon(params, 3, None, n) / 2.
1034 ei = np.zeros((n,), float)
1035 for i in range(n):
1036 ei[i] = epsilon[i]
1038 self.update(params + ei, transformed=transformed,
1039 includes_fixed=includes_fixed, complex_step=False)
1040 _res1 = self.ssm.filter(complex_step=False, **kwargs)
1042 self.update(params - ei, transformed=transformed,
1043 includes_fixed=includes_fixed, complex_step=False)
1044 _res2 = self.ssm.filter(complex_step=False, **kwargs)
1046 partials_forecasts_error[:, :, i] = (
1047 (_res1.forecasts_error - _res2.forecasts_error) /
1048 (2 * epsilon[i]))
1050 partials_forecasts_error_cov[:, :, :, i] = (
1051 (_res1.forecasts_error_cov - _res2.forecasts_error_cov) /
1052 (2 * epsilon[i]))
1054 ei[i] = 0.0
1056 return partials_forecasts_error, partials_forecasts_error_cov
1058 def observed_information_matrix(self, params, transformed=True,
1059 includes_fixed=False,
1060 approx_complex_step=None,
1061 approx_centered=False, **kwargs):
1062 """
1063 Observed information matrix
1065 Parameters
1066 ----------
1067 params : array_like, optional
1068 Array of parameters at which to evaluate the loglikelihood
1069 function.
1070 **kwargs
1071 Additional keyword arguments to pass to the Kalman filter. See
1072 `KalmanFilter.filter` for more details.
1074 Notes
1075 -----
1076 This method is from Harvey (1989), which shows that the information
1077 matrix only depends on terms from the gradient. This implementation is
1078 partially analytic and partially numeric approximation, therefore,
1079 because it uses the analytic formula for the information matrix, with
1080 numerically computed elements of the gradient.
1082 References
1083 ----------
1084 Harvey, Andrew C. 1990.
1085 Forecasting, Structural Time Series Models and the Kalman Filter.
1086 Cambridge University Press.
1087 """
1088 params = np.array(params, ndmin=1)
1090 # Setup
1091 n = len(params)
1093 # We cannot use complex-step differentiation with non-transformed
1094 # parameters
1095 if approx_complex_step is None:
1096 approx_complex_step = transformed
1097 if not transformed and approx_complex_step:
1098 raise ValueError("Cannot use complex-step approximations to"
1099 " calculate the observed_information_matrix"
1100 " with untransformed parameters.")
1102 # Get values at the params themselves
1103 params = self.handle_params(params, transformed=transformed,
1104 includes_fixed=includes_fixed)
1105 self.update(params, transformed=True, includes_fixed=True,
1106 complex_step=approx_complex_step)
1107 # If we're using complex-step differentiation, then we cannot use
1108 # Cholesky factorization
1109 if approx_complex_step:
1110 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
1111 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs)
1112 dtype = self.ssm.dtype
1114 # Save this for inversion later
1115 inv_forecasts_error_cov = res.forecasts_error_cov.copy()
1117 partials_forecasts_error, partials_forecasts_error_cov = (
1118 self._forecasts_error_partial_derivatives(
1119 params, transformed=transformed, includes_fixed=includes_fixed,
1120 approx_complex_step=approx_complex_step,
1121 approx_centered=approx_centered, res=res, **kwargs))
1123 # Compute the information matrix
1124 tmp = np.zeros((self.k_endog, self.k_endog, self.nobs, n), dtype=dtype)
1126 information_matrix = np.zeros((n, n), dtype=dtype)
1127 d = np.maximum(self.ssm.loglikelihood_burn, res.nobs_diffuse)
1128 for t in range(d, self.nobs):
1129 inv_forecasts_error_cov[:, :, t] = (
1130 np.linalg.inv(res.forecasts_error_cov[:, :, t])
1131 )
1132 for i in range(n):
1133 tmp[:, :, t, i] = np.dot(
1134 inv_forecasts_error_cov[:, :, t],
1135 partials_forecasts_error_cov[:, :, t, i]
1136 )
1137 for i in range(n):
1138 for j in range(n):
1139 information_matrix[i, j] += (
1140 0.5 * np.trace(np.dot(tmp[:, :, t, i],
1141 tmp[:, :, t, j]))
1142 )
1143 information_matrix[i, j] += np.inner(
1144 partials_forecasts_error[:, t, i],
1145 np.dot(inv_forecasts_error_cov[:, :, t],
1146 partials_forecasts_error[:, t, j])
1147 )
1148 return information_matrix / (self.nobs - self.ssm.loglikelihood_burn)
1150 def opg_information_matrix(self, params, transformed=True,
1151 includes_fixed=False, approx_complex_step=None,
1152 **kwargs):
1153 """
1154 Outer product of gradients information matrix
1156 Parameters
1157 ----------
1158 params : array_like, optional
1159 Array of parameters at which to evaluate the loglikelihood
1160 function.
1161 **kwargs
1162 Additional arguments to the `loglikeobs` method.
1164 References
1165 ----------
1166 Berndt, Ernst R., Bronwyn Hall, Robert Hall, and Jerry Hausman. 1974.
1167 Estimation and Inference in Nonlinear Structural Models.
1168 NBER Chapters. National Bureau of Economic Research, Inc.
1169 """
1170 # We cannot use complex-step differentiation with non-transformed
1171 # parameters
1172 if approx_complex_step is None:
1173 approx_complex_step = transformed
1174 if not transformed and approx_complex_step:
1175 raise ValueError("Cannot use complex-step approximations to"
1176 " calculate the observed_information_matrix"
1177 " with untransformed parameters.")
1179 score_obs = self.score_obs(params, transformed=transformed,
1180 includes_fixed=includes_fixed,
1181 approx_complex_step=approx_complex_step,
1182 **kwargs).transpose()
1183 return (
1184 np.inner(score_obs, score_obs) /
1185 (self.nobs - self.ssm.loglikelihood_burn)
1186 )
1188 def _score_complex_step(self, params, **kwargs):
1189 # the default epsilon can be too small
1190 # inversion_method = INVERT_UNIVARIATE | SOLVE_LU
1191 epsilon = _get_epsilon(params, 2., None, len(params))
1192 kwargs['transformed'] = True
1193 kwargs['complex_step'] = True
1194 return approx_fprime_cs(params, self.loglike, epsilon=epsilon,
1195 kwargs=kwargs)
1197 def _score_finite_difference(self, params, approx_centered=False,
1198 **kwargs):
1199 kwargs['transformed'] = True
1200 return approx_fprime(params, self.loglike, kwargs=kwargs,
1201 centered=approx_centered)
1203 def _score_harvey(self, params, approx_complex_step=True, **kwargs):
1204 score_obs = self._score_obs_harvey(
1205 params, approx_complex_step=approx_complex_step, **kwargs)
1206 return np.sum(score_obs, axis=0)
1208 def _score_obs_harvey(self, params, approx_complex_step=True,
1209 approx_centered=False, includes_fixed=False,
1210 **kwargs):
1211 """
1212 Score
1214 Parameters
1215 ----------
1216 params : array_like, optional
1217 Array of parameters at which to evaluate the loglikelihood
1218 function.
1219 **kwargs
1220 Additional keyword arguments to pass to the Kalman filter. See
1221 `KalmanFilter.filter` for more details.
1223 Notes
1224 -----
1225 This method is from Harvey (1989), section 3.4.5
1227 References
1228 ----------
1229 Harvey, Andrew C. 1990.
1230 Forecasting, Structural Time Series Models and the Kalman Filter.
1231 Cambridge University Press.
1232 """
1233 params = np.array(params, ndmin=1)
1234 n = len(params)
1236 # Get values at the params themselves
1237 self.update(params, transformed=True, includes_fixed=includes_fixed,
1238 complex_step=approx_complex_step)
1239 if approx_complex_step:
1240 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU
1241 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs)
1243 # Get forecasts error partials
1244 partials_forecasts_error, partials_forecasts_error_cov = (
1245 self._forecasts_error_partial_derivatives(
1246 params, transformed=True, includes_fixed=includes_fixed,
1247 approx_complex_step=approx_complex_step,
1248 approx_centered=approx_centered, res=res, **kwargs))
1250 # Compute partial derivatives w.r.t. likelihood function
1251 partials = np.zeros((self.nobs, n))
1252 k_endog = self.k_endog
1253 for t in range(self.nobs):
1254 inv_forecasts_error_cov = np.linalg.inv(
1255 res.forecasts_error_cov[:, :, t])
1257 for i in range(n):
1258 partials[t, i] += np.trace(np.dot(
1259 np.dot(inv_forecasts_error_cov,
1260 partials_forecasts_error_cov[:, :, t, i]),
1261 (np.eye(k_endog) -
1262 np.dot(inv_forecasts_error_cov,
1263 np.outer(res.forecasts_error[:, t],
1264 res.forecasts_error[:, t])))))
1265 # 2 * dv / di * F^{-1} v_t
1266 # where x = F^{-1} v_t or F x = v
1267 partials[t, i] += 2 * np.dot(
1268 partials_forecasts_error[:, t, i],
1269 np.dot(inv_forecasts_error_cov, res.forecasts_error[:, t]))
1271 return -partials / 2.
1273 _score_param_names = ['transformed', 'includes_fixed', 'score_method',
1274 'approx_complex_step', 'approx_centered']
1275 _score_param_defaults = [True, False, 'approx', None, False]
1277 def score(self, params, *args, **kwargs):
1278 """
1279 Compute the score function at params.
1281 Parameters
1282 ----------
1283 params : array_like
1284 Array of parameters at which to evaluate the score.
1285 *args
1286 Additional positional arguments to the `loglike` method.
1287 **kwargs
1288 Additional keyword arguments to the `loglike` method.
1290 Returns
1291 -------
1292 score : ndarray
1293 Score, evaluated at `params`.
1295 Notes
1296 -----
1297 This is a numerical approximation, calculated using first-order complex
1298 step differentiation on the `loglike` method.
1300 Both args and kwargs are necessary because the optimizer from
1301 `fit` must call this function and only supports passing arguments via
1302 args (for example `scipy.optimize.fmin_l_bfgs`).
1303 """
1304 (transformed, includes_fixed, method, approx_complex_step,
1305 approx_centered, kwargs) = (
1306 _handle_args(MLEModel._score_param_names,
1307 MLEModel._score_param_defaults, *args, **kwargs))
1308 # For fit() calls, the method is called 'score_method' (to distinguish
1309 # it from the method used for fit) but generally in kwargs the method
1310 # will just be called 'method'
1311 if 'method' in kwargs:
1312 method = kwargs.pop('method')
1314 if approx_complex_step is None:
1315 approx_complex_step = not self.ssm._complex_endog
1316 if approx_complex_step and self.ssm._complex_endog:
1317 raise ValueError('Cannot use complex step derivatives when data'
1318 ' or parameters are complex.')
1320 out = self.handle_params(
1321 params, transformed=transformed, includes_fixed=includes_fixed,
1322 return_jacobian=not transformed)
1323 if transformed:
1324 params = out
1325 else:
1326 params, transform_score = out
1328 if method == 'harvey':
1329 kwargs['includes_fixed'] = True
1330 score = self._score_harvey(
1331 params, approx_complex_step=approx_complex_step, **kwargs)
1332 elif method == 'approx' and approx_complex_step:
1333 kwargs['includes_fixed'] = True
1334 score = self._score_complex_step(params, **kwargs)
1335 elif method == 'approx':
1336 kwargs['includes_fixed'] = True
1337 score = self._score_finite_difference(
1338 params, approx_centered=approx_centered, **kwargs)
1339 else:
1340 raise NotImplementedError('Invalid score method.')
1342 if not transformed:
1343 score = np.dot(transform_score, score)
1345 if self._has_fixed_params and not includes_fixed:
1346 score = score[self._free_params_index]
1348 return score
1350 def score_obs(self, params, method='approx', transformed=True,
1351 includes_fixed=False, approx_complex_step=None,
1352 approx_centered=False, **kwargs):
1353 """
1354 Compute the score per observation, evaluated at params
1356 Parameters
1357 ----------
1358 params : array_like
1359 Array of parameters at which to evaluate the score.
1360 **kwargs
1361 Additional arguments to the `loglike` method.
1363 Returns
1364 -------
1365 score : ndarray
1366 Score per observation, evaluated at `params`.
1368 Notes
1369 -----
1370 This is a numerical approximation, calculated using first-order complex
1371 step differentiation on the `loglikeobs` method.
1372 """
1373 if not transformed and approx_complex_step:
1374 raise ValueError("Cannot use complex-step approximations to"
1375 " calculate the score at each observation"
1376 " with untransformed parameters.")
1378 if approx_complex_step is None:
1379 approx_complex_step = not self.ssm._complex_endog
1380 if approx_complex_step and self.ssm._complex_endog:
1381 raise ValueError('Cannot use complex step derivatives when data'
1382 ' or parameters are complex.')
1384 params = self.handle_params(params, transformed=True,
1385 includes_fixed=includes_fixed)
1386 kwargs['transformed'] = transformed
1387 kwargs['includes_fixed'] = True
1389 if method == 'harvey':
1390 score = self._score_obs_harvey(
1391 params, approx_complex_step=approx_complex_step, **kwargs)
1392 elif method == 'approx' and approx_complex_step:
1393 # the default epsilon can be too small
1394 epsilon = _get_epsilon(params, 2., None, len(params))
1395 kwargs['complex_step'] = True
1396 score = approx_fprime_cs(params, self.loglikeobs, epsilon=epsilon,
1397 kwargs=kwargs)
1398 elif method == 'approx':
1399 score = approx_fprime(params, self.loglikeobs, kwargs=kwargs,
1400 centered=approx_centered)
1401 else:
1402 raise NotImplementedError('Invalid scoreobs method.')
1404 return score
1406 _hessian_param_names = ['transformed', 'hessian_method',
1407 'approx_complex_step', 'approx_centered']
1408 _hessian_param_defaults = [True, 'approx', None, False]
1410 def hessian(self, params, *args, **kwargs):
1411 r"""
1412 Hessian matrix of the likelihood function, evaluated at the given
1413 parameters
1415 Parameters
1416 ----------
1417 params : array_like
1418 Array of parameters at which to evaluate the hessian.
1419 *args
1420 Additional positional arguments to the `loglike` method.
1421 **kwargs
1422 Additional keyword arguments to the `loglike` method.
1424 Returns
1425 -------
1426 hessian : ndarray
1427 Hessian matrix evaluated at `params`
1429 Notes
1430 -----
1431 This is a numerical approximation.
1433 Both args and kwargs are necessary because the optimizer from
1434 `fit` must call this function and only supports passing arguments via
1435 args (for example `scipy.optimize.fmin_l_bfgs`).
1436 """
1437 transformed, method, approx_complex_step, approx_centered, kwargs = (
1438 _handle_args(MLEModel._hessian_param_names,
1439 MLEModel._hessian_param_defaults,
1440 *args, **kwargs))
1441 # For fit() calls, the method is called 'hessian_method' (to
1442 # distinguish it from the method used for fit) but generally in kwargs
1443 # the method will just be called 'method'
1444 if 'method' in kwargs:
1445 method = kwargs.pop('method')
1447 if not transformed and approx_complex_step:
1448 raise ValueError("Cannot use complex-step approximations to"
1449 " calculate the hessian with untransformed"
1450 " parameters.")
1452 if approx_complex_step is None:
1453 approx_complex_step = not self.ssm._complex_endog
1454 if approx_complex_step and self.ssm._complex_endog:
1455 raise ValueError('Cannot use complex step derivatives when data'
1456 ' or parameters are complex.')
1458 if method == 'oim':
1459 hessian = self._hessian_oim(
1460 params, transformed=transformed,
1461 approx_complex_step=approx_complex_step,
1462 approx_centered=approx_centered, **kwargs)
1463 elif method == 'opg':
1464 hessian = self._hessian_opg(
1465 params, transformed=transformed,
1466 approx_complex_step=approx_complex_step,
1467 approx_centered=approx_centered, **kwargs)
1468 elif method == 'approx' and approx_complex_step:
1469 hessian = self._hessian_complex_step(
1470 params, transformed=transformed, **kwargs)
1471 elif method == 'approx':
1472 hessian = self._hessian_finite_difference(
1473 params, transformed=transformed,
1474 approx_centered=approx_centered, **kwargs)
1475 else:
1476 raise NotImplementedError('Invalid Hessian calculation method.')
1477 return hessian
1479 def _hessian_oim(self, params, **kwargs):
1480 """
1481 Hessian matrix computed using the Harvey (1989) information matrix
1482 """
1483 return -self.observed_information_matrix(params, **kwargs)
1485 def _hessian_opg(self, params, **kwargs):
1486 """
1487 Hessian matrix computed using the outer product of gradients
1488 information matrix
1489 """
1490 return -self.opg_information_matrix(params, **kwargs)
1492 def _hessian_finite_difference(self, params, approx_centered=False,
1493 **kwargs):
1494 params = np.array(params, ndmin=1)
1496 warnings.warn('Calculation of the Hessian using finite differences'
1497 ' is usually subject to substantial approximation'
1498 ' errors.', PrecisionWarning)
1500 if not approx_centered:
1501 epsilon = _get_epsilon(params, 3, None, len(params))
1502 else:
1503 epsilon = _get_epsilon(params, 4, None, len(params)) / 2
1504 hessian = approx_fprime(params, self._score_finite_difference,
1505 epsilon=epsilon, kwargs=kwargs,
1506 centered=approx_centered)
1508 return hessian / (self.nobs - self.ssm.loglikelihood_burn)
1510 def _hessian_complex_step(self, params, **kwargs):
1511 """
1512 Hessian matrix computed by second-order complex-step differentiation
1513 on the `loglike` function.
1514 """
1515 # the default epsilon can be too small
1516 epsilon = _get_epsilon(params, 3., None, len(params))
1517 kwargs['transformed'] = True
1518 kwargs['complex_step'] = True
1519 hessian = approx_hess_cs(
1520 params, self.loglike, epsilon=epsilon, kwargs=kwargs)
1522 return hessian / (self.nobs - self.ssm.loglikelihood_burn)
1524 @property
1525 def start_params(self):
1526 """
1527 (array) Starting parameters for maximum likelihood estimation.
1528 """
1529 if hasattr(self, '_start_params'):
1530 return self._start_params
1531 else:
1532 raise NotImplementedError
1534 @property
1535 def param_names(self):
1536 """
1537 (list of str) List of human readable parameter names (for parameters
1538 actually included in the model).
1539 """
1540 if hasattr(self, '_param_names'):
1541 return self._param_names
1542 else:
1543 try:
1544 names = ['param.%d' % i for i in range(len(self.start_params))]
1545 except NotImplementedError:
1546 names = []
1547 return names
1549 @property
1550 def state_names(self):
1551 """
1552 (list of str) List of human readable names for unobserved states.
1553 """
1554 if hasattr(self, '_state_names'):
1555 return self._state_names
1556 else:
1557 names = ['state.%d' % i for i in range(self.k_states)]
1558 return names
1560 def transform_jacobian(self, unconstrained, approx_centered=False):
1561 """
1562 Jacobian matrix for the parameter transformation function
1564 Parameters
1565 ----------
1566 unconstrained : array_like
1567 Array of unconstrained parameters used by the optimizer.
1569 Returns
1570 -------
1571 jacobian : ndarray
1572 Jacobian matrix of the transformation, evaluated at `unconstrained`
1574 Notes
1575 -----
1576 This is a numerical approximation using finite differences. Note that
1577 in general complex step methods cannot be used because it is not
1578 guaranteed that the `transform_params` method is a real function (e.g.
1579 if Cholesky decomposition is used).
1581 See Also
1582 --------
1583 transform_params
1584 """
1585 return approx_fprime(unconstrained, self.transform_params,
1586 centered=approx_centered)
1588 def transform_params(self, unconstrained):
1589 """
1590 Transform unconstrained parameters used by the optimizer to constrained
1591 parameters used in likelihood evaluation
1593 Parameters
1594 ----------
1595 unconstrained : array_like
1596 Array of unconstrained parameters used by the optimizer, to be
1597 transformed.
1599 Returns
1600 -------
1601 constrained : array_like
1602 Array of constrained parameters which may be used in likelihood
1603 evaluation.
1605 Notes
1606 -----
1607 This is a noop in the base class, subclasses should override where
1608 appropriate.
1609 """
1610 return np.array(unconstrained, ndmin=1)
1612 def untransform_params(self, constrained):
1613 """
1614 Transform constrained parameters used in likelihood evaluation
1615 to unconstrained parameters used by the optimizer
1617 Parameters
1618 ----------
1619 constrained : array_like
1620 Array of constrained parameters used in likelihood evaluation, to
1621 be transformed.
1623 Returns
1624 -------
1625 unconstrained : array_like
1626 Array of unconstrained parameters used by the optimizer.
1628 Notes
1629 -----
1630 This is a noop in the base class, subclasses should override where
1631 appropriate.
1632 """
1633 return np.array(constrained, ndmin=1)
1635 def handle_params(self, params, transformed=True, includes_fixed=False,
1636 return_jacobian=False):
1637 params = np.array(params, ndmin=1)
1639 # Never want integer dtype, so convert to floats
1640 if np.issubdtype(params.dtype, np.integer):
1641 params = params.astype(np.float64)
1643 if not includes_fixed and self._has_fixed_params:
1644 k_params = len(self.param_names)
1645 new_params = np.zeros(k_params, dtype=params.dtype) * np.nan
1646 new_params[self._free_params_index] = params
1647 params = new_params
1649 if not transformed:
1650 # It may be the case that the transformation relies on having
1651 # "some" (non-NaN) values for the fixed parameters, even if we will
1652 # not actually be transforming the fixed parameters (as they will)
1653 # be set below regardless
1654 if not includes_fixed and self._has_fixed_params:
1655 params[self._fixed_params_index] = (
1656 list(self._fixed_params.values()))
1658 if return_jacobian:
1659 transform_score = self.transform_jacobian(params)
1660 params = self.transform_params(params)
1662 if not includes_fixed and self._has_fixed_params:
1663 params[self._fixed_params_index] = (
1664 list(self._fixed_params.values()))
1666 return (params, transform_score) if return_jacobian else params
1668 def update(self, params, transformed=True, includes_fixed=False,
1669 complex_step=False):
1670 """
1671 Update the parameters of the model
1673 Parameters
1674 ----------
1675 params : array_like
1676 Array of new parameters.
1677 transformed : bool, optional
1678 Whether or not `params` is already transformed. If set to False,
1679 `transform_params` is called. Default is True.
1681 Returns
1682 -------
1683 params : array_like
1684 Array of parameters.
1686 Notes
1687 -----
1688 Since Model is a base class, this method should be overridden by
1689 subclasses to perform actual updating steps.
1690 """
1691 return self.handle_params(params=params, transformed=transformed,
1692 includes_fixed=includes_fixed)
1694 def _validate_out_of_sample_exog(self, exog, out_of_sample):
1695 """
1696 Validate given `exog` as satisfactory for out-of-sample operations
1698 Parameters
1699 ----------
1700 exog : array_like or None
1701 New observations of exogenous regressors, if applicable.
1702 out_of_sample : int
1703 Number of new observations required.
1705 Returns
1706 -------
1707 exog : array or None
1708 A numpy array of shape (out_of_sample, k_exog) if the model
1709 contains an `exog` component, or None if it doesn't.
1710 """
1711 if out_of_sample and self.k_exog > 0:
1712 if exog is None:
1713 raise ValueError('Out-of-sample operations in a model'
1714 ' with a regression component require'
1715 ' additional exogenous values via the'
1716 ' `exog` argument.')
1717 exog = np.array(exog)
1718 required_exog_shape = (out_of_sample, self.k_exog)
1719 try:
1720 exog = exog.reshape(required_exog_shape)
1721 except ValueError:
1722 raise ValueError('Provided exogenous values are not of the'
1723 ' appropriate shape. Required %s, got %s.'
1724 % (str(required_exog_shape),
1725 str(exog.shape)))
1726 elif self.k_exog > 0:
1727 exog = None
1728 warnings.warn('Exogenous array provided, but additional data'
1729 ' is not required. `exog` argument ignored.',
1730 ValueWarning)
1732 return exog
1734 def _get_extension_time_varying_matrices(
1735 self, params, exog, out_of_sample, extend_kwargs=None,
1736 transformed=True, includes_fixed=False, **kwargs):
1737 """
1738 Get updated time-varying state space system matrices
1740 Parameters
1741 ----------
1742 params : array_like
1743 Array of parameters used to construct the time-varying system
1744 matrices.
1745 exog : array_like or None
1746 New observations of exogenous regressors, if applicable.
1747 out_of_sample : int
1748 Number of new observations required.
1749 extend_kwargs : dict, optional
1750 Dictionary of keyword arguments to pass to the state space model
1751 constructor. For example, for an SARIMAX state space model, this
1752 could be used to pass the `concentrate_scale=True` keyword
1753 argument. Any arguments that are not explicitly set in this
1754 dictionary will be copied from the current model instance.
1755 transformed : bool, optional
1756 Whether or not `start_params` is already transformed. Default is
1757 True.
1758 includes_fixed : bool, optional
1759 If parameters were previously fixed with the `fix_params` method,
1760 this argument describes whether or not `start_params` also includes
1761 the fixed parameters, in addition to the free parameters. Default
1762 is False.
1763 """
1764 # Get the appropriate exog for the extended sample
1765 exog = self._validate_out_of_sample_exog(exog, out_of_sample)
1767 # Create extended model
1768 if extend_kwargs is None:
1769 extend_kwargs = {}
1771 # Handle trend offset for extended model
1772 if getattr(self, 'k_trend', 0) > 0 and hasattr(self, 'trend_offset'):
1773 extend_kwargs.setdefault(
1774 'trend_offset', self.trend_offset + self.nobs)
1776 mod_extend = self.clone(
1777 endog=np.zeros((out_of_sample, self.k_endog)), exog=exog,
1778 **extend_kwargs)
1779 mod_extend.update(params, transformed=transformed,
1780 includes_fixed=includes_fixed)
1782 # Retrieve the extensions to the time-varying system matrices and
1783 # put them in kwargs
1784 for name in self.ssm.shapes.keys():
1785 if name == 'obs' or name in kwargs:
1786 continue
1787 if getattr(self.ssm, name).shape[-1] > 1:
1788 mat = getattr(mod_extend.ssm, name)
1789 kwargs[name] = mat[..., -out_of_sample:]
1791 return kwargs
1793 def simulate(self, params, nsimulations, measurement_shocks=None,
1794 state_shocks=None, initial_state=None, anchor=None,
1795 repetitions=None, exog=None, extend_model=None,
1796 extend_kwargs=None, transformed=True, includes_fixed=False,
1797 **kwargs):
1798 r"""
1799 Simulate a new time series following the state space model
1801 Parameters
1802 ----------
1803 params : array_like
1804 Array of parameters to use in constructing the state space
1805 representation to use when simulating.
1806 nsimulations : int
1807 The number of observations to simulate. If the model is
1808 time-invariant this can be any number. If the model is
1809 time-varying, then this number must be less than or equal to the
1810 number of observations.
1811 measurement_shocks : array_like, optional
1812 If specified, these are the shocks to the measurement equation,
1813 :math:`\varepsilon_t`. If unspecified, these are automatically
1814 generated using a pseudo-random number generator. If specified,
1815 must be shaped `nsimulations` x `k_endog`, where `k_endog` is the
1816 same as in the state space model.
1817 state_shocks : array_like, optional
1818 If specified, these are the shocks to the state equation,
1819 :math:`\eta_t`. If unspecified, these are automatically
1820 generated using a pseudo-random number generator. If specified,
1821 must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the
1822 same as in the state space model.
1823 initial_state : array_like, optional
1824 If specified, this is the initial state vector to use in
1825 simulation, which should be shaped (`k_states` x 1), where
1826 `k_states` is the same as in the state space model. If unspecified,
1827 but the model has been initialized, then that initialization is
1828 used. This must be specified if `anchor` is anything other than
1829 "start" or 0 (or else you can use the `simulate` method on a
1830 results object rather than on the model object).
1831 anchor : int, str, or datetime, optional
1832 First period for simulation. The simulation will be conditional on
1833 all existing datapoints prior to the `anchor`. Type depends on the
1834 index of the given `endog` in the model. Two special cases are the
1835 strings 'start' and 'end'. `start` refers to beginning the
1836 simulation at the first period of the sample, and `end` refers to
1837 beginning the simulation at the first period after the sample.
1838 Integer values can run from 0 to `nobs`, or can be negative to
1839 apply negative indexing. Finally, if a date/time index was provided
1840 to the model, then this argument can be a date string to parse or a
1841 datetime type. Default is 'start'.
1842 repetitions : int, optional
1843 Number of simulated paths to generate. Default is 1 simulated path.
1844 exog : array_like, optional
1845 New observations of exogenous regressors, if applicable.
1846 transformed : bool, optional
1847 Whether or not `params` is already transformed. Default is
1848 True.
1849 includes_fixed : bool, optional
1850 If parameters were previously fixed with the `fix_params` method,
1851 this argument describes whether or not `params` also includes
1852 the fixed parameters, in addition to the free parameters. Default
1853 is False.
1855 Returns
1856 -------
1857 simulated_obs : ndarray
1858 An array of simulated observations. If `repetitions=None`, then it
1859 will be shaped (nsimulations x k_endog) or (nsimulations,) if
1860 `k_endog=1`. Otherwise it will be shaped
1861 (nsimulations x k_endog x repetitions). If the model was given
1862 Pandas input then the output will be a Pandas object. If
1863 `k_endog > 1` and `repetitions` is not None, then the output will
1864 be a Pandas DataFrame that has a MultiIndex for the columns, with
1865 the first level containing the names of the `endog` variables and
1866 the second level containing the repetition number.
1867 """
1868 # Make sure the model class has the current parameters
1869 self.update(params, transformed=transformed,
1870 includes_fixed=includes_fixed)
1872 # Get the starting location
1873 if anchor is None or anchor == 'start':
1874 iloc = 0
1875 elif anchor == 'end':
1876 iloc = self.nobs
1877 else:
1878 iloc, _, _ = self._get_index_loc(anchor)
1879 if isinstance(iloc, slice):
1880 iloc = iloc.start
1882 if iloc < 0:
1883 iloc = self.nobs + iloc
1884 if iloc > self.nobs:
1885 raise ValueError('Cannot anchor simulation outside of the sample.')
1887 if iloc > 0 and initial_state is None:
1888 raise ValueError('If `anchor` is after the start of the sample,'
1889 ' must provide a value for `initial_state`.')
1891 # Get updated time-varying system matrices in **kwargs, if necessary
1892 out_of_sample = max(iloc + nsimulations - self.nobs, 0)
1893 if extend_model is None:
1894 extend_model = self.exog is not None or not self.ssm.time_invariant
1895 if out_of_sample and extend_model:
1896 kwargs = self._get_extension_time_varying_matrices(
1897 params, exog, out_of_sample, extend_kwargs,
1898 transformed=transformed, includes_fixed=includes_fixed,
1899 **kwargs)
1901 # Standardize the dimensions of the initial state
1902 if initial_state is not None:
1903 initial_state = np.array(initial_state)
1904 if initial_state.ndim < 2:
1905 initial_state = np.atleast_2d(initial_state).T
1907 # Construct a model that represents the simulation period
1908 end = min(self.nobs, iloc + nsimulations)
1909 nextend = iloc + nsimulations - end
1910 sim_model = self.ssm.extend(np.empty((nextend, self.k_endog)),
1911 start=iloc, end=end, **kwargs)
1913 # Simulate the data
1914 _repetitions = 1 if repetitions is None else repetitions
1915 sim = np.zeros((nsimulations, self.k_endog, _repetitions))
1917 for i in range(_repetitions):
1918 initial_state_variates = None
1919 if initial_state is not None:
1920 if initial_state.shape[1] == 1:
1921 initial_state_variates = initial_state[:, 0]
1922 else:
1923 initial_state_variates = initial_state[:, i]
1925 # TODO: allow specifying measurement / state shocks for each
1926 # repetition?
1928 out, _ = sim_model.simulate(
1929 nsimulations, measurement_shocks, state_shocks,
1930 initial_state_variates)
1932 sim[:, :, i] = out
1934 # Wrap data / squeeze where appropriate
1935 use_pandas = isinstance(self.data, PandasData)
1936 index = None
1937 if use_pandas:
1938 _, _, _, index = self._get_prediction_index(
1939 iloc, iloc + nsimulations - 1)
1940 # If `repetitions` isn't set, we squeeze the last dimension(s)
1941 if repetitions is None:
1942 if self.k_endog == 1:
1943 sim = sim[:, 0, 0]
1944 if use_pandas:
1945 sim = pd.Series(sim, index=index, name=self.endog_names)
1946 else:
1947 sim = sim[:, :, 0]
1948 if use_pandas:
1949 sim = pd.DataFrame(sim, index=index,
1950 columns=self.endog_names)
1951 elif use_pandas:
1952 shape = sim.shape
1953 endog_names = self.endog_names
1954 if not isinstance(endog_names, list):
1955 endog_names = [endog_names]
1956 columns = pd.MultiIndex.from_product([endog_names,
1957 np.arange(shape[2])])
1958 sim = pd.DataFrame(sim.reshape(shape[0], shape[1] * shape[2]),
1959 index=index, columns=columns)
1961 return sim
1963 def impulse_responses(self, params, steps=1, impulse=0,
1964 orthogonalized=False, cumulative=False, anchor=None,
1965 exog=None, extend_model=None, extend_kwargs=None,
1966 transformed=True, includes_fixed=False, **kwargs):
1967 """
1968 Impulse response function
1970 Parameters
1971 ----------
1972 params : array_like
1973 Array of model parameters.
1974 steps : int, optional
1975 The number of steps for which impulse responses are calculated.
1976 Default is 1. Note that for time-invariant models, the initial
1977 impulse is not counted as a step, so if `steps=1`, the output will
1978 have 2 entries.
1979 impulse : int or array_like
1980 If an integer, the state innovation to pulse; must be between 0
1981 and `k_posdef-1`. Alternatively, a custom impulse vector may be
1982 provided; must be shaped `k_posdef x 1`.
1983 orthogonalized : bool, optional
1984 Whether or not to perform impulse using orthogonalized innovations.
1985 Note that this will also affect custum `impulse` vectors. Default
1986 is False.
1987 cumulative : bool, optional
1988 Whether or not to return cumulative impulse responses. Default is
1989 False.
1990 anchor : int, str, or datetime, optional
1991 Time point within the sample for the state innovation impulse. Type
1992 depends on the index of the given `endog` in the model. Two special
1993 cases are the strings 'start' and 'end', which refer to setting the
1994 impulse at the first and last points of the sample, respectively.
1995 Integer values can run from 0 to `nobs - 1`, or can be negative to
1996 apply negative indexing. Finally, if a date/time index was provided
1997 to the model, then this argument can be a date string to parse or a
1998 datetime type. Default is 'start'.
1999 exog : array_like, optional
2000 New observations of exogenous regressors for our-of-sample periods,
2001 if applicable.
2002 transformed : bool, optional
2003 Whether or not `params` is already transformed. Default is
2004 True.
2005 includes_fixed : bool, optional
2006 If parameters were previously fixed with the `fix_params` method,
2007 this argument describes whether or not `params` also includes
2008 the fixed parameters, in addition to the free parameters. Default
2009 is False.
2010 **kwargs
2011 If the model has time-varying design or transition matrices and the
2012 combination of `anchor` and `steps` implies creating impulse
2013 responses for the out-of-sample period, then these matrices must
2014 have updated values provided for the out-of-sample steps. For
2015 example, if `design` is a time-varying component, `nobs` is 10,
2016 `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7)
2017 matrix must be provided with the new design matrix values.
2019 Returns
2020 -------
2021 impulse_responses : ndarray
2022 Responses for each endogenous variable due to the impulse
2023 given by the `impulse` argument. For a time-invariant model, the
2024 impulse responses are given for `steps + 1` elements (this gives
2025 the "initial impulse" followed by `steps` responses for the
2026 important cases of VAR and SARIMAX models), while for time-varying
2027 models the impulse responses are only given for `steps` elements
2028 (to avoid having to unexpectedly provide updated time-varying
2029 matrices).
2031 Notes
2032 -----
2033 Intercepts in the measurement and state equation are ignored when
2034 calculating impulse responses.
2036 TODO: add an option to allow changing the ordering for the
2037 orthogonalized option. Will require permuting matrices when
2038 constructing the extended model.
2039 """
2040 # Make sure the model class has the current parameters
2041 self.update(params, transformed=transformed,
2042 includes_fixed=includes_fixed)
2044 # For time-invariant models, add an additional `step`. This is the
2045 # default for time-invariant models based on the expected behavior for
2046 # ARIMA and VAR models: we want to record the initial impulse and also
2047 # `steps` values of the responses afterwards.
2048 # Note: we don't modify `steps` itself, because
2049 # `KalmanFilter.impulse_responses` also adds an additional step in this
2050 # case (this is so that there isn't different behavior when calling
2051 # this method versus that method). We just need to also keep track of
2052 # this here because we need to generate the correct extended model.
2053 additional_steps = 0
2054 if (self.ssm._design.shape[2] == 1 and
2055 self.ssm._transition.shape[2] == 1 and
2056 self.ssm._selection.shape[2] == 1):
2057 additional_steps = 1
2059 # Get the starting location
2060 if anchor is None or anchor == 'start':
2061 iloc = 0
2062 elif anchor == 'end':
2063 iloc = self.nobs - 1
2064 else:
2065 iloc, _, _ = self._get_index_loc(anchor)
2066 if isinstance(iloc, slice):
2067 iloc = iloc.start
2069 if iloc < 0:
2070 iloc = self.nobs + iloc
2071 if iloc >= self.nobs:
2072 raise ValueError('Cannot anchor impulse responses outside of the'
2073 ' sample.')
2075 time_invariant = (
2076 self.ssm._design.shape[2] == self.ssm._obs_cov.shape[2] ==
2077 self.ssm._transition.shape[2] == self.ssm._selection.shape[2] ==
2078 self.ssm._state_cov.shape[2] == 1)
2080 # Get updated time-varying system matrices in **kwargs, if necessary
2081 # (Note: KalmanFilter adds 1 to steps to account for the first impulse)
2082 out_of_sample = max(
2083 iloc + (steps + additional_steps + 1) - self.nobs, 0)
2084 if extend_model is None:
2085 extend_model = self.exog is not None and not time_invariant
2086 if out_of_sample and extend_model:
2087 kwargs = self._get_extension_time_varying_matrices(
2088 params, exog, out_of_sample, extend_kwargs,
2089 transformed=transformed, includes_fixed=includes_fixed,
2090 **kwargs)
2092 # Special handling for matrix terms that are time-varying but
2093 # irrelevant for impulse response functions. Must be set since
2094 # ssm.extend() requires that we pass new matrices for these, but they
2095 # are ignored for IRF purposes.
2096 end = min(self.nobs, iloc + steps + additional_steps)
2097 nextend = iloc + (steps + additional_steps + 1) - end
2098 if ('obs_intercept' not in kwargs and
2099 self.ssm._obs_intercept.shape[1] > 1):
2100 kwargs['obs_intercept'] = np.zeros((self.k_endog, nextend))
2101 if ('state_intercept' not in kwargs and
2102 self.ssm._state_intercept.shape[1] > 1):
2103 kwargs['state_intercept'] = np.zeros((self.k_states, nextend))
2104 if 'obs_cov' not in kwargs and self.ssm._obs_cov.shape[2] > 1:
2105 kwargs['obs_cov'] = np.zeros((self.k_endog, self.k_endog, nextend))
2106 # Special handling for matrix terms that are time-varying but
2107 # only the value at the anchor matters for IRF purposes.
2108 if 'state_cov' not in kwargs and self.ssm._state_cov.shape[2] > 1:
2109 tmp = np.zeros((self.ssm.k_posdef, self.ssm.k_posdef, nextend))
2110 tmp[:] = self['state_cov', :, :, iloc:iloc + 1]
2111 kwargs['state_cov'] = tmp
2112 if 'selection' not in kwargs and self.ssm._selection.shape[2] > 1:
2113 tmp = np.zeros((self.k_states, self.ssm.k_posdef, nextend))
2114 tmp[:] = self['selection', :, :, iloc:iloc + 1]
2115 kwargs['selection'] = tmp
2117 # Construct a model that represents the simulation period
2118 sim_model = self.ssm.extend(np.empty((nextend, self.k_endog)),
2119 start=iloc, end=end, **kwargs)
2121 # Compute the impulse responses
2122 irfs = sim_model.impulse_responses(
2123 steps, impulse, orthogonalized, cumulative)
2125 # IRF is (nobs x k_endog); do not want to squeeze in case of steps = 1
2126 if irfs.shape[1] == 1:
2127 irfs = irfs[:, 0]
2129 return irfs
2131 @classmethod
2132 def from_formula(cls, formula, data, subset=None):
2133 """
2134 Not implemented for state space models
2135 """
2136 raise NotImplementedError
2139class MLEResults(tsbase.TimeSeriesModelResults):
2140 r"""
2141 Class to hold results from fitting a state space model.
2143 Parameters
2144 ----------
2145 model : MLEModel instance
2146 The fitted model instance
2147 params : ndarray
2148 Fitted parameters
2149 filter_results : KalmanFilter instance
2150 The underlying state space model and Kalman filter output
2152 Attributes
2153 ----------
2154 model : Model instance
2155 A reference to the model that was fit.
2156 filter_results : KalmanFilter instance
2157 The underlying state space model and Kalman filter output
2158 nobs : float
2159 The number of observations used to fit the model.
2160 params : ndarray
2161 The parameters of the model.
2162 scale : float
2163 This is currently set to 1.0 unless the model uses concentrated
2164 filtering.
2166 See Also
2167 --------
2168 MLEModel
2169 statsmodels.tsa.statespace.kalman_filter.FilterResults
2170 statsmodels.tsa.statespace.representation.FrozenRepresentation
2171 """
2172 def __init__(self, model, params, results, cov_type=None, cov_kwds=None,
2173 **kwargs):
2174 self.data = model.data
2175 scale = results.scale
2177 tsbase.TimeSeriesModelResults.__init__(self, model, params,
2178 normalized_cov_params=None,
2179 scale=scale)
2181 # Save the fixed parameters
2182 self._has_fixed_params = self.model._has_fixed_params
2183 self._fixed_params_index = self.model._fixed_params_index
2184 self._free_params_index = self.model._free_params_index
2185 # TODO: seems like maybe self.fixed_params should be the dictionary
2186 # itself, not just the keys?
2187 if self._has_fixed_params:
2188 self._fixed_params = self.model._fixed_params.copy()
2189 self.fixed_params = list(self._fixed_params.keys())
2190 else:
2191 self._fixed_params = None
2192 self.fixed_params = []
2193 self.param_names = [
2194 '%s (fixed)' % name if name in self.fixed_params else name
2195 for name in (self.data.param_names or [])]
2197 # Save the state space representation output
2198 self.filter_results = results
2199 if isinstance(results, SmootherResults):
2200 self.smoother_results = results
2201 else:
2202 self.smoother_results = None
2204 # Dimensions
2205 self.nobs = self.filter_results.nobs
2206 self.nobs_diffuse = self.filter_results.nobs_diffuse
2207 if self.nobs_diffuse > 0 and self.loglikelihood_burn > 0:
2208 warnings.warn('Care should be used when applying a loglikelihood'
2209 ' burn to a model with exact diffuse initialization.'
2210 ' Some results objects, e.g. degrees of freedom,'
2211 ' expect only one of the two to be set.')
2212 # This only excludes explicitly burned (usually approximate diffuse)
2213 # periods but does not exclude approximate diffuse periods. This is
2214 # because the loglikelihood remains valid for the initial periods in
2215 # the exact diffuse case (see DK, 2012, section 7.2) and so also do
2216 # e.g. information criteria (see DK, 2012, section 7.4) and the score
2217 # vector (see DK, 2012, section 7.3.3, equation 7.15).
2218 # However, other objects should be excluded in the diffuse periods
2219 # (e.g. the diffuse forecast errors, so in some cases a different
2220 # nobs_effective will have to be computed and used)
2221 self.nobs_effective = self.nobs - self.loglikelihood_burn
2223 P = self.filter_results.initial_diffuse_state_cov
2224 self.k_diffuse_states = 0 if P is None else np.sum(np.diagonal(P) == 1)
2226 # Degrees of freedom (see DK 2012, section 7.4)
2227 k_free_params = self.params.size - len(self.fixed_params)
2228 self.df_model = (k_free_params + self.k_diffuse_states
2229 + self.filter_results.filter_concentrated)
2230 self.df_resid = self.nobs_effective - self.df_model
2232 # Setup covariance matrix notes dictionary
2233 if not hasattr(self, 'cov_kwds'):
2234 self.cov_kwds = {}
2235 if cov_type is None:
2236 cov_type = 'approx' if results.memory_no_likelihood else 'opg'
2237 self.cov_type = cov_type
2239 # Setup the cache
2240 self._cache = {}
2242 # Handle covariance matrix calculation
2243 if cov_kwds is None:
2244 cov_kwds = {}
2245 self._cov_approx_complex_step = (
2246 cov_kwds.pop('approx_complex_step', True))
2247 self._cov_approx_centered = cov_kwds.pop('approx_centered', False)
2248 try:
2249 self._rank = None
2250 self._get_robustcov_results(cov_type=cov_type, use_self=True,
2251 **cov_kwds)
2252 except np.linalg.LinAlgError:
2253 self._rank = 0
2254 k_params = len(self.params)
2255 self.cov_params_default = np.zeros((k_params, k_params)) * np.nan
2256 self.cov_kwds['cov_type'] = (
2257 'Covariance matrix could not be calculated: singular.'
2258 ' information matrix.')
2259 self.model.update(self.params, transformed=True, includes_fixed=True)
2261 # References of filter and smoother output
2262 extra_arrays = [
2263 'filtered_state', 'filtered_state_cov', 'predicted_state',
2264 'predicted_state_cov', 'forecasts', 'forecasts_error',
2265 'forecasts_error_cov', 'standardized_forecasts_error',
2266 'forecasts_error_diffuse_cov', 'predicted_diffuse_state_cov',
2267 'scaled_smoothed_estimator',
2268 'scaled_smoothed_estimator_cov', 'smoothing_error',
2269 'smoothed_state',
2270 'smoothed_state_cov', 'smoothed_state_autocov',
2271 'smoothed_measurement_disturbance',
2272 'smoothed_state_disturbance',
2273 'smoothed_measurement_disturbance_cov',
2274 'smoothed_state_disturbance_cov']
2275 for name in extra_arrays:
2276 setattr(self, name, getattr(self.filter_results, name, None))
2278 # Remove too-short results when memory conservation was used
2279 if self.filter_results.memory_no_forecast_mean:
2280 self.forecasts = None
2281 self.forecasts_error = None
2282 if self.filter_results.memory_no_forecast_cov:
2283 self.forecasts_error_cov = None
2284 if self.filter_results.memory_no_predicted_mean:
2285 self.predicted_state = None
2286 if self.filter_results.memory_no_predicted_cov:
2287 self.predicted_state_cov = None
2288 if self.filter_results.memory_no_filtered_mean:
2289 self.filtered_state = None
2290 if self.filter_results.memory_no_filtered_cov:
2291 self.filtered_state_cov = None
2292 if self.filter_results.memory_no_gain:
2293 pass
2294 if self.filter_results.memory_no_smoothing:
2295 pass
2296 if self.filter_results.memory_no_std_forecast:
2297 self.standardized_forecasts_error = None
2299 # Save more convenient access to states
2300 # (will create a private attribute _states here and provide actual
2301 # access via a getter, so that we can e.g. issue a warning in the case
2302 # that a useless Pandas index was given in the model specification)
2303 self._states = SimpleNamespace()
2305 use_pandas = isinstance(self.data, PandasData)
2306 index = self.model._index
2307 columns = self.model.state_names
2309 # Predicted states
2310 # Note: a complication here is that we also include the initial values
2311 # here, so that we need an extended index in the Pandas case
2312 if (self.predicted_state is None or
2313 self.filter_results.memory_no_predicted_mean):
2314 self._states.predicted = None
2315 elif use_pandas:
2316 extended_index = self.model._get_index_with_initial_state()
2317 self._states.predicted = pd.DataFrame(
2318 self.predicted_state.T, index=extended_index, columns=columns)
2319 else:
2320 self._states.predicted = self.predicted_state.T
2321 if (self.predicted_state_cov is None or
2322 self.filter_results.memory_no_predicted_cov):
2323 self._states.predicted_cov = None
2324 elif use_pandas:
2325 extended_index = self.model._get_index_with_initial_state()
2326 tmp = np.transpose(self.predicted_state_cov, (2, 0, 1))
2327 self._states.predicted_cov = pd.DataFrame(
2328 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])),
2329 index=pd.MultiIndex.from_product(
2330 [extended_index, columns]).swaplevel(),
2331 columns=columns)
2332 else:
2333 self._states.predicted_cov = np.transpose(
2334 self.predicted_state_cov, (2, 0, 1))
2336 # Filtered states
2337 if (self.filtered_state is None or
2338 self.filter_results.memory_no_filtered_mean):
2339 self._states.filtered = None
2340 elif use_pandas:
2341 self._states.filtered = pd.DataFrame(
2342 self.filtered_state.T, index=index, columns=columns)
2343 else:
2344 self._states.filtered = self.filtered_state.T
2345 if (self.filtered_state_cov is None or
2346 self.filter_results.memory_no_filtered_cov):
2347 self._states.filtered_cov = None
2348 elif use_pandas:
2349 tmp = np.transpose(self.filtered_state_cov, (2, 0, 1))
2350 self._states.filtered_cov = pd.DataFrame(
2351 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])),
2352 index=pd.MultiIndex.from_product([index, columns]).swaplevel(),
2353 columns=columns)
2354 else:
2355 self._states.filtered_cov = np.transpose(
2356 self.filtered_state_cov, (2, 0, 1))
2358 # Smoothed states
2359 if self.smoothed_state is None:
2360 self._states.smoothed = None
2361 elif use_pandas:
2362 self._states.smoothed = pd.DataFrame(
2363 self.smoothed_state.T, index=index, columns=columns)
2364 else:
2365 self._states.smoothed = self.smoothed_state.T
2366 if self.smoothed_state_cov is None:
2367 self._states.smoothed_cov = None
2368 elif use_pandas:
2369 tmp = np.transpose(self.smoothed_state_cov, (2, 0, 1))
2370 self._states.smoothed_cov = pd.DataFrame(
2371 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])),
2372 index=pd.MultiIndex.from_product([index, columns]).swaplevel(),
2373 columns=columns)
2374 else:
2375 self._states.smoothed_cov = np.transpose(
2376 self.smoothed_state_cov, (2, 0, 1))
2378 # Handle removing data
2379 self._data_attr_model = getattr(self, '_data_attr_model', [])
2380 self._data_attr_model.extend(['ssm'])
2381 self._data_attr.extend(extra_arrays)
2382 self._data_attr.extend(['filter_results', 'smoother_results'])
2383 self.data_in_cache = getattr(self, 'data_in_cache', [])
2384 self.data_in_cache.extend([])
2386 def _get_robustcov_results(self, cov_type='opg', **kwargs):
2387 """
2388 Create new results instance with specified covariance estimator as
2389 default
2391 Note: creating new results instance currently not supported.
2393 Parameters
2394 ----------
2395 cov_type : str
2396 the type of covariance matrix estimator to use. See Notes below
2397 kwargs : depends on cov_type
2398 Required or optional arguments for covariance calculation.
2399 See Notes below.
2401 Returns
2402 -------
2403 results : results instance
2404 This method creates a new results instance with the requested
2405 covariance as the default covariance of the parameters.
2406 Inferential statistics like p-values and hypothesis tests will be
2407 based on this covariance matrix.
2409 Notes
2410 -----
2411 The following covariance types and required or optional arguments are
2412 currently available:
2414 - 'opg' for the outer product of gradient estimator
2415 - 'oim' for the observed information matrix estimator, calculated
2416 using the method of Harvey (1989)
2417 - 'approx' for the observed information matrix estimator,
2418 calculated using a numerical approximation of the Hessian matrix.
2419 Uses complex step approximation by default, or uses finite
2420 differences if `approx_complex_step=False` in the `cov_kwds`
2421 dictionary.
2422 - 'robust' for an approximate (quasi-maximum likelihood) covariance
2423 matrix that may be valid even in the presence of some
2424 misspecifications. Intermediate calculations use the 'oim'
2425 method.
2426 - 'robust_approx' is the same as 'robust' except that the
2427 intermediate calculations use the 'approx' method.
2428 - 'none' for no covariance matrix calculation.
2429 """
2430 from statsmodels.base.covtype import descriptions
2432 use_self = kwargs.pop('use_self', False)
2433 if use_self:
2434 res = self
2435 else:
2436 raise NotImplementedError
2437 res = self.__class__(
2438 self.model, self.params,
2439 normalized_cov_params=self.normalized_cov_params,
2440 scale=self.scale)
2442 # Set the new covariance type
2443 res.cov_type = cov_type
2444 res.cov_kwds = {}
2446 # Calculate the new covariance matrix
2447 approx_complex_step = self._cov_approx_complex_step
2448 if approx_complex_step:
2449 approx_type_str = 'complex-step'
2450 elif self._cov_approx_centered:
2451 approx_type_str = 'centered finite differences'
2452 else:
2453 approx_type_str = 'finite differences'
2455 k_params = len(self.params)
2456 if k_params == 0:
2457 res.cov_params_default = np.zeros((0, 0))
2458 res._rank = 0
2459 res.cov_kwds['description'] = 'No parameters estimated.'
2460 elif cov_type == 'custom':
2461 res.cov_type = kwargs['custom_cov_type']
2462 res.cov_params_default = kwargs['custom_cov_params']
2463 res.cov_kwds['description'] = kwargs['custom_description']
2464 if len(self.fixed_params) > 0:
2465 mask = np.ix_(self._free_params_index, self._free_params_index)
2466 else:
2467 mask = np.s_[...]
2468 res._rank = np.linalg.matrix_rank(res.cov_params_default[mask])
2469 elif cov_type == 'none':
2470 res.cov_params_default = np.zeros((k_params, k_params)) * np.nan
2471 res._rank = np.nan
2472 res.cov_kwds['description'] = descriptions['none']
2473 elif self.cov_type == 'approx':
2474 res.cov_params_default = res.cov_params_approx
2475 res.cov_kwds['description'] = descriptions['approx'].format(
2476 approx_type=approx_type_str)
2477 elif self.cov_type == 'oim':
2478 res.cov_params_default = res.cov_params_oim
2479 res.cov_kwds['description'] = descriptions['OIM'].format(
2480 approx_type=approx_type_str)
2481 elif self.cov_type == 'opg':
2482 res.cov_params_default = res.cov_params_opg
2483 res.cov_kwds['description'] = descriptions['OPG'].format(
2484 approx_type=approx_type_str)
2485 elif self.cov_type == 'robust' or self.cov_type == 'robust_oim':
2486 res.cov_params_default = res.cov_params_robust_oim
2487 res.cov_kwds['description'] = descriptions['robust-OIM'].format(
2488 approx_type=approx_type_str)
2489 elif self.cov_type == 'robust_approx':
2490 res.cov_params_default = res.cov_params_robust_approx
2491 res.cov_kwds['description'] = descriptions['robust-approx'].format(
2492 approx_type=approx_type_str)
2493 else:
2494 raise NotImplementedError('Invalid covariance matrix type.')
2496 return res
2498 @cache_readonly
2499 def aic(self):
2500 """
2501 (float) Akaike Information Criterion
2502 """
2503 return aic(self.llf, self.nobs_effective, self.df_model)
2505 @cache_readonly
2506 def aicc(self):
2507 """
2508 (float) Akaike Information Criterion with small sample correction
2509 """
2510 return aicc(self.llf, self.nobs_effective, self.df_model)
2512 @cache_readonly
2513 def bic(self):
2514 """
2515 (float) Bayes Information Criterion
2516 """
2517 return bic(self.llf, self.nobs_effective, self.df_model)
2519 def _cov_params_approx(self, approx_complex_step=True,
2520 approx_centered=False):
2521 evaluated_hessian = self.nobs_effective * self.model.hessian(
2522 params=self.params, transformed=True, includes_fixed=True,
2523 method='approx', approx_complex_step=approx_complex_step,
2524 approx_centered=approx_centered)
2525 # TODO: Case with "not approx_complex_step" is not hit in
2526 # tests as of 2017-05-19
2528 if len(self.fixed_params) > 0:
2529 mask = np.ix_(self._free_params_index, self._free_params_index)
2530 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask])
2531 neg_cov = np.zeros_like(evaluated_hessian) * np.nan
2532 neg_cov[mask] = tmp
2533 else:
2534 (neg_cov, singular_values) = pinv_extended(evaluated_hessian)
2536 self.model.update(self.params, transformed=True, includes_fixed=True)
2537 if self._rank is None:
2538 self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2539 return -neg_cov
2541 @cache_readonly
2542 def cov_params_approx(self):
2543 """
2544 (array) The variance / covariance matrix. Computed using the numerical
2545 Hessian approximated by complex step or finite differences methods.
2546 """
2547 return self._cov_params_approx(self._cov_approx_complex_step,
2548 self._cov_approx_centered)
2550 def _cov_params_oim(self, approx_complex_step=True, approx_centered=False):
2551 evaluated_hessian = self.nobs_effective * self.model.hessian(
2552 self.params, hessian_method='oim', transformed=True,
2553 includes_fixed=True, approx_complex_step=approx_complex_step,
2554 approx_centered=approx_centered)
2556 if len(self.fixed_params) > 0:
2557 mask = np.ix_(self._free_params_index, self._free_params_index)
2558 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask])
2559 neg_cov = np.zeros_like(evaluated_hessian) * np.nan
2560 neg_cov[mask] = tmp
2561 else:
2562 (neg_cov, singular_values) = pinv_extended(evaluated_hessian)
2564 self.model.update(self.params, transformed=True, includes_fixed=True)
2565 if self._rank is None:
2566 self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2567 return -neg_cov
2569 @cache_readonly
2570 def cov_params_oim(self):
2571 """
2572 (array) The variance / covariance matrix. Computed using the method
2573 from Harvey (1989).
2574 """
2575 return self._cov_params_oim(self._cov_approx_complex_step,
2576 self._cov_approx_centered)
2578 def _cov_params_opg(self, approx_complex_step=True, approx_centered=False):
2579 evaluated_hessian = self.nobs_effective * self.model._hessian_opg(
2580 self.params, transformed=True, includes_fixed=True,
2581 approx_complex_step=approx_complex_step,
2582 approx_centered=approx_centered)
2584 if len(self.fixed_params) > 0:
2585 mask = np.ix_(self._free_params_index, self._free_params_index)
2586 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask])
2587 neg_cov = np.zeros_like(evaluated_hessian) * np.nan
2588 neg_cov[mask] = tmp
2589 else:
2590 (neg_cov, singular_values) = pinv_extended(evaluated_hessian)
2592 self.model.update(self.params, transformed=True, includes_fixed=True)
2593 if self._rank is None:
2594 self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2595 return -neg_cov
2597 @cache_readonly
2598 def cov_params_opg(self):
2599 """
2600 (array) The variance / covariance matrix. Computed using the outer
2601 product of gradients method.
2602 """
2603 return self._cov_params_opg(self._cov_approx_complex_step,
2604 self._cov_approx_centered)
2606 @cache_readonly
2607 def cov_params_robust(self):
2608 """
2609 (array) The QMLE variance / covariance matrix. Alias for
2610 `cov_params_robust_oim`
2611 """
2612 return self.cov_params_robust_oim
2614 def _cov_params_robust_oim(self, approx_complex_step=True,
2615 approx_centered=False):
2616 cov_opg = self._cov_params_opg(approx_complex_step=approx_complex_step,
2617 approx_centered=approx_centered)
2619 evaluated_hessian = self.nobs_effective * self.model.hessian(
2620 self.params, hessian_method='oim', transformed=True,
2621 includes_fixed=True, approx_complex_step=approx_complex_step,
2622 approx_centered=approx_centered)
2624 if len(self.fixed_params) > 0:
2625 mask = np.ix_(self._free_params_index, self._free_params_index)
2626 cov_params = np.zeros_like(evaluated_hessian) * np.nan
2628 cov_opg = cov_opg[mask]
2629 evaluated_hessian = evaluated_hessian[mask]
2631 tmp, singular_values = pinv_extended(
2632 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian))
2634 cov_params[mask] = tmp
2635 else:
2636 (cov_params, singular_values) = pinv_extended(
2637 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian))
2639 self.model.update(self.params, transformed=True, includes_fixed=True)
2640 if self._rank is None:
2641 self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2642 return cov_params
2644 @cache_readonly
2645 def cov_params_robust_oim(self):
2646 """
2647 (array) The QMLE variance / covariance matrix. Computed using the
2648 method from Harvey (1989) as the evaluated hessian.
2649 """
2650 return self._cov_params_robust_oim(self._cov_approx_complex_step,
2651 self._cov_approx_centered)
2653 def _cov_params_robust_approx(self, approx_complex_step=True,
2654 approx_centered=False):
2655 cov_opg = self._cov_params_opg(approx_complex_step=approx_complex_step,
2656 approx_centered=approx_centered)
2658 evaluated_hessian = self.nobs_effective * self.model.hessian(
2659 self.params, transformed=True, includes_fixed=True,
2660 method='approx', approx_complex_step=approx_complex_step)
2661 # TODO: Case with "not approx_complex_step" is not
2662 # hit in tests as of 2017-05-19
2664 if len(self.fixed_params) > 0:
2665 mask = np.ix_(self._free_params_index, self._free_params_index)
2666 cov_params = np.zeros_like(evaluated_hessian) * np.nan
2668 cov_opg = cov_opg[mask]
2669 evaluated_hessian = evaluated_hessian[mask]
2671 tmp, singular_values = pinv_extended(
2672 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian))
2674 cov_params[mask] = tmp
2675 else:
2676 (cov_params, singular_values) = pinv_extended(
2677 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian))
2679 self.model.update(self.params, transformed=True, includes_fixed=True)
2680 if self._rank is None:
2681 self._rank = np.linalg.matrix_rank(np.diag(singular_values))
2682 return cov_params
2684 @cache_readonly
2685 def cov_params_robust_approx(self):
2686 """
2687 (array) The QMLE variance / covariance matrix. Computed using the
2688 numerical Hessian as the evaluated hessian.
2689 """
2690 return self._cov_params_robust_approx(self._cov_approx_complex_step,
2691 self._cov_approx_centered)
2693 def info_criteria(self, criteria, method='standard'):
2694 r"""
2695 Information criteria
2697 Parameters
2698 ----------
2699 criteria : {'aic', 'bic', 'hqic'}
2700 The information criteria to compute.
2701 method : {'standard', 'lutkepohl'}
2702 The method for information criteria computation. Default is
2703 'standard' method; 'lutkepohl' computes the information criteria
2704 as in Lütkepohl (2007). See Notes for formulas.
2706 Notes
2707 -----
2708 The `'standard'` formulas are:
2710 .. math::
2712 AIC & = -2 \log L(Y_n | \hat \psi) + 2 k \\
2713 BIC & = -2 \log L(Y_n | \hat \psi) + k \log n \\
2714 HQIC & = -2 \log L(Y_n | \hat \psi) + 2 k \log \log n \\
2716 where :math:`\hat \psi` are the maximum likelihood estimates of the
2717 parameters, :math:`n` is the number of observations, and `k` is the
2718 number of estimated parameters.
2720 Note that the `'standard'` formulas are returned from the `aic`, `bic`,
2721 and `hqic` results attributes.
2723 The `'lutkepohl'` formulas are (Lütkepohl, 2010):
2725 .. math::
2727 AIC_L & = \log | Q | + \frac{2 k}{n} \\
2728 BIC_L & = \log | Q | + \frac{k \log n}{n} \\
2729 HQIC_L & = \log | Q | + \frac{2 k \log \log n}{n} \\
2731 where :math:`Q` is the state covariance matrix. Note that the Lütkepohl
2732 definitions do not apply to all state space models, and should be used
2733 with care outside of SARIMAX and VARMAX models.
2735 References
2736 ----------
2737 .. [*] Lütkepohl, Helmut. 2007. *New Introduction to Multiple Time*
2738 *Series Analysis.* Berlin: Springer.
2739 """
2740 criteria = criteria.lower()
2741 method = method.lower()
2743 if method == 'standard':
2744 out = getattr(self, criteria)
2745 elif method == 'lutkepohl':
2746 if self.filter_results.state_cov.shape[-1] > 1:
2747 raise ValueError('Cannot compute Lütkepohl statistics for'
2748 ' models with time-varying state covariance'
2749 ' matrix.')
2751 cov = self.filter_results.state_cov[:, :, 0]
2752 if criteria == 'aic':
2753 out = np.squeeze(np.linalg.slogdet(cov)[1] +
2754 2 * self.df_model / self.nobs_effective)
2755 elif criteria == 'bic':
2756 out = np.squeeze(np.linalg.slogdet(cov)[1] +
2757 self.df_model * np.log(self.nobs_effective) /
2758 self.nobs_effective)
2759 elif criteria == 'hqic':
2760 out = np.squeeze(np.linalg.slogdet(cov)[1] +
2761 2 * self.df_model *
2762 np.log(np.log(self.nobs_effective)) /
2763 self.nobs_effective)
2764 else:
2765 raise ValueError('Invalid information criteria')
2767 else:
2768 raise ValueError('Invalid information criteria computation method')
2770 return out
2772 @cache_readonly
2773 def fittedvalues(self):
2774 """
2775 (array) The predicted values of the model. An (nobs x k_endog) array.
2776 """
2777 # This is a (k_endog x nobs array; do not want to squeeze in case of
2778 # the corner case where nobs = 1 (mostly a concern in the predict or
2779 # forecast functions, but here also to maintain consistency)
2780 fittedvalues = self.forecasts
2781 if fittedvalues is None:
2782 pass
2783 elif fittedvalues.shape[0] == 1:
2784 fittedvalues = fittedvalues[0, :]
2785 else:
2786 fittedvalues = fittedvalues.T
2787 return fittedvalues
2789 @cache_readonly
2790 def hqic(self):
2791 """
2792 (float) Hannan-Quinn Information Criterion
2793 """
2794 # return (-2 * self.llf +
2795 # 2 * np.log(np.log(self.nobs_effective)) * self.df_model)
2796 return hqic(self.llf, self.nobs_effective, self.df_model)
2798 @cache_readonly
2799 def llf_obs(self):
2800 """
2801 (float) The value of the log-likelihood function evaluated at `params`.
2802 """
2803 return self.filter_results.llf_obs
2805 @cache_readonly
2806 def llf(self):
2807 """
2808 (float) The value of the log-likelihood function evaluated at `params`.
2809 """
2810 return self.filter_results.llf
2812 @cache_readonly
2813 def loglikelihood_burn(self):
2814 """
2815 (float) The number of observations during which the likelihood is not
2816 evaluated.
2817 """
2818 return self.filter_results.loglikelihood_burn
2820 @cache_readonly
2821 def mae(self):
2822 """
2823 (float) Mean absolute error
2824 """
2825 return np.mean(np.abs(self.resid))
2827 @cache_readonly
2828 def mse(self):
2829 """
2830 (float) Mean squared error
2831 """
2832 return self.sse / self.nobs
2834 @cache_readonly
2835 def pvalues(self):
2836 """
2837 (array) The p-values associated with the z-statistics of the
2838 coefficients. Note that the coefficients are assumed to have a Normal
2839 distribution.
2840 """
2841 pvalues = np.zeros_like(self.zvalues) * np.nan
2842 mask = np.ones_like(pvalues, dtype=bool)
2843 mask[self._free_params_index] = True
2844 mask &= ~np.isnan(self.zvalues)
2845 pvalues[mask] = norm.sf(np.abs(self.zvalues[mask])) * 2
2846 return pvalues
2848 @cache_readonly
2849 def resid(self):
2850 """
2851 (array) The model residuals. An (nobs x k_endog) array.
2852 """
2853 # This is a (k_endog x nobs array; do not want to squeeze in case of
2854 # the corner case where nobs = 1 (mostly a concern in the predict or
2855 # forecast functions, but here also to maintain consistency)
2856 resid = self.forecasts_error
2857 if resid is None:
2858 pass
2859 elif resid.shape[0] == 1:
2860 resid = resid[0, :]
2861 else:
2862 resid = resid.T
2863 return resid
2865 @property
2866 def states(self):
2867 if self.model._index_generated and not self.model._index_none:
2868 warnings.warn('No supported index is available. The `states`'
2869 ' DataFrame uses a generated integer index',
2870 ValueWarning)
2871 return self._states
2873 @cache_readonly
2874 def sse(self):
2875 """
2876 (float) Sum of squared errors
2877 """
2878 return np.sum(self.resid**2)
2880 @cache_readonly
2881 def zvalues(self):
2882 """
2883 (array) The z-statistics for the coefficients.
2884 """
2885 return self.params / self.bse
2887 def test_normality(self, method):
2888 """
2889 Test for normality of standardized residuals.
2891 Null hypothesis is normality.
2893 Parameters
2894 ----------
2895 method : {'jarquebera', None}
2896 The statistical test for normality. Must be 'jarquebera' for
2897 Jarque-Bera normality test. If None, an attempt is made to select
2898 an appropriate test.
2900 Notes
2901 -----
2902 Let `d` = max(loglikelihood_burn, nobs_diffuse); this test is
2903 calculated ignoring the first `d` residuals.
2905 In the case of missing data, the maintained hypothesis is that the
2906 data are missing completely at random. This test is then run on the
2907 standardized residuals excluding those corresponding to missing
2908 observations.
2910 See Also
2911 --------
2912 statsmodels.stats.stattools.jarque_bera
2913 The Jarque-Bera test of normality.
2914 """
2915 if method is None:
2916 method = 'jarquebera'
2918 if self.standardized_forecasts_error is None:
2919 raise ValueError('Cannot compute test statistic when standardized'
2920 ' forecast errors have not been computed.')
2922 if method == 'jarquebera':
2923 from statsmodels.stats.stattools import jarque_bera
2924 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
2925 output = []
2926 for i in range(self.model.k_endog):
2927 resid = self.filter_results.standardized_forecasts_error[i, d:]
2928 mask = ~np.isnan(resid)
2929 output.append(jarque_bera(resid[mask]))
2930 else:
2931 raise NotImplementedError('Invalid normality test method.')
2933 return np.array(output)
2935 def test_heteroskedasticity(self, method, alternative='two-sided',
2936 use_f=True):
2937 r"""
2938 Test for heteroskedasticity of standardized residuals
2940 Tests whether the sum-of-squares in the first third of the sample is
2941 significantly different than the sum-of-squares in the last third
2942 of the sample. Analogous to a Goldfeld-Quandt test. The null hypothesis
2943 is of no heteroskedasticity.
2945 Parameters
2946 ----------
2947 method : {'breakvar', None}
2948 The statistical test for heteroskedasticity. Must be 'breakvar'
2949 for test of a break in the variance. If None, an attempt is
2950 made to select an appropriate test.
2951 alternative : str, 'increasing', 'decreasing' or 'two-sided'
2952 This specifies the alternative for the p-value calculation. Default
2953 is two-sided.
2954 use_f : bool, optional
2955 Whether or not to compare against the asymptotic distribution
2956 (chi-squared) or the approximate small-sample distribution (F).
2957 Default is True (i.e. default is to compare against an F
2958 distribution).
2960 Returns
2961 -------
2962 output : ndarray
2963 An array with `(test_statistic, pvalue)` for each endogenous
2964 variable. The array is then sized `(k_endog, 2)`. If the method is
2965 called as `het = res.test_heteroskedasticity()`, then `het[0]` is
2966 an array of size 2 corresponding to the first endogenous variable,
2967 where `het[0][0]` is the test statistic, and `het[0][1]` is the
2968 p-value.
2970 Notes
2971 -----
2972 The null hypothesis is of no heteroskedasticity. That means different
2973 things depending on which alternative is selected:
2975 - Increasing: Null hypothesis is that the variance is not increasing
2976 throughout the sample; that the sum-of-squares in the later
2977 subsample is *not* greater than the sum-of-squares in the earlier
2978 subsample.
2979 - Decreasing: Null hypothesis is that the variance is not decreasing
2980 throughout the sample; that the sum-of-squares in the earlier
2981 subsample is *not* greater than the sum-of-squares in the later
2982 subsample.
2983 - Two-sided: Null hypothesis is that the variance is not changing
2984 throughout the sample. Both that the sum-of-squares in the earlier
2985 subsample is not greater than the sum-of-squares in the later
2986 subsample *and* that the sum-of-squares in the later subsample is
2987 not greater than the sum-of-squares in the earlier subsample.
2989 For :math:`h = [T/3]`, the test statistic is:
2991 .. math::
2993 H(h) = \sum_{t=T-h+1}^T \tilde v_t^2
2994 \Bigg / \sum_{t=d+1}^{d+1+h} \tilde v_t^2
2996 where :math:`d` = max(loglikelihood_burn, nobs_diffuse)` (usually
2997 corresponding to diffuse initialization under either the approximate
2998 or exact approach).
3000 This statistic can be tested against an :math:`F(h,h)` distribution.
3001 Alternatively, :math:`h H(h)` is asymptotically distributed according
3002 to :math:`\chi_h^2`; this second test can be applied by passing
3003 `asymptotic=True` as an argument.
3005 See section 5.4 of [1]_ for the above formula and discussion, as well
3006 as additional details.
3008 TODO
3010 - Allow specification of :math:`h`
3012 References
3013 ----------
3014 .. [1] Harvey, Andrew C. 1990. *Forecasting, Structural Time Series*
3015 *Models and the Kalman Filter.* Cambridge University Press.
3016 """
3017 if method is None:
3018 method = 'breakvar'
3020 if self.standardized_forecasts_error is None:
3021 raise ValueError('Cannot compute test statistic when standardized'
3022 ' forecast errors have not been computed.')
3024 if method == 'breakvar':
3025 # Store some values
3026 squared_resid = self.filter_results.standardized_forecasts_error**2
3027 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
3028 # This differs from self.nobs_effective because here we want to
3029 # exclude exact diffuse periods, whereas self.nobs_effective only
3030 # excludes explicitly burned (usually approximate diffuse) periods.
3031 nobs_effective = self.nobs - d
3033 test_statistics = []
3034 p_values = []
3035 for i in range(self.model.k_endog):
3036 h = int(np.round(nobs_effective / 3))
3037 numer_resid = squared_resid[i, -h:]
3038 numer_resid = numer_resid[~np.isnan(numer_resid)]
3039 numer_dof = len(numer_resid)
3041 denom_resid = squared_resid[i, d:d+h]
3042 denom_resid = denom_resid[~np.isnan(denom_resid)]
3043 denom_dof = len(denom_resid)
3045 if numer_dof < 2:
3046 warnings.warn('Early subset of data for variable %d'
3047 ' has too few non-missing observations to'
3048 ' calculate test statistic.' % i)
3049 numer_resid = np.nan
3050 if denom_dof < 2:
3051 warnings.warn('Later subset of data for variable %d'
3052 ' has too few non-missing observations to'
3053 ' calculate test statistic.' % i)
3054 denom_resid = np.nan
3056 test_statistic = np.sum(numer_resid) / np.sum(denom_resid)
3058 # Setup functions to calculate the p-values
3059 if use_f:
3060 from scipy.stats import f
3061 pval_lower = lambda test_statistics: f.cdf( # noqa:E731
3062 test_statistics, numer_dof, denom_dof)
3063 pval_upper = lambda test_statistics: f.sf( # noqa:E731
3064 test_statistics, numer_dof, denom_dof)
3065 else:
3066 from scipy.stats import chi2
3067 pval_lower = lambda test_statistics: chi2.cdf( # noqa:E731
3068 numer_dof * test_statistics, denom_dof)
3069 pval_upper = lambda test_statistics: chi2.sf( # noqa:E731
3070 numer_dof * test_statistics, denom_dof)
3072 # Calculate the one- or two-sided p-values
3073 alternative = alternative.lower()
3074 if alternative in ['i', 'inc', 'increasing']:
3075 p_value = pval_upper(test_statistic)
3076 elif alternative in ['d', 'dec', 'decreasing']:
3077 test_statistic = 1. / test_statistic
3078 p_value = pval_upper(test_statistic)
3079 elif alternative in ['2', '2-sided', 'two-sided']:
3080 p_value = 2 * np.minimum(
3081 pval_lower(test_statistic),
3082 pval_upper(test_statistic)
3083 )
3084 else:
3085 raise ValueError('Invalid alternative.')
3087 test_statistics.append(test_statistic)
3088 p_values.append(p_value)
3090 output = np.c_[test_statistics, p_values]
3091 else:
3092 raise NotImplementedError('Invalid heteroskedasticity test'
3093 ' method.')
3095 return output
3097 def test_serial_correlation(self, method, lags=None):
3098 """
3099 Ljung-Box test for no serial correlation of standardized residuals
3101 Null hypothesis is no serial correlation.
3103 Parameters
3104 ----------
3105 method : {'ljungbox','boxpierece', None}
3106 The statistical test for serial correlation. If None, an attempt is
3107 made to select an appropriate test.
3108 lags : None, int or array_like
3109 If lags is an integer then this is taken to be the largest lag
3110 that is included, the test result is reported for all smaller lag
3111 length.
3112 If lags is a list or array, then all lags are included up to the
3113 largest lag in the list, however only the tests for the lags in the
3114 list are reported.
3115 If lags is None, then the default maxlag is 12*(nobs/100)^{1/4}
3117 Returns
3118 -------
3119 output : ndarray
3120 An array with `(test_statistic, pvalue)` for each endogenous
3121 variable and each lag. The array is then sized
3122 `(k_endog, 2, lags)`. If the method is called as
3123 `ljungbox = res.test_serial_correlation()`, then `ljungbox[i]`
3124 holds the results of the Ljung-Box test (as would be returned by
3125 `statsmodels.stats.diagnostic.acorr_ljungbox`) for the `i` th
3126 endogenous variable.
3128 Notes
3129 -----
3130 Let `d` = max(loglikelihood_burn, nobs_diffuse); this test is
3131 calculated ignoring the first `d` residuals.
3133 Output is nan for any endogenous variable which has missing values.
3135 See Also
3136 --------
3137 statsmodels.stats.diagnostic.acorr_ljungbox
3138 Ljung-Box test for serial correlation.
3139 """
3140 if method is None:
3141 method = 'ljungbox'
3143 if self.standardized_forecasts_error is None:
3144 raise ValueError('Cannot compute test statistic when standardized'
3145 ' forecast errors have not been computed.')
3147 if method == 'ljungbox' or method == 'boxpierce':
3148 from statsmodels.stats.diagnostic import acorr_ljungbox
3149 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
3150 # This differs from self.nobs_effective because here we want to
3151 # exclude exact diffuse periods, whereas self.nobs_effective only
3152 # excludes explicitly burned (usually approximate diffuse) periods.
3153 nobs_effective = self.nobs - d
3154 output = []
3156 # Default lags for acorr_ljungbox is 40, but may not always have
3157 # that many observations
3158 if lags is None:
3159 lags = min(40, nobs_effective - 1)
3161 for i in range(self.model.k_endog):
3162 results = acorr_ljungbox(
3163 self.filter_results.standardized_forecasts_error[i][d:],
3164 lags=lags, boxpierce=(method == 'boxpierce'),
3165 return_df=False)
3166 if method == 'ljungbox':
3167 output.append(results[0:2])
3168 else:
3169 output.append(results[2:])
3171 output = np.c_[output]
3172 else:
3173 raise NotImplementedError('Invalid serial correlation test'
3174 ' method.')
3175 return output
3177 def get_prediction(self, start=None, end=None, dynamic=False,
3178 index=None, exog=None, extend_model=None,
3179 extend_kwargs=None, **kwargs):
3180 """
3181 In-sample prediction and out-of-sample forecasting
3183 Parameters
3184 ----------
3185 start : int, str, or datetime, optional
3186 Zero-indexed observation number at which to start forecasting,
3187 i.e., the first forecast is start. Can also be a date string to
3188 parse or a datetime type. Default is the the zeroth observation.
3189 end : int, str, or datetime, optional
3190 Zero-indexed observation number at which to end forecasting, i.e.,
3191 the last forecast is end. Can also be a date string to
3192 parse or a datetime type. However, if the dates index does not
3193 have a fixed frequency, end must be an integer index if you
3194 want out of sample prediction. Default is the last observation in
3195 the sample.
3196 dynamic : bool, int, str, or datetime, optional
3197 Integer offset relative to `start` at which to begin dynamic
3198 prediction. Can also be an absolute date string to parse or a
3199 datetime type (these are not interpreted as offsets).
3200 Prior to this observation, true endogenous values will be used for
3201 prediction; starting with this observation and continuing through
3202 the end of prediction, forecasted endogenous values will be used
3203 instead.
3204 **kwargs
3205 Additional arguments may required for forecasting beyond the end
3206 of the sample. See `FilterResults.predict` for more details.
3208 Returns
3209 -------
3210 forecast : ndarray
3211 Array of out of in-sample predictions and / or out-of-sample
3212 forecasts. An (npredict x k_endog) array.
3213 """
3214 if start is None:
3215 start = 0
3217 # Handle start, end, dynamic
3218 start, end, out_of_sample, prediction_index = (
3219 self.model._get_prediction_index(start, end, index))
3221 # Handle `dynamic`
3222 if isinstance(dynamic, (bytes, str)):
3223 dynamic, _, _ = self.model._get_index_loc(dynamic)
3225 # If we have out-of-sample forecasting and `exog` or in general any
3226 # kind of time-varying state space model, then we need to create an
3227 # extended model to get updated state space system matrices
3228 if extend_model is None:
3229 extend_model = (self.model.exog is not None or
3230 not self.filter_results.time_invariant)
3231 if out_of_sample and extend_model:
3232 kwargs = self.model._get_extension_time_varying_matrices(
3233 self.params, exog, out_of_sample, extend_kwargs,
3234 transformed=True, includes_fixed=True, **kwargs)
3236 # Make sure the model class has the current parameters
3237 self.model.update(self.params, transformed=True, includes_fixed=True)
3239 # Perform the prediction
3240 # This is a (k_endog x npredictions) array; do not want to squeeze in
3241 # case of npredictions = 1
3242 prediction_results = self.filter_results.predict(
3243 start, end + out_of_sample + 1, dynamic, **kwargs)
3245 # Return a new mlemodel.PredictionResults object
3246 return PredictionResultsWrapper(PredictionResults(
3247 self, prediction_results, row_labels=prediction_index))
3249 def get_forecast(self, steps=1, **kwargs):
3250 """
3251 Out-of-sample forecasts
3253 Parameters
3254 ----------
3255 steps : int, str, or datetime, optional
3256 If an integer, the number of steps to forecast from the end of the
3257 sample. Can also be a date string to parse or a datetime type.
3258 However, if the dates index does not have a fixed frequency, steps
3259 must be an integer. Default
3260 **kwargs
3261 Additional arguments may required for forecasting beyond the end
3262 of the sample. See `FilterResults.predict` for more details.
3264 Returns
3265 -------
3266 forecast : ndarray
3267 Array of out of sample forecasts. A (steps x k_endog) array.
3268 """
3269 if isinstance(steps, int):
3270 end = self.nobs + steps - 1
3271 else:
3272 end = steps
3273 return self.get_prediction(start=self.nobs, end=end, **kwargs)
3275 def predict(self, start=None, end=None, dynamic=False, **kwargs):
3276 """
3277 In-sample prediction and out-of-sample forecasting
3279 Parameters
3280 ----------
3281 start : int, str, or datetime, optional
3282 Zero-indexed observation number at which to start forecasting,
3283 i.e., the first forecast is start. Can also be a date string to
3284 parse or a datetime type. Default is the the zeroth observation.
3285 end : int, str, or datetime, optional
3286 Zero-indexed observation number at which to end forecasting, i.e.,
3287 the last forecast is end. Can also be a date string to
3288 parse or a datetime type. However, if the dates index does not
3289 have a fixed frequency, end must be an integer index if you
3290 want out of sample prediction. Default is the last observation in
3291 the sample.
3292 dynamic : bool, int, str, or datetime, optional
3293 Integer offset relative to `start` at which to begin dynamic
3294 prediction. Can also be an absolute date string to parse or a
3295 datetime type (these are not interpreted as offsets).
3296 Prior to this observation, true endogenous values will be used for
3297 prediction; starting with this observation and continuing through
3298 the end of prediction, forecasted endogenous values will be used
3299 instead.
3300 **kwargs
3301 Additional arguments may required for forecasting beyond the end
3302 of the sample. See `FilterResults.predict` for more details.
3304 Returns
3305 -------
3306 forecast : array_like
3307 Array of out of in-sample predictions and / or out-of-sample
3308 forecasts. An (npredict x k_endog) array.
3309 """
3310 # Perform the prediction
3311 prediction_results = self.get_prediction(start, end, dynamic, **kwargs)
3312 return prediction_results.predicted_mean
3314 def forecast(self, steps=1, **kwargs):
3315 """
3316 Out-of-sample forecasts
3318 Parameters
3319 ----------
3320 steps : int, str, or datetime, optional
3321 If an integer, the number of steps to forecast from the end of the
3322 sample. Can also be a date string to parse or a datetime type.
3323 However, if the dates index does not have a fixed frequency, steps
3324 must be an integer. Default
3325 **kwargs
3326 Additional arguments may required for forecasting beyond the end
3327 of the sample. See `FilterResults.predict` for more details.
3329 Returns
3330 -------
3331 forecast : ndarray
3332 Array of out of sample forecasts. A (steps x k_endog) array.
3333 """
3334 if isinstance(steps, int):
3335 end = self.nobs + steps - 1
3336 else:
3337 end = steps
3338 return self.predict(start=self.nobs, end=end, **kwargs)
3340 def simulate(self, nsimulations, measurement_shocks=None,
3341 state_shocks=None, initial_state=None, anchor=None,
3342 repetitions=None, exog=None, extend_model=None,
3343 extend_kwargs=None, **kwargs):
3344 r"""
3345 Simulate a new time series following the state space model
3347 Parameters
3348 ----------
3349 nsimulations : int
3350 The number of observations to simulate. If the model is
3351 time-invariant this can be any number. If the model is
3352 time-varying, then this number must be less than or equal to the
3353 number
3354 measurement_shocks : array_like, optional
3355 If specified, these are the shocks to the measurement equation,
3356 :math:`\varepsilon_t`. If unspecified, these are automatically
3357 generated using a pseudo-random number generator. If specified,
3358 must be shaped `nsimulations` x `k_endog`, where `k_endog` is the
3359 same as in the state space model.
3360 state_shocks : array_like, optional
3361 If specified, these are the shocks to the state equation,
3362 :math:`\eta_t`. If unspecified, these are automatically
3363 generated using a pseudo-random number generator. If specified,
3364 must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the
3365 same as in the state space model.
3366 initial_state : array_like, optional
3367 If specified, this is the initial state vector to use in
3368 simulation, which should be shaped (`k_states` x 1), where
3369 `k_states` is the same as in the state space model. If unspecified,
3370 but the model has been initialized, then that initialization is
3371 used. This must be specified if `anchor` is anything other than
3372 "start" or 0.
3373 anchor : int, str, or datetime, optional
3374 Starting point from which to begin the simulations; type depends on
3375 the index of the given `endog` model. Two special cases are the
3376 strings 'start' and 'end', which refer to starting at the beginning
3377 and end of the sample, respectively. If a date/time index was
3378 provided to the model, then this argument can be a date string to
3379 parse or a datetime type. Otherwise, an integer index should be
3380 given. Default is 'start'.
3381 repetitions : int, optional
3382 Number of simulated paths to generate. Default is 1 simulated path.
3383 exog : array_like, optional
3384 New observations of exogenous regressors, if applicable.
3386 Returns
3387 -------
3388 simulated_obs : ndarray
3389 An array of simulated observations. If `repetitions=None`, then it
3390 will be shaped (nsimulations x k_endog) or (nsimulations,) if
3391 `k_endog=1`. Otherwise it will be shaped
3392 (nsimulations x k_endog x repetitions). If the model was given
3393 Pandas input then the output will be a Pandas object. If
3394 `k_endog > 1` and `repetitions` is not None, then the output will
3395 be a Pandas DataFrame that has a MultiIndex for the columns, with
3396 the first level containing the names of the `endog` variables and
3397 the second level containing the repetition number.
3398 """
3399 # Get the starting location
3400 if anchor is None or anchor == 'start':
3401 iloc = 0
3402 elif anchor == 'end':
3403 iloc = self.nobs
3404 else:
3405 iloc, _, _ = self.model._get_index_loc(anchor)
3406 if isinstance(iloc, slice):
3407 iloc = iloc.start
3409 if iloc < 0:
3410 iloc = self.nobs + iloc
3411 if iloc > self.nobs:
3412 raise ValueError('Cannot anchor simulation outside of the sample.')
3414 # Setup the initial state
3415 if initial_state is None:
3416 initial_state_moments = (
3417 self.predicted_state[:, iloc],
3418 self.predicted_state_cov[:, :, iloc])
3420 _repetitions = 1 if repetitions is None else repetitions
3422 initial_state = np.random.multivariate_normal(
3423 *initial_state_moments, size=_repetitions).T
3425 scale = self.scale if self.filter_results.filter_concentrated else None
3426 with self.model.ssm.fixed_scale(scale):
3427 sim = self.model.simulate(
3428 self.params, nsimulations,
3429 measurement_shocks=measurement_shocks,
3430 state_shocks=state_shocks, initial_state=initial_state,
3431 anchor=anchor, repetitions=repetitions, exog=exog,
3432 transformed=True, includes_fixed=True,
3433 extend_model=extend_model, extend_kwargs=extend_kwargs,
3434 **kwargs)
3436 return sim
3438 def impulse_responses(self, steps=1, impulse=0, orthogonalized=False,
3439 cumulative=False, **kwargs):
3440 """
3441 Impulse response function
3443 Parameters
3444 ----------
3445 steps : int, optional
3446 The number of steps for which impulse responses are calculated.
3447 Default is 1. Note that for time-invariant models, the initial
3448 impulse is not counted as a step, so if `steps=1`, the output will
3449 have 2 entries.
3450 impulse : int or array_like
3451 If an integer, the state innovation to pulse; must be between 0
3452 and `k_posdef-1`. Alternatively, a custom impulse vector may be
3453 provided; must be shaped `k_posdef x 1`.
3454 orthogonalized : bool, optional
3455 Whether or not to perform impulse using orthogonalized innovations.
3456 Note that this will also affect custum `impulse` vectors. Default
3457 is False.
3458 cumulative : bool, optional
3459 Whether or not to return cumulative impulse responses. Default is
3460 False.
3461 anchor : int, str, or datetime, optional
3462 Time point within the sample for the state innovation impulse. Type
3463 depends on the index of the given `endog` in the model. Two special
3464 cases are the strings 'start' and 'end', which refer to setting the
3465 impulse at the first and last points of the sample, respectively.
3466 Integer values can run from 0 to `nobs - 1`, or can be negative to
3467 apply negative indexing. Finally, if a date/time index was provided
3468 to the model, then this argument can be a date string to parse or a
3469 datetime type. Default is 'start'.
3470 exog : array_like, optional
3471 New observations of exogenous regressors, if applicable.
3472 **kwargs
3473 If the model has time-varying design or transition matrices and the
3474 combination of `anchor` and `steps` implies creating impulse
3475 responses for the out-of-sample period, then these matrices must
3476 have updated values provided for the out-of-sample steps. For
3477 example, if `design` is a time-varying component, `nobs` is 10,
3478 `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7)
3479 matrix must be provided with the new design matrix values.
3481 Returns
3482 -------
3483 impulse_responses : ndarray
3484 Responses for each endogenous variable due to the impulse
3485 given by the `impulse` argument. For a time-invariant model, the
3486 impulse responses are given for `steps + 1` elements (this gives
3487 the "initial impulse" followed by `steps` responses for the
3488 important cases of VAR and SARIMAX models), while for time-varying
3489 models the impulse responses are only given for `steps` elements
3490 (to avoid having to unexpectedly provide updated time-varying
3491 matrices).
3493 Notes
3494 -----
3495 Intercepts in the measurement and state equation are ignored when
3496 calculating impulse responses.
3497 """
3498 scale = self.scale if self.filter_results.filter_concentrated else None
3499 with self.model.ssm.fixed_scale(scale):
3500 irfs = self.model.impulse_responses(self.params, steps, impulse,
3501 orthogonalized, cumulative,
3502 **kwargs)
3503 return irfs
3505 def _apply(self, mod, refit=False, fit_kwargs=None, **kwargs):
3506 if fit_kwargs is None:
3507 fit_kwargs = {}
3509 if refit:
3510 fit_kwargs.setdefault('start_params', self.params)
3511 if self._has_fixed_params:
3512 fit_kwargs.setdefault('includes_fixed', True)
3513 res = mod.fit_constrained(self._fixed_params, **fit_kwargs)
3514 else:
3515 res = mod.fit(**fit_kwargs)
3516 else:
3517 if 'cov_type' in fit_kwargs:
3518 raise ValueError('Cannot specify covariance type in'
3519 ' `fit_kwargs` unless refitting'
3520 ' parameters (not available in extend).')
3521 if 'cov_kwds' in fit_kwargs:
3522 raise ValueError('Cannot specify covariance keyword arguments'
3523 ' in `fit_kwargs` unless refitting'
3524 ' parameters (not available in extend).')
3526 fit_kwargs['cov_type'] = 'custom'
3527 fit_kwargs['cov_kwds'] = {
3528 'custom_cov_type': self.cov_type,
3529 'custom_cov_params': self.cov_params_default,
3530 'custom_description': ('Parameters and standard errors'
3531 ' were estimated using a different'
3532 ' dataset and were then applied to this'
3533 ' dataset. %s'
3534 % self.cov_kwds['description'])}
3536 if self.smoother_results is not None:
3537 func = mod.smooth
3538 else:
3539 func = mod.filter
3541 if self._has_fixed_params:
3542 with mod.fix_params(self._fixed_params):
3543 fit_kwargs.setdefault('includes_fixed', True)
3544 res = func(self.params, **fit_kwargs)
3545 else:
3546 res = func(self.params, **fit_kwargs)
3548 return res
3550 def append(self, endog, exog=None, refit=False, fit_kwargs=None, **kwargs):
3551 """
3552 Recreate the results object with new data appended to the original data
3554 Creates a new result object applied to a dataset that is created by
3555 appending new data to the end of the model's original data. The new
3556 results can then be used for analysis or forecasting.
3558 Parameters
3559 ----------
3560 endog : array_like
3561 New observations from the modeled time-series process.
3562 exog : array_like, optional
3563 New observations of exogenous regressors, if applicable.
3564 refit : bool, optional
3565 Whether to re-fit the parameters, based on the combined dataset.
3566 Default is False (so parameters from the current results object
3567 are used to create the new results object).
3568 fit_kwargs : dict, optional
3569 Keyword arguments to pass to `fit` (if `refit=True`) or `filter` /
3570 `smooth`.
3571 **kwargs
3572 Keyword arguments may be used to modify model specification
3573 arguments when created the new model object.
3575 Returns
3576 -------
3577 results
3578 Updated Results object, that includes results from both the
3579 original dataset and the new dataset.
3581 Notes
3582 -----
3583 The `endog` and `exog` arguments to this method must be formatted in
3584 the same was (e.g. Pandas Series versus Numpy array) as were the
3585 `endog` and `exog` arrays passed to the original model.
3587 The `endog` argument to this method should consist of new observations
3588 that occurred directly after the last element of `endog`. For any other
3589 kind of dataset, see the `apply` method.
3591 This method will apply filtering to all of the original data as well
3592 as to the new data. To apply filtering only to the new data (which
3593 can be much faster if the original dataset is large), see the `extend`
3594 method.
3596 Examples
3597 --------
3598 >>> index = pd.period_range(start='2000', periods=2, freq='A')
3599 >>> original_observations = pd.Series([1.2, 1.5], index=index)
3600 >>> mod = sm.tsa.SARIMAX(original_observations)
3601 >>> res = mod.fit()
3602 >>> print(res.params)
3603 ar.L1 0.9756
3604 sigma2 0.0889
3605 dtype: float64
3606 >>> print(res.fittedvalues)
3607 2000 0.0000
3608 2001 1.1707
3609 Freq: A-DEC, dtype: float64
3610 >>> print(res.forecast(1))
3611 2002 1.4634
3612 Freq: A-DEC, dtype: float64
3614 >>> new_index = pd.period_range(start='2002', periods=1, freq='A')
3615 >>> new_observations = pd.Series([0.9], index=new_index)
3616 >>> updated_res = res.append(new_observations)
3617 >>> print(updated_res.params)
3618 ar.L1 0.9756
3619 sigma2 0.0889
3620 dtype: float64
3621 >>> print(updated_res.fittedvalues)
3622 2000 0.0000
3623 2001 1.1707
3624 2002 1.4634
3625 Freq: A-DEC, dtype: float64
3626 >>> print(updated_res.forecast(1))
3627 2003 0.878
3628 Freq: A-DEC, dtype: float64
3630 See Also
3631 --------
3632 statsmodels.tsa.statespace.mlemodel.MLEResults.extend
3633 statsmodels.tsa.statespace.mlemodel.MLEResults.apply
3634 """
3635 start = self.nobs
3636 end = self.nobs + len(endog) - 1
3637 _, _, _, append_ix = self.model._get_prediction_index(start, end)
3639 # Check the index of the new data
3640 if isinstance(self.model.data, PandasData):
3641 _check_index(append_ix, endog, '`endog`')
3643 # Concatenate the new data to original data
3644 new_endog = concat([self.model.data.orig_endog, endog], axis=0,
3645 allow_mix=True)
3647 # Create a continuous index for the combined data
3648 if isinstance(self.model.data, PandasData):
3649 start = 0
3650 end = len(new_endog) - 1
3651 _, _, _, new_index = self.model._get_prediction_index(start, end)
3652 # Standardize `endog` to have the right index and columns
3653 columns = self.model.endog_names
3654 if not isinstance(columns, list):
3655 columns = [columns]
3656 new_endog = pd.DataFrame(new_endog, index=new_index,
3657 columns=columns)
3659 # Handle `exog`
3660 if exog is not None:
3661 _, exog = prepare_exog(exog)
3662 _check_index(append_ix, exog, '`exog`')
3664 new_exog = concat([self.model.data.orig_exog, exog], axis=0,
3665 allow_mix=True)
3666 else:
3667 new_exog = None
3669 mod = self.model.clone(new_endog, exog=new_exog, **kwargs)
3670 res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs)
3672 return res
3674 def extend(self, endog, exog=None, fit_kwargs=None, **kwargs):
3675 """
3676 Recreate the results object for new data that extends the original data
3678 Creates a new result object applied to a new dataset that is assumed to
3679 follow directly from the end of the model's original data. The new
3680 results can then be used for analysis or forecasting.
3682 Parameters
3683 ----------
3684 endog : array_like
3685 New observations from the modeled time-series process.
3686 exog : array_like, optional
3687 New observations of exogenous regressors, if applicable.
3688 fit_kwargs : dict, optional
3689 Keyword arguments to pass to `filter` or `smooth`.
3690 **kwargs
3691 Keyword arguments may be used to modify model specification
3692 arguments when created the new model object.
3694 Returns
3695 -------
3696 results
3697 Updated Results object, that includes results only for the new
3698 dataset.
3700 Notes
3701 -----
3702 The `endog` argument to this method should consist of new observations
3703 that occurred directly after the last element of the model's original
3704 `endog` array. For any other kind of dataset, see the `apply` method.
3706 This method will apply filtering only to the new data provided by the
3707 `endog` argument, which can be much faster than re-filtering the entire
3708 dataset. However, the returned results object will only have results
3709 for the new data. To retrieve results for both the new data and the
3710 original data, see the `append` method.
3712 Examples
3713 --------
3714 >>> index = pd.period_range(start='2000', periods=2, freq='A')
3715 >>> original_observations = pd.Series([1.2, 1.5], index=index)
3716 >>> mod = sm.tsa.SARIMAX(original_observations)
3717 >>> res = mod.fit()
3718 >>> print(res.params)
3719 ar.L1 0.9756
3720 sigma2 0.0889
3721 dtype: float64
3722 >>> print(res.fittedvalues)
3723 2000 0.0000
3724 2001 1.1707
3725 Freq: A-DEC, dtype: float64
3726 >>> print(res.forecast(1))
3727 2002 1.4634
3728 Freq: A-DEC, dtype: float64
3730 >>> new_index = pd.period_range(start='2002', periods=1, freq='A')
3731 >>> new_observations = pd.Series([0.9], index=new_index)
3732 >>> updated_res = res.extend(new_observations)
3733 >>> print(updated_res.params)
3734 ar.L1 0.9756
3735 sigma2 0.0889
3736 dtype: float64
3737 >>> print(updated_res.fittedvalues)
3738 2002 1.4634
3739 Freq: A-DEC, dtype: float64
3740 >>> print(updated_res.forecast(1))
3741 2003 0.878
3742 Freq: A-DEC, dtype: float64
3744 See Also
3745 --------
3746 statsmodels.tsa.statespace.mlemodel.MLEResults.append
3747 statsmodels.tsa.statespace.mlemodel.MLEResults.apply
3748 """
3749 start = self.nobs
3750 end = self.nobs + len(endog) - 1
3751 _, _, _, extend_ix = self.model._get_prediction_index(start, end)
3753 if isinstance(self.model.data, PandasData):
3754 _check_index(extend_ix, endog, '`endog`')
3756 # Standardize `endog` to have the right index and columns
3757 columns = self.model.endog_names
3758 if not isinstance(columns, list):
3759 columns = [columns]
3760 endog = pd.DataFrame(endog, index=extend_ix, columns=columns)
3761 # Extend the current fit result to additional data
3762 mod = self.model.clone(endog, exog=exog, **kwargs)
3763 mod.ssm.initialization = Initialization(
3764 mod.k_states, 'known', constant=self.predicted_state[..., -1],
3765 stationary_cov=self.predicted_state_cov[..., -1])
3766 res = self._apply(mod, refit=False, fit_kwargs=fit_kwargs, **kwargs)
3768 return res
3770 def apply(self, endog, exog=None, refit=False, fit_kwargs=None, **kwargs):
3771 """
3772 Apply the fitted parameters to new data unrelated to the original data
3774 Creates a new result object using the current fitted parameters,
3775 applied to a completely new dataset that is assumed to be unrelated to
3776 the model's original data. The new results can then be used for
3777 analysis or forecasting.
3779 Parameters
3780 ----------
3781 endog : array_like
3782 New observations from the modeled time-series process.
3783 exog : array_like, optional
3784 New observations of exogenous regressors, if applicable.
3785 refit : bool, optional
3786 Whether to re-fit the parameters, using the new dataset.
3787 Default is False (so parameters from the current results object
3788 are used to create the new results object).
3789 fit_kwargs : dict, optional
3790 Keyword arguments to pass to `fit` (if `refit=True`) or `filter` /
3791 `smooth`.
3792 **kwargs
3793 Keyword arguments may be used to modify model specification
3794 arguments when created the new model object.
3796 Returns
3797 -------
3798 results
3799 Updated Results object, that includes results only for the new
3800 dataset.
3802 Notes
3803 -----
3804 The `endog` argument to this method should consist of new observations
3805 that are unrelated to the original model's `endog` dataset. For
3806 observations that continue that original dataset by follow directly
3807 after its last element, see the `append` and `extend` methods.
3809 Examples
3810 --------
3811 >>> index = pd.period_range(start='2000', periods=2, freq='A')
3812 >>> original_observations = pd.Series([1.2, 1.5], index=index)
3813 >>> mod = sm.tsa.SARIMAX(original_observations)
3814 >>> res = mod.fit()
3815 >>> print(res.params)
3816 ar.L1 0.9756
3817 sigma2 0.0889
3818 dtype: float64
3819 >>> print(res.fittedvalues)
3820 2000 0.0000
3821 2001 1.1707
3822 Freq: A-DEC, dtype: float64
3823 >>> print(res.forecast(1))
3824 2002 1.4634
3825 Freq: A-DEC, dtype: float64
3827 >>> new_index = pd.period_range(start='1980', periods=3, freq='A')
3828 >>> new_observations = pd.Series([1.4, 0.3, 1.2], index=new_index)
3829 >>> new_res = res.apply(new_observations)
3830 >>> print(new_res.params)
3831 ar.L1 0.9756
3832 sigma2 0.0889
3833 dtype: float64
3834 >>> print(new_res.fittedvalues)
3835 1980 1.1707
3836 1981 1.3659
3837 1982 0.2927
3838 Freq: A-DEC, dtype: float64
3839 Freq: A-DEC, dtype: float64
3840 >>> print(new_res.forecast(1))
3841 1983 1.1707
3842 Freq: A-DEC, dtype: float64
3844 See Also
3845 --------
3846 statsmodels.tsa.statespace.mlemodel.MLEResults.append
3847 statsmodels.tsa.statespace.mlemodel.MLEResults.apply
3848 """
3849 mod = self.model.clone(endog, exog=exog, **kwargs)
3850 res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs)
3852 return res
3854 def plot_diagnostics(self, variable=0, lags=10, fig=None, figsize=None):
3855 """
3856 Diagnostic plots for standardized residuals of one endogenous variable
3858 Parameters
3859 ----------
3860 variable : int, optional
3861 Index of the endogenous variable for which the diagnostic plots
3862 should be created. Default is 0.
3863 lags : int, optional
3864 Number of lags to include in the correlogram. Default is 10.
3865 fig : Figure, optional
3866 If given, subplots are created in this figure instead of in a new
3867 figure. Note that the 2x2 grid will be created in the provided
3868 figure using `fig.add_subplot()`.
3869 figsize : tuple, optional
3870 If a figure is created, this argument allows specifying a size.
3871 The tuple is (width, height).
3873 Notes
3874 -----
3875 Produces a 2x2 plot grid with the following plots (ordered clockwise
3876 from top left):
3878 1. Standardized residuals over time
3879 2. Histogram plus estimated density of standardized residuals, along
3880 with a Normal(0,1) density plotted for reference.
3881 3. Normal Q-Q plot, with Normal reference line.
3882 4. Correlogram
3884 See Also
3885 --------
3886 statsmodels.graphics.gofplots.qqplot
3887 statsmodels.graphics.tsaplots.plot_acf
3888 """
3889 from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
3890 _import_mpl()
3891 fig = create_mpl_fig(fig, figsize)
3892 # Eliminate residuals associated with burned or diffuse likelihoods
3893 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse)
3894 resid = self.filter_results.standardized_forecasts_error[variable, d:]
3896 # Top-left: residuals vs time
3897 ax = fig.add_subplot(221)
3898 if hasattr(self.data, 'dates') and self.data.dates is not None:
3899 x = self.data.dates[d:]._mpl_repr()
3900 else:
3901 x = np.arange(len(resid))
3902 ax.plot(x, resid)
3903 ax.hlines(0, x[0], x[-1], alpha=0.5)
3904 ax.set_xlim(x[0], x[-1])
3905 ax.set_title('Standardized residual')
3907 # Top-right: histogram, Gaussian kernel density, Normal density
3908 # Can only do histogram and Gaussian kernel density on the non-null
3909 # elements
3910 resid_nonmissing = resid[~(np.isnan(resid))]
3911 ax = fig.add_subplot(222)
3913 # gh5792: Remove except after support for matplotlib>2.1 required
3914 try:
3915 ax.hist(resid_nonmissing, density=True, label='Hist')
3916 except AttributeError:
3917 ax.hist(resid_nonmissing, normed=True, label='Hist')
3919 from scipy.stats import gaussian_kde, norm
3920 kde = gaussian_kde(resid_nonmissing)
3921 xlim = (-1.96*2, 1.96*2)
3922 x = np.linspace(xlim[0], xlim[1])
3923 ax.plot(x, kde(x), label='KDE')
3924 ax.plot(x, norm.pdf(x), label='N(0,1)')
3925 ax.set_xlim(xlim)
3926 ax.legend()
3927 ax.set_title('Histogram plus estimated density')
3929 # Bottom-left: QQ plot
3930 ax = fig.add_subplot(223)
3931 from statsmodels.graphics.gofplots import qqplot
3932 qqplot(resid_nonmissing, line='s', ax=ax)
3933 ax.set_title('Normal Q-Q')
3935 # Bottom-right: Correlogram
3936 ax = fig.add_subplot(224)
3937 from statsmodels.graphics.tsaplots import plot_acf
3938 plot_acf(resid, ax=ax, lags=lags)
3939 ax.set_title('Correlogram')
3941 ax.set_ylim(-1, 1)
3943 return fig
3945 def summary(self, alpha=.05, start=None, title=None, model_name=None,
3946 display_params=True):
3947 """
3948 Summarize the Model
3950 Parameters
3951 ----------
3952 alpha : float, optional
3953 Significance level for the confidence intervals. Default is 0.05.
3954 start : int, optional
3955 Integer of the start observation. Default is 0.
3956 model_name : str
3957 The name of the model used. Default is to use model class name.
3959 Returns
3960 -------
3961 summary : Summary instance
3962 This holds the summary table and text, which can be printed or
3963 converted to various output formats.
3965 See Also
3966 --------
3967 statsmodels.iolib.summary.Summary
3968 """
3969 from statsmodels.iolib.summary import Summary
3971 # Model specification results
3972 model = self.model
3973 if title is None:
3974 title = 'Statespace Model Results'
3976 if start is None:
3977 start = 0
3978 if self.model._index_dates:
3979 ix = self.model._index
3980 d = ix[start]
3981 sample = ['%02d-%02d-%02d' % (d.month, d.day, d.year)]
3982 d = ix[-1]
3983 sample += ['- ' + '%02d-%02d-%02d' % (d.month, d.day, d.year)]
3984 else:
3985 sample = [str(start), ' - ' + str(self.nobs)]
3987 # Standardize the model name as a list of str
3988 if model_name is None:
3989 model_name = model.__class__.__name__
3991 # Diagnostic tests results
3992 try:
3993 het = self.test_heteroskedasticity(method='breakvar')
3994 except Exception: # FIXME: catch something specific
3995 het = np.array([[np.nan]*2])
3996 try:
3997 lb = self.test_serial_correlation(method='ljungbox')
3998 except Exception: # FIXME: catch something specific
3999 lb = np.array([[np.nan]*2]).reshape(1, 2, 1)
4000 try:
4001 jb = self.test_normality(method='jarquebera')
4002 except Exception: # FIXME: catch something specific
4003 jb = np.array([[np.nan]*4])
4005 # Create the tables
4006 if not isinstance(model_name, list):
4007 model_name = [model_name]
4009 top_left = [('Dep. Variable:', None)]
4010 top_left.append(('Model:', [model_name[0]]))
4011 for i in range(1, len(model_name)):
4012 top_left.append(('', ['+ ' + model_name[i]]))
4013 top_left += [
4014 ('Date:', None),
4015 ('Time:', None),
4016 ('Sample:', [sample[0]]),
4017 ('', [sample[1]])
4018 ]
4020 top_right = [
4021 ('No. Observations:', [self.nobs]),
4022 ('Log Likelihood', ["%#5.3f" % self.llf]),
4023 ]
4024 if hasattr(self, 'rsquared'):
4025 top_right.append(('R-squared:', ["%#8.3f" % self.rsquared]))
4026 top_right += [
4027 ('AIC', ["%#5.3f" % self.aic]),
4028 ('BIC', ["%#5.3f" % self.bic]),
4029 ('HQIC', ["%#5.3f" % self.hqic])]
4030 if (self.filter_results is not None and
4031 self.filter_results.filter_concentrated):
4032 top_right.append(('Scale', ["%#5.3f" % self.scale]))
4034 if hasattr(self, 'cov_type'):
4035 top_left.append(('Covariance Type:', [self.cov_type]))
4037 format_str = lambda array: [ # noqa:E731
4038 ', '.join(['{0:.2f}'.format(i) for i in array])
4039 ]
4040 diagn_left = [('Ljung-Box (Q):', format_str(lb[:, 0, -1])),
4041 ('Prob(Q):', format_str(lb[:, 1, -1])),
4042 ('Heteroskedasticity (H):', format_str(het[:, 0])),
4043 ('Prob(H) (two-sided):', format_str(het[:, 1]))
4044 ]
4046 diagn_right = [('Jarque-Bera (JB):', format_str(jb[:, 0])),
4047 ('Prob(JB):', format_str(jb[:, 1])),
4048 ('Skew:', format_str(jb[:, 2])),
4049 ('Kurtosis:', format_str(jb[:, 3]))
4050 ]
4052 summary = Summary()
4053 summary.add_table_2cols(self, gleft=top_left, gright=top_right,
4054 title=title)
4055 if len(self.params) > 0 and display_params:
4056 summary.add_table_params(self, alpha=alpha,
4057 xname=self.param_names, use_t=False)
4058 summary.add_table_2cols(self, gleft=diagn_left, gright=diagn_right,
4059 title="")
4061 # Add warnings/notes, added to text format only
4062 etext = []
4063 if hasattr(self, 'cov_type') and 'description' in self.cov_kwds:
4064 etext.append(self.cov_kwds['description'])
4065 if self._rank < (len(self.params) - len(self.fixed_params)):
4066 cov_params = self.cov_params()
4067 if len(self.fixed_params) > 0:
4068 mask = np.ix_(self._free_params_index, self._free_params_index)
4069 cov_params = cov_params[mask]
4070 etext.append("Covariance matrix is singular or near-singular,"
4071 " with condition number %6.3g. Standard errors may be"
4072 " unstable." % np.linalg.cond(cov_params))
4074 if etext:
4075 etext = ["[{0}] {1}".format(i + 1, text)
4076 for i, text in enumerate(etext)]
4077 etext.insert(0, "Warnings:")
4078 summary.add_extra_txt(etext)
4080 return summary
4083class MLEResultsWrapper(wrap.ResultsWrapper):
4084 _attrs = {
4085 'zvalues': 'columns',
4086 'cov_params_approx': 'cov',
4087 'cov_params_default': 'cov',
4088 'cov_params_oim': 'cov',
4089 'cov_params_opg': 'cov',
4090 'cov_params_robust': 'cov',
4091 'cov_params_robust_approx': 'cov',
4092 'cov_params_robust_oim': 'cov',
4093 }
4094 _wrap_attrs = wrap.union_dicts(tsbase.TimeSeriesResultsWrapper._wrap_attrs,
4095 _attrs)
4096 _methods = {
4097 'forecast': 'dates',
4098 'impulse_responses': 'ynames'
4099 }
4100 _wrap_methods = wrap.union_dicts(
4101 tsbase.TimeSeriesResultsWrapper._wrap_methods, _methods)
4102wrap.populate_wrapper(MLEResultsWrapper, MLEResults) # noqa:E305
4105class PredictionResults(pred.PredictionResults):
4106 """
4108 Parameters
4109 ----------
4110 prediction_results : kalman_filter.PredictionResults instance
4111 Results object from prediction after fitting or filtering a state space
4112 model.
4113 row_labels : iterable
4114 Row labels for the predicted data.
4116 Attributes
4117 ----------
4118 """
4119 def __init__(self, model, prediction_results, row_labels=None):
4120 if model.model.k_endog == 1:
4121 endog = pd.Series(prediction_results.endog[0],
4122 name=model.model.endog_names)
4123 else:
4124 endog = pd.DataFrame(prediction_results.endog.T,
4125 columns=model.model.endog_names)
4126 self.model = Bunch(data=model.data.__class__(
4127 endog=endog, predict_dates=row_labels))
4128 self.prediction_results = prediction_results
4130 # Get required values
4131 k_endog, nobs = prediction_results.endog.shape
4132 if not prediction_results.results.memory_no_forecast_mean:
4133 predicted_mean = self.prediction_results.forecasts
4134 else:
4135 predicted_mean = np.zeros((k_endog, nobs)) * np.nan
4137 if predicted_mean.shape[0] == 1:
4138 predicted_mean = predicted_mean[0, :]
4139 else:
4140 predicted_mean = predicted_mean.transpose()
4142 if not prediction_results.results.memory_no_forecast_cov:
4143 var_pred_mean = self.prediction_results.forecasts_error_cov
4144 else:
4145 var_pred_mean = np.zeros((k_endog, k_endog, nobs)) * np.nan
4147 if var_pred_mean.shape[0] == 1:
4148 var_pred_mean = var_pred_mean[0, 0, :]
4149 else:
4150 var_pred_mean = var_pred_mean.transpose()
4152 # Initialize
4153 super(PredictionResults, self).__init__(predicted_mean, var_pred_mean,
4154 dist='norm',
4155 row_labels=row_labels,
4156 link=identity())
4158 @property
4159 def se_mean(self):
4160 if self.var_pred_mean.ndim == 1:
4161 se_mean = np.sqrt(self.var_pred_mean)
4162 else:
4163 se_mean = np.sqrt(self.var_pred_mean.T.diagonal())
4164 return se_mean
4166 def conf_int(self, method='endpoint', alpha=0.05, **kwds):
4167 # TODO: this performs metadata wrapping, and that should be handled
4168 # by attach_* methods. However, they do not currently support
4169 # this use case.
4170 conf_int = super(PredictionResults, self).conf_int(
4171 method, alpha, **kwds)
4173 # Create a dataframe
4174 if self.row_labels is not None:
4175 conf_int = pd.DataFrame(conf_int, index=self.row_labels)
4177 # Attach the endog names
4178 ynames = self.model.data.ynames
4179 if not type(ynames) == list:
4180 ynames = [ynames]
4181 names = (['lower {0}'.format(name) for name in ynames] +
4182 ['upper {0}'.format(name) for name in ynames])
4183 conf_int.columns = names
4185 return conf_int
4187 def summary_frame(self, endog=0, what='all', alpha=0.05):
4188 # TODO: finish and cleanup
4189 # import pandas as pd
4190 # ci_obs = self.conf_int(alpha=alpha, obs=True) # need to split
4191 ci_mean = np.asarray(self.conf_int(alpha=alpha))
4192 to_include = OrderedDict()
4193 if self.predicted_mean.ndim == 1:
4194 yname = self.model.data.ynames
4195 to_include['mean'] = self.predicted_mean
4196 to_include['mean_se'] = self.se_mean
4197 k_endog = 1
4198 else:
4199 yname = self.model.data.ynames[endog]
4200 to_include['mean'] = self.predicted_mean[:, endog]
4201 to_include['mean_se'] = self.se_mean[:, endog]
4202 k_endog = self.predicted_mean.shape[1]
4203 to_include['mean_ci_lower'] = ci_mean[:, endog]
4204 to_include['mean_ci_upper'] = ci_mean[:, k_endog + endog]
4206 # OrderedDict does not work to preserve sequence
4207 # pandas dict does not handle 2d_array
4208 # data = np.column_stack(list(to_include.values()))
4209 # names = ....
4210 res = pd.DataFrame(to_include, index=self.row_labels,
4211 columns=to_include.keys())
4212 res.columns.name = yname
4213 return res
4216class PredictionResultsWrapper(wrap.ResultsWrapper):
4217 _attrs = {
4218 'predicted_mean': 'dates',
4219 'se_mean': 'dates',
4220 't_values': 'dates',
4221 }
4222 _wrap_attrs = wrap.union_dicts(_attrs)
4224 _methods = {}
4225 _wrap_methods = wrap.union_dicts(_methods)
4226wrap.populate_wrapper(PredictionResultsWrapper, PredictionResults) # noqa:E305