Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tsa/statespace/dynamic_factor.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Dynamic factor model
5Author: Chad Fulton
6License: Simplified-BSD
7"""
9from collections import OrderedDict
11import numpy as np
12from .mlemodel import MLEModel, MLEResults, MLEResultsWrapper
13from .tools import (
14 is_invertible, prepare_exog,
15 constrain_stationary_univariate, unconstrain_stationary_univariate,
16 constrain_stationary_multivariate, unconstrain_stationary_multivariate
17)
18from statsmodels.multivariate.pca import PCA
19from statsmodels.regression.linear_model import OLS
20from statsmodels.tsa.vector_ar.var_model import VAR
21from statsmodels.tsa.arima.model import ARIMA
22from statsmodels.tools.tools import Bunch
23from statsmodels.tools.data import _is_using_pandas
24from statsmodels.tsa.tsatools import lagmat
25from statsmodels.tools.decorators import cache_readonly
26import statsmodels.base.wrapper as wrap
27from statsmodels.compat.pandas import Appender
30class DynamicFactor(MLEModel):
31 r"""
32 Dynamic factor model
34 Parameters
35 ----------
36 endog : array_like
37 The observed time-series process :math:`y`
38 exog : array_like, optional
39 Array of exogenous regressors for the observation equation, shaped
40 nobs x k_exog.
41 k_factors : int
42 The number of unobserved factors.
43 factor_order : int
44 The order of the vector autoregression followed by the factors.
45 error_cov_type : {'scalar', 'diagonal', 'unstructured'}, optional
46 The structure of the covariance matrix of the observation error term,
47 where "unstructured" puts no restrictions on the matrix, "diagonal"
48 requires it to be any diagonal matrix (uncorrelated errors), and
49 "scalar" requires it to be a scalar times the identity matrix. Default
50 is "diagonal".
51 error_order : int, optional
52 The order of the vector autoregression followed by the observation
53 error component. Default is None, corresponding to white noise errors.
54 error_var : bool, optional
55 Whether or not to model the errors jointly via a vector autoregression,
56 rather than as individual autoregressions. Has no effect unless
57 `error_order` is set. Default is False.
58 enforce_stationarity : bool, optional
59 Whether or not to transform the AR parameters to enforce stationarity
60 in the autoregressive component of the model. Default is True.
61 **kwargs
62 Keyword arguments may be used to provide default values for state space
63 matrices or for Kalman filtering options. See `Representation`, and
64 `KalmanFilter` for more details.
66 Attributes
67 ----------
68 exog : array_like, optional
69 Array of exogenous regressors for the observation equation, shaped
70 nobs x k_exog.
71 k_factors : int
72 The number of unobserved factors.
73 factor_order : int
74 The order of the vector autoregression followed by the factors.
75 error_cov_type : {'diagonal', 'unstructured'}
76 The structure of the covariance matrix of the error term, where
77 "unstructured" puts no restrictions on the matrix and "diagonal"
78 requires it to be a diagonal matrix (uncorrelated errors).
79 error_order : int
80 The order of the vector autoregression followed by the observation
81 error component.
82 error_var : bool
83 Whether or not to model the errors jointly via a vector autoregression,
84 rather than as individual autoregressions. Has no effect unless
85 `error_order` is set.
86 enforce_stationarity : bool, optional
87 Whether or not to transform the AR parameters to enforce stationarity
88 in the autoregressive component of the model. Default is True.
90 Notes
91 -----
92 The dynamic factor model considered here is in the so-called static form,
93 and is specified:
95 .. math::
97 y_t & = \Lambda f_t + B x_t + u_t \\
98 f_t & = A_1 f_{t-1} + \dots + A_p f_{t-p} + \eta_t \\
99 u_t & = C_1 u_{t-1} + \dots + C_1 f_{t-q} + \varepsilon_t
101 where there are `k_endog` observed series and `k_factors` unobserved
102 factors. Thus :math:`y_t` is a `k_endog` x 1 vector and :math:`f_t` is a
103 `k_factors` x 1 vector.
105 :math:`x_t` are optional exogenous vectors, shaped `k_exog` x 1.
107 :math:`\eta_t` and :math:`\varepsilon_t` are white noise error terms. In
108 order to identify the factors, :math:`Var(\eta_t) = I`. Denote
109 :math:`Var(\varepsilon_t) \equiv \Sigma`.
111 Options related to the unobserved factors:
113 - `k_factors`: this is the dimension of the vector :math:`f_t`, above.
114 To exclude factors completely, set `k_factors = 0`.
115 - `factor_order`: this is the number of lags to include in the factor
116 evolution equation, and corresponds to :math:`p`, above. To have static
117 factors, set `factor_order = 0`.
119 Options related to the observation error term :math:`u_t`:
121 - `error_order`: the number of lags to include in the error evolution
122 equation; corresponds to :math:`q`, above. To have white noise errors,
123 set `error_order = 0` (this is the default).
124 - `error_cov_type`: this controls the form of the covariance matrix
125 :math:`\Sigma`. If it is "dscalar", then :math:`\Sigma = \sigma^2 I`. If
126 it is "diagonal", then
127 :math:`\Sigma = \text{diag}(\sigma_1^2, \dots, \sigma_n^2)`. If it is
128 "unstructured", then :math:`\Sigma` is any valid variance / covariance
129 matrix (i.e. symmetric and positive definite).
130 - `error_var`: this controls whether or not the errors evolve jointly
131 according to a VAR(q), or individually according to separate AR(q)
132 processes. In terms of the formulation above, if `error_var = False`,
133 then the matrices :math:C_i` are diagonal, otherwise they are general
134 VAR matrices.
136 References
137 ----------
138 .. [*] Lütkepohl, Helmut. 2007.
139 New Introduction to Multiple Time Series Analysis.
140 Berlin: Springer.
141 """
143 def __init__(self, endog, k_factors, factor_order, exog=None,
144 error_order=0, error_var=False, error_cov_type='diagonal',
145 enforce_stationarity=True, **kwargs):
147 # Model properties
148 self.enforce_stationarity = enforce_stationarity
150 # Factor-related properties
151 self.k_factors = k_factors
152 self.factor_order = factor_order
154 # Error-related properties
155 self.error_order = error_order
156 self.error_var = error_var and error_order > 0
157 self.error_cov_type = error_cov_type
159 # Exogenous data
160 (self.k_exog, exog) = prepare_exog(exog)
162 # Note: at some point in the future might add state regression, as in
163 # SARIMAX.
164 self.mle_regression = self.k_exog > 0
166 # We need to have an array or pandas at this point
167 if not _is_using_pandas(endog, None):
168 endog = np.asanyarray(endog, order='C')
170 # Save some useful model orders, internally used
171 k_endog = endog.shape[1] if endog.ndim > 1 else 1
172 self._factor_order = max(1, self.factor_order) * self.k_factors
173 self._error_order = self.error_order * k_endog
175 # Calculate the number of states
176 k_states = self._factor_order
177 k_posdef = self.k_factors
178 if self.error_order > 0:
179 k_states += self._error_order
180 k_posdef += k_endog
182 # We can still estimate the model with no dynamic state (e.g. SUR), we
183 # just need to have one state that does nothing.
184 self._unused_state = False
185 if k_states == 0:
186 k_states = 1
187 k_posdef = 1
188 self._unused_state = True
190 # Test for non-multivariate endog
191 if k_endog < 2:
192 raise ValueError('The dynamic factors model is only valid for'
193 ' multivariate time series.')
195 # Test for too many factors
196 if self.k_factors >= k_endog:
197 raise ValueError('Number of factors must be less than the number'
198 ' of endogenous variables.')
200 # Test for invalid error_cov_type
201 if self.error_cov_type not in ['scalar', 'diagonal', 'unstructured']:
202 raise ValueError('Invalid error covariance matrix type'
203 ' specification.')
205 # By default, initialize as stationary
206 kwargs.setdefault('initialization', 'stationary')
208 # Initialize the state space model
209 super(DynamicFactor, self).__init__(
210 endog, exog=exog, k_states=k_states, k_posdef=k_posdef, **kwargs
211 )
213 # Set as time-varying model if we have exog
214 if self.k_exog > 0:
215 self.ssm._time_invariant = False
217 # Initialize the components
218 self.parameters = OrderedDict()
219 self._initialize_loadings()
220 self._initialize_exog()
221 self._initialize_error_cov()
222 self._initialize_factor_transition()
223 self._initialize_error_transition()
224 self.k_params = sum(self.parameters.values())
226 # Cache parameter vector slices
227 def _slice(key, offset):
228 length = self.parameters[key]
229 param_slice = np.s_[offset:offset + length]
230 offset += length
231 return param_slice, offset
233 offset = 0
234 self._params_loadings, offset = _slice('factor_loadings', offset)
235 self._params_exog, offset = _slice('exog', offset)
236 self._params_error_cov, offset = _slice('error_cov', offset)
237 self._params_factor_transition, offset = (
238 _slice('factor_transition', offset))
239 self._params_error_transition, offset = (
240 _slice('error_transition', offset))
242 # Update _init_keys attached by super
243 self._init_keys += ['k_factors', 'factor_order', 'error_order',
244 'error_var', 'error_cov_type',
245 'enforce_stationarity'] + list(kwargs.keys())
247 def _initialize_loadings(self):
248 # Initialize the parameters
249 self.parameters['factor_loadings'] = self.k_endog * self.k_factors
251 # Setup fixed components of state space matrices
252 if self.error_order > 0:
253 start = self._factor_order
254 end = self._factor_order + self.k_endog
255 self.ssm['design', :, start:end] = np.eye(self.k_endog)
257 # Setup indices of state space matrices
258 self._idx_loadings = np.s_['design', :, :self.k_factors]
260 def _initialize_exog(self):
261 # Initialize the parameters
262 self.parameters['exog'] = self.k_exog * self.k_endog
264 # If we have exog effects, then the obs intercept needs to be
265 # time-varying
266 if self.k_exog > 0:
267 self.ssm['obs_intercept'] = np.zeros((self.k_endog, self.nobs))
269 # Setup indices of state space matrices
270 self._idx_exog = np.s_['obs_intercept', :self.k_endog, :]
272 def _initialize_error_cov(self):
273 if self.error_cov_type == 'scalar':
274 self._initialize_error_cov_diagonal(scalar=True)
275 elif self.error_cov_type == 'diagonal':
276 self._initialize_error_cov_diagonal(scalar=False)
277 elif self.error_cov_type == 'unstructured':
278 self._initialize_error_cov_unstructured()
280 def _initialize_error_cov_diagonal(self, scalar=False):
281 # Initialize the parameters
282 self.parameters['error_cov'] = 1 if scalar else self.k_endog
284 # Setup fixed components of state space matrices
286 # Setup indices of state space matrices
287 k_endog = self.k_endog
288 k_factors = self.k_factors
289 idx = np.diag_indices(k_endog)
290 if self.error_order > 0:
291 matrix = 'state_cov'
292 idx = (idx[0] + k_factors, idx[1] + k_factors)
293 else:
294 matrix = 'obs_cov'
295 self._idx_error_cov = (matrix,) + idx
297 def _initialize_error_cov_unstructured(self):
298 # Initialize the parameters
299 k_endog = self.k_endog
300 self.parameters['error_cov'] = int(k_endog * (k_endog + 1) / 2)
302 # Setup fixed components of state space matrices
304 # Setup indices of state space matrices
305 self._idx_lower_error_cov = np.tril_indices(self.k_endog)
306 if self.error_order > 0:
307 start = self.k_factors
308 end = self.k_factors + self.k_endog
309 self._idx_error_cov = (
310 np.s_['state_cov', start:end, start:end])
311 else:
312 self._idx_error_cov = np.s_['obs_cov', :, :]
314 def _initialize_factor_transition(self):
315 order = self.factor_order * self.k_factors
316 k_factors = self.k_factors
318 # Initialize the parameters
319 self.parameters['factor_transition'] = (
320 self.factor_order * self.k_factors**2)
322 # Setup fixed components of state space matrices
323 # VAR(p) for factor transition
324 if self.k_factors > 0:
325 if self.factor_order > 0:
326 self.ssm['transition', k_factors:order, :order - k_factors] = (
327 np.eye(order - k_factors))
329 self.ssm['selection', :k_factors, :k_factors] = np.eye(k_factors)
330 # Identification requires constraining the state covariance to an
331 # identity matrix
332 self.ssm['state_cov', :k_factors, :k_factors] = np.eye(k_factors)
334 # Setup indices of state space matrices
335 self._idx_factor_transition = np.s_['transition', :k_factors, :order]
337 def _initialize_error_transition(self):
338 # Initialize the appropriate situation
339 if self.error_order == 0:
340 self._initialize_error_transition_white_noise()
341 else:
342 # Generic setup fixed components of state space matrices
343 # VAR(q) for error transition
344 # (in the individual AR case, we still have the VAR(q) companion
345 # matrix structure, but force the coefficient matrices to be
346 # diagonal)
347 k_endog = self.k_endog
348 k_factors = self.k_factors
349 _factor_order = self._factor_order
350 _error_order = self._error_order
351 _slice = np.s_['selection',
352 _factor_order:_factor_order + k_endog,
353 k_factors:k_factors + k_endog]
354 self.ssm[_slice] = np.eye(k_endog)
355 _slice = np.s_[
356 'transition',
357 _factor_order + k_endog:_factor_order + _error_order,
358 _factor_order:_factor_order + _error_order - k_endog]
359 self.ssm[_slice] = np.eye(_error_order - k_endog)
361 # Now specialized setups
362 if self.error_var:
363 self._initialize_error_transition_var()
364 else:
365 self._initialize_error_transition_individual()
367 def _initialize_error_transition_white_noise(self):
368 # Initialize the parameters
369 self.parameters['error_transition'] = 0
371 # No fixed components of state space matrices
373 # Setup indices of state space matrices (just an empty slice)
374 self._idx_error_transition = np.s_['transition', 0:0, 0:0]
376 def _initialize_error_transition_var(self):
377 k_endog = self.k_endog
378 _factor_order = self._factor_order
379 _error_order = self._error_order
381 # Initialize the parameters
382 self.parameters['error_transition'] = _error_order * k_endog
384 # Fixed components already setup above
386 # Setup indices of state space matrices
387 # Here we want to set all of the elements of the coefficient matrices,
388 # the same as in a VAR specification
389 self._idx_error_transition = np.s_[
390 'transition',
391 _factor_order:_factor_order + k_endog,
392 _factor_order:_factor_order + _error_order]
394 def _initialize_error_transition_individual(self):
395 k_endog = self.k_endog
396 _error_order = self._error_order
398 # Initialize the parameters
399 self.parameters['error_transition'] = _error_order
401 # Fixed components already setup above
403 # Setup indices of state space matrices
404 # Here we want to set only the diagonal elements of the coefficient
405 # matrices, and we want to set them in order by equation, not by
406 # matrix (i.e. set the first element of the first matrix's diagonal,
407 # then set the first element of the second matrix's diagonal, then...)
409 # The basic setup is a tiled list of diagonal indices, one for each
410 # coefficient matrix
411 idx = np.tile(np.diag_indices(k_endog), self.error_order)
412 # Now we need to shift the rows down to the correct location
413 row_shift = self._factor_order
414 # And we need to shift the columns in an increasing way
415 col_inc = self._factor_order + np.repeat(
416 [i * k_endog for i in range(self.error_order)], k_endog)
417 idx[0] += row_shift
418 idx[1] += col_inc
420 # Make a copy (without the row shift) so that we can easily get the
421 # diagonal parameters back out of a generic coefficients matrix array
422 idx_diag = idx.copy()
423 idx_diag[0] -= row_shift
424 idx_diag[1] -= self._factor_order
425 idx_diag = idx_diag[:, np.lexsort((idx_diag[1], idx_diag[0]))]
426 self._idx_error_diag = (idx_diag[0], idx_diag[1])
428 # Finally, we want to fill the entries in in the correct order, which
429 # is to say we want to fill in lexicographically, first by row then by
430 # column
431 idx = idx[:, np.lexsort((idx[1], idx[0]))]
432 self._idx_error_transition = np.s_['transition', idx[0], idx[1]]
434 def clone(self, endog, exog=None, **kwargs):
435 return self._clone_from_init_kwds(endog, exog=exog, **kwargs)
437 @property
438 def _res_classes(self):
439 return {'fit': (DynamicFactorResults, DynamicFactorResultsWrapper)}
441 @property
442 def start_params(self):
443 params = np.zeros(self.k_params, dtype=np.float64)
445 endog = self.endog.copy()
446 mask = ~np.any(np.isnan(endog), axis=1)
447 endog = endog[mask]
449 # 1. Factor loadings (estimated via PCA)
450 if self.k_factors > 0:
451 # Use principal components + OLS as starting values
452 res_pca = PCA(endog, ncomp=self.k_factors)
453 mod_ols = OLS(endog, res_pca.factors)
454 res_ols = mod_ols.fit()
456 # Using OLS params for the loadings tends to gives higher starting
457 # log-likelihood.
458 params[self._params_loadings] = res_ols.params.T.ravel()
459 # params[self._params_loadings] = res_pca.loadings.ravel()
461 # However, using res_ols.resid tends to causes non-invertible
462 # starting VAR coefficients for error VARs
463 # endog = res_ols.resid
464 endog = endog - np.dot(res_pca.factors, res_pca.loadings.T)
466 # 2. Exog (OLS on residuals)
467 if self.k_exog > 0:
468 mod_ols = OLS(endog, exog=self.exog)
469 res_ols = mod_ols.fit()
470 # In the form: beta.x1.y1, beta.x2.y1, beta.x1.y2, ...
471 params[self._params_exog] = res_ols.params.T.ravel()
472 endog = res_ols.resid
474 # 3. Factors (VAR on res_pca.factors)
475 stationary = True
476 if self.k_factors > 1 and self.factor_order > 0:
477 # 3a. VAR transition (OLS on factors estimated via PCA)
478 mod_factors = VAR(res_pca.factors)
479 res_factors = mod_factors.fit(maxlags=self.factor_order, ic=None,
480 trend='nc')
481 # Save the parameters
482 params[self._params_factor_transition] = (
483 res_factors.params.T.ravel())
485 # Test for stationarity
486 coefficient_matrices = (
487 params[self._params_factor_transition].reshape(
488 self.k_factors * self.factor_order, self.k_factors
489 ).T
490 ).reshape(self.k_factors, self.k_factors, self.factor_order).T
492 stationary = is_invertible([1] + list(-coefficient_matrices))
493 elif self.k_factors > 0 and self.factor_order > 0:
494 # 3b. AR transition
495 Y = res_pca.factors[self.factor_order:]
496 X = lagmat(res_pca.factors, self.factor_order, trim='both')
497 params_ar = np.linalg.pinv(X).dot(Y)
498 stationary = is_invertible(np.r_[1, -params_ar.squeeze()])
499 params[self._params_factor_transition] = params_ar[:, 0]
501 # Check for stationarity
502 if not stationary and self.enforce_stationarity:
503 raise ValueError('Non-stationary starting autoregressive'
504 ' parameters found with `enforce_stationarity`'
505 ' set to True.')
507 # 4. Errors
508 if self.error_order == 0:
509 if self.error_cov_type == 'scalar':
510 params[self._params_error_cov] = endog.var(axis=0).mean()
511 elif self.error_cov_type == 'diagonal':
512 params[self._params_error_cov] = endog.var(axis=0)
513 elif self.error_cov_type == 'unstructured':
514 cov_factor = np.diag(endog.std(axis=0))
515 params[self._params_error_cov] = (
516 cov_factor[self._idx_lower_error_cov].ravel())
517 elif self.error_var:
518 mod_errors = VAR(endog)
519 res_errors = mod_errors.fit(maxlags=self.error_order, ic=None,
520 trend='nc')
522 # Test for stationarity
523 coefficient_matrices = (
524 np.array(res_errors.params.T).ravel().reshape(
525 self.k_endog * self.error_order, self.k_endog
526 ).T
527 ).reshape(self.k_endog, self.k_endog, self.error_order).T
529 stationary = is_invertible([1] + list(-coefficient_matrices))
530 if not stationary and self.enforce_stationarity:
531 raise ValueError('Non-stationary starting error autoregressive'
532 ' parameters found with'
533 ' `enforce_stationarity` set to True.')
535 # Get the error autoregressive parameters
536 params[self._params_error_transition] = (
537 np.array(res_errors.params.T).ravel())
539 # Get the error covariance parameters
540 if self.error_cov_type == 'scalar':
541 params[self._params_error_cov] = (
542 res_errors.sigma_u.diagonal().mean())
543 elif self.error_cov_type == 'diagonal':
544 params[self._params_error_cov] = res_errors.sigma_u.diagonal()
545 elif self.error_cov_type == 'unstructured':
546 try:
547 cov_factor = np.linalg.cholesky(res_errors.sigma_u)
548 except np.linalg.LinAlgError:
549 cov_factor = np.eye(res_errors.sigma_u.shape[0]) * (
550 res_errors.sigma_u.diagonal().mean()**0.5)
551 cov_factor = np.eye(res_errors.sigma_u.shape[0]) * (
552 res_errors.sigma_u.diagonal().mean()**0.5)
553 params[self._params_error_cov] = (
554 cov_factor[self._idx_lower_error_cov].ravel())
555 else:
556 error_ar_params = []
557 error_cov_params = []
558 for i in range(self.k_endog):
559 mod_error = ARIMA(endog[:, i], order=(self.error_order, 0, 0),
560 trend='n', enforce_stationarity=True)
561 res_error = mod_error.fit(method='burg')
562 error_ar_params += res_error.params[:self.error_order].tolist()
563 error_cov_params += res_error.params[-1:].tolist()
565 params[self._params_error_transition] = np.r_[error_ar_params]
566 params[self._params_error_cov] = np.r_[error_cov_params]
568 return params
570 @property
571 def param_names(self):
572 param_names = []
573 endog_names = self.endog_names
575 # 1. Factor loadings
576 param_names += [
577 'loading.f%d.%s' % (j+1, endog_names[i])
578 for i in range(self.k_endog)
579 for j in range(self.k_factors)
580 ]
582 # 2. Exog
583 # Recall these are in the form: beta.x1.y1, beta.x2.y1, beta.x1.y2, ...
584 param_names += [
585 'beta.%s.%s' % (self.exog_names[j], endog_names[i])
586 for i in range(self.k_endog)
587 for j in range(self.k_exog)
588 ]
590 # 3. Error covariances
591 if self.error_cov_type == 'scalar':
592 param_names += ['sigma2']
593 elif self.error_cov_type == 'diagonal':
594 param_names += [
595 'sigma2.%s' % endog_names[i]
596 for i in range(self.k_endog)
597 ]
598 elif self.error_cov_type == 'unstructured':
599 param_names += [
600 'cov.chol[%d,%d]' % (i + 1, j + 1)
601 for i in range(self.k_endog)
602 for j in range(i+1)
603 ]
605 # 4. Factor transition VAR
606 param_names += [
607 'L%d.f%d.f%d' % (i+1, k+1, j+1)
608 for j in range(self.k_factors)
609 for i in range(self.factor_order)
610 for k in range(self.k_factors)
611 ]
613 # 5. Error transition VAR
614 if self.error_var:
615 param_names += [
616 'L%d.e(%s).e(%s)' % (i+1, endog_names[k], endog_names[j])
617 for j in range(self.k_endog)
618 for i in range(self.error_order)
619 for k in range(self.k_endog)
620 ]
621 else:
622 param_names += [
623 'L%d.e(%s).e(%s)' % (i+1, endog_names[j], endog_names[j])
624 for j in range(self.k_endog)
625 for i in range(self.error_order)
626 ]
628 return param_names
630 @property
631 def state_names(self):
632 names = []
633 endog_names = self.endog_names
635 # Factors and lags
636 names += [
637 (('f%d' % (j + 1)) if i == 0 else ('f%d.L%d' % (j + 1, i)))
638 for i in range(max(1, self.factor_order))
639 for j in range(self.k_factors)]
641 if self.error_order > 0:
642 names += [
643 (('e(%s)' % endog_names[j]) if i == 0
644 else ('e(%s).L%d' % (endog_names[j], i)))
645 for i in range(self.error_order)
646 for j in range(self.k_endog)]
648 if self._unused_state:
649 names += ['dummy']
651 return names
653 def transform_params(self, unconstrained):
654 """
655 Transform unconstrained parameters used by the optimizer to constrained
656 parameters used in likelihood evaluation
658 Parameters
659 ----------
660 unconstrained : array_like
661 Array of unconstrained parameters used by the optimizer, to be
662 transformed.
664 Returns
665 -------
666 constrained : array_like
667 Array of constrained parameters which may be used in likelihood
668 evaluation.
670 Notes
671 -----
672 Constrains the factor transition to be stationary and variances to be
673 positive.
674 """
675 unconstrained = np.array(unconstrained, ndmin=1)
676 dtype = unconstrained.dtype
677 constrained = np.zeros(unconstrained.shape, dtype=dtype)
679 # 1. Factor loadings
680 # The factor loadings do not need to be adjusted
681 constrained[self._params_loadings] = (
682 unconstrained[self._params_loadings])
684 # 2. Exog
685 # The regression coefficients do not need to be adjusted
686 constrained[self._params_exog] = (
687 unconstrained[self._params_exog])
689 # 3. Error covariances
690 # If we have variances, force them to be positive
691 if self.error_cov_type in ['scalar', 'diagonal']:
692 constrained[self._params_error_cov] = (
693 unconstrained[self._params_error_cov]**2)
694 # Otherwise, nothing needs to be done
695 elif self.error_cov_type == 'unstructured':
696 constrained[self._params_error_cov] = (
697 unconstrained[self._params_error_cov])
699 # 4. Factor transition VAR
700 # VAR transition: optionally force to be stationary
701 if self.enforce_stationarity and self.factor_order > 0:
702 # Transform the parameters
703 unconstrained_matrices = (
704 unconstrained[self._params_factor_transition].reshape(
705 self.k_factors, self._factor_order))
706 # This is always an identity matrix, but because the transform
707 # done prior to update (where the ssm representation matrices
708 # change), it may be complex
709 cov = self.ssm['state_cov', :self.k_factors, :self.k_factors].real
710 coefficient_matrices, variance = (
711 constrain_stationary_multivariate(unconstrained_matrices, cov))
712 constrained[self._params_factor_transition] = (
713 coefficient_matrices.ravel())
714 else:
715 constrained[self._params_factor_transition] = (
716 unconstrained[self._params_factor_transition])
718 # 5. Error transition VAR
719 # VAR transition: optionally force to be stationary
720 if self.enforce_stationarity and self.error_order > 0:
722 # Joint VAR specification
723 if self.error_var:
724 unconstrained_matrices = (
725 unconstrained[self._params_error_transition].reshape(
726 self.k_endog, self._error_order))
727 start = self.k_factors
728 end = self.k_factors + self.k_endog
729 cov = self.ssm['state_cov', start:end, start:end].real
730 coefficient_matrices, variance = (
731 constrain_stationary_multivariate(
732 unconstrained_matrices, cov))
733 constrained[self._params_error_transition] = (
734 coefficient_matrices.ravel())
735 # Separate AR specifications
736 else:
737 coefficients = (
738 unconstrained[self._params_error_transition].copy())
739 for i in range(self.k_endog):
740 start = i * self.error_order
741 end = (i + 1) * self.error_order
742 coefficients[start:end] = constrain_stationary_univariate(
743 coefficients[start:end])
744 constrained[self._params_error_transition] = coefficients
746 else:
747 constrained[self._params_error_transition] = (
748 unconstrained[self._params_error_transition])
750 return constrained
752 def untransform_params(self, constrained):
753 """
754 Transform constrained parameters used in likelihood evaluation
755 to unconstrained parameters used by the optimizer.
757 Parameters
758 ----------
759 constrained : array_like
760 Array of constrained parameters used in likelihood evaluation, to
761 be transformed.
763 Returns
764 -------
765 unconstrained : array_like
766 Array of unconstrained parameters used by the optimizer.
767 """
768 constrained = np.array(constrained, ndmin=1)
769 dtype = constrained.dtype
770 unconstrained = np.zeros(constrained.shape, dtype=dtype)
772 # 1. Factor loadings
773 # The factor loadings do not need to be adjusted
774 unconstrained[self._params_loadings] = (
775 constrained[self._params_loadings])
777 # 2. Exog
778 # The regression coefficients do not need to be adjusted
779 unconstrained[self._params_exog] = (
780 constrained[self._params_exog])
782 # 3. Error covariances
783 # If we have variances, force them to be positive
784 if self.error_cov_type in ['scalar', 'diagonal']:
785 unconstrained[self._params_error_cov] = (
786 constrained[self._params_error_cov]**0.5)
787 # Otherwise, nothing needs to be done
788 elif self.error_cov_type == 'unstructured':
789 unconstrained[self._params_error_cov] = (
790 constrained[self._params_error_cov])
792 # 3. Factor transition VAR
793 # VAR transition: optionally force to be stationary
794 if self.enforce_stationarity and self.factor_order > 0:
795 # Transform the parameters
796 constrained_matrices = (
797 constrained[self._params_factor_transition].reshape(
798 self.k_factors, self._factor_order))
799 cov = self.ssm['state_cov', :self.k_factors, :self.k_factors].real
800 coefficient_matrices, variance = (
801 unconstrain_stationary_multivariate(
802 constrained_matrices, cov))
803 unconstrained[self._params_factor_transition] = (
804 coefficient_matrices.ravel())
805 else:
806 unconstrained[self._params_factor_transition] = (
807 constrained[self._params_factor_transition])
809 # 5. Error transition VAR
810 # VAR transition: optionally force to be stationary
811 if self.enforce_stationarity and self.error_order > 0:
813 # Joint VAR specification
814 if self.error_var:
815 constrained_matrices = (
816 constrained[self._params_error_transition].reshape(
817 self.k_endog, self._error_order))
818 start = self.k_factors
819 end = self.k_factors + self.k_endog
820 cov = self.ssm['state_cov', start:end, start:end].real
821 coefficient_matrices, variance = (
822 unconstrain_stationary_multivariate(
823 constrained_matrices, cov))
824 unconstrained[self._params_error_transition] = (
825 coefficient_matrices.ravel())
826 # Separate AR specifications
827 else:
828 coefficients = (
829 constrained[self._params_error_transition].copy())
830 for i in range(self.k_endog):
831 start = i * self.error_order
832 end = (i + 1) * self.error_order
833 coefficients[start:end] = (
834 unconstrain_stationary_univariate(
835 coefficients[start:end]))
836 unconstrained[self._params_error_transition] = coefficients
838 else:
839 unconstrained[self._params_error_transition] = (
840 constrained[self._params_error_transition])
842 return unconstrained
844 def _validate_can_fix_params(self, param_names):
845 super(DynamicFactor, self)._validate_can_fix_params(param_names)
847 ix = np.cumsum(list(self.parameters.values()))[:-1]
848 (_, _, _, factor_transition_names, error_transition_names) = [
849 arr.tolist() for arr in np.array_split(self.param_names, ix)]
851 if self.enforce_stationarity and self.factor_order > 0:
852 if self.k_factors > 1 or self.factor_order > 1:
853 fix_all = param_names.issuperset(factor_transition_names)
854 fix_any = (
855 len(param_names.intersection(factor_transition_names)) > 0)
856 if fix_any and not fix_all:
857 raise ValueError(
858 'Cannot fix individual factor transition parameters'
859 ' when `enforce_stationarity=True`. In this case,'
860 ' must either fix all factor transition parameters or'
861 ' none.')
862 if self.enforce_stationarity and self.error_order > 0:
863 if self.error_var or self.error_order > 1:
864 fix_all = param_names.issuperset(error_transition_names)
865 fix_any = (
866 len(param_names.intersection(error_transition_names)) > 0)
867 if fix_any and not fix_all:
868 raise ValueError(
869 'Cannot fix individual error transition parameters'
870 ' when `enforce_stationarity=True`. In this case,'
871 ' must either fix all error transition parameters or'
872 ' none.')
874 def update(self, params, transformed=True, includes_fixed=False,
875 complex_step=False):
876 """
877 Update the parameters of the model
879 Updates the representation matrices to fill in the new parameter
880 values.
882 Parameters
883 ----------
884 params : array_like
885 Array of new parameters.
886 transformed : bool, optional
887 Whether or not `params` is already transformed. If set to False,
888 `transform_params` is called. Default is True..
890 Returns
891 -------
892 params : array_like
893 Array of parameters.
895 Notes
896 -----
897 Let `n = k_endog`, `m = k_factors`, and `p = factor_order`. Then the
898 `params` vector has length
899 :math:`[n \times m] + [n] + [m^2 \times p]`.
900 It is expanded in the following way:
902 - The first :math:`n \times m` parameters fill out the factor loading
903 matrix, starting from the [0,0] entry and then proceeding along rows.
904 These parameters are not modified in `transform_params`.
905 - The next :math:`n` parameters provide variances for the error_cov
906 errors in the observation equation. They fill in the diagonal of the
907 observation covariance matrix, and are constrained to be positive by
908 `transofrm_params`.
909 - The next :math:`m^2 \times p` parameters are used to create the `p`
910 coefficient matrices for the vector autoregression describing the
911 factor transition. They are transformed in `transform_params` to
912 enforce stationarity of the VAR(p). They are placed so as to make
913 the transition matrix a companion matrix for the VAR. In particular,
914 we assume that the first :math:`m^2` parameters fill the first
915 coefficient matrix (starting at [0,0] and filling along rows), the
916 second :math:`m^2` parameters fill the second matrix, etc.
917 """
918 params = self.handle_params(params, transformed=transformed,
919 includes_fixed=includes_fixed)
921 # 1. Factor loadings
922 # Update the design / factor loading matrix
923 self.ssm[self._idx_loadings] = (
924 params[self._params_loadings].reshape(self.k_endog, self.k_factors)
925 )
927 # 2. Exog
928 if self.k_exog > 0:
929 exog_params = params[self._params_exog].reshape(
930 self.k_endog, self.k_exog).T
931 self.ssm[self._idx_exog] = np.dot(self.exog, exog_params).T
933 # 3. Error covariances
934 if self.error_cov_type in ['scalar', 'diagonal']:
935 self.ssm[self._idx_error_cov] = (
936 params[self._params_error_cov])
937 elif self.error_cov_type == 'unstructured':
938 error_cov_lower = np.zeros((self.k_endog, self.k_endog),
939 dtype=params.dtype)
940 error_cov_lower[self._idx_lower_error_cov] = (
941 params[self._params_error_cov])
942 self.ssm[self._idx_error_cov] = (
943 np.dot(error_cov_lower, error_cov_lower.T))
945 # 4. Factor transition VAR
946 self.ssm[self._idx_factor_transition] = (
947 params[self._params_factor_transition].reshape(
948 self.k_factors, self.factor_order * self.k_factors))
950 # 5. Error transition VAR
951 if self.error_var:
952 self.ssm[self._idx_error_transition] = (
953 params[self._params_error_transition].reshape(
954 self.k_endog, self._error_order))
955 else:
956 self.ssm[self._idx_error_transition] = (
957 params[self._params_error_transition])
960class DynamicFactorResults(MLEResults):
961 """
962 Class to hold results from fitting an DynamicFactor model.
964 Parameters
965 ----------
966 model : DynamicFactor instance
967 The fitted model instance
969 Attributes
970 ----------
971 specification : dictionary
972 Dictionary including all attributes from the DynamicFactor model
973 instance.
974 coefficient_matrices_var : ndarray
975 Array containing autoregressive lag polynomial coefficient matrices,
976 ordered from lowest degree to highest.
978 See Also
979 --------
980 statsmodels.tsa.statespace.kalman_filter.FilterResults
981 statsmodels.tsa.statespace.mlemodel.MLEResults
982 """
983 def __init__(self, model, params, filter_results, cov_type=None,
984 **kwargs):
985 super(DynamicFactorResults, self).__init__(model, params,
986 filter_results, cov_type,
987 **kwargs)
989 self.df_resid = np.inf # attribute required for wald tests
991 self.specification = Bunch(**{
992 # Model properties
993 'k_endog': self.model.k_endog,
994 'enforce_stationarity': self.model.enforce_stationarity,
996 # Factor-related properties
997 'k_factors': self.model.k_factors,
998 'factor_order': self.model.factor_order,
1000 # Error-related properties
1001 'error_order': self.model.error_order,
1002 'error_var': self.model.error_var,
1003 'error_cov_type': self.model.error_cov_type,
1005 # Other properties
1006 'k_exog': self.model.k_exog
1007 })
1009 # Polynomials / coefficient matrices
1010 self.coefficient_matrices_var = None
1011 if self.model.factor_order > 0:
1012 ar_params = (
1013 np.array(self.params[self.model._params_factor_transition]))
1014 k_factors = self.model.k_factors
1015 factor_order = self.model.factor_order
1016 self.coefficient_matrices_var = (
1017 ar_params.reshape(k_factors * factor_order, k_factors).T
1018 ).reshape(k_factors, k_factors, factor_order).T
1020 self.coefficient_matrices_error = None
1021 if self.model.error_order > 0:
1022 ar_params = (
1023 np.array(self.params[self.model._params_error_transition]))
1024 k_endog = self.model.k_endog
1025 error_order = self.model.error_order
1026 if self.model.error_var:
1027 self.coefficient_matrices_error = (
1028 ar_params.reshape(k_endog * error_order, k_endog).T
1029 ).reshape(k_endog, k_endog, error_order).T
1030 else:
1031 mat = np.zeros((k_endog, k_endog * error_order))
1032 mat[self.model._idx_error_diag] = ar_params
1033 self.coefficient_matrices_error = (
1034 mat.T.reshape(error_order, k_endog, k_endog))
1036 @property
1037 def factors(self):
1038 """
1039 Estimates of unobserved factors
1041 Returns
1042 -------
1043 out : Bunch
1044 Has the following attributes shown in Notes.
1046 Notes
1047 -----
1048 The output is a bunch of the following format:
1050 - `filtered`: a time series array with the filtered estimate of
1051 the component
1052 - `filtered_cov`: a time series array with the filtered estimate of
1053 the variance/covariance of the component
1054 - `smoothed`: a time series array with the smoothed estimate of
1055 the component
1056 - `smoothed_cov`: a time series array with the smoothed estimate of
1057 the variance/covariance of the component
1058 - `offset`: an integer giving the offset in the state vector where
1059 this component begins
1060 """
1061 # If present, level is always the first component of the state vector
1062 out = None
1063 spec = self.specification
1064 if spec.k_factors > 0:
1065 offset = 0
1066 end = spec.k_factors
1067 res = self.filter_results
1068 out = Bunch(
1069 filtered=res.filtered_state[offset:end],
1070 filtered_cov=res.filtered_state_cov[offset:end, offset:end],
1071 smoothed=None, smoothed_cov=None,
1072 offset=offset)
1073 if self.smoothed_state is not None:
1074 out.smoothed = self.smoothed_state[offset:end]
1075 if self.smoothed_state_cov is not None:
1076 out.smoothed_cov = (
1077 self.smoothed_state_cov[offset:end, offset:end])
1078 return out
1080 @cache_readonly
1081 def coefficients_of_determination(self):
1082 """
1083 Coefficients of determination (:math:`R^2`) from regressions of
1084 individual estimated factors on endogenous variables.
1086 Returns
1087 -------
1088 coefficients_of_determination : ndarray
1089 A `k_endog` x `k_factors` array, where
1090 `coefficients_of_determination[i, j]` represents the :math:`R^2`
1091 value from a regression of factor `j` and a constant on endogenous
1092 variable `i`.
1094 Notes
1095 -----
1096 Although it can be difficult to interpret the estimated factor loadings
1097 and factors, it is often helpful to use the coefficients of
1098 determination from univariate regressions to assess the importance of
1099 each factor in explaining the variation in each endogenous variable.
1101 In models with many variables and factors, this can sometimes lend
1102 interpretation to the factors (for example sometimes one factor will
1103 load primarily on real variables and another on nominal variables).
1105 See Also
1106 --------
1107 plot_coefficients_of_determination
1108 """
1109 from statsmodels.tools import add_constant
1110 spec = self.specification
1111 coefficients = np.zeros((spec.k_endog, spec.k_factors))
1112 which = 'filtered' if self.smoothed_state is None else 'smoothed'
1114 for i in range(spec.k_factors):
1115 exog = add_constant(self.factors[which][i])
1116 for j in range(spec.k_endog):
1117 endog = self.filter_results.endog[j]
1118 coefficients[j, i] = OLS(endog, exog).fit().rsquared
1120 return coefficients
1122 def plot_coefficients_of_determination(self, endog_labels=None,
1123 fig=None, figsize=None):
1124 """
1125 Plot the coefficients of determination
1127 Parameters
1128 ----------
1129 endog_labels : bool, optional
1130 Whether or not to label the endogenous variables along the x-axis
1131 of the plots. Default is to include labels if there are 5 or fewer
1132 endogenous variables.
1133 fig : Figure, optional
1134 If given, subplots are created in this figure instead of in a new
1135 figure. Note that the grid will be created in the provided
1136 figure using `fig.add_subplot()`.
1137 figsize : tuple, optional
1138 If a figure is created, this argument allows specifying a size.
1139 The tuple is (width, height).
1141 Notes
1142 -----
1144 Produces a `k_factors` x 1 plot grid. The `i`th plot shows a bar plot
1145 of the coefficients of determination associated with factor `i`. The
1146 endogenous variables are arranged along the x-axis according to their
1147 position in the `endog` array.
1149 See Also
1150 --------
1151 coefficients_of_determination
1152 """
1153 from statsmodels.graphics.utils import _import_mpl, create_mpl_fig
1154 _import_mpl()
1155 fig = create_mpl_fig(fig, figsize)
1157 spec = self.specification
1159 # Should we label endogenous variables?
1160 if endog_labels is None:
1161 endog_labels = spec.k_endog <= 5
1163 # Plot the coefficients of determination
1164 coefficients_of_determination = self.coefficients_of_determination
1165 plot_idx = 1
1166 locations = np.arange(spec.k_endog)
1167 for coeffs in coefficients_of_determination.T:
1168 # Create the new axis
1169 ax = fig.add_subplot(spec.k_factors, 1, plot_idx)
1170 ax.set_ylim((0, 1))
1171 ax.set(title='Factor %i' % plot_idx, ylabel=r'$R^2$')
1172 bars = ax.bar(locations, coeffs)
1174 if endog_labels:
1175 width = bars[0].get_width()
1176 ax.xaxis.set_ticks(locations + width / 2)
1177 ax.xaxis.set_ticklabels(self.model.endog_names)
1178 else:
1179 ax.set(xlabel='Endogenous variables')
1180 ax.xaxis.set_ticks([])
1182 plot_idx += 1
1184 return fig
1186 @Appender(MLEResults.summary.__doc__)
1187 def summary(self, alpha=.05, start=None, separate_params=True):
1188 from statsmodels.iolib.summary import summary_params
1189 spec = self.specification
1191 # Create the model name
1192 model_name = []
1193 if spec.k_factors > 0:
1194 if spec.factor_order > 0:
1195 model_type = ('DynamicFactor(factors=%d, order=%d)' %
1196 (spec.k_factors, spec.factor_order))
1197 else:
1198 model_type = 'StaticFactor(factors=%d)' % spec.k_factors
1200 model_name.append(model_type)
1201 if spec.k_exog > 0:
1202 model_name.append('%d regressors' % spec.k_exog)
1203 else:
1204 model_name.append('SUR(%d regressors)' % spec.k_exog)
1206 if spec.error_order > 0:
1207 error_type = 'VAR' if spec.error_var else 'AR'
1208 model_name.append('%s(%d) errors' % (error_type, spec.error_order))
1210 summary = super(DynamicFactorResults, self).summary(
1211 alpha=alpha, start=start, model_name=model_name,
1212 display_params=not separate_params
1213 )
1215 if separate_params:
1216 indices = np.arange(len(self.params))
1218 def make_table(self, mask, title, strip_end=True):
1219 res = (self, self.params[mask], self.bse[mask],
1220 self.zvalues[mask], self.pvalues[mask],
1221 self.conf_int(alpha)[mask])
1223 param_names = [
1224 '.'.join(name.split('.')[:-1]) if strip_end else name
1225 for name in
1226 np.array(self.data.param_names)[mask].tolist()
1227 ]
1229 return summary_params(res, yname=None, xname=param_names,
1230 alpha=alpha, use_t=False, title=title)
1232 k_endog = self.model.k_endog
1233 k_exog = self.model.k_exog
1234 k_factors = self.model.k_factors
1235 factor_order = self.model.factor_order
1236 _factor_order = self.model._factor_order
1237 _error_order = self.model._error_order
1239 # Add parameter tables for each endogenous variable
1240 loading_indices = indices[self.model._params_loadings]
1241 loading_masks = []
1242 exog_indices = indices[self.model._params_exog]
1243 exog_masks = []
1244 for i in range(k_endog):
1245 # 1. Factor loadings
1246 # Recall these are in the form:
1247 # 'loading.f1.y1', 'loading.f2.y1', 'loading.f1.y2', ...
1249 loading_mask = (
1250 loading_indices[i * k_factors:(i + 1) * k_factors])
1251 loading_masks.append(loading_mask)
1253 # 2. Exog
1254 # Recall these are in the form:
1255 # beta.x1.y1, beta.x2.y1, beta.x1.y2, ...
1256 exog_mask = exog_indices[i * k_exog:(i + 1) * k_exog]
1257 exog_masks.append(exog_mask)
1259 # Create the table
1260 mask = np.concatenate([loading_mask, exog_mask])
1261 title = "Results for equation %s" % self.model.endog_names[i]
1262 table = make_table(self, mask, title)
1263 summary.tables.append(table)
1265 # Add parameter tables for each factor
1266 factor_indices = indices[self.model._params_factor_transition]
1267 factor_masks = []
1268 if factor_order > 0:
1269 for i in range(k_factors):
1270 start = i * _factor_order
1271 factor_mask = factor_indices[start: start + _factor_order]
1272 factor_masks.append(factor_mask)
1274 # Create the table
1275 title = "Results for factor equation f%d" % (i+1)
1276 table = make_table(self, factor_mask, title)
1277 summary.tables.append(table)
1279 # Add parameter tables for error transitions
1280 error_masks = []
1281 if spec.error_order > 0:
1282 error_indices = indices[self.model._params_error_transition]
1283 for i in range(k_endog):
1284 if spec.error_var:
1285 start = i * _error_order
1286 end = (i + 1) * _error_order
1287 else:
1288 start = i * spec.error_order
1289 end = (i + 1) * spec.error_order
1291 error_mask = error_indices[start:end]
1292 error_masks.append(error_mask)
1294 # Create the table
1295 title = ("Results for error equation e(%s)" %
1296 self.model.endog_names[i])
1297 table = make_table(self, error_mask, title)
1298 summary.tables.append(table)
1300 # Error covariance terms
1301 error_cov_mask = indices[self.model._params_error_cov]
1302 table = make_table(self, error_cov_mask,
1303 "Error covariance matrix", strip_end=False)
1304 summary.tables.append(table)
1306 # Add a table for all other parameters
1307 masks = []
1308 for m in (loading_masks, exog_masks, factor_masks,
1309 error_masks, [error_cov_mask]):
1310 m = np.array(m).flatten()
1311 if len(m) > 0:
1312 masks.append(m)
1313 masks = np.concatenate(masks)
1314 inverse_mask = np.array(list(set(indices).difference(set(masks))))
1315 if len(inverse_mask) > 0:
1316 table = make_table(self, inverse_mask, "Other parameters",
1317 strip_end=False)
1318 summary.tables.append(table)
1320 return summary
1323class DynamicFactorResultsWrapper(MLEResultsWrapper):
1324 _attrs = {}
1325 _wrap_attrs = wrap.union_dicts(MLEResultsWrapper._wrap_attrs,
1326 _attrs)
1327 _methods = {}
1328 _wrap_methods = wrap.union_dicts(MLEResultsWrapper._wrap_methods,
1329 _methods)
1330wrap.populate_wrapper(DynamicFactorResultsWrapper, # noqa:E305
1331 DynamicFactorResults)