Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3State Space Model 

4 

5Author: Chad Fulton 

6License: Simplified-BSD 

7""" 

8import contextlib 

9import warnings 

10 

11from collections import OrderedDict 

12from types import SimpleNamespace 

13import numpy as np 

14import pandas as pd 

15from scipy.stats import norm 

16 

17from statsmodels.tools.tools import pinv_extended, Bunch 

18from statsmodels.tools.sm_exceptions import PrecisionWarning, ValueWarning 

19from statsmodels.tools.numdiff import (_get_epsilon, approx_hess_cs, 

20 approx_fprime_cs, approx_fprime) 

21from statsmodels.tools.decorators import cache_readonly 

22from statsmodels.tools.eval_measures import aic, aicc, bic, hqic 

23 

24import statsmodels.base.wrapper as wrap 

25 

26import statsmodels.genmod._prediction as pred 

27from statsmodels.genmod.families.links import identity 

28 

29from statsmodels.base.data import PandasData 

30import statsmodels.tsa.base.tsa_model as tsbase 

31 

32from .simulation_smoother import SimulationSmoother 

33from .kalman_smoother import SmootherResults 

34from .kalman_filter import INVERT_UNIVARIATE, SOLVE_LU, MEMORY_CONSERVE 

35from .initialization import Initialization 

36from .tools import prepare_exog, concat 

37 

38 

39def _handle_args(names, defaults, *args, **kwargs): 

40 output_args = [] 

41 # We need to handle positional arguments in two ways, in case this was 

42 # called by a Scipy optimization routine 

43 if len(args) > 0: 

44 # the fit() method will pass a dictionary 

45 if isinstance(args[0], dict): 

46 flags = args[0] 

47 # otherwise, a user may have just used positional arguments... 

48 else: 

49 flags = dict(zip(names, args)) 

50 for i in range(len(names)): 

51 output_args.append(flags.get(names[i], defaults[i])) 

52 

53 for name, value in flags.items(): 

54 if name in kwargs: 

55 raise TypeError("loglike() got multiple values for keyword" 

56 " argument '%s'" % name) 

57 else: 

58 for i in range(len(names)): 

59 output_args.append(kwargs.pop(names[i], defaults[i])) 

60 

61 return tuple(output_args) + (kwargs,) 

62 

63 

64def _check_index(desired_index, dta, title='data'): 

65 given_index = None 

66 if isinstance(dta, (pd.Series, pd.DataFrame)): 

67 given_index = dta.index 

68 if given_index is not None and not desired_index.equals(given_index): 

69 desired_freq = getattr(desired_index, 'freq', None) 

70 given_freq = getattr(given_index, 'freq', None) 

71 if ((desired_freq is not None or given_freq is not None) and 

72 desired_freq != given_freq): 

73 raise ValueError('Given %s does not have an index' 

74 ' that extends the index of the' 

75 ' model. Expected index frequency is' 

76 ' "%s", but got "%s".' 

77 % (title, desired_freq, given_freq)) 

78 else: 

79 raise ValueError('Given %s does not have an index' 

80 ' that extends the index of the' 

81 ' model.' % title) 

82 

83 

84class MLEModel(tsbase.TimeSeriesModel): 

85 r""" 

86 State space model for maximum likelihood estimation 

87 

88 Parameters 

89 ---------- 

90 endog : array_like 

91 The observed time-series process :math:`y` 

92 k_states : int 

93 The dimension of the unobserved state process. 

94 exog : array_like, optional 

95 Array of exogenous regressors, shaped nobs x k. Default is no 

96 exogenous regressors. 

97 dates : array_like of datetime, optional 

98 An array-like object of datetime objects. If a Pandas object is given 

99 for endog, it is assumed to have a DateIndex. 

100 freq : str, optional 

101 The frequency of the time-series. A Pandas offset or 'B', 'D', 'W', 

102 'M', 'A', or 'Q'. This is optional if dates are given. 

103 **kwargs 

104 Keyword arguments may be used to provide default values for state space 

105 matrices or for Kalman filtering options. See `Representation`, and 

106 `KalmanFilter` for more details. 

107 

108 Attributes 

109 ---------- 

110 ssm : statsmodels.tsa.statespace.kalman_filter.KalmanFilter 

111 Underlying state space representation. 

112 

113 Notes 

114 ----- 

115 This class wraps the state space model with Kalman filtering to add in 

116 functionality for maximum likelihood estimation. In particular, it adds 

117 the concept of updating the state space representation based on a defined 

118 set of parameters, through the `update` method or `updater` attribute (see 

119 below for more details on which to use when), and it adds a `fit` method 

120 which uses a numerical optimizer to select the parameters that maximize 

121 the likelihood of the model. 

122 

123 The `start_params` `update` method must be overridden in the 

124 child class (and the `transform` and `untransform` methods, if needed). 

125 

126 See Also 

127 -------- 

128 statsmodels.tsa.statespace.mlemodel.MLEResults 

129 statsmodels.tsa.statespace.kalman_filter.KalmanFilter 

130 statsmodels.tsa.statespace.representation.Representation 

131 """ 

132 

133 def __init__(self, endog, k_states, exog=None, dates=None, freq=None, 

134 **kwargs): 

135 # Initialize the model base 

136 super(MLEModel, self).__init__(endog=endog, exog=exog, 

137 dates=dates, freq=freq, 

138 missing='none') 

139 

140 # Store kwargs to recreate model 

141 self._init_kwargs = kwargs 

142 

143 # Prepared the endog array: C-ordered, shape=(nobs x k_endog) 

144 self.endog, self.exog = self.prepare_data() 

145 

146 # Dimensions 

147 self.nobs = self.endog.shape[0] 

148 self.k_states = k_states 

149 

150 # Initialize the state-space representation 

151 self.initialize_statespace(**kwargs) 

152 

153 # Setup holder for fixed parameters 

154 self._has_fixed_params = False 

155 self._fixed_params = None 

156 self._params_index = None 

157 self._fixed_params_index = None 

158 self._free_params_index = None 

159 

160 def prepare_data(self): 

161 """ 

162 Prepare data for use in the state space representation 

163 """ 

164 endog = np.array(self.data.orig_endog, order='C') 

165 exog = self.data.orig_exog 

166 if exog is not None: 

167 exog = np.array(exog) 

168 

169 # Base class may allow 1-dim data, whereas we need 2-dim 

170 if endog.ndim == 1: 

171 endog.shape = (endog.shape[0], 1) # this will be C-contiguous 

172 

173 return endog, exog 

174 

175 def initialize_statespace(self, **kwargs): 

176 """ 

177 Initialize the state space representation 

178 

179 Parameters 

180 ---------- 

181 **kwargs 

182 Additional keyword arguments to pass to the state space class 

183 constructor. 

184 """ 

185 # (Now self.endog is C-ordered and in long format (nobs x k_endog). To 

186 # get F-ordered and in wide format just need to transpose) 

187 endog = self.endog.T 

188 

189 # Instantiate the state space object 

190 self.ssm = SimulationSmoother(endog.shape[0], self.k_states, 

191 nobs=endog.shape[1], **kwargs) 

192 # Bind the data to the model 

193 self.ssm.bind(endog) 

194 

195 # Other dimensions, now that `ssm` is available 

196 self.k_endog = self.ssm.k_endog 

197 

198 def _get_index_with_initial_state(self): 

199 # The index we inherit from `TimeSeriesModel` will only cover the 

200 # data sample itself, but we will also need an index value for the 

201 # initial state which is the previous time step to the first datapoint. 

202 # This method figures out an appropriate value for the three types of 

203 # supported indexes: date-based, Int64Index, or RangeIndex 

204 if self._index_dates: 

205 # value = self._index.shift(-1)[0] 

206 if isinstance(self._index, pd.DatetimeIndex): 

207 index = pd.date_range( 

208 end=self._index[-1], periods=len(self._index) + 1, 

209 freq=self._index.freq) 

210 elif isinstance(self._index, pd.PeriodIndex): 

211 index = pd.period_range( 

212 end=self._index[-1], periods=len(self._index) + 1, 

213 freq=self._index.freq) 

214 else: 

215 raise NotImplementedError 

216 elif isinstance(self._index, pd.Int64Index): 

217 # The only valid Int64Index is a full, incrementing index, so this 

218 # is general 

219 value = self._index[0] - 1 

220 index = pd.Int64Index([value] + self._index.tolist()) 

221 elif isinstance(self._index, pd.RangeIndex): 

222 index = pd.RangeIndex(self._index.start - self._index.step, 

223 self._index.end, self._index.step) 

224 else: 

225 raise NotImplementedError 

226 return index 

227 

228 def __setitem__(self, key, value): 

229 return self.ssm.__setitem__(key, value) 

230 

231 def __getitem__(self, key): 

232 return self.ssm.__getitem__(key) 

233 

234 def _get_init_kwds(self): 

235 # Get keywords based on model attributes 

236 kwds = super(MLEModel, self)._get_init_kwds() 

237 

238 for key, value in kwds.items(): 

239 if value is None and hasattr(self.ssm, key): 

240 kwds[key] = getattr(self.ssm, key) 

241 

242 return kwds 

243 

244 def clone(self, endog, exog=None, **kwargs): 

245 raise NotImplementedError 

246 

247 def _clone_from_init_kwds(self, endog, **kwargs): 

248 # Cannot make this the default, because there is extra work required 

249 # for subclasses to make _get_init_kwds useful. 

250 use_kwargs = self._get_init_kwds() 

251 use_kwargs.update(kwargs) 

252 

253 # Check for `exog` 

254 if getattr(self, 'k_exog', 0) > 0 and kwargs.get('exog', None) is None: 

255 raise ValueError('Cloning a model with an exogenous component' 

256 ' requires specifying a new exogenous array using' 

257 ' the `exog` argument.') 

258 

259 return self.__class__(endog, **use_kwargs) 

260 

261 def set_filter_method(self, filter_method=None, **kwargs): 

262 """ 

263 Set the filtering method 

264 

265 The filtering method controls aspects of which Kalman filtering 

266 approach will be used. 

267 

268 Parameters 

269 ---------- 

270 filter_method : int, optional 

271 Bitmask value to set the filter method to. See notes for details. 

272 **kwargs 

273 Keyword arguments may be used to influence the filter method by 

274 setting individual boolean flags. See notes for details. 

275 

276 Notes 

277 ----- 

278 This method is rarely used. See the corresponding function in the 

279 `KalmanFilter` class for details. 

280 """ 

281 self.ssm.set_filter_method(filter_method, **kwargs) 

282 

283 def set_inversion_method(self, inversion_method=None, **kwargs): 

284 """ 

285 Set the inversion method 

286 

287 The Kalman filter may contain one matrix inversion: that of the 

288 forecast error covariance matrix. The inversion method controls how and 

289 if that inverse is performed. 

290 

291 Parameters 

292 ---------- 

293 inversion_method : int, optional 

294 Bitmask value to set the inversion method to. See notes for 

295 details. 

296 **kwargs 

297 Keyword arguments may be used to influence the inversion method by 

298 setting individual boolean flags. See notes for details. 

299 

300 Notes 

301 ----- 

302 This method is rarely used. See the corresponding function in the 

303 `KalmanFilter` class for details. 

304 """ 

305 self.ssm.set_inversion_method(inversion_method, **kwargs) 

306 

307 def set_stability_method(self, stability_method=None, **kwargs): 

308 """ 

309 Set the numerical stability method 

310 

311 The Kalman filter is a recursive algorithm that may in some cases 

312 suffer issues with numerical stability. The stability method controls 

313 what, if any, measures are taken to promote stability. 

314 

315 Parameters 

316 ---------- 

317 stability_method : int, optional 

318 Bitmask value to set the stability method to. See notes for 

319 details. 

320 **kwargs 

321 Keyword arguments may be used to influence the stability method by 

322 setting individual boolean flags. See notes for details. 

323 

324 Notes 

325 ----- 

326 This method is rarely used. See the corresponding function in the 

327 `KalmanFilter` class for details. 

328 """ 

329 self.ssm.set_stability_method(stability_method, **kwargs) 

330 

331 def set_conserve_memory(self, conserve_memory=None, **kwargs): 

332 """ 

333 Set the memory conservation method 

334 

335 By default, the Kalman filter computes a number of intermediate 

336 matrices at each iteration. The memory conservation options control 

337 which of those matrices are stored. 

338 

339 Parameters 

340 ---------- 

341 conserve_memory : int, optional 

342 Bitmask value to set the memory conservation method to. See notes 

343 for details. 

344 **kwargs 

345 Keyword arguments may be used to influence the memory conservation 

346 method by setting individual boolean flags. 

347 

348 Notes 

349 ----- 

350 This method is rarely used. See the corresponding function in the 

351 `KalmanFilter` class for details. 

352 """ 

353 self.ssm.set_conserve_memory(conserve_memory, **kwargs) 

354 

355 def set_smoother_output(self, smoother_output=None, **kwargs): 

356 """ 

357 Set the smoother output 

358 

359 The smoother can produce several types of results. The smoother output 

360 variable controls which are calculated and returned. 

361 

362 Parameters 

363 ---------- 

364 smoother_output : int, optional 

365 Bitmask value to set the smoother output to. See notes for details. 

366 **kwargs 

367 Keyword arguments may be used to influence the smoother output by 

368 setting individual boolean flags. 

369 

370 Notes 

371 ----- 

372 This method is rarely used. See the corresponding function in the 

373 `KalmanSmoother` class for details. 

374 """ 

375 self.ssm.set_smoother_output(smoother_output, **kwargs) 

376 

377 def initialize_known(self, initial_state, initial_state_cov): 

378 """Initialize known""" 

379 self.ssm.initialize_known(initial_state, initial_state_cov) 

380 

381 def initialize_approximate_diffuse(self, variance=None): 

382 """Initialize approximate diffuse""" 

383 self.ssm.initialize_approximate_diffuse(variance) 

384 

385 def initialize_stationary(self): 

386 """Initialize stationary""" 

387 self.ssm.initialize_stationary() 

388 

389 @property 

390 def initialization(self): 

391 return self.ssm.initialization 

392 

393 @initialization.setter 

394 def initialization(self, value): 

395 self.ssm.initialization = value 

396 

397 @property 

398 def initial_variance(self): 

399 return self.ssm.initial_variance 

400 

401 @initial_variance.setter 

402 def initial_variance(self, value): 

403 self.ssm.initial_variance = value 

404 

405 @property 

406 def loglikelihood_burn(self): 

407 return self.ssm.loglikelihood_burn 

408 

409 @loglikelihood_burn.setter 

410 def loglikelihood_burn(self, value): 

411 self.ssm.loglikelihood_burn = value 

412 

413 @property 

414 def tolerance(self): 

415 return self.ssm.tolerance 

416 

417 @tolerance.setter 

418 def tolerance(self, value): 

419 self.ssm.tolerance = value 

420 

421 def _validate_can_fix_params(self, param_names): 

422 for param_name in param_names: 

423 if param_name not in self.param_names: 

424 raise ValueError('Invalid parameter name passed: "%s".' 

425 % param_name) 

426 

427 @contextlib.contextmanager 

428 def fix_params(self, params): 

429 """ 

430 Fix parameters to specific values (context manager) 

431 

432 Parameters 

433 ---------- 

434 params : dict 

435 Dictionary describing the fixed parameter values, of the form 

436 `param_name: fixed_value`. See the `param_names` property for valid 

437 parameter names. 

438 

439 Examples 

440 -------- 

441 >>> mod = sm.tsa.SARIMAX(endog, order=(1, 0, 1)) 

442 >>> with mod.fix_params({'ar.L1': 0.5}): 

443 res = mod.fit() 

444 """ 

445 k_params = len(self.param_names) 

446 # Initialization (this is done here rather than in the constructor 

447 # because param_names may not be available at that point) 

448 if self._fixed_params is None: 

449 self._fixed_params = {} 

450 self._params_index = OrderedDict( 

451 zip(self.param_names, np.arange(k_params))) 

452 

453 # Cache the current fixed parameters 

454 cache_fixed_params = self._fixed_params.copy() 

455 cache_has_fixed_params = self._has_fixed_params 

456 cache_fixed_params_index = self._fixed_params_index 

457 cache_free_params_index = self._free_params_index 

458 

459 # Validate parameter names and values 

460 self._validate_can_fix_params(set(params.keys())) 

461 

462 # Set the new fixed parameters, keeping the order as given by 

463 # param_names 

464 self._fixed_params.update(params) 

465 self._fixed_params = OrderedDict([ 

466 (name, self._fixed_params[name]) for name in self.param_names 

467 if name in self._fixed_params]) 

468 

469 # Update associated values 

470 self._has_fixed_params = True 

471 self._fixed_params_index = [self._params_index[key] 

472 for key in self._fixed_params.keys()] 

473 self._free_params_index = list( 

474 set(np.arange(k_params)).difference(self._fixed_params_index)) 

475 

476 try: 

477 yield 

478 finally: 

479 # Reset the fixed parameters 

480 self._has_fixed_params = cache_has_fixed_params 

481 self._fixed_params = cache_fixed_params 

482 self._fixed_params_index = cache_fixed_params_index 

483 self._free_params_index = cache_free_params_index 

484 

485 def fit(self, start_params=None, transformed=True, includes_fixed=False, 

486 cov_type=None, cov_kwds=None, method='lbfgs', maxiter=50, 

487 full_output=1, disp=5, callback=None, return_params=False, 

488 optim_score=None, optim_complex_step=None, optim_hessian=None, 

489 flags=None, low_memory=False, **kwargs): 

490 """ 

491 Fits the model by maximum likelihood via Kalman filter. 

492 

493 Parameters 

494 ---------- 

495 start_params : array_like, optional 

496 Initial guess of the solution for the loglikelihood maximization. 

497 If None, the default is given by Model.start_params. 

498 transformed : bool, optional 

499 Whether or not `start_params` is already transformed. Default is 

500 True. 

501 includes_fixed : bool, optional 

502 If parameters were previously fixed with the `fix_params` method, 

503 this argument describes whether or not `start_params` also includes 

504 the fixed parameters, in addition to the free parameters. Default 

505 is False. 

506 cov_type : str, optional 

507 The `cov_type` keyword governs the method for calculating the 

508 covariance matrix of parameter estimates. Can be one of: 

509 

510 - 'opg' for the outer product of gradient estimator 

511 - 'oim' for the observed information matrix estimator, calculated 

512 using the method of Harvey (1989) 

513 - 'approx' for the observed information matrix estimator, 

514 calculated using a numerical approximation of the Hessian matrix. 

515 - 'robust' for an approximate (quasi-maximum likelihood) covariance 

516 matrix that may be valid even in the presence of some 

517 misspecifications. Intermediate calculations use the 'oim' 

518 method. 

519 - 'robust_approx' is the same as 'robust' except that the 

520 intermediate calculations use the 'approx' method. 

521 - 'none' for no covariance matrix calculation. 

522 

523 Default is 'opg' unless memory conservation is used to avoid 

524 computing the loglikelihood values for each observation, in which 

525 case the default is 'approx'. 

526 cov_kwds : dict or None, optional 

527 A dictionary of arguments affecting covariance matrix computation. 

528 

529 **opg, oim, approx, robust, robust_approx** 

530 

531 - 'approx_complex_step' : bool, optional - If True, numerical 

532 approximations are computed using complex-step methods. If False, 

533 numerical approximations are computed using finite difference 

534 methods. Default is True. 

535 - 'approx_centered' : bool, optional - If True, numerical 

536 approximations computed using finite difference methods use a 

537 centered approximation. Default is False. 

538 method : str, optional 

539 The `method` determines which solver from `scipy.optimize` 

540 is used, and it can be chosen from among the following strings: 

541 

542 - 'newton' for Newton-Raphson, 'nm' for Nelder-Mead 

543 - 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS) 

544 - 'lbfgs' for limited-memory BFGS with optional box constraints 

545 - 'powell' for modified Powell's method 

546 - 'cg' for conjugate gradient 

547 - 'ncg' for Newton-conjugate gradient 

548 - 'basinhopping' for global basin-hopping solver 

549 

550 The explicit arguments in `fit` are passed to the solver, 

551 with the exception of the basin-hopping solver. Each 

552 solver has several optional arguments that are not the same across 

553 solvers. See the notes section below (or scipy.optimize) for the 

554 available arguments and for the list of explicit arguments that the 

555 basin-hopping solver supports. 

556 maxiter : int, optional 

557 The maximum number of iterations to perform. 

558 full_output : bool, optional 

559 Set to True to have all available output in the Results object's 

560 mle_retvals attribute. The output is dependent on the solver. 

561 See LikelihoodModelResults notes section for more information. 

562 disp : bool, optional 

563 Set to True to print convergence messages. 

564 callback : callable callback(xk), optional 

565 Called after each iteration, as callback(xk), where xk is the 

566 current parameter vector. 

567 return_params : bool, optional 

568 Whether or not to return only the array of maximizing parameters. 

569 Default is False. 

570 optim_score : {'harvey', 'approx'} or None, optional 

571 The method by which the score vector is calculated. 'harvey' uses 

572 the method from Harvey (1989), 'approx' uses either finite 

573 difference or complex step differentiation depending upon the 

574 value of `optim_complex_step`, and None uses the built-in gradient 

575 approximation of the optimizer. Default is None. This keyword is 

576 only relevant if the optimization method uses the score. 

577 optim_complex_step : bool, optional 

578 Whether or not to use complex step differentiation when 

579 approximating the score; if False, finite difference approximation 

580 is used. Default is True. This keyword is only relevant if 

581 `optim_score` is set to 'harvey' or 'approx'. 

582 optim_hessian : {'opg','oim','approx'}, optional 

583 The method by which the Hessian is numerically approximated. 'opg' 

584 uses outer product of gradients, 'oim' uses the information 

585 matrix formula from Harvey (1989), and 'approx' uses numerical 

586 approximation. This keyword is only relevant if the 

587 optimization method uses the Hessian matrix. 

588 low_memory : bool, optional 

589 If set to True, techniques are applied to substantially reduce 

590 memory usage. If used, some features of the results object will 

591 not be available (including smoothed results and in-sample 

592 prediction), although out-of-sample forecasting is possible. 

593 Default is False. 

594 **kwargs 

595 Additional keyword arguments to pass to the optimizer. 

596 

597 Returns 

598 ------- 

599 MLEResults 

600 

601 See Also 

602 -------- 

603 statsmodels.base.model.LikelihoodModel.fit 

604 statsmodels.tsa.statespace.mlemodel.MLEResults 

605 """ 

606 if start_params is None: 

607 start_params = self.start_params 

608 transformed = True 

609 includes_fixed = True 

610 

611 # Update the score method 

612 if optim_score is None and method == 'lbfgs': 

613 kwargs.setdefault('approx_grad', True) 

614 kwargs.setdefault('epsilon', 1e-5) 

615 elif optim_score is None: 

616 optim_score = 'approx' 

617 

618 # Check for complex step differentiation 

619 if optim_complex_step is None: 

620 optim_complex_step = not self.ssm._complex_endog 

621 elif optim_complex_step and self.ssm._complex_endog: 

622 raise ValueError('Cannot use complex step derivatives when data' 

623 ' or parameters are complex.') 

624 

625 # Standardize starting parameters 

626 start_params = self.handle_params(start_params, transformed=True, 

627 includes_fixed=includes_fixed) 

628 

629 # Unconstrain the starting parameters 

630 if transformed: 

631 start_params = self.untransform_params(start_params) 

632 

633 # Remove any fixed parameters 

634 if self._has_fixed_params: 

635 start_params = start_params[self._free_params_index] 

636 

637 # If all parameters are fixed, we are done 

638 if self._has_fixed_params and len(start_params) == 0: 

639 mlefit = Bunch(params=[], mle_retvals=None, 

640 mle_settings=None) 

641 else: 

642 # Maximum likelihood estimation 

643 if flags is None: 

644 flags = {} 

645 flags.update({ 

646 'transformed': False, 

647 'includes_fixed': False, 

648 'score_method': optim_score, 

649 'approx_complex_step': optim_complex_step 

650 }) 

651 if optim_hessian is not None: 

652 flags['hessian_method'] = optim_hessian 

653 fargs = (flags,) 

654 mlefit = super(MLEModel, self).fit(start_params, method=method, 

655 fargs=fargs, 

656 maxiter=maxiter, 

657 full_output=full_output, 

658 disp=disp, callback=callback, 

659 skip_hessian=True, **kwargs) 

660 

661 # Just return the fitted parameters if requested 

662 if return_params: 

663 return self.handle_params(mlefit.params, transformed=False, 

664 includes_fixed=False) 

665 # Otherwise construct the results class if desired 

666 else: 

667 # Handle memory conservation option 

668 if low_memory: 

669 conserve_memory = self.ssm.conserve_memory 

670 self.ssm.set_conserve_memory(MEMORY_CONSERVE) 

671 

672 # Perform filtering / smoothing 

673 if (self.ssm.memory_no_predicted or self.ssm.memory_no_gain 

674 or self.ssm.memory_no_smoothing): 

675 func = self.filter 

676 else: 

677 func = self.smooth 

678 res = func(mlefit.params, transformed=False, includes_fixed=False, 

679 cov_type=cov_type, cov_kwds=cov_kwds) 

680 

681 res.mlefit = mlefit 

682 res.mle_retvals = mlefit.mle_retvals 

683 res.mle_settings = mlefit.mle_settings 

684 

685 # Reset memory conservation 

686 if low_memory: 

687 self.ssm.set_conserve_memory(conserve_memory) 

688 

689 return res 

690 

691 def fit_constrained(self, constraints, start_params=None, **fit_kwds): 

692 """ 

693 Fit the model with some parameters subject to equality constraints. 

694 

695 Parameters 

696 ---------- 

697 constraints : dict 

698 Dictionary of constraints, of the form `param_name: fixed_value`. 

699 See the `param_names` property for valid parameter names. 

700 start_params : array_like, optional 

701 Initial guess of the solution for the loglikelihood maximization. 

702 If None, the default is given by Model.start_params. 

703 **fit_kwds : keyword arguments 

704 fit_kwds are used in the optimization of the remaining parameters. 

705 

706 Returns 

707 ------- 

708 results : Results instance 

709 

710 Examples 

711 -------- 

712 >>> mod = sm.tsa.SARIMAX(endog, order=(1, 0, 1)) 

713 >>> res = mod.fit_constrained({'ar.L1': 0.5}) 

714 """ 

715 with self.fix_params(constraints): 

716 res = self.fit(start_params, **fit_kwds) 

717 return res 

718 

719 @property 

720 def _res_classes(self): 

721 return {'fit': (MLEResults, MLEResultsWrapper)} 

722 

723 def _wrap_results(self, params, result, return_raw, cov_type=None, 

724 cov_kwds=None, results_class=None, wrapper_class=None): 

725 if not return_raw: 

726 # Wrap in a results object 

727 result_kwargs = {} 

728 if cov_type is not None: 

729 result_kwargs['cov_type'] = cov_type 

730 if cov_kwds is not None: 

731 result_kwargs['cov_kwds'] = cov_kwds 

732 

733 if results_class is None: 

734 results_class = self._res_classes['fit'][0] 

735 if wrapper_class is None: 

736 wrapper_class = self._res_classes['fit'][1] 

737 

738 res = results_class(self, params, result, **result_kwargs) 

739 result = wrapper_class(res) 

740 return result 

741 

742 def filter(self, params, transformed=True, includes_fixed=False, 

743 complex_step=False, cov_type=None, cov_kwds=None, 

744 return_ssm=False, results_class=None, 

745 results_wrapper_class=None, low_memory=False, **kwargs): 

746 """ 

747 Kalman filtering 

748 

749 Parameters 

750 ---------- 

751 params : array_like 

752 Array of parameters at which to evaluate the loglikelihood 

753 function. 

754 transformed : bool, optional 

755 Whether or not `params` is already transformed. Default is True. 

756 return_ssm : bool,optional 

757 Whether or not to return only the state space output or a full 

758 results object. Default is to return a full results object. 

759 cov_type : str, optional 

760 See `MLEResults.fit` for a description of covariance matrix types 

761 for results object. 

762 cov_kwds : dict or None, optional 

763 See `MLEResults.get_robustcov_results` for a description required 

764 keywords for alternative covariance estimators 

765 low_memory : bool, optional 

766 If set to True, techniques are applied to substantially reduce 

767 memory usage. If used, some features of the results object will 

768 not be available (including in-sample prediction), although 

769 out-of-sample forecasting is possible. Default is False. 

770 **kwargs 

771 Additional keyword arguments to pass to the Kalman filter. See 

772 `KalmanFilter.filter` for more details. 

773 """ 

774 params = self.handle_params(params, transformed=transformed, 

775 includes_fixed=includes_fixed) 

776 self.update(params, transformed=True, includes_fixed=True, 

777 complex_step=complex_step) 

778 

779 # Save the parameter names 

780 self.data.param_names = self.param_names 

781 

782 if complex_step: 

783 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 

784 

785 # Handle memory conservation 

786 if low_memory: 

787 kwargs['conserve_memory'] = MEMORY_CONSERVE 

788 

789 # Get the state space output 

790 result = self.ssm.filter(complex_step=complex_step, **kwargs) 

791 

792 # Wrap in a results object 

793 return self._wrap_results(params, result, return_ssm, cov_type, 

794 cov_kwds, results_class, 

795 results_wrapper_class) 

796 

797 def smooth(self, params, transformed=True, includes_fixed=False, 

798 complex_step=False, cov_type=None, cov_kwds=None, 

799 return_ssm=False, results_class=None, 

800 results_wrapper_class=None, **kwargs): 

801 """ 

802 Kalman smoothing 

803 

804 Parameters 

805 ---------- 

806 params : array_like 

807 Array of parameters at which to evaluate the loglikelihood 

808 function. 

809 transformed : bool, optional 

810 Whether or not `params` is already transformed. Default is True. 

811 return_ssm : bool,optional 

812 Whether or not to return only the state space output or a full 

813 results object. Default is to return a full results object. 

814 cov_type : str, optional 

815 See `MLEResults.fit` for a description of covariance matrix types 

816 for results object. 

817 cov_kwds : dict or None, optional 

818 See `MLEResults.get_robustcov_results` for a description required 

819 keywords for alternative covariance estimators 

820 **kwargs 

821 Additional keyword arguments to pass to the Kalman filter. See 

822 `KalmanFilter.filter` for more details. 

823 """ 

824 params = self.handle_params(params, transformed=transformed, 

825 includes_fixed=includes_fixed) 

826 self.update(params, transformed=True, includes_fixed=True, 

827 complex_step=complex_step) 

828 

829 # Save the parameter names 

830 self.data.param_names = self.param_names 

831 

832 if complex_step: 

833 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 

834 

835 # Get the state space output 

836 result = self.ssm.smooth(complex_step=complex_step, **kwargs) 

837 

838 # Wrap in a results object 

839 return self._wrap_results(params, result, return_ssm, cov_type, 

840 cov_kwds, results_class, 

841 results_wrapper_class) 

842 

843 _loglike_param_names = ['transformed', 'includes_fixed', 'complex_step'] 

844 _loglike_param_defaults = [True, False, False] 

845 

846 def loglike(self, params, *args, **kwargs): 

847 """ 

848 Loglikelihood evaluation 

849 

850 Parameters 

851 ---------- 

852 params : array_like 

853 Array of parameters at which to evaluate the loglikelihood 

854 function. 

855 transformed : bool, optional 

856 Whether or not `params` is already transformed. Default is True. 

857 **kwargs 

858 Additional keyword arguments to pass to the Kalman filter. See 

859 `KalmanFilter.filter` for more details. 

860 

861 Notes 

862 ----- 

863 [1]_ recommend maximizing the average likelihood to avoid scale issues; 

864 this is done automatically by the base Model fit method. 

865 

866 References 

867 ---------- 

868 .. [1] Koopman, Siem Jan, Neil Shephard, and Jurgen A. Doornik. 1999. 

869 Statistical Algorithms for Models in State Space Using SsfPack 2.2. 

870 Econometrics Journal 2 (1): 107-60. doi:10.1111/1368-423X.00023. 

871 

872 See Also 

873 -------- 

874 update : modifies the internal state of the state space model to 

875 reflect new params 

876 """ 

877 transformed, includes_fixed, complex_step, kwargs = _handle_args( 

878 MLEModel._loglike_param_names, MLEModel._loglike_param_defaults, 

879 *args, **kwargs) 

880 

881 params = self.handle_params(params, transformed=transformed, 

882 includes_fixed=includes_fixed) 

883 self.update(params, transformed=True, includes_fixed=True, 

884 complex_step=complex_step) 

885 

886 if complex_step: 

887 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 

888 

889 loglike = self.ssm.loglike(complex_step=complex_step, **kwargs) 

890 

891 # Koopman, Shephard, and Doornik recommend maximizing the average 

892 # likelihood to avoid scale issues, but the averaging is done 

893 # automatically in the base model `fit` method 

894 return loglike 

895 

896 def loglikeobs(self, params, transformed=True, includes_fixed=False, 

897 complex_step=False, **kwargs): 

898 """ 

899 Loglikelihood evaluation 

900 

901 Parameters 

902 ---------- 

903 params : array_like 

904 Array of parameters at which to evaluate the loglikelihood 

905 function. 

906 transformed : bool, optional 

907 Whether or not `params` is already transformed. Default is True. 

908 **kwargs 

909 Additional keyword arguments to pass to the Kalman filter. See 

910 `KalmanFilter.filter` for more details. 

911 

912 Notes 

913 ----- 

914 [1]_ recommend maximizing the average likelihood to avoid scale issues; 

915 this is done automatically by the base Model fit method. 

916 

917 References 

918 ---------- 

919 .. [1] Koopman, Siem Jan, Neil Shephard, and Jurgen A. Doornik. 1999. 

920 Statistical Algorithms for Models in State Space Using SsfPack 2.2. 

921 Econometrics Journal 2 (1): 107-60. doi:10.1111/1368-423X.00023. 

922 

923 See Also 

924 -------- 

925 update : modifies the internal state of the Model to reflect new params 

926 """ 

927 params = self.handle_params(params, transformed=transformed, 

928 includes_fixed=includes_fixed) 

929 

930 # If we're using complex-step differentiation, then we cannot use 

931 # Cholesky factorization 

932 if complex_step: 

933 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 

934 

935 self.update(params, transformed=True, includes_fixed=True, 

936 complex_step=complex_step) 

937 

938 return self.ssm.loglikeobs(complex_step=complex_step, **kwargs) 

939 

940 def simulation_smoother(self, simulation_output=None, **kwargs): 

941 r""" 

942 Retrieve a simulation smoother for the state space model. 

943 

944 Parameters 

945 ---------- 

946 simulation_output : int, optional 

947 Determines which simulation smoother output is calculated. 

948 Default is all (including state and disturbances). 

949 **kwargs 

950 Additional keyword arguments, used to set the simulation output. 

951 See `set_simulation_output` for more details. 

952 

953 Returns 

954 ------- 

955 SimulationSmoothResults 

956 """ 

957 return self.ssm.simulation_smoother( 

958 simulation_output=simulation_output, **kwargs) 

959 

960 def _forecasts_error_partial_derivatives(self, params, transformed=True, 

961 includes_fixed=False, 

962 approx_complex_step=None, 

963 approx_centered=False, 

964 res=None, **kwargs): 

965 params = np.array(params, ndmin=1) 

966 

967 # We cannot use complex-step differentiation with non-transformed 

968 # parameters 

969 if approx_complex_step is None: 

970 approx_complex_step = transformed 

971 if not transformed and approx_complex_step: 

972 raise ValueError("Cannot use complex-step approximations to" 

973 " calculate the observed_information_matrix" 

974 " with untransformed parameters.") 

975 

976 # If we're using complex-step differentiation, then we cannot use 

977 # Cholesky factorization 

978 if approx_complex_step: 

979 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 

980 

981 # Get values at the params themselves 

982 if res is None: 

983 self.update(params, transformed=transformed, 

984 includes_fixed=includes_fixed, 

985 complex_step=approx_complex_step) 

986 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs) 

987 

988 # Setup 

989 n = len(params) 

990 

991 # Compute partial derivatives w.r.t. forecast error and forecast 

992 # error covariance 

993 partials_forecasts_error = ( 

994 np.zeros((self.k_endog, self.nobs, n)) 

995 ) 

996 partials_forecasts_error_cov = ( 

997 np.zeros((self.k_endog, self.k_endog, self.nobs, n)) 

998 ) 

999 if approx_complex_step: 

1000 epsilon = _get_epsilon(params, 2, None, n) 

1001 increments = np.identity(n) * 1j * epsilon 

1002 

1003 for i, ih in enumerate(increments): 

1004 self.update(params + ih, transformed=transformed, 

1005 includes_fixed=includes_fixed, 

1006 complex_step=True) 

1007 _res = self.ssm.filter(complex_step=True, **kwargs) 

1008 

1009 partials_forecasts_error[:, :, i] = ( 

1010 _res.forecasts_error.imag / epsilon[i] 

1011 ) 

1012 

1013 partials_forecasts_error_cov[:, :, :, i] = ( 

1014 _res.forecasts_error_cov.imag / epsilon[i] 

1015 ) 

1016 elif not approx_centered: 

1017 epsilon = _get_epsilon(params, 2, None, n) 

1018 ei = np.zeros((n,), float) 

1019 for i in range(n): 

1020 ei[i] = epsilon[i] 

1021 self.update(params + ei, transformed=transformed, 

1022 includes_fixed=includes_fixed, complex_step=False) 

1023 _res = self.ssm.filter(complex_step=False, **kwargs) 

1024 

1025 partials_forecasts_error[:, :, i] = ( 

1026 _res.forecasts_error - res.forecasts_error) / epsilon[i] 

1027 

1028 partials_forecasts_error_cov[:, :, :, i] = ( 

1029 _res.forecasts_error_cov - 

1030 res.forecasts_error_cov) / epsilon[i] 

1031 ei[i] = 0.0 

1032 else: 

1033 epsilon = _get_epsilon(params, 3, None, n) / 2. 

1034 ei = np.zeros((n,), float) 

1035 for i in range(n): 

1036 ei[i] = epsilon[i] 

1037 

1038 self.update(params + ei, transformed=transformed, 

1039 includes_fixed=includes_fixed, complex_step=False) 

1040 _res1 = self.ssm.filter(complex_step=False, **kwargs) 

1041 

1042 self.update(params - ei, transformed=transformed, 

1043 includes_fixed=includes_fixed, complex_step=False) 

1044 _res2 = self.ssm.filter(complex_step=False, **kwargs) 

1045 

1046 partials_forecasts_error[:, :, i] = ( 

1047 (_res1.forecasts_error - _res2.forecasts_error) / 

1048 (2 * epsilon[i])) 

1049 

1050 partials_forecasts_error_cov[:, :, :, i] = ( 

1051 (_res1.forecasts_error_cov - _res2.forecasts_error_cov) / 

1052 (2 * epsilon[i])) 

1053 

1054 ei[i] = 0.0 

1055 

1056 return partials_forecasts_error, partials_forecasts_error_cov 

1057 

1058 def observed_information_matrix(self, params, transformed=True, 

1059 includes_fixed=False, 

1060 approx_complex_step=None, 

1061 approx_centered=False, **kwargs): 

1062 """ 

1063 Observed information matrix 

1064 

1065 Parameters 

1066 ---------- 

1067 params : array_like, optional 

1068 Array of parameters at which to evaluate the loglikelihood 

1069 function. 

1070 **kwargs 

1071 Additional keyword arguments to pass to the Kalman filter. See 

1072 `KalmanFilter.filter` for more details. 

1073 

1074 Notes 

1075 ----- 

1076 This method is from Harvey (1989), which shows that the information 

1077 matrix only depends on terms from the gradient. This implementation is 

1078 partially analytic and partially numeric approximation, therefore, 

1079 because it uses the analytic formula for the information matrix, with 

1080 numerically computed elements of the gradient. 

1081 

1082 References 

1083 ---------- 

1084 Harvey, Andrew C. 1990. 

1085 Forecasting, Structural Time Series Models and the Kalman Filter. 

1086 Cambridge University Press. 

1087 """ 

1088 params = np.array(params, ndmin=1) 

1089 

1090 # Setup 

1091 n = len(params) 

1092 

1093 # We cannot use complex-step differentiation with non-transformed 

1094 # parameters 

1095 if approx_complex_step is None: 

1096 approx_complex_step = transformed 

1097 if not transformed and approx_complex_step: 

1098 raise ValueError("Cannot use complex-step approximations to" 

1099 " calculate the observed_information_matrix" 

1100 " with untransformed parameters.") 

1101 

1102 # Get values at the params themselves 

1103 params = self.handle_params(params, transformed=transformed, 

1104 includes_fixed=includes_fixed) 

1105 self.update(params, transformed=True, includes_fixed=True, 

1106 complex_step=approx_complex_step) 

1107 # If we're using complex-step differentiation, then we cannot use 

1108 # Cholesky factorization 

1109 if approx_complex_step: 

1110 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 

1111 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs) 

1112 dtype = self.ssm.dtype 

1113 

1114 # Save this for inversion later 

1115 inv_forecasts_error_cov = res.forecasts_error_cov.copy() 

1116 

1117 partials_forecasts_error, partials_forecasts_error_cov = ( 

1118 self._forecasts_error_partial_derivatives( 

1119 params, transformed=transformed, includes_fixed=includes_fixed, 

1120 approx_complex_step=approx_complex_step, 

1121 approx_centered=approx_centered, res=res, **kwargs)) 

1122 

1123 # Compute the information matrix 

1124 tmp = np.zeros((self.k_endog, self.k_endog, self.nobs, n), dtype=dtype) 

1125 

1126 information_matrix = np.zeros((n, n), dtype=dtype) 

1127 d = np.maximum(self.ssm.loglikelihood_burn, res.nobs_diffuse) 

1128 for t in range(d, self.nobs): 

1129 inv_forecasts_error_cov[:, :, t] = ( 

1130 np.linalg.inv(res.forecasts_error_cov[:, :, t]) 

1131 ) 

1132 for i in range(n): 

1133 tmp[:, :, t, i] = np.dot( 

1134 inv_forecasts_error_cov[:, :, t], 

1135 partials_forecasts_error_cov[:, :, t, i] 

1136 ) 

1137 for i in range(n): 

1138 for j in range(n): 

1139 information_matrix[i, j] += ( 

1140 0.5 * np.trace(np.dot(tmp[:, :, t, i], 

1141 tmp[:, :, t, j])) 

1142 ) 

1143 information_matrix[i, j] += np.inner( 

1144 partials_forecasts_error[:, t, i], 

1145 np.dot(inv_forecasts_error_cov[:, :, t], 

1146 partials_forecasts_error[:, t, j]) 

1147 ) 

1148 return information_matrix / (self.nobs - self.ssm.loglikelihood_burn) 

1149 

1150 def opg_information_matrix(self, params, transformed=True, 

1151 includes_fixed=False, approx_complex_step=None, 

1152 **kwargs): 

1153 """ 

1154 Outer product of gradients information matrix 

1155 

1156 Parameters 

1157 ---------- 

1158 params : array_like, optional 

1159 Array of parameters at which to evaluate the loglikelihood 

1160 function. 

1161 **kwargs 

1162 Additional arguments to the `loglikeobs` method. 

1163 

1164 References 

1165 ---------- 

1166 Berndt, Ernst R., Bronwyn Hall, Robert Hall, and Jerry Hausman. 1974. 

1167 Estimation and Inference in Nonlinear Structural Models. 

1168 NBER Chapters. National Bureau of Economic Research, Inc. 

1169 """ 

1170 # We cannot use complex-step differentiation with non-transformed 

1171 # parameters 

1172 if approx_complex_step is None: 

1173 approx_complex_step = transformed 

1174 if not transformed and approx_complex_step: 

1175 raise ValueError("Cannot use complex-step approximations to" 

1176 " calculate the observed_information_matrix" 

1177 " with untransformed parameters.") 

1178 

1179 score_obs = self.score_obs(params, transformed=transformed, 

1180 includes_fixed=includes_fixed, 

1181 approx_complex_step=approx_complex_step, 

1182 **kwargs).transpose() 

1183 return ( 

1184 np.inner(score_obs, score_obs) / 

1185 (self.nobs - self.ssm.loglikelihood_burn) 

1186 ) 

1187 

1188 def _score_complex_step(self, params, **kwargs): 

1189 # the default epsilon can be too small 

1190 # inversion_method = INVERT_UNIVARIATE | SOLVE_LU 

1191 epsilon = _get_epsilon(params, 2., None, len(params)) 

1192 kwargs['transformed'] = True 

1193 kwargs['complex_step'] = True 

1194 return approx_fprime_cs(params, self.loglike, epsilon=epsilon, 

1195 kwargs=kwargs) 

1196 

1197 def _score_finite_difference(self, params, approx_centered=False, 

1198 **kwargs): 

1199 kwargs['transformed'] = True 

1200 return approx_fprime(params, self.loglike, kwargs=kwargs, 

1201 centered=approx_centered) 

1202 

1203 def _score_harvey(self, params, approx_complex_step=True, **kwargs): 

1204 score_obs = self._score_obs_harvey( 

1205 params, approx_complex_step=approx_complex_step, **kwargs) 

1206 return np.sum(score_obs, axis=0) 

1207 

1208 def _score_obs_harvey(self, params, approx_complex_step=True, 

1209 approx_centered=False, includes_fixed=False, 

1210 **kwargs): 

1211 """ 

1212 Score 

1213 

1214 Parameters 

1215 ---------- 

1216 params : array_like, optional 

1217 Array of parameters at which to evaluate the loglikelihood 

1218 function. 

1219 **kwargs 

1220 Additional keyword arguments to pass to the Kalman filter. See 

1221 `KalmanFilter.filter` for more details. 

1222 

1223 Notes 

1224 ----- 

1225 This method is from Harvey (1989), section 3.4.5 

1226 

1227 References 

1228 ---------- 

1229 Harvey, Andrew C. 1990. 

1230 Forecasting, Structural Time Series Models and the Kalman Filter. 

1231 Cambridge University Press. 

1232 """ 

1233 params = np.array(params, ndmin=1) 

1234 n = len(params) 

1235 

1236 # Get values at the params themselves 

1237 self.update(params, transformed=True, includes_fixed=includes_fixed, 

1238 complex_step=approx_complex_step) 

1239 if approx_complex_step: 

1240 kwargs['inversion_method'] = INVERT_UNIVARIATE | SOLVE_LU 

1241 res = self.ssm.filter(complex_step=approx_complex_step, **kwargs) 

1242 

1243 # Get forecasts error partials 

1244 partials_forecasts_error, partials_forecasts_error_cov = ( 

1245 self._forecasts_error_partial_derivatives( 

1246 params, transformed=True, includes_fixed=includes_fixed, 

1247 approx_complex_step=approx_complex_step, 

1248 approx_centered=approx_centered, res=res, **kwargs)) 

1249 

1250 # Compute partial derivatives w.r.t. likelihood function 

1251 partials = np.zeros((self.nobs, n)) 

1252 k_endog = self.k_endog 

1253 for t in range(self.nobs): 

1254 inv_forecasts_error_cov = np.linalg.inv( 

1255 res.forecasts_error_cov[:, :, t]) 

1256 

1257 for i in range(n): 

1258 partials[t, i] += np.trace(np.dot( 

1259 np.dot(inv_forecasts_error_cov, 

1260 partials_forecasts_error_cov[:, :, t, i]), 

1261 (np.eye(k_endog) - 

1262 np.dot(inv_forecasts_error_cov, 

1263 np.outer(res.forecasts_error[:, t], 

1264 res.forecasts_error[:, t]))))) 

1265 # 2 * dv / di * F^{-1} v_t 

1266 # where x = F^{-1} v_t or F x = v 

1267 partials[t, i] += 2 * np.dot( 

1268 partials_forecasts_error[:, t, i], 

1269 np.dot(inv_forecasts_error_cov, res.forecasts_error[:, t])) 

1270 

1271 return -partials / 2. 

1272 

1273 _score_param_names = ['transformed', 'includes_fixed', 'score_method', 

1274 'approx_complex_step', 'approx_centered'] 

1275 _score_param_defaults = [True, False, 'approx', None, False] 

1276 

1277 def score(self, params, *args, **kwargs): 

1278 """ 

1279 Compute the score function at params. 

1280 

1281 Parameters 

1282 ---------- 

1283 params : array_like 

1284 Array of parameters at which to evaluate the score. 

1285 *args 

1286 Additional positional arguments to the `loglike` method. 

1287 **kwargs 

1288 Additional keyword arguments to the `loglike` method. 

1289 

1290 Returns 

1291 ------- 

1292 score : ndarray 

1293 Score, evaluated at `params`. 

1294 

1295 Notes 

1296 ----- 

1297 This is a numerical approximation, calculated using first-order complex 

1298 step differentiation on the `loglike` method. 

1299 

1300 Both args and kwargs are necessary because the optimizer from 

1301 `fit` must call this function and only supports passing arguments via 

1302 args (for example `scipy.optimize.fmin_l_bfgs`). 

1303 """ 

1304 (transformed, includes_fixed, method, approx_complex_step, 

1305 approx_centered, kwargs) = ( 

1306 _handle_args(MLEModel._score_param_names, 

1307 MLEModel._score_param_defaults, *args, **kwargs)) 

1308 # For fit() calls, the method is called 'score_method' (to distinguish 

1309 # it from the method used for fit) but generally in kwargs the method 

1310 # will just be called 'method' 

1311 if 'method' in kwargs: 

1312 method = kwargs.pop('method') 

1313 

1314 if approx_complex_step is None: 

1315 approx_complex_step = not self.ssm._complex_endog 

1316 if approx_complex_step and self.ssm._complex_endog: 

1317 raise ValueError('Cannot use complex step derivatives when data' 

1318 ' or parameters are complex.') 

1319 

1320 out = self.handle_params( 

1321 params, transformed=transformed, includes_fixed=includes_fixed, 

1322 return_jacobian=not transformed) 

1323 if transformed: 

1324 params = out 

1325 else: 

1326 params, transform_score = out 

1327 

1328 if method == 'harvey': 

1329 kwargs['includes_fixed'] = True 

1330 score = self._score_harvey( 

1331 params, approx_complex_step=approx_complex_step, **kwargs) 

1332 elif method == 'approx' and approx_complex_step: 

1333 kwargs['includes_fixed'] = True 

1334 score = self._score_complex_step(params, **kwargs) 

1335 elif method == 'approx': 

1336 kwargs['includes_fixed'] = True 

1337 score = self._score_finite_difference( 

1338 params, approx_centered=approx_centered, **kwargs) 

1339 else: 

1340 raise NotImplementedError('Invalid score method.') 

1341 

1342 if not transformed: 

1343 score = np.dot(transform_score, score) 

1344 

1345 if self._has_fixed_params and not includes_fixed: 

1346 score = score[self._free_params_index] 

1347 

1348 return score 

1349 

1350 def score_obs(self, params, method='approx', transformed=True, 

1351 includes_fixed=False, approx_complex_step=None, 

1352 approx_centered=False, **kwargs): 

1353 """ 

1354 Compute the score per observation, evaluated at params 

1355 

1356 Parameters 

1357 ---------- 

1358 params : array_like 

1359 Array of parameters at which to evaluate the score. 

1360 **kwargs 

1361 Additional arguments to the `loglike` method. 

1362 

1363 Returns 

1364 ------- 

1365 score : ndarray 

1366 Score per observation, evaluated at `params`. 

1367 

1368 Notes 

1369 ----- 

1370 This is a numerical approximation, calculated using first-order complex 

1371 step differentiation on the `loglikeobs` method. 

1372 """ 

1373 if not transformed and approx_complex_step: 

1374 raise ValueError("Cannot use complex-step approximations to" 

1375 " calculate the score at each observation" 

1376 " with untransformed parameters.") 

1377 

1378 if approx_complex_step is None: 

1379 approx_complex_step = not self.ssm._complex_endog 

1380 if approx_complex_step and self.ssm._complex_endog: 

1381 raise ValueError('Cannot use complex step derivatives when data' 

1382 ' or parameters are complex.') 

1383 

1384 params = self.handle_params(params, transformed=True, 

1385 includes_fixed=includes_fixed) 

1386 kwargs['transformed'] = transformed 

1387 kwargs['includes_fixed'] = True 

1388 

1389 if method == 'harvey': 

1390 score = self._score_obs_harvey( 

1391 params, approx_complex_step=approx_complex_step, **kwargs) 

1392 elif method == 'approx' and approx_complex_step: 

1393 # the default epsilon can be too small 

1394 epsilon = _get_epsilon(params, 2., None, len(params)) 

1395 kwargs['complex_step'] = True 

1396 score = approx_fprime_cs(params, self.loglikeobs, epsilon=epsilon, 

1397 kwargs=kwargs) 

1398 elif method == 'approx': 

1399 score = approx_fprime(params, self.loglikeobs, kwargs=kwargs, 

1400 centered=approx_centered) 

1401 else: 

1402 raise NotImplementedError('Invalid scoreobs method.') 

1403 

1404 return score 

1405 

1406 _hessian_param_names = ['transformed', 'hessian_method', 

1407 'approx_complex_step', 'approx_centered'] 

1408 _hessian_param_defaults = [True, 'approx', None, False] 

1409 

1410 def hessian(self, params, *args, **kwargs): 

1411 r""" 

1412 Hessian matrix of the likelihood function, evaluated at the given 

1413 parameters 

1414 

1415 Parameters 

1416 ---------- 

1417 params : array_like 

1418 Array of parameters at which to evaluate the hessian. 

1419 *args 

1420 Additional positional arguments to the `loglike` method. 

1421 **kwargs 

1422 Additional keyword arguments to the `loglike` method. 

1423 

1424 Returns 

1425 ------- 

1426 hessian : ndarray 

1427 Hessian matrix evaluated at `params` 

1428 

1429 Notes 

1430 ----- 

1431 This is a numerical approximation. 

1432 

1433 Both args and kwargs are necessary because the optimizer from 

1434 `fit` must call this function and only supports passing arguments via 

1435 args (for example `scipy.optimize.fmin_l_bfgs`). 

1436 """ 

1437 transformed, method, approx_complex_step, approx_centered, kwargs = ( 

1438 _handle_args(MLEModel._hessian_param_names, 

1439 MLEModel._hessian_param_defaults, 

1440 *args, **kwargs)) 

1441 # For fit() calls, the method is called 'hessian_method' (to 

1442 # distinguish it from the method used for fit) but generally in kwargs 

1443 # the method will just be called 'method' 

1444 if 'method' in kwargs: 

1445 method = kwargs.pop('method') 

1446 

1447 if not transformed and approx_complex_step: 

1448 raise ValueError("Cannot use complex-step approximations to" 

1449 " calculate the hessian with untransformed" 

1450 " parameters.") 

1451 

1452 if approx_complex_step is None: 

1453 approx_complex_step = not self.ssm._complex_endog 

1454 if approx_complex_step and self.ssm._complex_endog: 

1455 raise ValueError('Cannot use complex step derivatives when data' 

1456 ' or parameters are complex.') 

1457 

1458 if method == 'oim': 

1459 hessian = self._hessian_oim( 

1460 params, transformed=transformed, 

1461 approx_complex_step=approx_complex_step, 

1462 approx_centered=approx_centered, **kwargs) 

1463 elif method == 'opg': 

1464 hessian = self._hessian_opg( 

1465 params, transformed=transformed, 

1466 approx_complex_step=approx_complex_step, 

1467 approx_centered=approx_centered, **kwargs) 

1468 elif method == 'approx' and approx_complex_step: 

1469 hessian = self._hessian_complex_step( 

1470 params, transformed=transformed, **kwargs) 

1471 elif method == 'approx': 

1472 hessian = self._hessian_finite_difference( 

1473 params, transformed=transformed, 

1474 approx_centered=approx_centered, **kwargs) 

1475 else: 

1476 raise NotImplementedError('Invalid Hessian calculation method.') 

1477 return hessian 

1478 

1479 def _hessian_oim(self, params, **kwargs): 

1480 """ 

1481 Hessian matrix computed using the Harvey (1989) information matrix 

1482 """ 

1483 return -self.observed_information_matrix(params, **kwargs) 

1484 

1485 def _hessian_opg(self, params, **kwargs): 

1486 """ 

1487 Hessian matrix computed using the outer product of gradients 

1488 information matrix 

1489 """ 

1490 return -self.opg_information_matrix(params, **kwargs) 

1491 

1492 def _hessian_finite_difference(self, params, approx_centered=False, 

1493 **kwargs): 

1494 params = np.array(params, ndmin=1) 

1495 

1496 warnings.warn('Calculation of the Hessian using finite differences' 

1497 ' is usually subject to substantial approximation' 

1498 ' errors.', PrecisionWarning) 

1499 

1500 if not approx_centered: 

1501 epsilon = _get_epsilon(params, 3, None, len(params)) 

1502 else: 

1503 epsilon = _get_epsilon(params, 4, None, len(params)) / 2 

1504 hessian = approx_fprime(params, self._score_finite_difference, 

1505 epsilon=epsilon, kwargs=kwargs, 

1506 centered=approx_centered) 

1507 

1508 return hessian / (self.nobs - self.ssm.loglikelihood_burn) 

1509 

1510 def _hessian_complex_step(self, params, **kwargs): 

1511 """ 

1512 Hessian matrix computed by second-order complex-step differentiation 

1513 on the `loglike` function. 

1514 """ 

1515 # the default epsilon can be too small 

1516 epsilon = _get_epsilon(params, 3., None, len(params)) 

1517 kwargs['transformed'] = True 

1518 kwargs['complex_step'] = True 

1519 hessian = approx_hess_cs( 

1520 params, self.loglike, epsilon=epsilon, kwargs=kwargs) 

1521 

1522 return hessian / (self.nobs - self.ssm.loglikelihood_burn) 

1523 

1524 @property 

1525 def start_params(self): 

1526 """ 

1527 (array) Starting parameters for maximum likelihood estimation. 

1528 """ 

1529 if hasattr(self, '_start_params'): 

1530 return self._start_params 

1531 else: 

1532 raise NotImplementedError 

1533 

1534 @property 

1535 def param_names(self): 

1536 """ 

1537 (list of str) List of human readable parameter names (for parameters 

1538 actually included in the model). 

1539 """ 

1540 if hasattr(self, '_param_names'): 

1541 return self._param_names 

1542 else: 

1543 try: 

1544 names = ['param.%d' % i for i in range(len(self.start_params))] 

1545 except NotImplementedError: 

1546 names = [] 

1547 return names 

1548 

1549 @property 

1550 def state_names(self): 

1551 """ 

1552 (list of str) List of human readable names for unobserved states. 

1553 """ 

1554 if hasattr(self, '_state_names'): 

1555 return self._state_names 

1556 else: 

1557 names = ['state.%d' % i for i in range(self.k_states)] 

1558 return names 

1559 

1560 def transform_jacobian(self, unconstrained, approx_centered=False): 

1561 """ 

1562 Jacobian matrix for the parameter transformation function 

1563 

1564 Parameters 

1565 ---------- 

1566 unconstrained : array_like 

1567 Array of unconstrained parameters used by the optimizer. 

1568 

1569 Returns 

1570 ------- 

1571 jacobian : ndarray 

1572 Jacobian matrix of the transformation, evaluated at `unconstrained` 

1573 

1574 Notes 

1575 ----- 

1576 This is a numerical approximation using finite differences. Note that 

1577 in general complex step methods cannot be used because it is not 

1578 guaranteed that the `transform_params` method is a real function (e.g. 

1579 if Cholesky decomposition is used). 

1580 

1581 See Also 

1582 -------- 

1583 transform_params 

1584 """ 

1585 return approx_fprime(unconstrained, self.transform_params, 

1586 centered=approx_centered) 

1587 

1588 def transform_params(self, unconstrained): 

1589 """ 

1590 Transform unconstrained parameters used by the optimizer to constrained 

1591 parameters used in likelihood evaluation 

1592 

1593 Parameters 

1594 ---------- 

1595 unconstrained : array_like 

1596 Array of unconstrained parameters used by the optimizer, to be 

1597 transformed. 

1598 

1599 Returns 

1600 ------- 

1601 constrained : array_like 

1602 Array of constrained parameters which may be used in likelihood 

1603 evaluation. 

1604 

1605 Notes 

1606 ----- 

1607 This is a noop in the base class, subclasses should override where 

1608 appropriate. 

1609 """ 

1610 return np.array(unconstrained, ndmin=1) 

1611 

1612 def untransform_params(self, constrained): 

1613 """ 

1614 Transform constrained parameters used in likelihood evaluation 

1615 to unconstrained parameters used by the optimizer 

1616 

1617 Parameters 

1618 ---------- 

1619 constrained : array_like 

1620 Array of constrained parameters used in likelihood evaluation, to 

1621 be transformed. 

1622 

1623 Returns 

1624 ------- 

1625 unconstrained : array_like 

1626 Array of unconstrained parameters used by the optimizer. 

1627 

1628 Notes 

1629 ----- 

1630 This is a noop in the base class, subclasses should override where 

1631 appropriate. 

1632 """ 

1633 return np.array(constrained, ndmin=1) 

1634 

1635 def handle_params(self, params, transformed=True, includes_fixed=False, 

1636 return_jacobian=False): 

1637 params = np.array(params, ndmin=1) 

1638 

1639 # Never want integer dtype, so convert to floats 

1640 if np.issubdtype(params.dtype, np.integer): 

1641 params = params.astype(np.float64) 

1642 

1643 if not includes_fixed and self._has_fixed_params: 

1644 k_params = len(self.param_names) 

1645 new_params = np.zeros(k_params, dtype=params.dtype) * np.nan 

1646 new_params[self._free_params_index] = params 

1647 params = new_params 

1648 

1649 if not transformed: 

1650 # It may be the case that the transformation relies on having 

1651 # "some" (non-NaN) values for the fixed parameters, even if we will 

1652 # not actually be transforming the fixed parameters (as they will) 

1653 # be set below regardless 

1654 if not includes_fixed and self._has_fixed_params: 

1655 params[self._fixed_params_index] = ( 

1656 list(self._fixed_params.values())) 

1657 

1658 if return_jacobian: 

1659 transform_score = self.transform_jacobian(params) 

1660 params = self.transform_params(params) 

1661 

1662 if not includes_fixed and self._has_fixed_params: 

1663 params[self._fixed_params_index] = ( 

1664 list(self._fixed_params.values())) 

1665 

1666 return (params, transform_score) if return_jacobian else params 

1667 

1668 def update(self, params, transformed=True, includes_fixed=False, 

1669 complex_step=False): 

1670 """ 

1671 Update the parameters of the model 

1672 

1673 Parameters 

1674 ---------- 

1675 params : array_like 

1676 Array of new parameters. 

1677 transformed : bool, optional 

1678 Whether or not `params` is already transformed. If set to False, 

1679 `transform_params` is called. Default is True. 

1680 

1681 Returns 

1682 ------- 

1683 params : array_like 

1684 Array of parameters. 

1685 

1686 Notes 

1687 ----- 

1688 Since Model is a base class, this method should be overridden by 

1689 subclasses to perform actual updating steps. 

1690 """ 

1691 return self.handle_params(params=params, transformed=transformed, 

1692 includes_fixed=includes_fixed) 

1693 

1694 def _validate_out_of_sample_exog(self, exog, out_of_sample): 

1695 """ 

1696 Validate given `exog` as satisfactory for out-of-sample operations 

1697 

1698 Parameters 

1699 ---------- 

1700 exog : array_like or None 

1701 New observations of exogenous regressors, if applicable. 

1702 out_of_sample : int 

1703 Number of new observations required. 

1704 

1705 Returns 

1706 ------- 

1707 exog : array or None 

1708 A numpy array of shape (out_of_sample, k_exog) if the model 

1709 contains an `exog` component, or None if it doesn't. 

1710 """ 

1711 if out_of_sample and self.k_exog > 0: 

1712 if exog is None: 

1713 raise ValueError('Out-of-sample operations in a model' 

1714 ' with a regression component require' 

1715 ' additional exogenous values via the' 

1716 ' `exog` argument.') 

1717 exog = np.array(exog) 

1718 required_exog_shape = (out_of_sample, self.k_exog) 

1719 try: 

1720 exog = exog.reshape(required_exog_shape) 

1721 except ValueError: 

1722 raise ValueError('Provided exogenous values are not of the' 

1723 ' appropriate shape. Required %s, got %s.' 

1724 % (str(required_exog_shape), 

1725 str(exog.shape))) 

1726 elif self.k_exog > 0: 

1727 exog = None 

1728 warnings.warn('Exogenous array provided, but additional data' 

1729 ' is not required. `exog` argument ignored.', 

1730 ValueWarning) 

1731 

1732 return exog 

1733 

1734 def _get_extension_time_varying_matrices( 

1735 self, params, exog, out_of_sample, extend_kwargs=None, 

1736 transformed=True, includes_fixed=False, **kwargs): 

1737 """ 

1738 Get updated time-varying state space system matrices 

1739 

1740 Parameters 

1741 ---------- 

1742 params : array_like 

1743 Array of parameters used to construct the time-varying system 

1744 matrices. 

1745 exog : array_like or None 

1746 New observations of exogenous regressors, if applicable. 

1747 out_of_sample : int 

1748 Number of new observations required. 

1749 extend_kwargs : dict, optional 

1750 Dictionary of keyword arguments to pass to the state space model 

1751 constructor. For example, for an SARIMAX state space model, this 

1752 could be used to pass the `concentrate_scale=True` keyword 

1753 argument. Any arguments that are not explicitly set in this 

1754 dictionary will be copied from the current model instance. 

1755 transformed : bool, optional 

1756 Whether or not `start_params` is already transformed. Default is 

1757 True. 

1758 includes_fixed : bool, optional 

1759 If parameters were previously fixed with the `fix_params` method, 

1760 this argument describes whether or not `start_params` also includes 

1761 the fixed parameters, in addition to the free parameters. Default 

1762 is False. 

1763 """ 

1764 # Get the appropriate exog for the extended sample 

1765 exog = self._validate_out_of_sample_exog(exog, out_of_sample) 

1766 

1767 # Create extended model 

1768 if extend_kwargs is None: 

1769 extend_kwargs = {} 

1770 

1771 # Handle trend offset for extended model 

1772 if getattr(self, 'k_trend', 0) > 0 and hasattr(self, 'trend_offset'): 

1773 extend_kwargs.setdefault( 

1774 'trend_offset', self.trend_offset + self.nobs) 

1775 

1776 mod_extend = self.clone( 

1777 endog=np.zeros((out_of_sample, self.k_endog)), exog=exog, 

1778 **extend_kwargs) 

1779 mod_extend.update(params, transformed=transformed, 

1780 includes_fixed=includes_fixed) 

1781 

1782 # Retrieve the extensions to the time-varying system matrices and 

1783 # put them in kwargs 

1784 for name in self.ssm.shapes.keys(): 

1785 if name == 'obs' or name in kwargs: 

1786 continue 

1787 if getattr(self.ssm, name).shape[-1] > 1: 

1788 mat = getattr(mod_extend.ssm, name) 

1789 kwargs[name] = mat[..., -out_of_sample:] 

1790 

1791 return kwargs 

1792 

1793 def simulate(self, params, nsimulations, measurement_shocks=None, 

1794 state_shocks=None, initial_state=None, anchor=None, 

1795 repetitions=None, exog=None, extend_model=None, 

1796 extend_kwargs=None, transformed=True, includes_fixed=False, 

1797 **kwargs): 

1798 r""" 

1799 Simulate a new time series following the state space model 

1800 

1801 Parameters 

1802 ---------- 

1803 params : array_like 

1804 Array of parameters to use in constructing the state space 

1805 representation to use when simulating. 

1806 nsimulations : int 

1807 The number of observations to simulate. If the model is 

1808 time-invariant this can be any number. If the model is 

1809 time-varying, then this number must be less than or equal to the 

1810 number of observations. 

1811 measurement_shocks : array_like, optional 

1812 If specified, these are the shocks to the measurement equation, 

1813 :math:`\varepsilon_t`. If unspecified, these are automatically 

1814 generated using a pseudo-random number generator. If specified, 

1815 must be shaped `nsimulations` x `k_endog`, where `k_endog` is the 

1816 same as in the state space model. 

1817 state_shocks : array_like, optional 

1818 If specified, these are the shocks to the state equation, 

1819 :math:`\eta_t`. If unspecified, these are automatically 

1820 generated using a pseudo-random number generator. If specified, 

1821 must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the 

1822 same as in the state space model. 

1823 initial_state : array_like, optional 

1824 If specified, this is the initial state vector to use in 

1825 simulation, which should be shaped (`k_states` x 1), where 

1826 `k_states` is the same as in the state space model. If unspecified, 

1827 but the model has been initialized, then that initialization is 

1828 used. This must be specified if `anchor` is anything other than 

1829 "start" or 0 (or else you can use the `simulate` method on a 

1830 results object rather than on the model object). 

1831 anchor : int, str, or datetime, optional 

1832 First period for simulation. The simulation will be conditional on 

1833 all existing datapoints prior to the `anchor`. Type depends on the 

1834 index of the given `endog` in the model. Two special cases are the 

1835 strings 'start' and 'end'. `start` refers to beginning the 

1836 simulation at the first period of the sample, and `end` refers to 

1837 beginning the simulation at the first period after the sample. 

1838 Integer values can run from 0 to `nobs`, or can be negative to 

1839 apply negative indexing. Finally, if a date/time index was provided 

1840 to the model, then this argument can be a date string to parse or a 

1841 datetime type. Default is 'start'. 

1842 repetitions : int, optional 

1843 Number of simulated paths to generate. Default is 1 simulated path. 

1844 exog : array_like, optional 

1845 New observations of exogenous regressors, if applicable. 

1846 transformed : bool, optional 

1847 Whether or not `params` is already transformed. Default is 

1848 True. 

1849 includes_fixed : bool, optional 

1850 If parameters were previously fixed with the `fix_params` method, 

1851 this argument describes whether or not `params` also includes 

1852 the fixed parameters, in addition to the free parameters. Default 

1853 is False. 

1854 

1855 Returns 

1856 ------- 

1857 simulated_obs : ndarray 

1858 An array of simulated observations. If `repetitions=None`, then it 

1859 will be shaped (nsimulations x k_endog) or (nsimulations,) if 

1860 `k_endog=1`. Otherwise it will be shaped 

1861 (nsimulations x k_endog x repetitions). If the model was given 

1862 Pandas input then the output will be a Pandas object. If 

1863 `k_endog > 1` and `repetitions` is not None, then the output will 

1864 be a Pandas DataFrame that has a MultiIndex for the columns, with 

1865 the first level containing the names of the `endog` variables and 

1866 the second level containing the repetition number. 

1867 """ 

1868 # Make sure the model class has the current parameters 

1869 self.update(params, transformed=transformed, 

1870 includes_fixed=includes_fixed) 

1871 

1872 # Get the starting location 

1873 if anchor is None or anchor == 'start': 

1874 iloc = 0 

1875 elif anchor == 'end': 

1876 iloc = self.nobs 

1877 else: 

1878 iloc, _, _ = self._get_index_loc(anchor) 

1879 if isinstance(iloc, slice): 

1880 iloc = iloc.start 

1881 

1882 if iloc < 0: 

1883 iloc = self.nobs + iloc 

1884 if iloc > self.nobs: 

1885 raise ValueError('Cannot anchor simulation outside of the sample.') 

1886 

1887 if iloc > 0 and initial_state is None: 

1888 raise ValueError('If `anchor` is after the start of the sample,' 

1889 ' must provide a value for `initial_state`.') 

1890 

1891 # Get updated time-varying system matrices in **kwargs, if necessary 

1892 out_of_sample = max(iloc + nsimulations - self.nobs, 0) 

1893 if extend_model is None: 

1894 extend_model = self.exog is not None or not self.ssm.time_invariant 

1895 if out_of_sample and extend_model: 

1896 kwargs = self._get_extension_time_varying_matrices( 

1897 params, exog, out_of_sample, extend_kwargs, 

1898 transformed=transformed, includes_fixed=includes_fixed, 

1899 **kwargs) 

1900 

1901 # Standardize the dimensions of the initial state 

1902 if initial_state is not None: 

1903 initial_state = np.array(initial_state) 

1904 if initial_state.ndim < 2: 

1905 initial_state = np.atleast_2d(initial_state).T 

1906 

1907 # Construct a model that represents the simulation period 

1908 end = min(self.nobs, iloc + nsimulations) 

1909 nextend = iloc + nsimulations - end 

1910 sim_model = self.ssm.extend(np.empty((nextend, self.k_endog)), 

1911 start=iloc, end=end, **kwargs) 

1912 

1913 # Simulate the data 

1914 _repetitions = 1 if repetitions is None else repetitions 

1915 sim = np.zeros((nsimulations, self.k_endog, _repetitions)) 

1916 

1917 for i in range(_repetitions): 

1918 initial_state_variates = None 

1919 if initial_state is not None: 

1920 if initial_state.shape[1] == 1: 

1921 initial_state_variates = initial_state[:, 0] 

1922 else: 

1923 initial_state_variates = initial_state[:, i] 

1924 

1925 # TODO: allow specifying measurement / state shocks for each 

1926 # repetition? 

1927 

1928 out, _ = sim_model.simulate( 

1929 nsimulations, measurement_shocks, state_shocks, 

1930 initial_state_variates) 

1931 

1932 sim[:, :, i] = out 

1933 

1934 # Wrap data / squeeze where appropriate 

1935 use_pandas = isinstance(self.data, PandasData) 

1936 index = None 

1937 if use_pandas: 

1938 _, _, _, index = self._get_prediction_index( 

1939 iloc, iloc + nsimulations - 1) 

1940 # If `repetitions` isn't set, we squeeze the last dimension(s) 

1941 if repetitions is None: 

1942 if self.k_endog == 1: 

1943 sim = sim[:, 0, 0] 

1944 if use_pandas: 

1945 sim = pd.Series(sim, index=index, name=self.endog_names) 

1946 else: 

1947 sim = sim[:, :, 0] 

1948 if use_pandas: 

1949 sim = pd.DataFrame(sim, index=index, 

1950 columns=self.endog_names) 

1951 elif use_pandas: 

1952 shape = sim.shape 

1953 endog_names = self.endog_names 

1954 if not isinstance(endog_names, list): 

1955 endog_names = [endog_names] 

1956 columns = pd.MultiIndex.from_product([endog_names, 

1957 np.arange(shape[2])]) 

1958 sim = pd.DataFrame(sim.reshape(shape[0], shape[1] * shape[2]), 

1959 index=index, columns=columns) 

1960 

1961 return sim 

1962 

1963 def impulse_responses(self, params, steps=1, impulse=0, 

1964 orthogonalized=False, cumulative=False, anchor=None, 

1965 exog=None, extend_model=None, extend_kwargs=None, 

1966 transformed=True, includes_fixed=False, **kwargs): 

1967 """ 

1968 Impulse response function 

1969 

1970 Parameters 

1971 ---------- 

1972 params : array_like 

1973 Array of model parameters. 

1974 steps : int, optional 

1975 The number of steps for which impulse responses are calculated. 

1976 Default is 1. Note that for time-invariant models, the initial 

1977 impulse is not counted as a step, so if `steps=1`, the output will 

1978 have 2 entries. 

1979 impulse : int or array_like 

1980 If an integer, the state innovation to pulse; must be between 0 

1981 and `k_posdef-1`. Alternatively, a custom impulse vector may be 

1982 provided; must be shaped `k_posdef x 1`. 

1983 orthogonalized : bool, optional 

1984 Whether or not to perform impulse using orthogonalized innovations. 

1985 Note that this will also affect custum `impulse` vectors. Default 

1986 is False. 

1987 cumulative : bool, optional 

1988 Whether or not to return cumulative impulse responses. Default is 

1989 False. 

1990 anchor : int, str, or datetime, optional 

1991 Time point within the sample for the state innovation impulse. Type 

1992 depends on the index of the given `endog` in the model. Two special 

1993 cases are the strings 'start' and 'end', which refer to setting the 

1994 impulse at the first and last points of the sample, respectively. 

1995 Integer values can run from 0 to `nobs - 1`, or can be negative to 

1996 apply negative indexing. Finally, if a date/time index was provided 

1997 to the model, then this argument can be a date string to parse or a 

1998 datetime type. Default is 'start'. 

1999 exog : array_like, optional 

2000 New observations of exogenous regressors for our-of-sample periods, 

2001 if applicable. 

2002 transformed : bool, optional 

2003 Whether or not `params` is already transformed. Default is 

2004 True. 

2005 includes_fixed : bool, optional 

2006 If parameters were previously fixed with the `fix_params` method, 

2007 this argument describes whether or not `params` also includes 

2008 the fixed parameters, in addition to the free parameters. Default 

2009 is False. 

2010 **kwargs 

2011 If the model has time-varying design or transition matrices and the 

2012 combination of `anchor` and `steps` implies creating impulse 

2013 responses for the out-of-sample period, then these matrices must 

2014 have updated values provided for the out-of-sample steps. For 

2015 example, if `design` is a time-varying component, `nobs` is 10, 

2016 `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7) 

2017 matrix must be provided with the new design matrix values. 

2018 

2019 Returns 

2020 ------- 

2021 impulse_responses : ndarray 

2022 Responses for each endogenous variable due to the impulse 

2023 given by the `impulse` argument. For a time-invariant model, the 

2024 impulse responses are given for `steps + 1` elements (this gives 

2025 the "initial impulse" followed by `steps` responses for the 

2026 important cases of VAR and SARIMAX models), while for time-varying 

2027 models the impulse responses are only given for `steps` elements 

2028 (to avoid having to unexpectedly provide updated time-varying 

2029 matrices). 

2030 

2031 Notes 

2032 ----- 

2033 Intercepts in the measurement and state equation are ignored when 

2034 calculating impulse responses. 

2035 

2036 TODO: add an option to allow changing the ordering for the 

2037 orthogonalized option. Will require permuting matrices when 

2038 constructing the extended model. 

2039 """ 

2040 # Make sure the model class has the current parameters 

2041 self.update(params, transformed=transformed, 

2042 includes_fixed=includes_fixed) 

2043 

2044 # For time-invariant models, add an additional `step`. This is the 

2045 # default for time-invariant models based on the expected behavior for 

2046 # ARIMA and VAR models: we want to record the initial impulse and also 

2047 # `steps` values of the responses afterwards. 

2048 # Note: we don't modify `steps` itself, because 

2049 # `KalmanFilter.impulse_responses` also adds an additional step in this 

2050 # case (this is so that there isn't different behavior when calling 

2051 # this method versus that method). We just need to also keep track of 

2052 # this here because we need to generate the correct extended model. 

2053 additional_steps = 0 

2054 if (self.ssm._design.shape[2] == 1 and 

2055 self.ssm._transition.shape[2] == 1 and 

2056 self.ssm._selection.shape[2] == 1): 

2057 additional_steps = 1 

2058 

2059 # Get the starting location 

2060 if anchor is None or anchor == 'start': 

2061 iloc = 0 

2062 elif anchor == 'end': 

2063 iloc = self.nobs - 1 

2064 else: 

2065 iloc, _, _ = self._get_index_loc(anchor) 

2066 if isinstance(iloc, slice): 

2067 iloc = iloc.start 

2068 

2069 if iloc < 0: 

2070 iloc = self.nobs + iloc 

2071 if iloc >= self.nobs: 

2072 raise ValueError('Cannot anchor impulse responses outside of the' 

2073 ' sample.') 

2074 

2075 time_invariant = ( 

2076 self.ssm._design.shape[2] == self.ssm._obs_cov.shape[2] == 

2077 self.ssm._transition.shape[2] == self.ssm._selection.shape[2] == 

2078 self.ssm._state_cov.shape[2] == 1) 

2079 

2080 # Get updated time-varying system matrices in **kwargs, if necessary 

2081 # (Note: KalmanFilter adds 1 to steps to account for the first impulse) 

2082 out_of_sample = max( 

2083 iloc + (steps + additional_steps + 1) - self.nobs, 0) 

2084 if extend_model is None: 

2085 extend_model = self.exog is not None and not time_invariant 

2086 if out_of_sample and extend_model: 

2087 kwargs = self._get_extension_time_varying_matrices( 

2088 params, exog, out_of_sample, extend_kwargs, 

2089 transformed=transformed, includes_fixed=includes_fixed, 

2090 **kwargs) 

2091 

2092 # Special handling for matrix terms that are time-varying but 

2093 # irrelevant for impulse response functions. Must be set since 

2094 # ssm.extend() requires that we pass new matrices for these, but they 

2095 # are ignored for IRF purposes. 

2096 end = min(self.nobs, iloc + steps + additional_steps) 

2097 nextend = iloc + (steps + additional_steps + 1) - end 

2098 if ('obs_intercept' not in kwargs and 

2099 self.ssm._obs_intercept.shape[1] > 1): 

2100 kwargs['obs_intercept'] = np.zeros((self.k_endog, nextend)) 

2101 if ('state_intercept' not in kwargs and 

2102 self.ssm._state_intercept.shape[1] > 1): 

2103 kwargs['state_intercept'] = np.zeros((self.k_states, nextend)) 

2104 if 'obs_cov' not in kwargs and self.ssm._obs_cov.shape[2] > 1: 

2105 kwargs['obs_cov'] = np.zeros((self.k_endog, self.k_endog, nextend)) 

2106 # Special handling for matrix terms that are time-varying but 

2107 # only the value at the anchor matters for IRF purposes. 

2108 if 'state_cov' not in kwargs and self.ssm._state_cov.shape[2] > 1: 

2109 tmp = np.zeros((self.ssm.k_posdef, self.ssm.k_posdef, nextend)) 

2110 tmp[:] = self['state_cov', :, :, iloc:iloc + 1] 

2111 kwargs['state_cov'] = tmp 

2112 if 'selection' not in kwargs and self.ssm._selection.shape[2] > 1: 

2113 tmp = np.zeros((self.k_states, self.ssm.k_posdef, nextend)) 

2114 tmp[:] = self['selection', :, :, iloc:iloc + 1] 

2115 kwargs['selection'] = tmp 

2116 

2117 # Construct a model that represents the simulation period 

2118 sim_model = self.ssm.extend(np.empty((nextend, self.k_endog)), 

2119 start=iloc, end=end, **kwargs) 

2120 

2121 # Compute the impulse responses 

2122 irfs = sim_model.impulse_responses( 

2123 steps, impulse, orthogonalized, cumulative) 

2124 

2125 # IRF is (nobs x k_endog); do not want to squeeze in case of steps = 1 

2126 if irfs.shape[1] == 1: 

2127 irfs = irfs[:, 0] 

2128 

2129 return irfs 

2130 

2131 @classmethod 

2132 def from_formula(cls, formula, data, subset=None): 

2133 """ 

2134 Not implemented for state space models 

2135 """ 

2136 raise NotImplementedError 

2137 

2138 

2139class MLEResults(tsbase.TimeSeriesModelResults): 

2140 r""" 

2141 Class to hold results from fitting a state space model. 

2142 

2143 Parameters 

2144 ---------- 

2145 model : MLEModel instance 

2146 The fitted model instance 

2147 params : ndarray 

2148 Fitted parameters 

2149 filter_results : KalmanFilter instance 

2150 The underlying state space model and Kalman filter output 

2151 

2152 Attributes 

2153 ---------- 

2154 model : Model instance 

2155 A reference to the model that was fit. 

2156 filter_results : KalmanFilter instance 

2157 The underlying state space model and Kalman filter output 

2158 nobs : float 

2159 The number of observations used to fit the model. 

2160 params : ndarray 

2161 The parameters of the model. 

2162 scale : float 

2163 This is currently set to 1.0 unless the model uses concentrated 

2164 filtering. 

2165 

2166 See Also 

2167 -------- 

2168 MLEModel 

2169 statsmodels.tsa.statespace.kalman_filter.FilterResults 

2170 statsmodels.tsa.statespace.representation.FrozenRepresentation 

2171 """ 

2172 def __init__(self, model, params, results, cov_type=None, cov_kwds=None, 

2173 **kwargs): 

2174 self.data = model.data 

2175 scale = results.scale 

2176 

2177 tsbase.TimeSeriesModelResults.__init__(self, model, params, 

2178 normalized_cov_params=None, 

2179 scale=scale) 

2180 

2181 # Save the fixed parameters 

2182 self._has_fixed_params = self.model._has_fixed_params 

2183 self._fixed_params_index = self.model._fixed_params_index 

2184 self._free_params_index = self.model._free_params_index 

2185 # TODO: seems like maybe self.fixed_params should be the dictionary 

2186 # itself, not just the keys? 

2187 if self._has_fixed_params: 

2188 self._fixed_params = self.model._fixed_params.copy() 

2189 self.fixed_params = list(self._fixed_params.keys()) 

2190 else: 

2191 self._fixed_params = None 

2192 self.fixed_params = [] 

2193 self.param_names = [ 

2194 '%s (fixed)' % name if name in self.fixed_params else name 

2195 for name in (self.data.param_names or [])] 

2196 

2197 # Save the state space representation output 

2198 self.filter_results = results 

2199 if isinstance(results, SmootherResults): 

2200 self.smoother_results = results 

2201 else: 

2202 self.smoother_results = None 

2203 

2204 # Dimensions 

2205 self.nobs = self.filter_results.nobs 

2206 self.nobs_diffuse = self.filter_results.nobs_diffuse 

2207 if self.nobs_diffuse > 0 and self.loglikelihood_burn > 0: 

2208 warnings.warn('Care should be used when applying a loglikelihood' 

2209 ' burn to a model with exact diffuse initialization.' 

2210 ' Some results objects, e.g. degrees of freedom,' 

2211 ' expect only one of the two to be set.') 

2212 # This only excludes explicitly burned (usually approximate diffuse) 

2213 # periods but does not exclude approximate diffuse periods. This is 

2214 # because the loglikelihood remains valid for the initial periods in 

2215 # the exact diffuse case (see DK, 2012, section 7.2) and so also do 

2216 # e.g. information criteria (see DK, 2012, section 7.4) and the score 

2217 # vector (see DK, 2012, section 7.3.3, equation 7.15). 

2218 # However, other objects should be excluded in the diffuse periods 

2219 # (e.g. the diffuse forecast errors, so in some cases a different 

2220 # nobs_effective will have to be computed and used) 

2221 self.nobs_effective = self.nobs - self.loglikelihood_burn 

2222 

2223 P = self.filter_results.initial_diffuse_state_cov 

2224 self.k_diffuse_states = 0 if P is None else np.sum(np.diagonal(P) == 1) 

2225 

2226 # Degrees of freedom (see DK 2012, section 7.4) 

2227 k_free_params = self.params.size - len(self.fixed_params) 

2228 self.df_model = (k_free_params + self.k_diffuse_states 

2229 + self.filter_results.filter_concentrated) 

2230 self.df_resid = self.nobs_effective - self.df_model 

2231 

2232 # Setup covariance matrix notes dictionary 

2233 if not hasattr(self, 'cov_kwds'): 

2234 self.cov_kwds = {} 

2235 if cov_type is None: 

2236 cov_type = 'approx' if results.memory_no_likelihood else 'opg' 

2237 self.cov_type = cov_type 

2238 

2239 # Setup the cache 

2240 self._cache = {} 

2241 

2242 # Handle covariance matrix calculation 

2243 if cov_kwds is None: 

2244 cov_kwds = {} 

2245 self._cov_approx_complex_step = ( 

2246 cov_kwds.pop('approx_complex_step', True)) 

2247 self._cov_approx_centered = cov_kwds.pop('approx_centered', False) 

2248 try: 

2249 self._rank = None 

2250 self._get_robustcov_results(cov_type=cov_type, use_self=True, 

2251 **cov_kwds) 

2252 except np.linalg.LinAlgError: 

2253 self._rank = 0 

2254 k_params = len(self.params) 

2255 self.cov_params_default = np.zeros((k_params, k_params)) * np.nan 

2256 self.cov_kwds['cov_type'] = ( 

2257 'Covariance matrix could not be calculated: singular.' 

2258 ' information matrix.') 

2259 self.model.update(self.params, transformed=True, includes_fixed=True) 

2260 

2261 # References of filter and smoother output 

2262 extra_arrays = [ 

2263 'filtered_state', 'filtered_state_cov', 'predicted_state', 

2264 'predicted_state_cov', 'forecasts', 'forecasts_error', 

2265 'forecasts_error_cov', 'standardized_forecasts_error', 

2266 'forecasts_error_diffuse_cov', 'predicted_diffuse_state_cov', 

2267 'scaled_smoothed_estimator', 

2268 'scaled_smoothed_estimator_cov', 'smoothing_error', 

2269 'smoothed_state', 

2270 'smoothed_state_cov', 'smoothed_state_autocov', 

2271 'smoothed_measurement_disturbance', 

2272 'smoothed_state_disturbance', 

2273 'smoothed_measurement_disturbance_cov', 

2274 'smoothed_state_disturbance_cov'] 

2275 for name in extra_arrays: 

2276 setattr(self, name, getattr(self.filter_results, name, None)) 

2277 

2278 # Remove too-short results when memory conservation was used 

2279 if self.filter_results.memory_no_forecast_mean: 

2280 self.forecasts = None 

2281 self.forecasts_error = None 

2282 if self.filter_results.memory_no_forecast_cov: 

2283 self.forecasts_error_cov = None 

2284 if self.filter_results.memory_no_predicted_mean: 

2285 self.predicted_state = None 

2286 if self.filter_results.memory_no_predicted_cov: 

2287 self.predicted_state_cov = None 

2288 if self.filter_results.memory_no_filtered_mean: 

2289 self.filtered_state = None 

2290 if self.filter_results.memory_no_filtered_cov: 

2291 self.filtered_state_cov = None 

2292 if self.filter_results.memory_no_gain: 

2293 pass 

2294 if self.filter_results.memory_no_smoothing: 

2295 pass 

2296 if self.filter_results.memory_no_std_forecast: 

2297 self.standardized_forecasts_error = None 

2298 

2299 # Save more convenient access to states 

2300 # (will create a private attribute _states here and provide actual 

2301 # access via a getter, so that we can e.g. issue a warning in the case 

2302 # that a useless Pandas index was given in the model specification) 

2303 self._states = SimpleNamespace() 

2304 

2305 use_pandas = isinstance(self.data, PandasData) 

2306 index = self.model._index 

2307 columns = self.model.state_names 

2308 

2309 # Predicted states 

2310 # Note: a complication here is that we also include the initial values 

2311 # here, so that we need an extended index in the Pandas case 

2312 if (self.predicted_state is None or 

2313 self.filter_results.memory_no_predicted_mean): 

2314 self._states.predicted = None 

2315 elif use_pandas: 

2316 extended_index = self.model._get_index_with_initial_state() 

2317 self._states.predicted = pd.DataFrame( 

2318 self.predicted_state.T, index=extended_index, columns=columns) 

2319 else: 

2320 self._states.predicted = self.predicted_state.T 

2321 if (self.predicted_state_cov is None or 

2322 self.filter_results.memory_no_predicted_cov): 

2323 self._states.predicted_cov = None 

2324 elif use_pandas: 

2325 extended_index = self.model._get_index_with_initial_state() 

2326 tmp = np.transpose(self.predicted_state_cov, (2, 0, 1)) 

2327 self._states.predicted_cov = pd.DataFrame( 

2328 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])), 

2329 index=pd.MultiIndex.from_product( 

2330 [extended_index, columns]).swaplevel(), 

2331 columns=columns) 

2332 else: 

2333 self._states.predicted_cov = np.transpose( 

2334 self.predicted_state_cov, (2, 0, 1)) 

2335 

2336 # Filtered states 

2337 if (self.filtered_state is None or 

2338 self.filter_results.memory_no_filtered_mean): 

2339 self._states.filtered = None 

2340 elif use_pandas: 

2341 self._states.filtered = pd.DataFrame( 

2342 self.filtered_state.T, index=index, columns=columns) 

2343 else: 

2344 self._states.filtered = self.filtered_state.T 

2345 if (self.filtered_state_cov is None or 

2346 self.filter_results.memory_no_filtered_cov): 

2347 self._states.filtered_cov = None 

2348 elif use_pandas: 

2349 tmp = np.transpose(self.filtered_state_cov, (2, 0, 1)) 

2350 self._states.filtered_cov = pd.DataFrame( 

2351 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])), 

2352 index=pd.MultiIndex.from_product([index, columns]).swaplevel(), 

2353 columns=columns) 

2354 else: 

2355 self._states.filtered_cov = np.transpose( 

2356 self.filtered_state_cov, (2, 0, 1)) 

2357 

2358 # Smoothed states 

2359 if self.smoothed_state is None: 

2360 self._states.smoothed = None 

2361 elif use_pandas: 

2362 self._states.smoothed = pd.DataFrame( 

2363 self.smoothed_state.T, index=index, columns=columns) 

2364 else: 

2365 self._states.smoothed = self.smoothed_state.T 

2366 if self.smoothed_state_cov is None: 

2367 self._states.smoothed_cov = None 

2368 elif use_pandas: 

2369 tmp = np.transpose(self.smoothed_state_cov, (2, 0, 1)) 

2370 self._states.smoothed_cov = pd.DataFrame( 

2371 np.reshape(tmp, (tmp.shape[0] * tmp.shape[1], tmp.shape[2])), 

2372 index=pd.MultiIndex.from_product([index, columns]).swaplevel(), 

2373 columns=columns) 

2374 else: 

2375 self._states.smoothed_cov = np.transpose( 

2376 self.smoothed_state_cov, (2, 0, 1)) 

2377 

2378 # Handle removing data 

2379 self._data_attr_model = getattr(self, '_data_attr_model', []) 

2380 self._data_attr_model.extend(['ssm']) 

2381 self._data_attr.extend(extra_arrays) 

2382 self._data_attr.extend(['filter_results', 'smoother_results']) 

2383 self.data_in_cache = getattr(self, 'data_in_cache', []) 

2384 self.data_in_cache.extend([]) 

2385 

2386 def _get_robustcov_results(self, cov_type='opg', **kwargs): 

2387 """ 

2388 Create new results instance with specified covariance estimator as 

2389 default 

2390 

2391 Note: creating new results instance currently not supported. 

2392 

2393 Parameters 

2394 ---------- 

2395 cov_type : str 

2396 the type of covariance matrix estimator to use. See Notes below 

2397 kwargs : depends on cov_type 

2398 Required or optional arguments for covariance calculation. 

2399 See Notes below. 

2400 

2401 Returns 

2402 ------- 

2403 results : results instance 

2404 This method creates a new results instance with the requested 

2405 covariance as the default covariance of the parameters. 

2406 Inferential statistics like p-values and hypothesis tests will be 

2407 based on this covariance matrix. 

2408 

2409 Notes 

2410 ----- 

2411 The following covariance types and required or optional arguments are 

2412 currently available: 

2413 

2414 - 'opg' for the outer product of gradient estimator 

2415 - 'oim' for the observed information matrix estimator, calculated 

2416 using the method of Harvey (1989) 

2417 - 'approx' for the observed information matrix estimator, 

2418 calculated using a numerical approximation of the Hessian matrix. 

2419 Uses complex step approximation by default, or uses finite 

2420 differences if `approx_complex_step=False` in the `cov_kwds` 

2421 dictionary. 

2422 - 'robust' for an approximate (quasi-maximum likelihood) covariance 

2423 matrix that may be valid even in the presence of some 

2424 misspecifications. Intermediate calculations use the 'oim' 

2425 method. 

2426 - 'robust_approx' is the same as 'robust' except that the 

2427 intermediate calculations use the 'approx' method. 

2428 - 'none' for no covariance matrix calculation. 

2429 """ 

2430 from statsmodels.base.covtype import descriptions 

2431 

2432 use_self = kwargs.pop('use_self', False) 

2433 if use_self: 

2434 res = self 

2435 else: 

2436 raise NotImplementedError 

2437 res = self.__class__( 

2438 self.model, self.params, 

2439 normalized_cov_params=self.normalized_cov_params, 

2440 scale=self.scale) 

2441 

2442 # Set the new covariance type 

2443 res.cov_type = cov_type 

2444 res.cov_kwds = {} 

2445 

2446 # Calculate the new covariance matrix 

2447 approx_complex_step = self._cov_approx_complex_step 

2448 if approx_complex_step: 

2449 approx_type_str = 'complex-step' 

2450 elif self._cov_approx_centered: 

2451 approx_type_str = 'centered finite differences' 

2452 else: 

2453 approx_type_str = 'finite differences' 

2454 

2455 k_params = len(self.params) 

2456 if k_params == 0: 

2457 res.cov_params_default = np.zeros((0, 0)) 

2458 res._rank = 0 

2459 res.cov_kwds['description'] = 'No parameters estimated.' 

2460 elif cov_type == 'custom': 

2461 res.cov_type = kwargs['custom_cov_type'] 

2462 res.cov_params_default = kwargs['custom_cov_params'] 

2463 res.cov_kwds['description'] = kwargs['custom_description'] 

2464 if len(self.fixed_params) > 0: 

2465 mask = np.ix_(self._free_params_index, self._free_params_index) 

2466 else: 

2467 mask = np.s_[...] 

2468 res._rank = np.linalg.matrix_rank(res.cov_params_default[mask]) 

2469 elif cov_type == 'none': 

2470 res.cov_params_default = np.zeros((k_params, k_params)) * np.nan 

2471 res._rank = np.nan 

2472 res.cov_kwds['description'] = descriptions['none'] 

2473 elif self.cov_type == 'approx': 

2474 res.cov_params_default = res.cov_params_approx 

2475 res.cov_kwds['description'] = descriptions['approx'].format( 

2476 approx_type=approx_type_str) 

2477 elif self.cov_type == 'oim': 

2478 res.cov_params_default = res.cov_params_oim 

2479 res.cov_kwds['description'] = descriptions['OIM'].format( 

2480 approx_type=approx_type_str) 

2481 elif self.cov_type == 'opg': 

2482 res.cov_params_default = res.cov_params_opg 

2483 res.cov_kwds['description'] = descriptions['OPG'].format( 

2484 approx_type=approx_type_str) 

2485 elif self.cov_type == 'robust' or self.cov_type == 'robust_oim': 

2486 res.cov_params_default = res.cov_params_robust_oim 

2487 res.cov_kwds['description'] = descriptions['robust-OIM'].format( 

2488 approx_type=approx_type_str) 

2489 elif self.cov_type == 'robust_approx': 

2490 res.cov_params_default = res.cov_params_robust_approx 

2491 res.cov_kwds['description'] = descriptions['robust-approx'].format( 

2492 approx_type=approx_type_str) 

2493 else: 

2494 raise NotImplementedError('Invalid covariance matrix type.') 

2495 

2496 return res 

2497 

2498 @cache_readonly 

2499 def aic(self): 

2500 """ 

2501 (float) Akaike Information Criterion 

2502 """ 

2503 return aic(self.llf, self.nobs_effective, self.df_model) 

2504 

2505 @cache_readonly 

2506 def aicc(self): 

2507 """ 

2508 (float) Akaike Information Criterion with small sample correction 

2509 """ 

2510 return aicc(self.llf, self.nobs_effective, self.df_model) 

2511 

2512 @cache_readonly 

2513 def bic(self): 

2514 """ 

2515 (float) Bayes Information Criterion 

2516 """ 

2517 return bic(self.llf, self.nobs_effective, self.df_model) 

2518 

2519 def _cov_params_approx(self, approx_complex_step=True, 

2520 approx_centered=False): 

2521 evaluated_hessian = self.nobs_effective * self.model.hessian( 

2522 params=self.params, transformed=True, includes_fixed=True, 

2523 method='approx', approx_complex_step=approx_complex_step, 

2524 approx_centered=approx_centered) 

2525 # TODO: Case with "not approx_complex_step" is not hit in 

2526 # tests as of 2017-05-19 

2527 

2528 if len(self.fixed_params) > 0: 

2529 mask = np.ix_(self._free_params_index, self._free_params_index) 

2530 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask]) 

2531 neg_cov = np.zeros_like(evaluated_hessian) * np.nan 

2532 neg_cov[mask] = tmp 

2533 else: 

2534 (neg_cov, singular_values) = pinv_extended(evaluated_hessian) 

2535 

2536 self.model.update(self.params, transformed=True, includes_fixed=True) 

2537 if self._rank is None: 

2538 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 

2539 return -neg_cov 

2540 

2541 @cache_readonly 

2542 def cov_params_approx(self): 

2543 """ 

2544 (array) The variance / covariance matrix. Computed using the numerical 

2545 Hessian approximated by complex step or finite differences methods. 

2546 """ 

2547 return self._cov_params_approx(self._cov_approx_complex_step, 

2548 self._cov_approx_centered) 

2549 

2550 def _cov_params_oim(self, approx_complex_step=True, approx_centered=False): 

2551 evaluated_hessian = self.nobs_effective * self.model.hessian( 

2552 self.params, hessian_method='oim', transformed=True, 

2553 includes_fixed=True, approx_complex_step=approx_complex_step, 

2554 approx_centered=approx_centered) 

2555 

2556 if len(self.fixed_params) > 0: 

2557 mask = np.ix_(self._free_params_index, self._free_params_index) 

2558 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask]) 

2559 neg_cov = np.zeros_like(evaluated_hessian) * np.nan 

2560 neg_cov[mask] = tmp 

2561 else: 

2562 (neg_cov, singular_values) = pinv_extended(evaluated_hessian) 

2563 

2564 self.model.update(self.params, transformed=True, includes_fixed=True) 

2565 if self._rank is None: 

2566 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 

2567 return -neg_cov 

2568 

2569 @cache_readonly 

2570 def cov_params_oim(self): 

2571 """ 

2572 (array) The variance / covariance matrix. Computed using the method 

2573 from Harvey (1989). 

2574 """ 

2575 return self._cov_params_oim(self._cov_approx_complex_step, 

2576 self._cov_approx_centered) 

2577 

2578 def _cov_params_opg(self, approx_complex_step=True, approx_centered=False): 

2579 evaluated_hessian = self.nobs_effective * self.model._hessian_opg( 

2580 self.params, transformed=True, includes_fixed=True, 

2581 approx_complex_step=approx_complex_step, 

2582 approx_centered=approx_centered) 

2583 

2584 if len(self.fixed_params) > 0: 

2585 mask = np.ix_(self._free_params_index, self._free_params_index) 

2586 (tmp, singular_values) = pinv_extended(evaluated_hessian[mask]) 

2587 neg_cov = np.zeros_like(evaluated_hessian) * np.nan 

2588 neg_cov[mask] = tmp 

2589 else: 

2590 (neg_cov, singular_values) = pinv_extended(evaluated_hessian) 

2591 

2592 self.model.update(self.params, transformed=True, includes_fixed=True) 

2593 if self._rank is None: 

2594 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 

2595 return -neg_cov 

2596 

2597 @cache_readonly 

2598 def cov_params_opg(self): 

2599 """ 

2600 (array) The variance / covariance matrix. Computed using the outer 

2601 product of gradients method. 

2602 """ 

2603 return self._cov_params_opg(self._cov_approx_complex_step, 

2604 self._cov_approx_centered) 

2605 

2606 @cache_readonly 

2607 def cov_params_robust(self): 

2608 """ 

2609 (array) The QMLE variance / covariance matrix. Alias for 

2610 `cov_params_robust_oim` 

2611 """ 

2612 return self.cov_params_robust_oim 

2613 

2614 def _cov_params_robust_oim(self, approx_complex_step=True, 

2615 approx_centered=False): 

2616 cov_opg = self._cov_params_opg(approx_complex_step=approx_complex_step, 

2617 approx_centered=approx_centered) 

2618 

2619 evaluated_hessian = self.nobs_effective * self.model.hessian( 

2620 self.params, hessian_method='oim', transformed=True, 

2621 includes_fixed=True, approx_complex_step=approx_complex_step, 

2622 approx_centered=approx_centered) 

2623 

2624 if len(self.fixed_params) > 0: 

2625 mask = np.ix_(self._free_params_index, self._free_params_index) 

2626 cov_params = np.zeros_like(evaluated_hessian) * np.nan 

2627 

2628 cov_opg = cov_opg[mask] 

2629 evaluated_hessian = evaluated_hessian[mask] 

2630 

2631 tmp, singular_values = pinv_extended( 

2632 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian)) 

2633 

2634 cov_params[mask] = tmp 

2635 else: 

2636 (cov_params, singular_values) = pinv_extended( 

2637 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian)) 

2638 

2639 self.model.update(self.params, transformed=True, includes_fixed=True) 

2640 if self._rank is None: 

2641 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 

2642 return cov_params 

2643 

2644 @cache_readonly 

2645 def cov_params_robust_oim(self): 

2646 """ 

2647 (array) The QMLE variance / covariance matrix. Computed using the 

2648 method from Harvey (1989) as the evaluated hessian. 

2649 """ 

2650 return self._cov_params_robust_oim(self._cov_approx_complex_step, 

2651 self._cov_approx_centered) 

2652 

2653 def _cov_params_robust_approx(self, approx_complex_step=True, 

2654 approx_centered=False): 

2655 cov_opg = self._cov_params_opg(approx_complex_step=approx_complex_step, 

2656 approx_centered=approx_centered) 

2657 

2658 evaluated_hessian = self.nobs_effective * self.model.hessian( 

2659 self.params, transformed=True, includes_fixed=True, 

2660 method='approx', approx_complex_step=approx_complex_step) 

2661 # TODO: Case with "not approx_complex_step" is not 

2662 # hit in tests as of 2017-05-19 

2663 

2664 if len(self.fixed_params) > 0: 

2665 mask = np.ix_(self._free_params_index, self._free_params_index) 

2666 cov_params = np.zeros_like(evaluated_hessian) * np.nan 

2667 

2668 cov_opg = cov_opg[mask] 

2669 evaluated_hessian = evaluated_hessian[mask] 

2670 

2671 tmp, singular_values = pinv_extended( 

2672 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian)) 

2673 

2674 cov_params[mask] = tmp 

2675 else: 

2676 (cov_params, singular_values) = pinv_extended( 

2677 np.dot(np.dot(evaluated_hessian, cov_opg), evaluated_hessian)) 

2678 

2679 self.model.update(self.params, transformed=True, includes_fixed=True) 

2680 if self._rank is None: 

2681 self._rank = np.linalg.matrix_rank(np.diag(singular_values)) 

2682 return cov_params 

2683 

2684 @cache_readonly 

2685 def cov_params_robust_approx(self): 

2686 """ 

2687 (array) The QMLE variance / covariance matrix. Computed using the 

2688 numerical Hessian as the evaluated hessian. 

2689 """ 

2690 return self._cov_params_robust_approx(self._cov_approx_complex_step, 

2691 self._cov_approx_centered) 

2692 

2693 def info_criteria(self, criteria, method='standard'): 

2694 r""" 

2695 Information criteria 

2696 

2697 Parameters 

2698 ---------- 

2699 criteria : {'aic', 'bic', 'hqic'} 

2700 The information criteria to compute. 

2701 method : {'standard', 'lutkepohl'} 

2702 The method for information criteria computation. Default is 

2703 'standard' method; 'lutkepohl' computes the information criteria 

2704 as in Lütkepohl (2007). See Notes for formulas. 

2705 

2706 Notes 

2707 ----- 

2708 The `'standard'` formulas are: 

2709 

2710 .. math:: 

2711 

2712 AIC & = -2 \log L(Y_n | \hat \psi) + 2 k \\ 

2713 BIC & = -2 \log L(Y_n | \hat \psi) + k \log n \\ 

2714 HQIC & = -2 \log L(Y_n | \hat \psi) + 2 k \log \log n \\ 

2715 

2716 where :math:`\hat \psi` are the maximum likelihood estimates of the 

2717 parameters, :math:`n` is the number of observations, and `k` is the 

2718 number of estimated parameters. 

2719 

2720 Note that the `'standard'` formulas are returned from the `aic`, `bic`, 

2721 and `hqic` results attributes. 

2722 

2723 The `'lutkepohl'` formulas are (Lütkepohl, 2010): 

2724 

2725 .. math:: 

2726 

2727 AIC_L & = \log | Q | + \frac{2 k}{n} \\ 

2728 BIC_L & = \log | Q | + \frac{k \log n}{n} \\ 

2729 HQIC_L & = \log | Q | + \frac{2 k \log \log n}{n} \\ 

2730 

2731 where :math:`Q` is the state covariance matrix. Note that the Lütkepohl 

2732 definitions do not apply to all state space models, and should be used 

2733 with care outside of SARIMAX and VARMAX models. 

2734 

2735 References 

2736 ---------- 

2737 .. [*] Lütkepohl, Helmut. 2007. *New Introduction to Multiple Time* 

2738 *Series Analysis.* Berlin: Springer. 

2739 """ 

2740 criteria = criteria.lower() 

2741 method = method.lower() 

2742 

2743 if method == 'standard': 

2744 out = getattr(self, criteria) 

2745 elif method == 'lutkepohl': 

2746 if self.filter_results.state_cov.shape[-1] > 1: 

2747 raise ValueError('Cannot compute Lütkepohl statistics for' 

2748 ' models with time-varying state covariance' 

2749 ' matrix.') 

2750 

2751 cov = self.filter_results.state_cov[:, :, 0] 

2752 if criteria == 'aic': 

2753 out = np.squeeze(np.linalg.slogdet(cov)[1] + 

2754 2 * self.df_model / self.nobs_effective) 

2755 elif criteria == 'bic': 

2756 out = np.squeeze(np.linalg.slogdet(cov)[1] + 

2757 self.df_model * np.log(self.nobs_effective) / 

2758 self.nobs_effective) 

2759 elif criteria == 'hqic': 

2760 out = np.squeeze(np.linalg.slogdet(cov)[1] + 

2761 2 * self.df_model * 

2762 np.log(np.log(self.nobs_effective)) / 

2763 self.nobs_effective) 

2764 else: 

2765 raise ValueError('Invalid information criteria') 

2766 

2767 else: 

2768 raise ValueError('Invalid information criteria computation method') 

2769 

2770 return out 

2771 

2772 @cache_readonly 

2773 def fittedvalues(self): 

2774 """ 

2775 (array) The predicted values of the model. An (nobs x k_endog) array. 

2776 """ 

2777 # This is a (k_endog x nobs array; do not want to squeeze in case of 

2778 # the corner case where nobs = 1 (mostly a concern in the predict or 

2779 # forecast functions, but here also to maintain consistency) 

2780 fittedvalues = self.forecasts 

2781 if fittedvalues is None: 

2782 pass 

2783 elif fittedvalues.shape[0] == 1: 

2784 fittedvalues = fittedvalues[0, :] 

2785 else: 

2786 fittedvalues = fittedvalues.T 

2787 return fittedvalues 

2788 

2789 @cache_readonly 

2790 def hqic(self): 

2791 """ 

2792 (float) Hannan-Quinn Information Criterion 

2793 """ 

2794 # return (-2 * self.llf + 

2795 # 2 * np.log(np.log(self.nobs_effective)) * self.df_model) 

2796 return hqic(self.llf, self.nobs_effective, self.df_model) 

2797 

2798 @cache_readonly 

2799 def llf_obs(self): 

2800 """ 

2801 (float) The value of the log-likelihood function evaluated at `params`. 

2802 """ 

2803 return self.filter_results.llf_obs 

2804 

2805 @cache_readonly 

2806 def llf(self): 

2807 """ 

2808 (float) The value of the log-likelihood function evaluated at `params`. 

2809 """ 

2810 return self.filter_results.llf 

2811 

2812 @cache_readonly 

2813 def loglikelihood_burn(self): 

2814 """ 

2815 (float) The number of observations during which the likelihood is not 

2816 evaluated. 

2817 """ 

2818 return self.filter_results.loglikelihood_burn 

2819 

2820 @cache_readonly 

2821 def mae(self): 

2822 """ 

2823 (float) Mean absolute error 

2824 """ 

2825 return np.mean(np.abs(self.resid)) 

2826 

2827 @cache_readonly 

2828 def mse(self): 

2829 """ 

2830 (float) Mean squared error 

2831 """ 

2832 return self.sse / self.nobs 

2833 

2834 @cache_readonly 

2835 def pvalues(self): 

2836 """ 

2837 (array) The p-values associated with the z-statistics of the 

2838 coefficients. Note that the coefficients are assumed to have a Normal 

2839 distribution. 

2840 """ 

2841 pvalues = np.zeros_like(self.zvalues) * np.nan 

2842 mask = np.ones_like(pvalues, dtype=bool) 

2843 mask[self._free_params_index] = True 

2844 mask &= ~np.isnan(self.zvalues) 

2845 pvalues[mask] = norm.sf(np.abs(self.zvalues[mask])) * 2 

2846 return pvalues 

2847 

2848 @cache_readonly 

2849 def resid(self): 

2850 """ 

2851 (array) The model residuals. An (nobs x k_endog) array. 

2852 """ 

2853 # This is a (k_endog x nobs array; do not want to squeeze in case of 

2854 # the corner case where nobs = 1 (mostly a concern in the predict or 

2855 # forecast functions, but here also to maintain consistency) 

2856 resid = self.forecasts_error 

2857 if resid is None: 

2858 pass 

2859 elif resid.shape[0] == 1: 

2860 resid = resid[0, :] 

2861 else: 

2862 resid = resid.T 

2863 return resid 

2864 

2865 @property 

2866 def states(self): 

2867 if self.model._index_generated and not self.model._index_none: 

2868 warnings.warn('No supported index is available. The `states`' 

2869 ' DataFrame uses a generated integer index', 

2870 ValueWarning) 

2871 return self._states 

2872 

2873 @cache_readonly 

2874 def sse(self): 

2875 """ 

2876 (float) Sum of squared errors 

2877 """ 

2878 return np.sum(self.resid**2) 

2879 

2880 @cache_readonly 

2881 def zvalues(self): 

2882 """ 

2883 (array) The z-statistics for the coefficients. 

2884 """ 

2885 return self.params / self.bse 

2886 

2887 def test_normality(self, method): 

2888 """ 

2889 Test for normality of standardized residuals. 

2890 

2891 Null hypothesis is normality. 

2892 

2893 Parameters 

2894 ---------- 

2895 method : {'jarquebera', None} 

2896 The statistical test for normality. Must be 'jarquebera' for 

2897 Jarque-Bera normality test. If None, an attempt is made to select 

2898 an appropriate test. 

2899 

2900 Notes 

2901 ----- 

2902 Let `d` = max(loglikelihood_burn, nobs_diffuse); this test is 

2903 calculated ignoring the first `d` residuals. 

2904 

2905 In the case of missing data, the maintained hypothesis is that the 

2906 data are missing completely at random. This test is then run on the 

2907 standardized residuals excluding those corresponding to missing 

2908 observations. 

2909 

2910 See Also 

2911 -------- 

2912 statsmodels.stats.stattools.jarque_bera 

2913 The Jarque-Bera test of normality. 

2914 """ 

2915 if method is None: 

2916 method = 'jarquebera' 

2917 

2918 if self.standardized_forecasts_error is None: 

2919 raise ValueError('Cannot compute test statistic when standardized' 

2920 ' forecast errors have not been computed.') 

2921 

2922 if method == 'jarquebera': 

2923 from statsmodels.stats.stattools import jarque_bera 

2924 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) 

2925 output = [] 

2926 for i in range(self.model.k_endog): 

2927 resid = self.filter_results.standardized_forecasts_error[i, d:] 

2928 mask = ~np.isnan(resid) 

2929 output.append(jarque_bera(resid[mask])) 

2930 else: 

2931 raise NotImplementedError('Invalid normality test method.') 

2932 

2933 return np.array(output) 

2934 

2935 def test_heteroskedasticity(self, method, alternative='two-sided', 

2936 use_f=True): 

2937 r""" 

2938 Test for heteroskedasticity of standardized residuals 

2939 

2940 Tests whether the sum-of-squares in the first third of the sample is 

2941 significantly different than the sum-of-squares in the last third 

2942 of the sample. Analogous to a Goldfeld-Quandt test. The null hypothesis 

2943 is of no heteroskedasticity. 

2944 

2945 Parameters 

2946 ---------- 

2947 method : {'breakvar', None} 

2948 The statistical test for heteroskedasticity. Must be 'breakvar' 

2949 for test of a break in the variance. If None, an attempt is 

2950 made to select an appropriate test. 

2951 alternative : str, 'increasing', 'decreasing' or 'two-sided' 

2952 This specifies the alternative for the p-value calculation. Default 

2953 is two-sided. 

2954 use_f : bool, optional 

2955 Whether or not to compare against the asymptotic distribution 

2956 (chi-squared) or the approximate small-sample distribution (F). 

2957 Default is True (i.e. default is to compare against an F 

2958 distribution). 

2959 

2960 Returns 

2961 ------- 

2962 output : ndarray 

2963 An array with `(test_statistic, pvalue)` for each endogenous 

2964 variable. The array is then sized `(k_endog, 2)`. If the method is 

2965 called as `het = res.test_heteroskedasticity()`, then `het[0]` is 

2966 an array of size 2 corresponding to the first endogenous variable, 

2967 where `het[0][0]` is the test statistic, and `het[0][1]` is the 

2968 p-value. 

2969 

2970 Notes 

2971 ----- 

2972 The null hypothesis is of no heteroskedasticity. That means different 

2973 things depending on which alternative is selected: 

2974 

2975 - Increasing: Null hypothesis is that the variance is not increasing 

2976 throughout the sample; that the sum-of-squares in the later 

2977 subsample is *not* greater than the sum-of-squares in the earlier 

2978 subsample. 

2979 - Decreasing: Null hypothesis is that the variance is not decreasing 

2980 throughout the sample; that the sum-of-squares in the earlier 

2981 subsample is *not* greater than the sum-of-squares in the later 

2982 subsample. 

2983 - Two-sided: Null hypothesis is that the variance is not changing 

2984 throughout the sample. Both that the sum-of-squares in the earlier 

2985 subsample is not greater than the sum-of-squares in the later 

2986 subsample *and* that the sum-of-squares in the later subsample is 

2987 not greater than the sum-of-squares in the earlier subsample. 

2988 

2989 For :math:`h = [T/3]`, the test statistic is: 

2990 

2991 .. math:: 

2992 

2993 H(h) = \sum_{t=T-h+1}^T \tilde v_t^2 

2994 \Bigg / \sum_{t=d+1}^{d+1+h} \tilde v_t^2 

2995 

2996 where :math:`d` = max(loglikelihood_burn, nobs_diffuse)` (usually 

2997 corresponding to diffuse initialization under either the approximate 

2998 or exact approach). 

2999 

3000 This statistic can be tested against an :math:`F(h,h)` distribution. 

3001 Alternatively, :math:`h H(h)` is asymptotically distributed according 

3002 to :math:`\chi_h^2`; this second test can be applied by passing 

3003 `asymptotic=True` as an argument. 

3004 

3005 See section 5.4 of [1]_ for the above formula and discussion, as well 

3006 as additional details. 

3007 

3008 TODO 

3009 

3010 - Allow specification of :math:`h` 

3011 

3012 References 

3013 ---------- 

3014 .. [1] Harvey, Andrew C. 1990. *Forecasting, Structural Time Series* 

3015 *Models and the Kalman Filter.* Cambridge University Press. 

3016 """ 

3017 if method is None: 

3018 method = 'breakvar' 

3019 

3020 if self.standardized_forecasts_error is None: 

3021 raise ValueError('Cannot compute test statistic when standardized' 

3022 ' forecast errors have not been computed.') 

3023 

3024 if method == 'breakvar': 

3025 # Store some values 

3026 squared_resid = self.filter_results.standardized_forecasts_error**2 

3027 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) 

3028 # This differs from self.nobs_effective because here we want to 

3029 # exclude exact diffuse periods, whereas self.nobs_effective only 

3030 # excludes explicitly burned (usually approximate diffuse) periods. 

3031 nobs_effective = self.nobs - d 

3032 

3033 test_statistics = [] 

3034 p_values = [] 

3035 for i in range(self.model.k_endog): 

3036 h = int(np.round(nobs_effective / 3)) 

3037 numer_resid = squared_resid[i, -h:] 

3038 numer_resid = numer_resid[~np.isnan(numer_resid)] 

3039 numer_dof = len(numer_resid) 

3040 

3041 denom_resid = squared_resid[i, d:d+h] 

3042 denom_resid = denom_resid[~np.isnan(denom_resid)] 

3043 denom_dof = len(denom_resid) 

3044 

3045 if numer_dof < 2: 

3046 warnings.warn('Early subset of data for variable %d' 

3047 ' has too few non-missing observations to' 

3048 ' calculate test statistic.' % i) 

3049 numer_resid = np.nan 

3050 if denom_dof < 2: 

3051 warnings.warn('Later subset of data for variable %d' 

3052 ' has too few non-missing observations to' 

3053 ' calculate test statistic.' % i) 

3054 denom_resid = np.nan 

3055 

3056 test_statistic = np.sum(numer_resid) / np.sum(denom_resid) 

3057 

3058 # Setup functions to calculate the p-values 

3059 if use_f: 

3060 from scipy.stats import f 

3061 pval_lower = lambda test_statistics: f.cdf( # noqa:E731 

3062 test_statistics, numer_dof, denom_dof) 

3063 pval_upper = lambda test_statistics: f.sf( # noqa:E731 

3064 test_statistics, numer_dof, denom_dof) 

3065 else: 

3066 from scipy.stats import chi2 

3067 pval_lower = lambda test_statistics: chi2.cdf( # noqa:E731 

3068 numer_dof * test_statistics, denom_dof) 

3069 pval_upper = lambda test_statistics: chi2.sf( # noqa:E731 

3070 numer_dof * test_statistics, denom_dof) 

3071 

3072 # Calculate the one- or two-sided p-values 

3073 alternative = alternative.lower() 

3074 if alternative in ['i', 'inc', 'increasing']: 

3075 p_value = pval_upper(test_statistic) 

3076 elif alternative in ['d', 'dec', 'decreasing']: 

3077 test_statistic = 1. / test_statistic 

3078 p_value = pval_upper(test_statistic) 

3079 elif alternative in ['2', '2-sided', 'two-sided']: 

3080 p_value = 2 * np.minimum( 

3081 pval_lower(test_statistic), 

3082 pval_upper(test_statistic) 

3083 ) 

3084 else: 

3085 raise ValueError('Invalid alternative.') 

3086 

3087 test_statistics.append(test_statistic) 

3088 p_values.append(p_value) 

3089 

3090 output = np.c_[test_statistics, p_values] 

3091 else: 

3092 raise NotImplementedError('Invalid heteroskedasticity test' 

3093 ' method.') 

3094 

3095 return output 

3096 

3097 def test_serial_correlation(self, method, lags=None): 

3098 """ 

3099 Ljung-Box test for no serial correlation of standardized residuals 

3100 

3101 Null hypothesis is no serial correlation. 

3102 

3103 Parameters 

3104 ---------- 

3105 method : {'ljungbox','boxpierece', None} 

3106 The statistical test for serial correlation. If None, an attempt is 

3107 made to select an appropriate test. 

3108 lags : None, int or array_like 

3109 If lags is an integer then this is taken to be the largest lag 

3110 that is included, the test result is reported for all smaller lag 

3111 length. 

3112 If lags is a list or array, then all lags are included up to the 

3113 largest lag in the list, however only the tests for the lags in the 

3114 list are reported. 

3115 If lags is None, then the default maxlag is 12*(nobs/100)^{1/4} 

3116 

3117 Returns 

3118 ------- 

3119 output : ndarray 

3120 An array with `(test_statistic, pvalue)` for each endogenous 

3121 variable and each lag. The array is then sized 

3122 `(k_endog, 2, lags)`. If the method is called as 

3123 `ljungbox = res.test_serial_correlation()`, then `ljungbox[i]` 

3124 holds the results of the Ljung-Box test (as would be returned by 

3125 `statsmodels.stats.diagnostic.acorr_ljungbox`) for the `i` th 

3126 endogenous variable. 

3127 

3128 Notes 

3129 ----- 

3130 Let `d` = max(loglikelihood_burn, nobs_diffuse); this test is 

3131 calculated ignoring the first `d` residuals. 

3132 

3133 Output is nan for any endogenous variable which has missing values. 

3134 

3135 See Also 

3136 -------- 

3137 statsmodels.stats.diagnostic.acorr_ljungbox 

3138 Ljung-Box test for serial correlation. 

3139 """ 

3140 if method is None: 

3141 method = 'ljungbox' 

3142 

3143 if self.standardized_forecasts_error is None: 

3144 raise ValueError('Cannot compute test statistic when standardized' 

3145 ' forecast errors have not been computed.') 

3146 

3147 if method == 'ljungbox' or method == 'boxpierce': 

3148 from statsmodels.stats.diagnostic import acorr_ljungbox 

3149 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) 

3150 # This differs from self.nobs_effective because here we want to 

3151 # exclude exact diffuse periods, whereas self.nobs_effective only 

3152 # excludes explicitly burned (usually approximate diffuse) periods. 

3153 nobs_effective = self.nobs - d 

3154 output = [] 

3155 

3156 # Default lags for acorr_ljungbox is 40, but may not always have 

3157 # that many observations 

3158 if lags is None: 

3159 lags = min(40, nobs_effective - 1) 

3160 

3161 for i in range(self.model.k_endog): 

3162 results = acorr_ljungbox( 

3163 self.filter_results.standardized_forecasts_error[i][d:], 

3164 lags=lags, boxpierce=(method == 'boxpierce'), 

3165 return_df=False) 

3166 if method == 'ljungbox': 

3167 output.append(results[0:2]) 

3168 else: 

3169 output.append(results[2:]) 

3170 

3171 output = np.c_[output] 

3172 else: 

3173 raise NotImplementedError('Invalid serial correlation test' 

3174 ' method.') 

3175 return output 

3176 

3177 def get_prediction(self, start=None, end=None, dynamic=False, 

3178 index=None, exog=None, extend_model=None, 

3179 extend_kwargs=None, **kwargs): 

3180 """ 

3181 In-sample prediction and out-of-sample forecasting 

3182 

3183 Parameters 

3184 ---------- 

3185 start : int, str, or datetime, optional 

3186 Zero-indexed observation number at which to start forecasting, 

3187 i.e., the first forecast is start. Can also be a date string to 

3188 parse or a datetime type. Default is the the zeroth observation. 

3189 end : int, str, or datetime, optional 

3190 Zero-indexed observation number at which to end forecasting, i.e., 

3191 the last forecast is end. Can also be a date string to 

3192 parse or a datetime type. However, if the dates index does not 

3193 have a fixed frequency, end must be an integer index if you 

3194 want out of sample prediction. Default is the last observation in 

3195 the sample. 

3196 dynamic : bool, int, str, or datetime, optional 

3197 Integer offset relative to `start` at which to begin dynamic 

3198 prediction. Can also be an absolute date string to parse or a 

3199 datetime type (these are not interpreted as offsets). 

3200 Prior to this observation, true endogenous values will be used for 

3201 prediction; starting with this observation and continuing through 

3202 the end of prediction, forecasted endogenous values will be used 

3203 instead. 

3204 **kwargs 

3205 Additional arguments may required for forecasting beyond the end 

3206 of the sample. See `FilterResults.predict` for more details. 

3207 

3208 Returns 

3209 ------- 

3210 forecast : ndarray 

3211 Array of out of in-sample predictions and / or out-of-sample 

3212 forecasts. An (npredict x k_endog) array. 

3213 """ 

3214 if start is None: 

3215 start = 0 

3216 

3217 # Handle start, end, dynamic 

3218 start, end, out_of_sample, prediction_index = ( 

3219 self.model._get_prediction_index(start, end, index)) 

3220 

3221 # Handle `dynamic` 

3222 if isinstance(dynamic, (bytes, str)): 

3223 dynamic, _, _ = self.model._get_index_loc(dynamic) 

3224 

3225 # If we have out-of-sample forecasting and `exog` or in general any 

3226 # kind of time-varying state space model, then we need to create an 

3227 # extended model to get updated state space system matrices 

3228 if extend_model is None: 

3229 extend_model = (self.model.exog is not None or 

3230 not self.filter_results.time_invariant) 

3231 if out_of_sample and extend_model: 

3232 kwargs = self.model._get_extension_time_varying_matrices( 

3233 self.params, exog, out_of_sample, extend_kwargs, 

3234 transformed=True, includes_fixed=True, **kwargs) 

3235 

3236 # Make sure the model class has the current parameters 

3237 self.model.update(self.params, transformed=True, includes_fixed=True) 

3238 

3239 # Perform the prediction 

3240 # This is a (k_endog x npredictions) array; do not want to squeeze in 

3241 # case of npredictions = 1 

3242 prediction_results = self.filter_results.predict( 

3243 start, end + out_of_sample + 1, dynamic, **kwargs) 

3244 

3245 # Return a new mlemodel.PredictionResults object 

3246 return PredictionResultsWrapper(PredictionResults( 

3247 self, prediction_results, row_labels=prediction_index)) 

3248 

3249 def get_forecast(self, steps=1, **kwargs): 

3250 """ 

3251 Out-of-sample forecasts 

3252 

3253 Parameters 

3254 ---------- 

3255 steps : int, str, or datetime, optional 

3256 If an integer, the number of steps to forecast from the end of the 

3257 sample. Can also be a date string to parse or a datetime type. 

3258 However, if the dates index does not have a fixed frequency, steps 

3259 must be an integer. Default 

3260 **kwargs 

3261 Additional arguments may required for forecasting beyond the end 

3262 of the sample. See `FilterResults.predict` for more details. 

3263 

3264 Returns 

3265 ------- 

3266 forecast : ndarray 

3267 Array of out of sample forecasts. A (steps x k_endog) array. 

3268 """ 

3269 if isinstance(steps, int): 

3270 end = self.nobs + steps - 1 

3271 else: 

3272 end = steps 

3273 return self.get_prediction(start=self.nobs, end=end, **kwargs) 

3274 

3275 def predict(self, start=None, end=None, dynamic=False, **kwargs): 

3276 """ 

3277 In-sample prediction and out-of-sample forecasting 

3278 

3279 Parameters 

3280 ---------- 

3281 start : int, str, or datetime, optional 

3282 Zero-indexed observation number at which to start forecasting, 

3283 i.e., the first forecast is start. Can also be a date string to 

3284 parse or a datetime type. Default is the the zeroth observation. 

3285 end : int, str, or datetime, optional 

3286 Zero-indexed observation number at which to end forecasting, i.e., 

3287 the last forecast is end. Can also be a date string to 

3288 parse or a datetime type. However, if the dates index does not 

3289 have a fixed frequency, end must be an integer index if you 

3290 want out of sample prediction. Default is the last observation in 

3291 the sample. 

3292 dynamic : bool, int, str, or datetime, optional 

3293 Integer offset relative to `start` at which to begin dynamic 

3294 prediction. Can also be an absolute date string to parse or a 

3295 datetime type (these are not interpreted as offsets). 

3296 Prior to this observation, true endogenous values will be used for 

3297 prediction; starting with this observation and continuing through 

3298 the end of prediction, forecasted endogenous values will be used 

3299 instead. 

3300 **kwargs 

3301 Additional arguments may required for forecasting beyond the end 

3302 of the sample. See `FilterResults.predict` for more details. 

3303 

3304 Returns 

3305 ------- 

3306 forecast : array_like 

3307 Array of out of in-sample predictions and / or out-of-sample 

3308 forecasts. An (npredict x k_endog) array. 

3309 """ 

3310 # Perform the prediction 

3311 prediction_results = self.get_prediction(start, end, dynamic, **kwargs) 

3312 return prediction_results.predicted_mean 

3313 

3314 def forecast(self, steps=1, **kwargs): 

3315 """ 

3316 Out-of-sample forecasts 

3317 

3318 Parameters 

3319 ---------- 

3320 steps : int, str, or datetime, optional 

3321 If an integer, the number of steps to forecast from the end of the 

3322 sample. Can also be a date string to parse or a datetime type. 

3323 However, if the dates index does not have a fixed frequency, steps 

3324 must be an integer. Default 

3325 **kwargs 

3326 Additional arguments may required for forecasting beyond the end 

3327 of the sample. See `FilterResults.predict` for more details. 

3328 

3329 Returns 

3330 ------- 

3331 forecast : ndarray 

3332 Array of out of sample forecasts. A (steps x k_endog) array. 

3333 """ 

3334 if isinstance(steps, int): 

3335 end = self.nobs + steps - 1 

3336 else: 

3337 end = steps 

3338 return self.predict(start=self.nobs, end=end, **kwargs) 

3339 

3340 def simulate(self, nsimulations, measurement_shocks=None, 

3341 state_shocks=None, initial_state=None, anchor=None, 

3342 repetitions=None, exog=None, extend_model=None, 

3343 extend_kwargs=None, **kwargs): 

3344 r""" 

3345 Simulate a new time series following the state space model 

3346 

3347 Parameters 

3348 ---------- 

3349 nsimulations : int 

3350 The number of observations to simulate. If the model is 

3351 time-invariant this can be any number. If the model is 

3352 time-varying, then this number must be less than or equal to the 

3353 number 

3354 measurement_shocks : array_like, optional 

3355 If specified, these are the shocks to the measurement equation, 

3356 :math:`\varepsilon_t`. If unspecified, these are automatically 

3357 generated using a pseudo-random number generator. If specified, 

3358 must be shaped `nsimulations` x `k_endog`, where `k_endog` is the 

3359 same as in the state space model. 

3360 state_shocks : array_like, optional 

3361 If specified, these are the shocks to the state equation, 

3362 :math:`\eta_t`. If unspecified, these are automatically 

3363 generated using a pseudo-random number generator. If specified, 

3364 must be shaped `nsimulations` x `k_posdef` where `k_posdef` is the 

3365 same as in the state space model. 

3366 initial_state : array_like, optional 

3367 If specified, this is the initial state vector to use in 

3368 simulation, which should be shaped (`k_states` x 1), where 

3369 `k_states` is the same as in the state space model. If unspecified, 

3370 but the model has been initialized, then that initialization is 

3371 used. This must be specified if `anchor` is anything other than 

3372 "start" or 0. 

3373 anchor : int, str, or datetime, optional 

3374 Starting point from which to begin the simulations; type depends on 

3375 the index of the given `endog` model. Two special cases are the 

3376 strings 'start' and 'end', which refer to starting at the beginning 

3377 and end of the sample, respectively. If a date/time index was 

3378 provided to the model, then this argument can be a date string to 

3379 parse or a datetime type. Otherwise, an integer index should be 

3380 given. Default is 'start'. 

3381 repetitions : int, optional 

3382 Number of simulated paths to generate. Default is 1 simulated path. 

3383 exog : array_like, optional 

3384 New observations of exogenous regressors, if applicable. 

3385 

3386 Returns 

3387 ------- 

3388 simulated_obs : ndarray 

3389 An array of simulated observations. If `repetitions=None`, then it 

3390 will be shaped (nsimulations x k_endog) or (nsimulations,) if 

3391 `k_endog=1`. Otherwise it will be shaped 

3392 (nsimulations x k_endog x repetitions). If the model was given 

3393 Pandas input then the output will be a Pandas object. If 

3394 `k_endog > 1` and `repetitions` is not None, then the output will 

3395 be a Pandas DataFrame that has a MultiIndex for the columns, with 

3396 the first level containing the names of the `endog` variables and 

3397 the second level containing the repetition number. 

3398 """ 

3399 # Get the starting location 

3400 if anchor is None or anchor == 'start': 

3401 iloc = 0 

3402 elif anchor == 'end': 

3403 iloc = self.nobs 

3404 else: 

3405 iloc, _, _ = self.model._get_index_loc(anchor) 

3406 if isinstance(iloc, slice): 

3407 iloc = iloc.start 

3408 

3409 if iloc < 0: 

3410 iloc = self.nobs + iloc 

3411 if iloc > self.nobs: 

3412 raise ValueError('Cannot anchor simulation outside of the sample.') 

3413 

3414 # Setup the initial state 

3415 if initial_state is None: 

3416 initial_state_moments = ( 

3417 self.predicted_state[:, iloc], 

3418 self.predicted_state_cov[:, :, iloc]) 

3419 

3420 _repetitions = 1 if repetitions is None else repetitions 

3421 

3422 initial_state = np.random.multivariate_normal( 

3423 *initial_state_moments, size=_repetitions).T 

3424 

3425 scale = self.scale if self.filter_results.filter_concentrated else None 

3426 with self.model.ssm.fixed_scale(scale): 

3427 sim = self.model.simulate( 

3428 self.params, nsimulations, 

3429 measurement_shocks=measurement_shocks, 

3430 state_shocks=state_shocks, initial_state=initial_state, 

3431 anchor=anchor, repetitions=repetitions, exog=exog, 

3432 transformed=True, includes_fixed=True, 

3433 extend_model=extend_model, extend_kwargs=extend_kwargs, 

3434 **kwargs) 

3435 

3436 return sim 

3437 

3438 def impulse_responses(self, steps=1, impulse=0, orthogonalized=False, 

3439 cumulative=False, **kwargs): 

3440 """ 

3441 Impulse response function 

3442 

3443 Parameters 

3444 ---------- 

3445 steps : int, optional 

3446 The number of steps for which impulse responses are calculated. 

3447 Default is 1. Note that for time-invariant models, the initial 

3448 impulse is not counted as a step, so if `steps=1`, the output will 

3449 have 2 entries. 

3450 impulse : int or array_like 

3451 If an integer, the state innovation to pulse; must be between 0 

3452 and `k_posdef-1`. Alternatively, a custom impulse vector may be 

3453 provided; must be shaped `k_posdef x 1`. 

3454 orthogonalized : bool, optional 

3455 Whether or not to perform impulse using orthogonalized innovations. 

3456 Note that this will also affect custum `impulse` vectors. Default 

3457 is False. 

3458 cumulative : bool, optional 

3459 Whether or not to return cumulative impulse responses. Default is 

3460 False. 

3461 anchor : int, str, or datetime, optional 

3462 Time point within the sample for the state innovation impulse. Type 

3463 depends on the index of the given `endog` in the model. Two special 

3464 cases are the strings 'start' and 'end', which refer to setting the 

3465 impulse at the first and last points of the sample, respectively. 

3466 Integer values can run from 0 to `nobs - 1`, or can be negative to 

3467 apply negative indexing. Finally, if a date/time index was provided 

3468 to the model, then this argument can be a date string to parse or a 

3469 datetime type. Default is 'start'. 

3470 exog : array_like, optional 

3471 New observations of exogenous regressors, if applicable. 

3472 **kwargs 

3473 If the model has time-varying design or transition matrices and the 

3474 combination of `anchor` and `steps` implies creating impulse 

3475 responses for the out-of-sample period, then these matrices must 

3476 have updated values provided for the out-of-sample steps. For 

3477 example, if `design` is a time-varying component, `nobs` is 10, 

3478 `anchor=1`, and `steps` is 15, a (`k_endog` x `k_states` x 7) 

3479 matrix must be provided with the new design matrix values. 

3480 

3481 Returns 

3482 ------- 

3483 impulse_responses : ndarray 

3484 Responses for each endogenous variable due to the impulse 

3485 given by the `impulse` argument. For a time-invariant model, the 

3486 impulse responses are given for `steps + 1` elements (this gives 

3487 the "initial impulse" followed by `steps` responses for the 

3488 important cases of VAR and SARIMAX models), while for time-varying 

3489 models the impulse responses are only given for `steps` elements 

3490 (to avoid having to unexpectedly provide updated time-varying 

3491 matrices). 

3492 

3493 Notes 

3494 ----- 

3495 Intercepts in the measurement and state equation are ignored when 

3496 calculating impulse responses. 

3497 """ 

3498 scale = self.scale if self.filter_results.filter_concentrated else None 

3499 with self.model.ssm.fixed_scale(scale): 

3500 irfs = self.model.impulse_responses(self.params, steps, impulse, 

3501 orthogonalized, cumulative, 

3502 **kwargs) 

3503 return irfs 

3504 

3505 def _apply(self, mod, refit=False, fit_kwargs=None, **kwargs): 

3506 if fit_kwargs is None: 

3507 fit_kwargs = {} 

3508 

3509 if refit: 

3510 fit_kwargs.setdefault('start_params', self.params) 

3511 if self._has_fixed_params: 

3512 fit_kwargs.setdefault('includes_fixed', True) 

3513 res = mod.fit_constrained(self._fixed_params, **fit_kwargs) 

3514 else: 

3515 res = mod.fit(**fit_kwargs) 

3516 else: 

3517 if 'cov_type' in fit_kwargs: 

3518 raise ValueError('Cannot specify covariance type in' 

3519 ' `fit_kwargs` unless refitting' 

3520 ' parameters (not available in extend).') 

3521 if 'cov_kwds' in fit_kwargs: 

3522 raise ValueError('Cannot specify covariance keyword arguments' 

3523 ' in `fit_kwargs` unless refitting' 

3524 ' parameters (not available in extend).') 

3525 

3526 fit_kwargs['cov_type'] = 'custom' 

3527 fit_kwargs['cov_kwds'] = { 

3528 'custom_cov_type': self.cov_type, 

3529 'custom_cov_params': self.cov_params_default, 

3530 'custom_description': ('Parameters and standard errors' 

3531 ' were estimated using a different' 

3532 ' dataset and were then applied to this' 

3533 ' dataset. %s' 

3534 % self.cov_kwds['description'])} 

3535 

3536 if self.smoother_results is not None: 

3537 func = mod.smooth 

3538 else: 

3539 func = mod.filter 

3540 

3541 if self._has_fixed_params: 

3542 with mod.fix_params(self._fixed_params): 

3543 fit_kwargs.setdefault('includes_fixed', True) 

3544 res = func(self.params, **fit_kwargs) 

3545 else: 

3546 res = func(self.params, **fit_kwargs) 

3547 

3548 return res 

3549 

3550 def append(self, endog, exog=None, refit=False, fit_kwargs=None, **kwargs): 

3551 """ 

3552 Recreate the results object with new data appended to the original data 

3553 

3554 Creates a new result object applied to a dataset that is created by 

3555 appending new data to the end of the model's original data. The new 

3556 results can then be used for analysis or forecasting. 

3557 

3558 Parameters 

3559 ---------- 

3560 endog : array_like 

3561 New observations from the modeled time-series process. 

3562 exog : array_like, optional 

3563 New observations of exogenous regressors, if applicable. 

3564 refit : bool, optional 

3565 Whether to re-fit the parameters, based on the combined dataset. 

3566 Default is False (so parameters from the current results object 

3567 are used to create the new results object). 

3568 fit_kwargs : dict, optional 

3569 Keyword arguments to pass to `fit` (if `refit=True`) or `filter` / 

3570 `smooth`. 

3571 **kwargs 

3572 Keyword arguments may be used to modify model specification 

3573 arguments when created the new model object. 

3574 

3575 Returns 

3576 ------- 

3577 results 

3578 Updated Results object, that includes results from both the 

3579 original dataset and the new dataset. 

3580 

3581 Notes 

3582 ----- 

3583 The `endog` and `exog` arguments to this method must be formatted in 

3584 the same was (e.g. Pandas Series versus Numpy array) as were the 

3585 `endog` and `exog` arrays passed to the original model. 

3586 

3587 The `endog` argument to this method should consist of new observations 

3588 that occurred directly after the last element of `endog`. For any other 

3589 kind of dataset, see the `apply` method. 

3590 

3591 This method will apply filtering to all of the original data as well 

3592 as to the new data. To apply filtering only to the new data (which 

3593 can be much faster if the original dataset is large), see the `extend` 

3594 method. 

3595 

3596 Examples 

3597 -------- 

3598 >>> index = pd.period_range(start='2000', periods=2, freq='A') 

3599 >>> original_observations = pd.Series([1.2, 1.5], index=index) 

3600 >>> mod = sm.tsa.SARIMAX(original_observations) 

3601 >>> res = mod.fit() 

3602 >>> print(res.params) 

3603 ar.L1 0.9756 

3604 sigma2 0.0889 

3605 dtype: float64 

3606 >>> print(res.fittedvalues) 

3607 2000 0.0000 

3608 2001 1.1707 

3609 Freq: A-DEC, dtype: float64 

3610 >>> print(res.forecast(1)) 

3611 2002 1.4634 

3612 Freq: A-DEC, dtype: float64 

3613 

3614 >>> new_index = pd.period_range(start='2002', periods=1, freq='A') 

3615 >>> new_observations = pd.Series([0.9], index=new_index) 

3616 >>> updated_res = res.append(new_observations) 

3617 >>> print(updated_res.params) 

3618 ar.L1 0.9756 

3619 sigma2 0.0889 

3620 dtype: float64 

3621 >>> print(updated_res.fittedvalues) 

3622 2000 0.0000 

3623 2001 1.1707 

3624 2002 1.4634 

3625 Freq: A-DEC, dtype: float64 

3626 >>> print(updated_res.forecast(1)) 

3627 2003 0.878 

3628 Freq: A-DEC, dtype: float64 

3629 

3630 See Also 

3631 -------- 

3632 statsmodels.tsa.statespace.mlemodel.MLEResults.extend 

3633 statsmodels.tsa.statespace.mlemodel.MLEResults.apply 

3634 """ 

3635 start = self.nobs 

3636 end = self.nobs + len(endog) - 1 

3637 _, _, _, append_ix = self.model._get_prediction_index(start, end) 

3638 

3639 # Check the index of the new data 

3640 if isinstance(self.model.data, PandasData): 

3641 _check_index(append_ix, endog, '`endog`') 

3642 

3643 # Concatenate the new data to original data 

3644 new_endog = concat([self.model.data.orig_endog, endog], axis=0, 

3645 allow_mix=True) 

3646 

3647 # Create a continuous index for the combined data 

3648 if isinstance(self.model.data, PandasData): 

3649 start = 0 

3650 end = len(new_endog) - 1 

3651 _, _, _, new_index = self.model._get_prediction_index(start, end) 

3652 # Standardize `endog` to have the right index and columns 

3653 columns = self.model.endog_names 

3654 if not isinstance(columns, list): 

3655 columns = [columns] 

3656 new_endog = pd.DataFrame(new_endog, index=new_index, 

3657 columns=columns) 

3658 

3659 # Handle `exog` 

3660 if exog is not None: 

3661 _, exog = prepare_exog(exog) 

3662 _check_index(append_ix, exog, '`exog`') 

3663 

3664 new_exog = concat([self.model.data.orig_exog, exog], axis=0, 

3665 allow_mix=True) 

3666 else: 

3667 new_exog = None 

3668 

3669 mod = self.model.clone(new_endog, exog=new_exog, **kwargs) 

3670 res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs) 

3671 

3672 return res 

3673 

3674 def extend(self, endog, exog=None, fit_kwargs=None, **kwargs): 

3675 """ 

3676 Recreate the results object for new data that extends the original data 

3677 

3678 Creates a new result object applied to a new dataset that is assumed to 

3679 follow directly from the end of the model's original data. The new 

3680 results can then be used for analysis or forecasting. 

3681 

3682 Parameters 

3683 ---------- 

3684 endog : array_like 

3685 New observations from the modeled time-series process. 

3686 exog : array_like, optional 

3687 New observations of exogenous regressors, if applicable. 

3688 fit_kwargs : dict, optional 

3689 Keyword arguments to pass to `filter` or `smooth`. 

3690 **kwargs 

3691 Keyword arguments may be used to modify model specification 

3692 arguments when created the new model object. 

3693 

3694 Returns 

3695 ------- 

3696 results 

3697 Updated Results object, that includes results only for the new 

3698 dataset. 

3699 

3700 Notes 

3701 ----- 

3702 The `endog` argument to this method should consist of new observations 

3703 that occurred directly after the last element of the model's original 

3704 `endog` array. For any other kind of dataset, see the `apply` method. 

3705 

3706 This method will apply filtering only to the new data provided by the 

3707 `endog` argument, which can be much faster than re-filtering the entire 

3708 dataset. However, the returned results object will only have results 

3709 for the new data. To retrieve results for both the new data and the 

3710 original data, see the `append` method. 

3711 

3712 Examples 

3713 -------- 

3714 >>> index = pd.period_range(start='2000', periods=2, freq='A') 

3715 >>> original_observations = pd.Series([1.2, 1.5], index=index) 

3716 >>> mod = sm.tsa.SARIMAX(original_observations) 

3717 >>> res = mod.fit() 

3718 >>> print(res.params) 

3719 ar.L1 0.9756 

3720 sigma2 0.0889 

3721 dtype: float64 

3722 >>> print(res.fittedvalues) 

3723 2000 0.0000 

3724 2001 1.1707 

3725 Freq: A-DEC, dtype: float64 

3726 >>> print(res.forecast(1)) 

3727 2002 1.4634 

3728 Freq: A-DEC, dtype: float64 

3729 

3730 >>> new_index = pd.period_range(start='2002', periods=1, freq='A') 

3731 >>> new_observations = pd.Series([0.9], index=new_index) 

3732 >>> updated_res = res.extend(new_observations) 

3733 >>> print(updated_res.params) 

3734 ar.L1 0.9756 

3735 sigma2 0.0889 

3736 dtype: float64 

3737 >>> print(updated_res.fittedvalues) 

3738 2002 1.4634 

3739 Freq: A-DEC, dtype: float64 

3740 >>> print(updated_res.forecast(1)) 

3741 2003 0.878 

3742 Freq: A-DEC, dtype: float64 

3743 

3744 See Also 

3745 -------- 

3746 statsmodels.tsa.statespace.mlemodel.MLEResults.append 

3747 statsmodels.tsa.statespace.mlemodel.MLEResults.apply 

3748 """ 

3749 start = self.nobs 

3750 end = self.nobs + len(endog) - 1 

3751 _, _, _, extend_ix = self.model._get_prediction_index(start, end) 

3752 

3753 if isinstance(self.model.data, PandasData): 

3754 _check_index(extend_ix, endog, '`endog`') 

3755 

3756 # Standardize `endog` to have the right index and columns 

3757 columns = self.model.endog_names 

3758 if not isinstance(columns, list): 

3759 columns = [columns] 

3760 endog = pd.DataFrame(endog, index=extend_ix, columns=columns) 

3761 # Extend the current fit result to additional data 

3762 mod = self.model.clone(endog, exog=exog, **kwargs) 

3763 mod.ssm.initialization = Initialization( 

3764 mod.k_states, 'known', constant=self.predicted_state[..., -1], 

3765 stationary_cov=self.predicted_state_cov[..., -1]) 

3766 res = self._apply(mod, refit=False, fit_kwargs=fit_kwargs, **kwargs) 

3767 

3768 return res 

3769 

3770 def apply(self, endog, exog=None, refit=False, fit_kwargs=None, **kwargs): 

3771 """ 

3772 Apply the fitted parameters to new data unrelated to the original data 

3773 

3774 Creates a new result object using the current fitted parameters, 

3775 applied to a completely new dataset that is assumed to be unrelated to 

3776 the model's original data. The new results can then be used for 

3777 analysis or forecasting. 

3778 

3779 Parameters 

3780 ---------- 

3781 endog : array_like 

3782 New observations from the modeled time-series process. 

3783 exog : array_like, optional 

3784 New observations of exogenous regressors, if applicable. 

3785 refit : bool, optional 

3786 Whether to re-fit the parameters, using the new dataset. 

3787 Default is False (so parameters from the current results object 

3788 are used to create the new results object). 

3789 fit_kwargs : dict, optional 

3790 Keyword arguments to pass to `fit` (if `refit=True`) or `filter` / 

3791 `smooth`. 

3792 **kwargs 

3793 Keyword arguments may be used to modify model specification 

3794 arguments when created the new model object. 

3795 

3796 Returns 

3797 ------- 

3798 results 

3799 Updated Results object, that includes results only for the new 

3800 dataset. 

3801 

3802 Notes 

3803 ----- 

3804 The `endog` argument to this method should consist of new observations 

3805 that are unrelated to the original model's `endog` dataset. For 

3806 observations that continue that original dataset by follow directly 

3807 after its last element, see the `append` and `extend` methods. 

3808 

3809 Examples 

3810 -------- 

3811 >>> index = pd.period_range(start='2000', periods=2, freq='A') 

3812 >>> original_observations = pd.Series([1.2, 1.5], index=index) 

3813 >>> mod = sm.tsa.SARIMAX(original_observations) 

3814 >>> res = mod.fit() 

3815 >>> print(res.params) 

3816 ar.L1 0.9756 

3817 sigma2 0.0889 

3818 dtype: float64 

3819 >>> print(res.fittedvalues) 

3820 2000 0.0000 

3821 2001 1.1707 

3822 Freq: A-DEC, dtype: float64 

3823 >>> print(res.forecast(1)) 

3824 2002 1.4634 

3825 Freq: A-DEC, dtype: float64 

3826 

3827 >>> new_index = pd.period_range(start='1980', periods=3, freq='A') 

3828 >>> new_observations = pd.Series([1.4, 0.3, 1.2], index=new_index) 

3829 >>> new_res = res.apply(new_observations) 

3830 >>> print(new_res.params) 

3831 ar.L1 0.9756 

3832 sigma2 0.0889 

3833 dtype: float64 

3834 >>> print(new_res.fittedvalues) 

3835 1980 1.1707 

3836 1981 1.3659 

3837 1982 0.2927 

3838 Freq: A-DEC, dtype: float64 

3839 Freq: A-DEC, dtype: float64 

3840 >>> print(new_res.forecast(1)) 

3841 1983 1.1707 

3842 Freq: A-DEC, dtype: float64 

3843 

3844 See Also 

3845 -------- 

3846 statsmodels.tsa.statespace.mlemodel.MLEResults.append 

3847 statsmodels.tsa.statespace.mlemodel.MLEResults.apply 

3848 """ 

3849 mod = self.model.clone(endog, exog=exog, **kwargs) 

3850 res = self._apply(mod, refit=refit, fit_kwargs=fit_kwargs, **kwargs) 

3851 

3852 return res 

3853 

3854 def plot_diagnostics(self, variable=0, lags=10, fig=None, figsize=None): 

3855 """ 

3856 Diagnostic plots for standardized residuals of one endogenous variable 

3857 

3858 Parameters 

3859 ---------- 

3860 variable : int, optional 

3861 Index of the endogenous variable for which the diagnostic plots 

3862 should be created. Default is 0. 

3863 lags : int, optional 

3864 Number of lags to include in the correlogram. Default is 10. 

3865 fig : Figure, optional 

3866 If given, subplots are created in this figure instead of in a new 

3867 figure. Note that the 2x2 grid will be created in the provided 

3868 figure using `fig.add_subplot()`. 

3869 figsize : tuple, optional 

3870 If a figure is created, this argument allows specifying a size. 

3871 The tuple is (width, height). 

3872 

3873 Notes 

3874 ----- 

3875 Produces a 2x2 plot grid with the following plots (ordered clockwise 

3876 from top left): 

3877 

3878 1. Standardized residuals over time 

3879 2. Histogram plus estimated density of standardized residuals, along 

3880 with a Normal(0,1) density plotted for reference. 

3881 3. Normal Q-Q plot, with Normal reference line. 

3882 4. Correlogram 

3883 

3884 See Also 

3885 -------- 

3886 statsmodels.graphics.gofplots.qqplot 

3887 statsmodels.graphics.tsaplots.plot_acf 

3888 """ 

3889 from statsmodels.graphics.utils import _import_mpl, create_mpl_fig 

3890 _import_mpl() 

3891 fig = create_mpl_fig(fig, figsize) 

3892 # Eliminate residuals associated with burned or diffuse likelihoods 

3893 d = np.maximum(self.loglikelihood_burn, self.nobs_diffuse) 

3894 resid = self.filter_results.standardized_forecasts_error[variable, d:] 

3895 

3896 # Top-left: residuals vs time 

3897 ax = fig.add_subplot(221) 

3898 if hasattr(self.data, 'dates') and self.data.dates is not None: 

3899 x = self.data.dates[d:]._mpl_repr() 

3900 else: 

3901 x = np.arange(len(resid)) 

3902 ax.plot(x, resid) 

3903 ax.hlines(0, x[0], x[-1], alpha=0.5) 

3904 ax.set_xlim(x[0], x[-1]) 

3905 ax.set_title('Standardized residual') 

3906 

3907 # Top-right: histogram, Gaussian kernel density, Normal density 

3908 # Can only do histogram and Gaussian kernel density on the non-null 

3909 # elements 

3910 resid_nonmissing = resid[~(np.isnan(resid))] 

3911 ax = fig.add_subplot(222) 

3912 

3913 # gh5792: Remove except after support for matplotlib>2.1 required 

3914 try: 

3915 ax.hist(resid_nonmissing, density=True, label='Hist') 

3916 except AttributeError: 

3917 ax.hist(resid_nonmissing, normed=True, label='Hist') 

3918 

3919 from scipy.stats import gaussian_kde, norm 

3920 kde = gaussian_kde(resid_nonmissing) 

3921 xlim = (-1.96*2, 1.96*2) 

3922 x = np.linspace(xlim[0], xlim[1]) 

3923 ax.plot(x, kde(x), label='KDE') 

3924 ax.plot(x, norm.pdf(x), label='N(0,1)') 

3925 ax.set_xlim(xlim) 

3926 ax.legend() 

3927 ax.set_title('Histogram plus estimated density') 

3928 

3929 # Bottom-left: QQ plot 

3930 ax = fig.add_subplot(223) 

3931 from statsmodels.graphics.gofplots import qqplot 

3932 qqplot(resid_nonmissing, line='s', ax=ax) 

3933 ax.set_title('Normal Q-Q') 

3934 

3935 # Bottom-right: Correlogram 

3936 ax = fig.add_subplot(224) 

3937 from statsmodels.graphics.tsaplots import plot_acf 

3938 plot_acf(resid, ax=ax, lags=lags) 

3939 ax.set_title('Correlogram') 

3940 

3941 ax.set_ylim(-1, 1) 

3942 

3943 return fig 

3944 

3945 def summary(self, alpha=.05, start=None, title=None, model_name=None, 

3946 display_params=True): 

3947 """ 

3948 Summarize the Model 

3949 

3950 Parameters 

3951 ---------- 

3952 alpha : float, optional 

3953 Significance level for the confidence intervals. Default is 0.05. 

3954 start : int, optional 

3955 Integer of the start observation. Default is 0. 

3956 model_name : str 

3957 The name of the model used. Default is to use model class name. 

3958 

3959 Returns 

3960 ------- 

3961 summary : Summary instance 

3962 This holds the summary table and text, which can be printed or 

3963 converted to various output formats. 

3964 

3965 See Also 

3966 -------- 

3967 statsmodels.iolib.summary.Summary 

3968 """ 

3969 from statsmodels.iolib.summary import Summary 

3970 

3971 # Model specification results 

3972 model = self.model 

3973 if title is None: 

3974 title = 'Statespace Model Results' 

3975 

3976 if start is None: 

3977 start = 0 

3978 if self.model._index_dates: 

3979 ix = self.model._index 

3980 d = ix[start] 

3981 sample = ['%02d-%02d-%02d' % (d.month, d.day, d.year)] 

3982 d = ix[-1] 

3983 sample += ['- ' + '%02d-%02d-%02d' % (d.month, d.day, d.year)] 

3984 else: 

3985 sample = [str(start), ' - ' + str(self.nobs)] 

3986 

3987 # Standardize the model name as a list of str 

3988 if model_name is None: 

3989 model_name = model.__class__.__name__ 

3990 

3991 # Diagnostic tests results 

3992 try: 

3993 het = self.test_heteroskedasticity(method='breakvar') 

3994 except Exception: # FIXME: catch something specific 

3995 het = np.array([[np.nan]*2]) 

3996 try: 

3997 lb = self.test_serial_correlation(method='ljungbox') 

3998 except Exception: # FIXME: catch something specific 

3999 lb = np.array([[np.nan]*2]).reshape(1, 2, 1) 

4000 try: 

4001 jb = self.test_normality(method='jarquebera') 

4002 except Exception: # FIXME: catch something specific 

4003 jb = np.array([[np.nan]*4]) 

4004 

4005 # Create the tables 

4006 if not isinstance(model_name, list): 

4007 model_name = [model_name] 

4008 

4009 top_left = [('Dep. Variable:', None)] 

4010 top_left.append(('Model:', [model_name[0]])) 

4011 for i in range(1, len(model_name)): 

4012 top_left.append(('', ['+ ' + model_name[i]])) 

4013 top_left += [ 

4014 ('Date:', None), 

4015 ('Time:', None), 

4016 ('Sample:', [sample[0]]), 

4017 ('', [sample[1]]) 

4018 ] 

4019 

4020 top_right = [ 

4021 ('No. Observations:', [self.nobs]), 

4022 ('Log Likelihood', ["%#5.3f" % self.llf]), 

4023 ] 

4024 if hasattr(self, 'rsquared'): 

4025 top_right.append(('R-squared:', ["%#8.3f" % self.rsquared])) 

4026 top_right += [ 

4027 ('AIC', ["%#5.3f" % self.aic]), 

4028 ('BIC', ["%#5.3f" % self.bic]), 

4029 ('HQIC', ["%#5.3f" % self.hqic])] 

4030 if (self.filter_results is not None and 

4031 self.filter_results.filter_concentrated): 

4032 top_right.append(('Scale', ["%#5.3f" % self.scale])) 

4033 

4034 if hasattr(self, 'cov_type'): 

4035 top_left.append(('Covariance Type:', [self.cov_type])) 

4036 

4037 format_str = lambda array: [ # noqa:E731 

4038 ', '.join(['{0:.2f}'.format(i) for i in array]) 

4039 ] 

4040 diagn_left = [('Ljung-Box (Q):', format_str(lb[:, 0, -1])), 

4041 ('Prob(Q):', format_str(lb[:, 1, -1])), 

4042 ('Heteroskedasticity (H):', format_str(het[:, 0])), 

4043 ('Prob(H) (two-sided):', format_str(het[:, 1])) 

4044 ] 

4045 

4046 diagn_right = [('Jarque-Bera (JB):', format_str(jb[:, 0])), 

4047 ('Prob(JB):', format_str(jb[:, 1])), 

4048 ('Skew:', format_str(jb[:, 2])), 

4049 ('Kurtosis:', format_str(jb[:, 3])) 

4050 ] 

4051 

4052 summary = Summary() 

4053 summary.add_table_2cols(self, gleft=top_left, gright=top_right, 

4054 title=title) 

4055 if len(self.params) > 0 and display_params: 

4056 summary.add_table_params(self, alpha=alpha, 

4057 xname=self.param_names, use_t=False) 

4058 summary.add_table_2cols(self, gleft=diagn_left, gright=diagn_right, 

4059 title="") 

4060 

4061 # Add warnings/notes, added to text format only 

4062 etext = [] 

4063 if hasattr(self, 'cov_type') and 'description' in self.cov_kwds: 

4064 etext.append(self.cov_kwds['description']) 

4065 if self._rank < (len(self.params) - len(self.fixed_params)): 

4066 cov_params = self.cov_params() 

4067 if len(self.fixed_params) > 0: 

4068 mask = np.ix_(self._free_params_index, self._free_params_index) 

4069 cov_params = cov_params[mask] 

4070 etext.append("Covariance matrix is singular or near-singular," 

4071 " with condition number %6.3g. Standard errors may be" 

4072 " unstable." % np.linalg.cond(cov_params)) 

4073 

4074 if etext: 

4075 etext = ["[{0}] {1}".format(i + 1, text) 

4076 for i, text in enumerate(etext)] 

4077 etext.insert(0, "Warnings:") 

4078 summary.add_extra_txt(etext) 

4079 

4080 return summary 

4081 

4082 

4083class MLEResultsWrapper(wrap.ResultsWrapper): 

4084 _attrs = { 

4085 'zvalues': 'columns', 

4086 'cov_params_approx': 'cov', 

4087 'cov_params_default': 'cov', 

4088 'cov_params_oim': 'cov', 

4089 'cov_params_opg': 'cov', 

4090 'cov_params_robust': 'cov', 

4091 'cov_params_robust_approx': 'cov', 

4092 'cov_params_robust_oim': 'cov', 

4093 } 

4094 _wrap_attrs = wrap.union_dicts(tsbase.TimeSeriesResultsWrapper._wrap_attrs, 

4095 _attrs) 

4096 _methods = { 

4097 'forecast': 'dates', 

4098 'impulse_responses': 'ynames' 

4099 } 

4100 _wrap_methods = wrap.union_dicts( 

4101 tsbase.TimeSeriesResultsWrapper._wrap_methods, _methods) 

4102wrap.populate_wrapper(MLEResultsWrapper, MLEResults) # noqa:E305 

4103 

4104 

4105class PredictionResults(pred.PredictionResults): 

4106 """ 

4107 

4108 Parameters 

4109 ---------- 

4110 prediction_results : kalman_filter.PredictionResults instance 

4111 Results object from prediction after fitting or filtering a state space 

4112 model. 

4113 row_labels : iterable 

4114 Row labels for the predicted data. 

4115 

4116 Attributes 

4117 ---------- 

4118 """ 

4119 def __init__(self, model, prediction_results, row_labels=None): 

4120 if model.model.k_endog == 1: 

4121 endog = pd.Series(prediction_results.endog[0], 

4122 name=model.model.endog_names) 

4123 else: 

4124 endog = pd.DataFrame(prediction_results.endog.T, 

4125 columns=model.model.endog_names) 

4126 self.model = Bunch(data=model.data.__class__( 

4127 endog=endog, predict_dates=row_labels)) 

4128 self.prediction_results = prediction_results 

4129 

4130 # Get required values 

4131 k_endog, nobs = prediction_results.endog.shape 

4132 if not prediction_results.results.memory_no_forecast_mean: 

4133 predicted_mean = self.prediction_results.forecasts 

4134 else: 

4135 predicted_mean = np.zeros((k_endog, nobs)) * np.nan 

4136 

4137 if predicted_mean.shape[0] == 1: 

4138 predicted_mean = predicted_mean[0, :] 

4139 else: 

4140 predicted_mean = predicted_mean.transpose() 

4141 

4142 if not prediction_results.results.memory_no_forecast_cov: 

4143 var_pred_mean = self.prediction_results.forecasts_error_cov 

4144 else: 

4145 var_pred_mean = np.zeros((k_endog, k_endog, nobs)) * np.nan 

4146 

4147 if var_pred_mean.shape[0] == 1: 

4148 var_pred_mean = var_pred_mean[0, 0, :] 

4149 else: 

4150 var_pred_mean = var_pred_mean.transpose() 

4151 

4152 # Initialize 

4153 super(PredictionResults, self).__init__(predicted_mean, var_pred_mean, 

4154 dist='norm', 

4155 row_labels=row_labels, 

4156 link=identity()) 

4157 

4158 @property 

4159 def se_mean(self): 

4160 if self.var_pred_mean.ndim == 1: 

4161 se_mean = np.sqrt(self.var_pred_mean) 

4162 else: 

4163 se_mean = np.sqrt(self.var_pred_mean.T.diagonal()) 

4164 return se_mean 

4165 

4166 def conf_int(self, method='endpoint', alpha=0.05, **kwds): 

4167 # TODO: this performs metadata wrapping, and that should be handled 

4168 # by attach_* methods. However, they do not currently support 

4169 # this use case. 

4170 conf_int = super(PredictionResults, self).conf_int( 

4171 method, alpha, **kwds) 

4172 

4173 # Create a dataframe 

4174 if self.row_labels is not None: 

4175 conf_int = pd.DataFrame(conf_int, index=self.row_labels) 

4176 

4177 # Attach the endog names 

4178 ynames = self.model.data.ynames 

4179 if not type(ynames) == list: 

4180 ynames = [ynames] 

4181 names = (['lower {0}'.format(name) for name in ynames] + 

4182 ['upper {0}'.format(name) for name in ynames]) 

4183 conf_int.columns = names 

4184 

4185 return conf_int 

4186 

4187 def summary_frame(self, endog=0, what='all', alpha=0.05): 

4188 # TODO: finish and cleanup 

4189 # import pandas as pd 

4190 # ci_obs = self.conf_int(alpha=alpha, obs=True) # need to split 

4191 ci_mean = np.asarray(self.conf_int(alpha=alpha)) 

4192 to_include = OrderedDict() 

4193 if self.predicted_mean.ndim == 1: 

4194 yname = self.model.data.ynames 

4195 to_include['mean'] = self.predicted_mean 

4196 to_include['mean_se'] = self.se_mean 

4197 k_endog = 1 

4198 else: 

4199 yname = self.model.data.ynames[endog] 

4200 to_include['mean'] = self.predicted_mean[:, endog] 

4201 to_include['mean_se'] = self.se_mean[:, endog] 

4202 k_endog = self.predicted_mean.shape[1] 

4203 to_include['mean_ci_lower'] = ci_mean[:, endog] 

4204 to_include['mean_ci_upper'] = ci_mean[:, k_endog + endog] 

4205 

4206 # OrderedDict does not work to preserve sequence 

4207 # pandas dict does not handle 2d_array 

4208 # data = np.column_stack(list(to_include.values())) 

4209 # names = .... 

4210 res = pd.DataFrame(to_include, index=self.row_labels, 

4211 columns=to_include.keys()) 

4212 res.columns.name = yname 

4213 return res 

4214 

4215 

4216class PredictionResultsWrapper(wrap.ResultsWrapper): 

4217 _attrs = { 

4218 'predicted_mean': 'dates', 

4219 'se_mean': 'dates', 

4220 't_values': 'dates', 

4221 } 

4222 _wrap_attrs = wrap.union_dicts(_attrs) 

4223 

4224 _methods = {} 

4225 _wrap_methods = wrap.union_dicts(_methods) 

4226wrap.populate_wrapper(PredictionResultsWrapper, PredictionResults) # noqa:E305