Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3Vector Autoregression (VAR) processes 

4 

5References 

6---------- 

7Lütkepohl (2005) New Introduction to Multiple Time Series Analysis 

8""" 

9from statsmodels.compat.pandas import deprecate_kwarg 

10from statsmodels.compat.python import lrange, iteritems 

11 

12from collections import defaultdict 

13from io import StringIO 

14 

15import numpy as np 

16import pandas as pd 

17import scipy.stats as stats 

18 

19import statsmodels.base.wrapper as wrap 

20from statsmodels.tsa.base.tsa_model import (TimeSeriesModel, 

21 TimeSeriesResultsWrapper) 

22import statsmodels.tsa.tsatools as tsa 

23from statsmodels.iolib.table import SimpleTable 

24from statsmodels.tools.decorators import cache_readonly, deprecated_alias 

25from statsmodels.tools.linalg import logdet_symm 

26from statsmodels.tools.sm_exceptions import OutputWarning 

27from statsmodels.tsa.tsatools import vec, unvec, duplication_matrix 

28from statsmodels.tsa.vector_ar import output, plotting, util 

29from statsmodels.tsa.vector_ar.hypothesis_test_results import \ 

30 CausalityTestResults, NormalityTestResults, WhitenessTestResults 

31from statsmodels.tsa.vector_ar.irf import IRAnalysis 

32from statsmodels.tsa.vector_ar.output import VARSummary 

33 

34 

35# ------------------------------------------------------------------------------- 

36# VAR process routines 

37 

38def ma_rep(coefs, maxn=10): 

39 r""" 

40 MA(\infty) representation of VAR(p) process 

41 

42 Parameters 

43 ---------- 

44 coefs : ndarray (p x k x k) 

45 maxn : int 

46 Number of MA matrices to compute 

47 

48 Notes 

49 ----- 

50 VAR(p) process as 

51 

52 .. math:: y_t = A_1 y_{t-1} + \ldots + A_p y_{t-p} + u_t 

53 

54 can be equivalently represented as 

55 

56 .. math:: y_t = \mu + \sum_{i=0}^\infty \Phi_i u_{t-i} 

57 

58 e.g. can recursively compute the \Phi_i matrices with \Phi_0 = I_k 

59 

60 Returns 

61 ------- 

62 phis : ndarray (maxn + 1 x k x k) 

63 """ 

64 p, k, k = coefs.shape 

65 phis = np.zeros((maxn+1, k, k)) 

66 phis[0] = np.eye(k) 

67 

68 # recursively compute Phi matrices 

69 for i in range(1, maxn + 1): 

70 for j in range(1, i+1): 

71 if j > p: 

72 break 

73 

74 phis[i] += np.dot(phis[i-j], coefs[j-1]) 

75 

76 return phis 

77 

78 

79def is_stable(coefs, verbose=False): 

80 """ 

81 Determine stability of VAR(p) system by examining the eigenvalues of the 

82 VAR(1) representation 

83 

84 Parameters 

85 ---------- 

86 coefs : ndarray (p x k x k) 

87 

88 Returns 

89 ------- 

90 is_stable : bool 

91 """ 

92 A_var1 = util.comp_matrix(coefs) 

93 eigs = np.linalg.eigvals(A_var1) 

94 

95 if verbose: 

96 print('Eigenvalues of VAR(1) rep') 

97 for val in np.abs(eigs): 

98 print(val) 

99 

100 return (np.abs(eigs) <= 1).all() 

101 

102 

103def var_acf(coefs, sig_u, nlags=None): 

104 """ 

105 Compute autocovariance function ACF_y(h) up to nlags of stable VAR(p) 

106 process 

107 

108 Parameters 

109 ---------- 

110 coefs : ndarray (p x k x k) 

111 Coefficient matrices A_i 

112 sig_u : ndarray (k x k) 

113 Covariance of white noise process u_t 

114 nlags : int, optional 

115 Defaults to order p of system 

116 

117 Notes 

118 ----- 

119 Ref: Lütkepohl p.28-29 

120 

121 Returns 

122 ------- 

123 acf : ndarray, (p, k, k) 

124 """ 

125 p, k, _ = coefs.shape 

126 if nlags is None: 

127 nlags = p 

128 

129 # p x k x k, ACF for lags 0, ..., p-1 

130 result = np.zeros((nlags + 1, k, k)) 

131 result[:p] = _var_acf(coefs, sig_u) 

132 

133 # yule-walker equations 

134 for h in range(p, nlags + 1): 

135 # compute ACF for lag=h 

136 # G(h) = A_1 G(h-1) + ... + A_p G(h-p) 

137 

138 for j in range(p): 

139 result[h] += np.dot(coefs[j], result[h-j-1]) 

140 

141 return result 

142 

143 

144def _var_acf(coefs, sig_u): 

145 """ 

146 Compute autocovariance function ACF_y(h) for h=1,...,p 

147 

148 Notes 

149 ----- 

150 Lütkepohl (2005) p.29 

151 """ 

152 p, k, k2 = coefs.shape 

153 assert(k == k2) 

154 

155 A = util.comp_matrix(coefs) 

156 # construct VAR(1) noise covariance 

157 SigU = np.zeros((k*p, k*p)) 

158 SigU[:k, :k] = sig_u 

159 

160 # vec(ACF) = (I_(kp)^2 - kron(A, A))^-1 vec(Sigma_U) 

161 vecACF = np.linalg.solve(np.eye((k*p)**2) - np.kron(A, A), vec(SigU)) 

162 

163 acf = unvec(vecACF) 

164 acf = [acf[:k, k * i:k * (i + 1)] for i in range(p)] 

165 acf = np.array(acf) 

166 

167 return acf 

168 

169 

170def forecast_cov(ma_coefs, sigma_u, steps): 

171 r""" 

172 Compute theoretical forecast error variance matrices 

173 

174 Parameters 

175 ---------- 

176 steps : int 

177 Number of steps ahead 

178 

179 Notes 

180 ----- 

181 .. math:: \mathrm{MSE}(h) = \sum_{i=0}^{h-1} \Phi \Sigma_u \Phi^T 

182 

183 Returns 

184 ------- 

185 forc_covs : ndarray (steps x neqs x neqs) 

186 """ 

187 neqs = len(sigma_u) 

188 forc_covs = np.zeros((steps, neqs, neqs)) 

189 

190 prior = np.zeros((neqs, neqs)) 

191 for h in range(steps): 

192 # Sigma(h) = Sigma(h-1) + Phi Sig_u Phi' 

193 phi = ma_coefs[h] 

194 var = phi @ sigma_u @ phi.T 

195 forc_covs[h] = prior = prior + var 

196 

197 return forc_covs 

198 

199 

200mse = forecast_cov 

201 

202 

203def forecast(y, coefs, trend_coefs, steps, exog=None): 

204 """ 

205 Produce linear minimum MSE forecast 

206 

207 Parameters 

208 ---------- 

209 y : ndarray (k_ar x neqs) 

210 coefs : ndarray (k_ar x neqs x neqs) 

211 trend_coefs : ndarray (1 x neqs) or (neqs) 

212 steps : int 

213 exog : ndarray (trend_coefs.shape[1] x neqs) 

214 

215 Returns 

216 ------- 

217 forecasts : ndarray (steps x neqs) 

218 

219 Notes 

220 ----- 

221 Lütkepohl p. 37 

222 """ 

223 p = len(coefs) 

224 k = len(coefs[0]) 

225 # initial value 

226 forcs = np.zeros((steps, k)) 

227 if exog is not None and trend_coefs is not None: 

228 forcs += np.dot(exog, trend_coefs) 

229 # to make existing code (with trend_coefs=intercept and without exog) work: 

230 elif exog is None and trend_coefs is not None: 

231 forcs += trend_coefs 

232 

233 # h=0 forecast should be latest observation 

234 # forcs[0] = y[-1] 

235 

236 # make indices easier to think about 

237 for h in range(1, steps + 1): 

238 # y_t(h) = intercept + sum_1^p A_i y_t_(h-i) 

239 f = forcs[h - 1] 

240 for i in range(1, p + 1): 

241 # slightly hackish 

242 if h - i <= 0: 

243 # e.g. when h=1, h-1 = 0, which is y[-1] 

244 prior_y = y[h - i - 1] 

245 else: 

246 # e.g. when h=2, h-1=1, which is forcs[0] 

247 prior_y = forcs[h - i - 1] 

248 

249 # i=1 is coefs[0] 

250 f = f + np.dot(coefs[i - 1], prior_y) 

251 

252 forcs[h - 1] = f 

253 

254 return forcs 

255 

256 

257def _forecast_vars(steps, ma_coefs, sig_u): 

258 """_forecast_vars function used by VECMResults. Note that the definition 

259 of the local variable covs is the same as in VARProcess and as such it 

260 differs from the one in VARResults! 

261 

262 Parameters 

263 ---------- 

264 steps 

265 ma_coefs 

266 sig_u 

267 

268 Returns 

269 ------- 

270 """ 

271 covs = mse(ma_coefs, sig_u, steps) 

272 # Take diagonal for each cov 

273 neqs = len(sig_u) 

274 inds = np.arange(neqs) 

275 return covs[:, inds, inds] 

276 

277 

278def forecast_interval(y, coefs, trend_coefs, sig_u, steps=5, alpha=0.05, 

279 exog=1): 

280 assert(0 < alpha < 1) 

281 q = util.norm_signif_level(alpha) 

282 

283 point_forecast = forecast(y, coefs, trend_coefs, steps, exog) 

284 ma_coefs = ma_rep(coefs, steps) 

285 sigma = np.sqrt(_forecast_vars(steps, ma_coefs, sig_u)) 

286 

287 forc_lower = point_forecast - q * sigma 

288 forc_upper = point_forecast + q * sigma 

289 

290 return point_forecast, forc_lower, forc_upper 

291 

292 

293def var_loglike(resid, omega, nobs): 

294 r""" 

295 Returns the value of the VAR(p) log-likelihood. 

296 

297 Parameters 

298 ---------- 

299 resid : ndarray (T x K) 

300 omega : ndarray 

301 Sigma hat matrix. Each element i,j is the average product of the 

302 OLS residual for variable i and the OLS residual for variable j or 

303 np.dot(resid.T,resid)/nobs. There should be no correction for the 

304 degrees of freedom. 

305 nobs : int 

306 

307 Returns 

308 ------- 

309 llf : float 

310 The value of the loglikelihood function for a VAR(p) model 

311 

312 Notes 

313 ----- 

314 The loglikelihood function for the VAR(p) is 

315 

316 .. math:: 

317 

318 -\left(\frac{T}{2}\right) 

319 \left(\ln\left|\Omega\right|-K\ln\left(2\pi\right)-K\right) 

320 """ 

321 logdet = logdet_symm(np.asarray(omega)) 

322 neqs = len(omega) 

323 part1 = - (nobs * neqs / 2) * np.log(2 * np.pi) 

324 part2 = - (nobs / 2) * (logdet + neqs) 

325 return part1 + part2 

326 

327 

328def _reordered(self, order): 

329 # Create new arrays to hold rearranged results from .fit() 

330 endog = self.endog 

331 endog_lagged = self.endog_lagged 

332 params = self.params 

333 sigma_u = self.sigma_u 

334 names = self.names 

335 k_ar = self.k_ar 

336 endog_new = np.zeros([np.size(endog, 0), np.size(endog, 1)]) 

337 endog_lagged_new = np.zeros([np.size(endog_lagged, 0), np.size(endog_lagged, 1)]) 

338 params_new_inc, params_new = [np.zeros([np.size(params, 0), np.size(params, 1)]) 

339 for i in range(2)] 

340 sigma_u_new_inc, sigma_u_new = [np.zeros([np.size(sigma_u, 0), np.size(sigma_u, 1)]) 

341 for i in range(2)] 

342 num_end = len(self.params[0]) 

343 names_new = [] 

344 

345 # Rearrange elements and fill in new arrays 

346 k = self.k_trend 

347 for i, c in enumerate(order): 

348 endog_new[:, i] = self.endog[:, c] 

349 if k > 0: 

350 params_new_inc[0, i] = params[0, i] 

351 endog_lagged_new[:, 0] = endog_lagged[:, 0] 

352 for j in range(k_ar): 

353 params_new_inc[i+j*num_end+k, :] = self.params[c+j*num_end+k, :] 

354 endog_lagged_new[:, i+j*num_end+k] = endog_lagged[:, c+j*num_end+k] 

355 sigma_u_new_inc[i, :] = sigma_u[c, :] 

356 names_new.append(names[c]) 

357 for i, c in enumerate(order): 

358 params_new[:, i] = params_new_inc[:, c] 

359 sigma_u_new[:, i] = sigma_u_new_inc[:, c] 

360 

361 return VARResults(endog=endog_new, endog_lagged=endog_lagged_new, 

362 params=params_new, sigma_u=sigma_u_new, 

363 lag_order=self.k_ar, model=self.model, 

364 trend='c', names=names_new, dates=self.dates) 

365 

366 

367def orth_ma_rep(results, maxn=10, P=None): 

368 r"""Compute Orthogonalized MA coefficient matrices using P matrix such 

369 that :math:`\Sigma_u = PP^\prime`. P defaults to the Cholesky 

370 decomposition of :math:`\Sigma_u` 

371 

372 Parameters 

373 ---------- 

374 results : VARResults or VECMResults 

375 maxn : int 

376 Number of coefficient matrices to compute 

377 P : ndarray (neqs x neqs), optional 

378 Matrix such that Sigma_u = PP', defaults to the Cholesky decomposition. 

379 

380 Returns 

381 ------- 

382 coefs : ndarray (maxn x neqs x neqs) 

383 """ 

384 if P is None: 

385 P = results._chol_sigma_u 

386 

387 ma_mats = results.ma_rep(maxn=maxn) 

388 return np.array([np.dot(coefs, P) for coefs in ma_mats]) 

389 

390 

391def test_normality(results, signif=0.05): 

392 """ 

393 Test assumption of normal-distributed errors using Jarque-Bera-style 

394 omnibus Chi^2 test 

395 

396 Parameters 

397 ---------- 

398 results : VARResults or statsmodels.tsa.vecm.vecm.VECMResults 

399 signif : float 

400 The test's significance level. 

401 

402 Notes 

403 ----- 

404 H0 (null) : data are generated by a Gaussian-distributed process 

405 

406 Returns 

407 ------- 

408 result : NormalityTestResults 

409 

410 References 

411 ---------- 

412 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer. 

413 

414 .. [2] Kilian, L. & Demiroglu, U. (2000). "Residual-Based Tests for 

415 Normality in Autoregressions: Asymptotic Theory and Simulation 

416 Evidence." Journal of Business & Economic Statistics 

417 """ 

418 resid_c = results.resid - results.resid.mean(0) 

419 sig = np.dot(resid_c.T, resid_c) / results.nobs 

420 Pinv = np.linalg.inv(np.linalg.cholesky(sig)) 

421 

422 w = np.dot(Pinv, resid_c.T) 

423 b1 = (w**3).sum(1)[:, None] / results.nobs 

424 b2 = (w**4).sum(1)[:, None] / results.nobs - 3 

425 

426 lam_skew = results.nobs * np.dot(b1.T, b1) / 6 

427 lam_kurt = results.nobs * np.dot(b2.T, b2) / 24 

428 

429 lam_omni = float(lam_skew + lam_kurt) 

430 omni_dist = stats.chi2(results.neqs * 2) 

431 omni_pvalue = float(omni_dist.sf(lam_omni)) 

432 crit_omni = float(omni_dist.ppf(1 - signif)) 

433 

434 return NormalityTestResults(lam_omni, crit_omni, omni_pvalue, 

435 results.neqs*2, signif) 

436 

437 

438class LagOrderResults: 

439 """ 

440 Results class for choosing a model's lag order. 

441 

442 Parameters 

443 ---------- 

444 ics : dict 

445 The keys are the strings ``"aic"``, ``"bic"``, ``"hqic"``, and 

446 ``"fpe"``. A corresponding value is a list of information criteria for 

447 various numbers of lags. 

448 selected_orders : dict 

449 The keys are the strings ``"aic"``, ``"bic"``, ``"hqic"``, and 

450 ``"fpe"``. The corresponding value is an integer specifying the number 

451 of lags chosen according to a given criterion (key). 

452 vecm : bool, default: `False` 

453 `True` indicates that the model is a VECM. In case of a VAR model 

454 this argument must be `False`. 

455 

456 Notes 

457 ----- 

458 In case of a VECM the shown lags are lagged differences. 

459 """ 

460 def __init__(self, ics, selected_orders, vecm=False): 

461 self.title = ("VECM" if vecm else "VAR") + " Order Selection" 

462 self.title += " (* highlights the minimums)" 

463 self.ics = ics 

464 self.selected_orders = selected_orders 

465 self.vecm = vecm 

466 self.aic = selected_orders["aic"] 

467 self.bic = selected_orders["bic"] 

468 self.hqic = selected_orders["hqic"] 

469 self.fpe = selected_orders["fpe"] 

470 

471 def summary(self): # basically copied from (now deleted) print_ic_table() 

472 cols = sorted(self.ics) # ["aic", "bic", "hqic", "fpe"] 

473 str_data = np.array([["%#10.4g" % v for v in self.ics[c]] for c in cols], 

474 dtype=object).T 

475 # mark minimum with an asterisk 

476 for i, col in enumerate(cols): 

477 idx = int(self.selected_orders[col]), i 

478 str_data[idx] += '*' 

479 return SimpleTable(str_data, [col.upper() for col in cols], 

480 lrange(len(str_data)), title=self.title) 

481 

482 def __str__(self): 

483 return "<" + self.__module__ + "." + self.__class__.__name__ \ 

484 + " object. Selected orders are: AIC -> " + str(self.aic) \ 

485 + ", BIC -> " + str(self.bic) \ 

486 + ", FPE -> " + str(self.fpe) \ 

487 + ", HQIC -> " + str(self.hqic) + ">" 

488 

489# ------------------------------------------------------------------------------- 

490# VARProcess class: for known or unknown VAR process 

491 

492 

493class VAR(TimeSeriesModel): 

494 r""" 

495 Fit VAR(p) process and do lag order selection 

496 

497 .. math:: y_t = A_1 y_{t-1} + \ldots + A_p y_{t-p} + u_t 

498 

499 Parameters 

500 ---------- 

501 endog : array_like 

502 2-d endogenous response variable. The independent variable. 

503 exog : array_like 

504 2-d exogenous variable. 

505 dates : array_like 

506 must match number of rows of endog 

507 

508 References 

509 ---------- 

510 Lütkepohl (2005) New Introduction to Multiple Time Series Analysis 

511 """ 

512 

513 y = deprecated_alias("y", "endog", remove_version="0.11.0") 

514 

515 def __init__(self, endog, exog=None, dates=None, freq=None, 

516 missing='none'): 

517 super(VAR, self).__init__(endog, exog, dates, freq, missing=missing) 

518 if self.endog.ndim == 1: 

519 raise ValueError("Only gave one variable to VAR") 

520 self.neqs = self.endog.shape[1] 

521 self.n_totobs = len(endog) 

522 

523 def predict(self, params, start=None, end=None, lags=1, trend='c'): 

524 """ 

525 Returns in-sample predictions or forecasts 

526 """ 

527 params = np.array(params) 

528 

529 if start is None: 

530 start = lags 

531 

532 # Handle start, end 

533 start, end, out_of_sample, prediction_index = ( 

534 self._get_prediction_index(start, end)) 

535 

536 if end < start: 

537 raise ValueError("end is before start") 

538 if end == start + out_of_sample: 

539 return np.array([]) 

540 

541 k_trend = util.get_trendorder(trend) 

542 k = self.neqs 

543 k_ar = lags 

544 

545 predictedvalues = np.zeros((end + 1 - start + out_of_sample, k)) 

546 if k_trend != 0: 

547 intercept = params[:k_trend] 

548 predictedvalues += intercept 

549 

550 y = self.endog 

551 x = util.get_var_endog(y, lags, trend=trend, has_constant='raise') 

552 fittedvalues = np.dot(x, params) 

553 

554 fv_start = start - k_ar 

555 pv_end = min(len(predictedvalues), len(fittedvalues) - fv_start) 

556 fv_end = min(len(fittedvalues), end - k_ar + 1) 

557 predictedvalues[:pv_end] = fittedvalues[fv_start:fv_end] 

558 

559 if not out_of_sample: 

560 return predictedvalues 

561 

562 # fit out of sample 

563 y = y[-k_ar:] 

564 coefs = params[k_trend:].reshape((k_ar, k, k)).swapaxes(1, 2) 

565 predictedvalues[pv_end:] = forecast(y, coefs, intercept, out_of_sample) 

566 return predictedvalues 

567 

568 def fit(self, maxlags=None, method='ols', ic=None, trend='c', 

569 verbose=False): 

570 # todo: this code is only supporting deterministic terms as exog. 

571 # This means that all exog-variables have lag 0. If dealing with 

572 # different exogs is necessary, a `lags_exog`-parameter might make 

573 # sense (e.g. a sequence of ints specifying lags). 

574 # Alternatively, leading zeros for exog-variables with smaller number 

575 # of lags than the maximum number of exog-lags might work. 

576 """ 

577 Fit the VAR model 

578 

579 Parameters 

580 ---------- 

581 maxlags : int 

582 Maximum number of lags to check for order selection, defaults to 

583 12 * (nobs/100.)**(1./4), see select_order function 

584 method : {'ols'} 

585 Estimation method to use 

586 ic : {'aic', 'fpe', 'hqic', 'bic', None} 

587 Information criterion to use for VAR order selection. 

588 aic : Akaike 

589 fpe : Final prediction error 

590 hqic : Hannan-Quinn 

591 bic : Bayesian a.k.a. Schwarz 

592 verbose : bool, default False 

593 Print order selection output to the screen 

594 trend : str {"c", "ct", "ctt", "nc", "n"} 

595 "c" - add constant 

596 "ct" - constant and trend 

597 "ctt" - constant, linear and quadratic trend 

598 "n", "nc" - co constant, no trend 

599 Note that these are prepended to the columns of the dataset. 

600 

601 Returns 

602 ------- 

603 VARResults 

604 Estimation results 

605 

606 Notes 

607 ----- 

608 See Lütkepohl pp. 146-153 for implementation details. 

609 """ 

610 lags = maxlags 

611 

612 if trend not in ['c', 'ct', 'ctt', 'nc', 'n']: 

613 raise ValueError("trend '{}' not supported for VAR".format(trend)) 

614 

615 if ic is not None: 

616 selections = self.select_order(maxlags=maxlags) 

617 if not hasattr(selections, ic): 

618 raise ValueError("%s not recognized, must be among %s" 

619 % (ic, sorted(selections))) 

620 lags = getattr(selections, ic) 

621 if verbose: 

622 print(selections) 

623 print('Using %d based on %s criterion' % (lags, ic)) 

624 else: 

625 if lags is None: 

626 lags = 1 

627 

628 k_trend = util.get_trendorder(trend) 

629 orig_exog_names = self.exog_names 

630 self.exog_names = util.make_lag_names(self.endog_names, lags, k_trend) 

631 self.nobs = self.n_totobs - lags 

632 

633 # add exog to data.xnames (necessary because the length of xnames also 

634 # determines the allowed size of VARResults.params) 

635 if self.exog is not None: 

636 if orig_exog_names: 

637 x_names_to_add = orig_exog_names 

638 else: 

639 x_names_to_add = [("exog%d" % i) 

640 for i in range(self.exog.shape[1])] 

641 self.data.xnames = (self.data.xnames[:k_trend] + 

642 x_names_to_add + 

643 self.data.xnames[k_trend:]) 

644 self.data.cov_names = pd.MultiIndex.from_product((self.data.xnames, 

645 self.data.ynames)) 

646 return self._estimate_var(lags, trend=trend) 

647 

648 def _estimate_var(self, lags, offset=0, trend='c'): 

649 """ 

650 lags : int 

651 Lags of the endogenous variable. 

652 offset : int 

653 Periods to drop from beginning-- for order selection so it's an 

654 apples-to-apples comparison 

655 trend : {str, None} 

656 As per above 

657 """ 

658 # have to do this again because select_order does not call fit 

659 self.k_trend = k_trend = util.get_trendorder(trend) 

660 

661 if offset < 0: # pragma: no cover 

662 raise ValueError('offset must be >= 0') 

663 

664 nobs = self.n_totobs - lags - offset 

665 endog = self.endog[offset:] 

666 exog = None if self.exog is None else self.exog[offset:] 

667 z = util.get_var_endog(endog, lags, trend=trend, 

668 has_constant='raise') 

669 if exog is not None: 

670 # TODO: currently only deterministic terms supported (exoglags==0) 

671 # and since exoglags==0, x will be an array of size 0. 

672 x = util.get_var_endog(exog[-nobs:], 0, trend="n", 

673 has_constant="raise") 

674 x_inst = exog[-nobs:] 

675 x = np.column_stack((x, x_inst)) 

676 del x_inst # free memory 

677 temp_z = z 

678 z = np.empty((x.shape[0], x.shape[1]+z.shape[1])) 

679 z[:, :self.k_trend] = temp_z[:, :self.k_trend] 

680 z[:, self.k_trend:self.k_trend+x.shape[1]] = x 

681 z[:, self.k_trend+x.shape[1]:] = temp_z[:, self.k_trend:] 

682 del temp_z, x # free memory 

683 # the following modification of z is necessary to get the same results 

684 # as JMulTi for the constant-term-parameter... 

685 for i in range(self.k_trend): 

686 if (np.diff(z[:, i]) == 1).all(): # modify the trend-column 

687 z[:, i] += lags 

688 # make the same adjustment for the quadratic term 

689 if (np.diff(np.sqrt(z[:, i])) == 1).all(): 

690 z[:, i] = (np.sqrt(z[:, i]) + lags)**2 

691 

692 y_sample = endog[lags:] 

693 # Lütkepohl p75, about 5x faster than stated formula 

694 params = np.linalg.lstsq(z, y_sample, rcond=1e-15)[0] 

695 resid = y_sample - np.dot(z, params) 

696 

697 # Unbiased estimate of covariance matrix $\Sigma_u$ of the white noise 

698 # process $u$ 

699 # equivalent definition 

700 # .. math:: \frac{1}{T - Kp - 1} Y^\prime (I_T - Z (Z^\prime Z)^{-1} 

701 # Z^\prime) Y 

702 # Ref: Lütkepohl p.75 

703 # df_resid right now is T - Kp - 1, which is a suggested correction 

704 

705 avobs = len(y_sample) 

706 if exog is not None: 

707 k_trend += exog.shape[1] 

708 df_resid = avobs - (self.neqs * lags + k_trend) 

709 

710 sse = np.dot(resid.T, resid) 

711 omega = sse / df_resid 

712 

713 varfit = VARResults(endog, z, params, omega, lags, 

714 names=self.endog_names, trend=trend, 

715 dates=self.data.dates, model=self, exog=self.exog) 

716 return VARResultsWrapper(varfit) 

717 

718 def select_order(self, maxlags=None, trend="c"): 

719 """ 

720 Compute lag order selections based on each of the available information 

721 criteria 

722 

723 Parameters 

724 ---------- 

725 maxlags : int 

726 if None, defaults to 12 * (nobs/100.)**(1./4) 

727 trend : str {"nc", "c", "ct", "ctt"} 

728 * "nc" - no deterministic terms 

729 * "c" - constant term 

730 * "ct" - constant and linear term 

731 * "ctt" - constant, linear, and quadratic term 

732 

733 Returns 

734 ------- 

735 selections : LagOrderResults 

736 """ 

737 if maxlags is None: 

738 maxlags = int(round(12*(len(self.endog)/100.)**(1/4.))) 

739 # TODO: This expression shows up in a bunch of places, but 

740 # in some it is `int` and in others `np.ceil`. Also in some 

741 # it multiplies by 4 instead of 12. Let's put these all in 

742 # one place and document when to use which variant. 

743 

744 ics = defaultdict(list) 

745 p_min = 0 if self.exog is not None or trend != "nc" else 1 

746 for p in range(p_min, maxlags + 1): 

747 # exclude some periods to same amount of data used for each lag 

748 # order 

749 result = self._estimate_var(p, offset=maxlags - p, trend=trend) 

750 

751 for k, v in iteritems(result.info_criteria): 

752 ics[k].append(v) 

753 

754 selected_orders = dict((k, np.array(v).argmin() + p_min) 

755 for k, v in iteritems(ics)) 

756 

757 return LagOrderResults(ics, selected_orders, vecm=False) 

758 

759 

760class VARProcess(object): 

761 """ 

762 Class represents a known VAR(p) process 

763 

764 Parameters 

765 ---------- 

766 coefs : ndarray (p x k x k) 

767 coefficients for lags of endog, part or params reshaped 

768 coefs_exog : ndarray 

769 parameters for trend and user provided exog 

770 sigma_u : ndarray (k x k) 

771 residual covariance 

772 names : sequence (length k) 

773 _params_info : dict 

774 internal dict to provide information about the composition of `params`, 

775 specifically `k_trend` (trend order) and `k_exog_user` (the number of 

776 exog variables provided by the user). 

777 If it is None, then coefs_exog are assumed to be for the intercept and 

778 trend. 

779 """ 

780 def __init__(self, coefs, coefs_exog, sigma_u, names=None, _params_info=None): 

781 self.k_ar = len(coefs) 

782 self.neqs = coefs.shape[1] 

783 self.coefs = coefs 

784 self.coefs_exog = coefs_exog 

785 # Note reshaping 1-D coefs_exog to 2_D makes unit tests fail 

786 self.sigma_u = sigma_u 

787 self.names = names 

788 

789 if _params_info is None: 

790 _params_info = {} 

791 self.k_exog_user = _params_info.get('k_exog_user', 0) 

792 if self.coefs_exog is not None: 

793 k_ex = self.coefs_exog.shape[0] if self.coefs_exog.ndim != 1 else 1 

794 k_c = k_ex - self.k_exog_user 

795 else: 

796 k_c = 0 

797 self.k_trend = _params_info.get('k_trend', k_c) 

798 # TODO: we need to distinguish exog including trend and exog_user 

799 self.k_exog = self.k_trend + self.k_exog_user 

800 

801 if self.k_trend > 0: 

802 if coefs_exog.ndim == 2: 

803 self.intercept = coefs_exog[:, 0] 

804 else: 

805 self.intercept = coefs_exog 

806 else: 

807 self.intercept = np.zeros(self.neqs) 

808 

809 def get_eq_index(self, name): 

810 """Return integer position of requested equation name""" 

811 return util.get_index(self.names, name) 

812 

813 def __str__(self): 

814 output = ('VAR(%d) process for %d-dimensional response y_t' 

815 % (self.k_ar, self.neqs)) 

816 output += '\nstable: %s' % self.is_stable() 

817 output += '\nmean: %s' % self.mean() 

818 

819 return output 

820 

821 def is_stable(self, verbose=False): 

822 """Determine stability based on model coefficients 

823 

824 Parameters 

825 ---------- 

826 verbose : bool 

827 Print eigenvalues of the VAR(1) companion 

828 

829 Notes 

830 ----- 

831 Checks if det(I - Az) = 0 for any mod(z) <= 1, so all the eigenvalues of 

832 the companion matrix must lie outside the unit circle 

833 """ 

834 return is_stable(self.coefs, verbose=verbose) 

835 

836 def simulate_var(self, steps=None, offset=None, seed=None): 

837 """ 

838 simulate the VAR(p) process for the desired number of steps 

839 

840 Parameters 

841 ---------- 

842 steps : None or int 

843 number of observations to simulate, this includes the initial 

844 observations to start the autoregressive process. 

845 If offset is not None, then exog of the model are used if they were 

846 provided in the model 

847 offset : None or ndarray (steps, neqs) 

848 If not None, then offset is added as an observation specific 

849 intercept to the autoregression. If it is None and either trend 

850 (including intercept) or exog were used in the VAR model, then 

851 the linear predictor of those components will be used as offset. 

852 This should have the same number of rows as steps, and the same 

853 number of columns as endogenous variables (neqs). 

854 seed : {None, int} 

855 If seed is not None, then it will be used with for the random 

856 variables generated by numpy.random. 

857 

858 Returns 

859 ------- 

860 endog_simulated : nd_array 

861 Endog of the simulated VAR process 

862 """ 

863 steps_ = None 

864 if offset is None: 

865 if self.k_exog_user > 0 or self.k_trend > 1: 

866 # if more than intercept 

867 # endog_lagged contains all regressors, trend, exog_user 

868 # and lagged endog, trimmed initial observations 

869 offset = self.endog_lagged[:,:self.k_exog].dot( 

870 self.coefs_exog.T) 

871 steps_ = self.endog_lagged.shape[0] 

872 else: 

873 offset = self.intercept 

874 else: 

875 steps_ = offset.shape[0] 

876 

877 # default, but over written if exog or offset are used 

878 if steps is None: 

879 if steps_ is None: 

880 steps = 1000 

881 else: 

882 steps = steps_ 

883 else: 

884 if steps_ is not None and steps != steps_: 

885 raise ValueError('if exog or offset are used, then steps must' 

886 'be equal to their length or None') 

887 

888 y = util.varsim(self.coefs, offset, self.sigma_u, steps=steps, seed=seed) 

889 return y 

890 

891 def plotsim(self, steps=None, offset=None, seed=None): 

892 """ 

893 Plot a simulation from the VAR(p) process for the desired number of 

894 steps 

895 """ 

896 y = self.simulate_var(steps=steps, offset=offset, seed=seed) 

897 return plotting.plot_mts(y) 

898 

899 def intercept_longrun(self): 

900 r""" 

901 Long run intercept of stable VAR process 

902 

903 Lütkepohl eq. 2.1.23 

904 

905 .. math:: \mu = (I - A_1 - \dots - A_p)^{-1} \alpha 

906 

907 where \alpha is the intercept (parameter of the constant) 

908 """ 

909 return np.linalg.solve(self._char_mat, self.intercept) 

910 

911 def mean(self): 

912 r""" 

913 Long run intercept of stable VAR process 

914 

915 Warning: trend and exog except for intercept are ignored for this. 

916 This might change in future versions. 

917 

918 Lütkepohl eq. 2.1.23 

919 

920 .. math:: \mu = (I - A_1 - \dots - A_p)^{-1} \alpha 

921 

922 where \alpha is the intercept (parameter of the constant) 

923 """ 

924 return self.intercept_longrun() 

925 

926 def ma_rep(self, maxn=10): 

927 r""" 

928 Compute MA(:math:`\infty`) coefficient matrices 

929 

930 Parameters 

931 ---------- 

932 maxn : int 

933 Number of coefficient matrices to compute 

934 

935 Returns 

936 ------- 

937 coefs : ndarray (maxn x k x k) 

938 """ 

939 return ma_rep(self.coefs, maxn=maxn) 

940 

941 def orth_ma_rep(self, maxn=10, P=None): 

942 r""" 

943 Compute orthogonalized MA coefficient matrices using P matrix such 

944 that :math:`\Sigma_u = PP^\prime`. P defaults to the Cholesky 

945 decomposition of :math:`\Sigma_u` 

946 

947 Parameters 

948 ---------- 

949 maxn : int 

950 Number of coefficient matrices to compute 

951 P : ndarray (k x k), optional 

952 Matrix such that Sigma_u = PP', defaults to Cholesky descomp 

953 

954 Returns 

955 ------- 

956 coefs : ndarray (maxn x k x k) 

957 """ 

958 return orth_ma_rep(self, maxn, P) 

959 

960 def long_run_effects(self): 

961 r"""Compute long-run effect of unit impulse 

962 

963 .. math:: 

964 

965 \Psi_\infty = \sum_{i=0}^\infty \Phi_i 

966 """ 

967 return np.linalg.inv(self._char_mat) 

968 

969 @cache_readonly 

970 def _chol_sigma_u(self): 

971 return np.linalg.cholesky(self.sigma_u) 

972 

973 @cache_readonly 

974 def _char_mat(self): 

975 """Characteristic matrix of the VAR""" 

976 return np.eye(self.neqs) - self.coefs.sum(0) 

977 

978 def acf(self, nlags=None): 

979 """Compute theoretical autocovariance function 

980 

981 Returns 

982 ------- 

983 acf : ndarray (p x k x k) 

984 """ 

985 return var_acf(self.coefs, self.sigma_u, nlags=nlags) 

986 

987 def acorr(self, nlags=None): 

988 """ 

989 Autocorrelation function 

990 

991 Parameters 

992 ---------- 

993 nlags : int or None 

994 The number of lags to include in the autocovariance function. The 

995 default is the number of lags included in the model. 

996 

997 Returns 

998 ------- 

999 acorr : ndarray 

1000 Autocorrelation and cross correlations (nlags, neqs, neqs) 

1001 """ 

1002 return util.acf_to_acorr(self.acf(nlags=nlags)) 

1003 

1004 def plot_acorr(self, nlags=10, linewidth=8): 

1005 """Plot theoretical autocorrelation function""" 

1006 fig = plotting.plot_full_acorr(self.acorr(nlags=nlags), 

1007 linewidth=linewidth) 

1008 return fig 

1009 

1010 def forecast(self, y, steps, exog_future=None): 

1011 """Produce linear minimum MSE forecasts for desired number of steps 

1012 ahead, using prior values y 

1013 

1014 Parameters 

1015 ---------- 

1016 y : ndarray (p x k) 

1017 steps : int 

1018 

1019 Returns 

1020 ------- 

1021 forecasts : ndarray (steps x neqs) 

1022 

1023 Notes 

1024 ----- 

1025 Lütkepohl pp 37-38 

1026 """ 

1027 if self.exog is None and exog_future is not None: 

1028 raise ValueError("No exog in model, so no exog_future supported " 

1029 "in forecast method.") 

1030 if self.exog is not None and exog_future is None: 

1031 raise ValueError("Please provide an exog_future argument to " 

1032 "the forecast method.") 

1033 trend_coefs = None if self.coefs_exog.size == 0 else self.coefs_exog.T 

1034 

1035 exogs = [] 

1036 if self.trend.startswith("c"): # constant term 

1037 exogs.append(np.ones(steps)) 

1038 exog_lin_trend = np.arange(self.n_totobs + 1, 

1039 self.n_totobs + 1 + steps) 

1040 if "t" in self.trend: 

1041 exogs.append(exog_lin_trend) 

1042 if "tt" in self.trend: 

1043 exogs.append(exog_lin_trend**2) 

1044 if exog_future is not None: 

1045 exogs.append(exog_future) 

1046 

1047 if not exogs: 

1048 exog_future = None 

1049 else: 

1050 exog_future = np.column_stack(exogs) 

1051 return forecast(y, self.coefs, trend_coefs, steps, exog_future) 

1052 

1053 # TODO: use `mse` module-level function? 

1054 def mse(self, steps): 

1055 r""" 

1056 Compute theoretical forecast error variance matrices 

1057 

1058 Parameters 

1059 ---------- 

1060 steps : int 

1061 Number of steps ahead 

1062 

1063 Notes 

1064 ----- 

1065 .. math:: \mathrm{MSE}(h) = \sum_{i=0}^{h-1} \Phi \Sigma_u \Phi^T 

1066 

1067 Returns 

1068 ------- 

1069 forc_covs : ndarray (steps x neqs x neqs) 

1070 """ 

1071 ma_coefs = self.ma_rep(steps) 

1072 

1073 k = len(self.sigma_u) 

1074 forc_covs = np.zeros((steps, k, k)) 

1075 

1076 prior = np.zeros((k, k)) 

1077 for h in range(steps): 

1078 # Sigma(h) = Sigma(h-1) + Phi Sig_u Phi' 

1079 phi = ma_coefs[h] 

1080 var = phi @ self.sigma_u @ phi.T 

1081 forc_covs[h] = prior = prior + var 

1082 

1083 return forc_covs 

1084 

1085 forecast_cov = mse 

1086 

1087 def _forecast_vars(self, steps): 

1088 covs = self.forecast_cov(steps) 

1089 

1090 # Take diagonal for each cov 

1091 inds = np.arange(self.neqs) 

1092 return covs[:, inds, inds] 

1093 

1094 def forecast_interval(self, y, steps, alpha=0.05, exog_future=None): 

1095 """ 

1096 Construct forecast interval estimates assuming the y are Gaussian 

1097 

1098 Parameters 

1099 ---------- 

1100 y : {ndarray, None} 

1101 The initial values to use for the forecasts. If None, 

1102 the last k_ar values of the original endogenous variables are 

1103 used. 

1104 steps : int 

1105 Number of steps ahead to forecast 

1106 alpha : float, optional 

1107 The significance level for the confidence intervals. 

1108 exog_future : ndarray, optional 

1109 Forecast values of the exogenous variables. Should include 

1110 constant, trend, etc. as needed, including extrapolating out 

1111 of sample. 

1112 Returns 

1113 ------- 

1114 point : ndarray 

1115 Mean value of forecast 

1116 lower : ndarray 

1117 Lower bound of confidence interval 

1118 upper : ndarray 

1119 Upper bound of confidence interval 

1120 

1121 Notes 

1122 ----- 

1123 Lütkepohl pp. 39-40 

1124 """ 

1125 if not 0 < alpha < 1: 

1126 raise ValueError('alpha must be between 0 and 1') 

1127 q = util.norm_signif_level(alpha) 

1128 

1129 point_forecast = self.forecast(y, steps, exog_future=exog_future) 

1130 sigma = np.sqrt(self._forecast_vars(steps)) 

1131 

1132 forc_lower = point_forecast - q * sigma 

1133 forc_upper = point_forecast + q * sigma 

1134 

1135 return point_forecast, forc_lower, forc_upper 

1136 

1137 def to_vecm(self): 

1138 """to_vecm""" 

1139 k = self.coefs.shape[1] 

1140 p = self.coefs.shape[0] 

1141 A = self.coefs 

1142 pi = -(np.identity(k) - np.sum(A, 0)) 

1143 gamma = np.zeros((p-1, k, k)) 

1144 for i in range(p-1): 

1145 gamma[i] = -(np.sum(A[i+1:], 0)) 

1146 gamma = np.concatenate(gamma, 1) 

1147 return {"Gamma": gamma, "Pi": pi} 

1148 

1149# ------------------------------------------------------------------------------- 

1150# VARResults class 

1151 

1152 

1153class VARResults(VARProcess): 

1154 """Estimate VAR(p) process with fixed number of lags 

1155 

1156 Parameters 

1157 ---------- 

1158 endog : ndarray 

1159 endog_lagged : ndarray 

1160 params : ndarray 

1161 sigma_u : ndarray 

1162 lag_order : int 

1163 model : VAR model instance 

1164 trend : str {'nc', 'c', 'ct'} 

1165 names : array_like 

1166 List of names of the endogenous variables in order of appearance in `endog`. 

1167 dates 

1168 exog : ndarray 

1169 

1170 Attributes 

1171 ---------- 

1172 coefs : ndarray (p x K x K) 

1173 Estimated A_i matrices, A_i = coefs[i-1] 

1174 dates 

1175 endog 

1176 endog_lagged 

1177 k_ar : int 

1178 Order of VAR process 

1179 k_trend : int 

1180 model 

1181 names 

1182 neqs : int 

1183 Number of variables (equations) 

1184 nobs : int 

1185 n_totobs : int 

1186 params : ndarray (Kp + 1) x K 

1187 A_i matrices and intercept in stacked form [int A_1 ... A_p] 

1188 names : list 

1189 variables names 

1190 resid 

1191 sigma_u : ndarray (K x K) 

1192 Estimate of white noise process variance Var[u_t] 

1193 tvalues 

1194 y : 

1195 ys_lagged 

1196 """ 

1197 _model_type = 'VAR' 

1198 y = deprecated_alias("y", "endog", remove_version="0.11.0") 

1199 ys_lagged = deprecated_alias("ys_lagged", "endog_lagged", 

1200 remove_version="0.11.0") 

1201 

1202 def __init__(self, endog, endog_lagged, params, sigma_u, lag_order, 

1203 model=None, trend='c', names=None, dates=None, exog=None): 

1204 

1205 self.model = model 

1206 self.endog = endog 

1207 self.endog_lagged = endog_lagged 

1208 self.dates = dates 

1209 

1210 self.n_totobs, neqs = self.endog.shape 

1211 self.nobs = self.n_totobs - lag_order 

1212 self.trend = trend 

1213 k_trend = util.get_trendorder(trend) 

1214 self.exog_names = util.make_lag_names(names, lag_order, k_trend, 

1215 model.data.orig_exog) 

1216 self.params = params 

1217 self.exog = exog 

1218 

1219 # Initialize VARProcess parent class 

1220 # construct coefficient matrices 

1221 # Each matrix needs to be transposed 

1222 endog_start = k_trend 

1223 if exog is not None: 

1224 k_exog_user = exog.shape[1] 

1225 endog_start += k_exog_user 

1226 else: 

1227 k_exog_user = 0 

1228 reshaped = self.params[endog_start:] 

1229 reshaped = reshaped.reshape((lag_order, neqs, neqs)) 

1230 # Need to transpose each coefficient matrix 

1231 coefs = reshaped.swapaxes(1, 2).copy() 

1232 

1233 self.coefs_exog = params[:endog_start].T 

1234 self.k_exog = self.coefs_exog.shape[1] 

1235 self.k_exog_user = k_exog_user 

1236 

1237 # maybe change to params class, distinguish exog_all versus exog_user 

1238 # see issue #4535 

1239 _params_info = {'k_trend': k_trend, 

1240 'k_exog_user': k_exog_user, 

1241 'k_ar': lag_order} 

1242 super(VARResults, self).__init__(coefs, self.coefs_exog, sigma_u, 

1243 names=names, 

1244 _params_info=_params_info) 

1245 

1246 def plot(self): 

1247 """Plot input time series""" 

1248 return plotting.plot_mts(self.endog, names=self.names, 

1249 index=self.dates) 

1250 

1251 @property 

1252 def df_model(self): 

1253 """ 

1254 Number of estimated parameters, including the intercept / trends 

1255 """ 

1256 return self.neqs * self.k_ar + self.k_exog 

1257 

1258 @property 

1259 def df_resid(self): 

1260 """Number of observations minus number of estimated parameters""" 

1261 return self.nobs - self.df_model 

1262 

1263 @cache_readonly 

1264 def fittedvalues(self): 

1265 """ 

1266 The predicted insample values of the response variables of the model. 

1267 """ 

1268 return np.dot(self.endog_lagged, self.params) 

1269 

1270 @cache_readonly 

1271 def resid(self): 

1272 """ 

1273 Residuals of response variable resulting from estimated coefficients 

1274 """ 

1275 return self.endog[self.k_ar:] - self.fittedvalues 

1276 

1277 def sample_acov(self, nlags=1): 

1278 """Sample acov""" 

1279 return _compute_acov(self.endog[self.k_ar:], nlags=nlags) 

1280 

1281 def sample_acorr(self, nlags=1): 

1282 """Sample acorr""" 

1283 acovs = self.sample_acov(nlags=nlags) 

1284 return _acovs_to_acorrs(acovs) 

1285 

1286 def plot_sample_acorr(self, nlags=10, linewidth=8): 

1287 """ 

1288 Plot sample autocorrelation function 

1289 

1290 Parameters 

1291 ---------- 

1292 nlags : int 

1293 The number of lags to use in compute the autocorrelation. Does 

1294 not count the zero lag, which will be returned. 

1295 linewidth : int 

1296 The linewidth for the plots. 

1297 

1298 Returns 

1299 ------- 

1300 Figure 

1301 The figure that contains the plot axes. 

1302 """ 

1303 fig = plotting.plot_full_acorr(self.sample_acorr(nlags=nlags), 

1304 linewidth=linewidth) 

1305 return fig 

1306 

1307 def resid_acov(self, nlags=1): 

1308 """ 

1309 Compute centered sample autocovariance (including lag 0) 

1310 

1311 Parameters 

1312 ---------- 

1313 nlags : int 

1314 

1315 Returns 

1316 ------- 

1317 """ 

1318 return _compute_acov(self.resid, nlags=nlags) 

1319 

1320 def resid_acorr(self, nlags=1): 

1321 """ 

1322 Compute sample autocorrelation (including lag 0) 

1323 

1324 Parameters 

1325 ---------- 

1326 nlags : int 

1327 

1328 Returns 

1329 ------- 

1330 """ 

1331 acovs = self.resid_acov(nlags=nlags) 

1332 return _acovs_to_acorrs(acovs) 

1333 

1334 @cache_readonly 

1335 def resid_corr(self): 

1336 """ 

1337 Centered residual correlation matrix 

1338 """ 

1339 return self.resid_acorr(0)[0] 

1340 

1341 @cache_readonly 

1342 def sigma_u_mle(self): 

1343 """(Biased) maximum likelihood estimate of noise process covariance 

1344 """ 

1345 return self.sigma_u * self.df_resid / self.nobs 

1346 

1347 def cov_params(self): 

1348 """Estimated variance-covariance of model coefficients 

1349 

1350 Notes 

1351 ----- 

1352 Covariance of vec(B), where B is the matrix 

1353 [params_for_deterministic_terms, A_1, ..., A_p] with the shape 

1354 (K x (Kp + number_of_deterministic_terms)) 

1355 Adjusted to be an unbiased estimator 

1356 Ref: Lütkepohl p.74-75 

1357 """ 

1358 z = self.endog_lagged 

1359 return np.kron(np.linalg.inv(z.T @ z), self.sigma_u) 

1360 

1361 def cov_ybar(self): 

1362 r"""Asymptotically consistent estimate of covariance of the sample mean 

1363 

1364 .. math:: 

1365 

1366 \sqrt(T) (\bar{y} - \mu) \rightarrow {\cal N}(0, \Sigma_{\bar{y}})\\ 

1367 

1368 \Sigma_{\bar{y}} = B \Sigma_u B^\prime, \text{where } B = (I_K - A_1 

1369 - \cdots - A_p)^{-1} 

1370 

1371 Notes 

1372 ----- 

1373 Lütkepohl Proposition 3.3 

1374 """ 

1375 

1376 Ainv = np.linalg.inv(np.eye(self.neqs) - self.coefs.sum(0)) 

1377 return Ainv @ self.sigma_u @ Ainv.T 

1378 

1379 # ------------------------------------------------------------ 

1380 # Estimation-related things 

1381 

1382 @cache_readonly 

1383 def _zz(self): 

1384 # Z'Z 

1385 return np.dot(self.endog_lagged.T, self.endog_lagged) 

1386 

1387 @property 

1388 def _cov_alpha(self): 

1389 """ 

1390 Estimated covariance matrix of model coefficients w/o exog 

1391 """ 

1392 # drop exog 

1393 kn = self.k_exog * self.neqs 

1394 return self.cov_params()[kn:, kn:] 

1395 

1396 @cache_readonly 

1397 def _cov_sigma(self): 

1398 """ 

1399 Estimated covariance matrix of vech(sigma_u) 

1400 """ 

1401 D_K = tsa.duplication_matrix(self.neqs) 

1402 D_Kinv = np.linalg.pinv(D_K) 

1403 

1404 sigxsig = np.kron(self.sigma_u, self.sigma_u) 

1405 return 2 * D_Kinv @ sigxsig @ D_Kinv.T 

1406 

1407 @cache_readonly 

1408 def llf(self): 

1409 "Compute VAR(p) loglikelihood" 

1410 return var_loglike(self.resid, self.sigma_u_mle, self.nobs) 

1411 

1412 @cache_readonly 

1413 def stderr(self): 

1414 """Standard errors of coefficients, reshaped to match in size 

1415 """ 

1416 stderr = np.sqrt(np.diag(self.cov_params())) 

1417 return stderr.reshape((self.df_model, self.neqs), order='C') 

1418 

1419 bse = stderr # statsmodels interface? 

1420 

1421 @cache_readonly 

1422 def stderr_endog_lagged(self): 

1423 """Stderr_endog_lagged""" 

1424 start = self.k_exog 

1425 return self.stderr[start:] 

1426 

1427 @cache_readonly 

1428 def stderr_dt(self): 

1429 """Stderr_dt""" 

1430 end = self.k_exog 

1431 return self.stderr[:end] 

1432 

1433 @cache_readonly 

1434 def tvalues(self): 

1435 """Compute t-statistics. Use Student-t(T - Kp - 1) = t(df_resid) to test 

1436 significance. 

1437 """ 

1438 return self.params / self.stderr 

1439 

1440 @cache_readonly 

1441 def tvalues_endog_lagged(self): 

1442 """tvalues_endog_lagged""" 

1443 start = self.k_exog 

1444 return self.tvalues[start:] 

1445 

1446 @cache_readonly 

1447 def tvalues_dt(self): 

1448 """tvalues_dt""" 

1449 end = self.k_exog 

1450 return self.tvalues[:end] 

1451 

1452 @cache_readonly 

1453 def pvalues(self): 

1454 """Two-sided p-values for model coefficients from Student t-distribution 

1455 """ 

1456 # return stats.t.sf(np.abs(self.tvalues), self.df_resid)*2 

1457 return 2 * stats.norm.sf(np.abs(self.tvalues)) 

1458 

1459 @cache_readonly 

1460 def pvalues_endog_lagged(self): 

1461 """pvalues_endog_laggd""" 

1462 start = self.k_exog 

1463 return self.pvalues[start:] 

1464 

1465 @cache_readonly 

1466 def pvalues_dt(self): 

1467 """pvalues_dt""" 

1468 end = self.k_exog 

1469 return self.pvalues[:end] 

1470 

1471 # todo: -------------------------------------------------------------------- 

1472 

1473 def plot_forecast(self, steps, alpha=0.05, plot_stderr=True): 

1474 """ 

1475 Plot forecast 

1476 """ 

1477 mid, lower, upper = self.forecast_interval(self.endog[-self.k_ar:], 

1478 steps, 

1479 alpha=alpha) 

1480 fig = plotting.plot_var_forc(self.endog, mid, lower, upper, 

1481 names=self.names, plot_stderr=plot_stderr) 

1482 return fig 

1483 

1484 # Forecast error covariance functions 

1485 

1486 def forecast_cov(self, steps=1, method='mse'): 

1487 r"""Compute forecast covariance matrices for desired number of steps 

1488 

1489 Parameters 

1490 ---------- 

1491 steps : int 

1492 

1493 Notes 

1494 ----- 

1495 .. math:: \Sigma_{\hat y}(h) = \Sigma_y(h) + \Omega(h) / T 

1496 

1497 Ref: Lütkepohl pp. 96-97 

1498 

1499 Returns 

1500 ------- 

1501 covs : ndarray (steps x k x k) 

1502 """ 

1503 fc_cov = self.mse(steps) 

1504 if method == 'mse': 

1505 pass 

1506 elif method == 'auto': 

1507 if self.k_exog == 1 and self.k_trend < 2: 

1508 # currently only supported if no exog and trend in ['nc', 'c'] 

1509 fc_cov += self._omega_forc_cov(steps) / self.nobs 

1510 import warnings 

1511 warnings.warn('forecast cov takes parameter uncertainty into' 

1512 'account', OutputWarning) 

1513 else: 

1514 raise ValueError("method has to be either 'mse' or 'auto'") 

1515 

1516 return fc_cov 

1517 

1518 # Monte Carlo irf standard errors 

1519 @deprecate_kwarg('T', 'steps') 

1520 def irf_errband_mc(self, orth=False, repl=1000, steps=10, 

1521 signif=0.05, seed=None, burn=100, cum=False): 

1522 """ 

1523 Compute Monte Carlo integrated error bands assuming normally 

1524 distributed for impulse response functions 

1525 

1526 Parameters 

1527 ---------- 

1528 orth: bool, default False 

1529 Compute orthogonalized impulse response error bands 

1530 repl: int 

1531 number of Monte Carlo replications to perform 

1532 steps: int, default 10 

1533 number of impulse response periods 

1534 signif: float (0 < signif <1) 

1535 Significance level for error bars, defaults to 95% CI 

1536 seed: int 

1537 np.random.seed for replications 

1538 burn: int 

1539 number of initial observations to discard for simulation 

1540 cum: bool, default False 

1541 produce cumulative irf error bands 

1542 

1543 Notes 

1544 ----- 

1545 Lütkepohl (2005) Appendix D 

1546 

1547 Returns 

1548 ------- 

1549 Tuple of lower and upper arrays of ma_rep monte carlo standard errors 

1550 """ 

1551 ma_coll = self.irf_resim(orth=orth, repl=repl, steps=steps, 

1552 seed=seed, burn=burn, cum=cum) 

1553 

1554 ma_sort = np.sort(ma_coll, axis=0) # sort to get quantiles 

1555 # python 2: round returns float 

1556 low_idx = int(round(signif / 2 * repl) - 1) 

1557 upp_idx = int(round((1 - signif / 2) * repl) - 1) 

1558 lower = ma_sort[low_idx, :, :, :] 

1559 upper = ma_sort[upp_idx, :, :, :] 

1560 return lower, upper 

1561 

1562 @deprecate_kwarg('T', 'steps') 

1563 def irf_resim(self, orth=False, repl=1000, steps=10, 

1564 seed=None, burn=100, cum=False): 

1565 """ 

1566 Simulates impulse response function, returning an array of simulations. 

1567 Used for Sims-Zha error band calculation. 

1568 

1569 Parameters 

1570 ---------- 

1571 orth: bool, default False 

1572 Compute orthogonalized impulse response error bands 

1573 repl: int 

1574 number of Monte Carlo replications to perform 

1575 steps: int, default 10 

1576 number of impulse response periods 

1577 signif: float (0 < signif <1) 

1578 Significance level for error bars, defaults to 95% CI 

1579 seed: int 

1580 np.random.seed for replications 

1581 burn: int 

1582 number of initial observations to discard for simulation 

1583 cum: bool, default False 

1584 produce cumulative irf error bands 

1585 

1586 Notes 

1587 ----- 

1588 .. [*] Sims, Christoper A., and Tao Zha. 1999. "Error Bands for Impulse 

1589 Response." Econometrica 67: 1113-1155. 

1590 

1591 Returns 

1592 ------- 

1593 Array of simulated impulse response functions 

1594 """ 

1595 neqs = self.neqs 

1596 k_ar = self.k_ar 

1597 coefs = self.coefs 

1598 sigma_u = self.sigma_u 

1599 intercept = self.intercept 

1600 nobs = self.nobs 

1601 

1602 ma_coll = np.zeros((repl, steps + 1, neqs, neqs)) 

1603 

1604 def fill_coll(sim): 

1605 ret = VAR(sim, exog=self.exog).fit(maxlags=k_ar, trend=self.trend) 

1606 ret = ret.orth_ma_rep(maxn=steps) if orth else ret.ma_rep(maxn=steps) 

1607 return ret.cumsum(axis=0) if cum else ret 

1608 

1609 for i in range(repl): 

1610 # discard first burn to eliminate correct for starting bias 

1611 sim = util.varsim(coefs, intercept, sigma_u, 

1612 seed=seed, steps=nobs+burn) 

1613 sim = sim[burn:] 

1614 ma_coll[i, :, :, :] = fill_coll(sim) 

1615 

1616 return ma_coll 

1617 

1618 def _omega_forc_cov(self, steps): 

1619 # Approximate MSE matrix \Omega(h) as defined in Lut p97 

1620 G = self._zz 

1621 Ginv = np.linalg.inv(G) 

1622 

1623 # memoize powers of B for speedup 

1624 # TODO: see if can memoize better 

1625 # TODO: much lower-hanging fruit in caching `np.trace` below. 

1626 B = self._bmat_forc_cov() 

1627 _B = {} 

1628 

1629 def bpow(i): 

1630 if i not in _B: 

1631 _B[i] = np.linalg.matrix_power(B, i) 

1632 

1633 return _B[i] 

1634 

1635 phis = self.ma_rep(steps) 

1636 sig_u = self.sigma_u 

1637 

1638 omegas = np.zeros((steps, self.neqs, self.neqs)) 

1639 for h in range(1, steps + 1): 

1640 if h == 1: 

1641 omegas[h-1] = self.df_model * self.sigma_u 

1642 continue 

1643 

1644 om = omegas[h-1] 

1645 for i in range(h): 

1646 for j in range(h): 

1647 Bi = bpow(h - 1 - i) 

1648 Bj = bpow(h - 1 - j) 

1649 mult = np.trace(Bi.T @ Ginv @ Bj @ G) 

1650 om += mult * phis[i] @ sig_u @ phis[j].T 

1651 omegas[h-1] = om 

1652 

1653 return omegas 

1654 

1655 def _bmat_forc_cov(self): 

1656 # B as defined on p. 96 of Lut 

1657 upper = np.zeros((self.k_exog, self.df_model)) 

1658 upper[:, :self.k_exog] = np.eye(self.k_exog) 

1659 

1660 lower_dim = self.neqs * (self.k_ar - 1) 

1661 eye = np.eye(lower_dim) 

1662 lower = np.column_stack((np.zeros((lower_dim, self.k_exog)), eye, 

1663 np.zeros((lower_dim, self.neqs)))) 

1664 

1665 return np.vstack((upper, self.params.T, lower)) 

1666 

1667 def summary(self): 

1668 """Compute console output summary of estimates 

1669 

1670 Returns 

1671 ------- 

1672 summary : VARSummary 

1673 """ 

1674 return VARSummary(self) 

1675 

1676 def irf(self, periods=10, var_decomp=None, var_order=None): 

1677 """Analyze impulse responses to shocks in system 

1678 

1679 Parameters 

1680 ---------- 

1681 periods : int 

1682 var_decomp : ndarray (k x k), lower triangular 

1683 Must satisfy Omega = P P', where P is the passed matrix. Defaults to 

1684 Cholesky decomposition of Omega 

1685 var_order : sequence 

1686 Alternate variable order for Cholesky decomposition 

1687 

1688 Returns 

1689 ------- 

1690 irf : IRAnalysis 

1691 """ 

1692 if var_order is not None: 

1693 raise NotImplementedError('alternate variable order not implemented' 

1694 ' (yet)') 

1695 

1696 return IRAnalysis(self, P=var_decomp, periods=periods) 

1697 

1698 def fevd(self, periods=10, var_decomp=None): 

1699 """ 

1700 Compute forecast error variance decomposition ("fevd") 

1701 

1702 Returns 

1703 ------- 

1704 fevd : FEVD instance 

1705 """ 

1706 return FEVD(self, P=var_decomp, periods=periods) 

1707 

1708 def reorder(self, order): 

1709 """Reorder variables for structural specification 

1710 """ 

1711 if len(order) != len(self.params[0, :]): 

1712 raise ValueError("Reorder specification length should match " 

1713 "number of endogenous variables") 

1714 # This converts order to list of integers if given as strings 

1715 if isinstance(order[0], str): 

1716 order_new = [] 

1717 for i, nam in enumerate(order): 

1718 order_new.append(self.names.index(order[i])) 

1719 order = order_new 

1720 return _reordered(self, order) 

1721 

1722 # -------------------------------------------------------------------------- 

1723 # VAR Diagnostics: Granger-causality, whiteness of residuals, normality, etc 

1724 

1725 def test_causality(self, caused, causing=None, kind='f', signif=0.05): 

1726 """ 

1727 Test Granger causality 

1728 

1729 Parameters 

1730 ---------- 

1731 caused : int or str or sequence of int or str 

1732 If int or str, test whether the variable specified via this index 

1733 (int) or name (str) is Granger-caused by the variable(s) specified 

1734 by `causing`. 

1735 If a sequence of int or str, test whether the corresponding 

1736 variables are Granger-caused by the variable(s) specified 

1737 by `causing`. 

1738 causing : int or str or sequence of int or str or None, default: None 

1739 If int or str, test whether the variable specified via this index 

1740 (int) or name (str) is Granger-causing the variable(s) specified by 

1741 `caused`. 

1742 If a sequence of int or str, test whether the corresponding 

1743 variables are Granger-causing the variable(s) specified by 

1744 `caused`. 

1745 If None, `causing` is assumed to be the complement of `caused`. 

1746 kind : {'f', 'wald'} 

1747 Perform F-test or Wald (chi-sq) test 

1748 signif : float, default 5% 

1749 Significance level for computing critical values for test, 

1750 defaulting to standard 0.05 level 

1751 

1752 Notes 

1753 ----- 

1754 Null hypothesis is that there is no Granger-causality for the indicated 

1755 variables. The degrees of freedom in the F-test are based on the 

1756 number of variables in the VAR system, that is, degrees of freedom 

1757 are equal to the number of equations in the VAR times degree of freedom 

1758 of a single equation. 

1759 

1760 Test for Granger-causality as described in chapter 7.6.3 of [1]_. 

1761 Test H0: "`causing` does not Granger-cause the remaining variables of 

1762 the system" against H1: "`causing` is Granger-causal for the 

1763 remaining variables". 

1764 

1765 Returns 

1766 ------- 

1767 results : CausalityTestResults 

1768 

1769 References 

1770 ---------- 

1771 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer. 

1772 """ 

1773 if not (0 < signif < 1): 

1774 raise ValueError("signif has to be between 0 and 1") 

1775 

1776 allowed_types = (str, int) 

1777 

1778 if isinstance(caused, allowed_types): 

1779 caused = [caused] 

1780 if not all(isinstance(c, allowed_types) for c in caused): 

1781 raise TypeError("caused has to be of type string or int (or a " 

1782 "sequence of these types).") 

1783 caused = [self.names[c] if type(c) == int else c for c in caused] 

1784 caused_ind = [util.get_index(self.names, c) for c in caused] 

1785 

1786 if causing is not None: 

1787 

1788 if isinstance(causing, allowed_types): 

1789 causing = [causing] 

1790 if not all(isinstance(c, allowed_types) for c in causing): 

1791 raise TypeError("causing has to be of type string or int (or " 

1792 "a sequence of these types) or None.") 

1793 causing = [self.names[c] if type(c) == int else c for c in causing] 

1794 causing_ind = [util.get_index(self.names, c) for c in causing] 

1795 

1796 if causing is None: 

1797 causing_ind = [i for i in range(self.neqs) if i not in caused_ind] 

1798 causing = [self.names[c] for c in caused_ind] 

1799 

1800 k, p = self.neqs, self.k_ar 

1801 if p == 0: 

1802 err = "Cannot test Granger Causality in a model with 0 lags." 

1803 raise RuntimeError(err) 

1804 

1805 # number of restrictions 

1806 num_restr = len(causing) * len(caused) * p 

1807 num_det_terms = self.k_exog 

1808 

1809 # Make restriction matrix 

1810 C = np.zeros((num_restr, k * num_det_terms + k**2 * p), dtype=float) 

1811 cols_det = k * num_det_terms 

1812 row = 0 

1813 for j in range(p): 

1814 for ing_ind in causing_ind: 

1815 for ed_ind in caused_ind: 

1816 C[row, cols_det + ed_ind + k * ing_ind + k**2 * j] = 1 

1817 row += 1 

1818 

1819 # Lütkepohl 3.6.5 

1820 Cb = np.dot(C, vec(self.params.T)) 

1821 middle = np.linalg.inv(C @ self.cov_params() @ C.T) 

1822 

1823 # wald statistic 

1824 lam_wald = statistic = Cb @ middle @ Cb 

1825 

1826 if kind.lower() == 'wald': 

1827 df = num_restr 

1828 dist = stats.chi2(df) 

1829 elif kind.lower() == 'f': 

1830 statistic = lam_wald / num_restr 

1831 df = (num_restr, k * self.df_resid) 

1832 dist = stats.f(*df) 

1833 else: 

1834 raise ValueError('kind %s not recognized' % kind) 

1835 

1836 pvalue = dist.sf(statistic) 

1837 crit_value = dist.ppf(1 - signif) 

1838 

1839 return CausalityTestResults(causing, caused, statistic, 

1840 crit_value, pvalue, df, signif, 

1841 test="granger", method=kind) 

1842 

1843 def test_inst_causality(self, causing, signif=0.05): 

1844 """ 

1845 Test for instantaneous causality 

1846 

1847 Parameters 

1848 ---------- 

1849 causing : 

1850 If int or str, test whether the corresponding variable is causing 

1851 the variable(s) specified in caused. 

1852 If sequence of int or str, test whether the corresponding variables 

1853 are causing the variable(s) specified in caused. 

1854 signif : float between 0 and 1, default 5 % 

1855 Significance level for computing critical values for test, 

1856 defaulting to standard 0.05 level 

1857 verbose : bool 

1858 If True, print a table with the results. 

1859 

1860 Returns 

1861 ------- 

1862 results : dict 

1863 A dict holding the test's results. The dict's keys are: 

1864 

1865 "statistic" : float 

1866 The calculated test statistic. 

1867 

1868 "crit_value" : float 

1869 The critical value of the Chi^2-distribution. 

1870 

1871 "pvalue" : float 

1872 The p-value corresponding to the test statistic. 

1873 

1874 "df" : float 

1875 The degrees of freedom of the Chi^2-distribution. 

1876 

1877 "conclusion" : str {"reject", "fail to reject"} 

1878 Whether H0 can be rejected or not. 

1879 

1880 "signif" : float 

1881 Significance level 

1882 

1883 Notes 

1884 ----- 

1885 Test for instantaneous causality as described in chapters 3.6.3 and 

1886 7.6.4 of [1]_. 

1887 Test H0: "No instantaneous causality between caused and causing" 

1888 against H1: "Instantaneous causality between caused and causing 

1889 exists". 

1890 

1891 Instantaneous causality is a symmetric relation (i.e. if causing is 

1892 "instantaneously causing" caused, then also caused is "instantaneously 

1893 causing" causing), thus the naming of the parameters (which is chosen 

1894 to be in accordance with test_granger_causality()) may be misleading. 

1895 

1896 This method is not returning the same result as JMulTi. This is because 

1897 the test is based on a VAR(k_ar) model in statsmodels (in accordance to 

1898 pp. 104, 320-321 in [1]_) whereas JMulTi seems to be using a 

1899 VAR(k_ar+1) model. 

1900 

1901 References 

1902 ---------- 

1903 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer. 

1904 """ 

1905 if not (0 < signif < 1): 

1906 raise ValueError("signif has to be between 0 and 1") 

1907 

1908 allowed_types = (str, int) 

1909 if isinstance(causing, allowed_types): 

1910 causing = [causing] 

1911 if not all(isinstance(c, allowed_types) for c in causing): 

1912 raise TypeError("causing has to be of type string or int (or a " + 

1913 "a sequence of these types).") 

1914 causing = [self.names[c] if type(c) == int else c for c in causing] 

1915 causing_ind = [util.get_index(self.names, c) for c in causing] 

1916 

1917 caused_ind = [i for i in range(self.neqs) if i not in causing_ind] 

1918 caused = [self.names[c] for c in caused_ind] 

1919 

1920 # Note: JMulTi seems to be using k_ar+1 instead of k_ar 

1921 k, t, p = self.neqs, self.nobs, self.k_ar 

1922 

1923 num_restr = len(causing) * len(caused) # called N in Lütkepohl 

1924 

1925 sigma_u = self.sigma_u 

1926 vech_sigma_u = util.vech(sigma_u) 

1927 sig_mask = np.zeros(sigma_u.shape) 

1928 # set =1 twice to ensure, that all the ones needed are below the main 

1929 # diagonal: 

1930 sig_mask[causing_ind, caused_ind] = 1 

1931 sig_mask[caused_ind, causing_ind] = 1 

1932 vech_sig_mask = util.vech(sig_mask) 

1933 inds = np.nonzero(vech_sig_mask)[0] 

1934 

1935 # Make restriction matrix 

1936 C = np.zeros((num_restr, len(vech_sigma_u)), dtype=float) 

1937 for row in range(num_restr): 

1938 C[row, inds[row]] = 1 

1939 Cs = np.dot(C, vech_sigma_u) 

1940 d = np.linalg.pinv(duplication_matrix(k)) 

1941 Cd = np.dot(C, d) 

1942 middle = np.linalg.inv(Cd @ np.kron(sigma_u, sigma_u) @ Cd.T) / 2 

1943 

1944 wald_statistic = t * (Cs.T @ middle @ Cs) 

1945 df = num_restr 

1946 dist = stats.chi2(df) 

1947 

1948 pvalue = dist.sf(wald_statistic) 

1949 crit_value = dist.ppf(1 - signif) 

1950 

1951 return CausalityTestResults(causing, caused, wald_statistic, 

1952 crit_value, pvalue, df, signif, 

1953 test="inst", method="wald") 

1954 

1955 def test_whiteness(self, nlags=10, signif=0.05, adjusted=False): 

1956 """ 

1957 Residual whiteness tests using Portmanteau test 

1958 

1959 Parameters 

1960 ---------- 

1961 nlags : int > 0 

1962 signif : float, between 0 and 1 

1963 adjusted : bool, default False 

1964 

1965 Returns 

1966 ------- 

1967 results : WhitenessTestResults 

1968 

1969 Notes 

1970 ----- 

1971 Test the whiteness of the residuals using the Portmanteau test as 

1972 described in [1]_, chapter 4.4.3. 

1973 

1974 References 

1975 ---------- 

1976 .. [1] Lütkepohl, H. 2005. *New Introduction to Multiple Time Series Analysis*. Springer. 

1977 """ 

1978 statistic = 0 

1979 u = np.asarray(self.resid) 

1980 acov_list = _compute_acov(u, nlags) 

1981 cov0_inv = np.linalg.inv(acov_list[0]) 

1982 for t in range(1, nlags+1): 

1983 ct = acov_list[t] 

1984 to_add = np.trace(ct.T @ cov0_inv @ ct @ cov0_inv) 

1985 if adjusted: 

1986 to_add /= (self.nobs - t) 

1987 statistic += to_add 

1988 statistic *= self.nobs**2 if adjusted else self.nobs 

1989 df = self.neqs**2 * (nlags - self.k_ar) 

1990 dist = stats.chi2(df) 

1991 pvalue = dist.sf(statistic) 

1992 crit_value = dist.ppf(1 - signif) 

1993 

1994 return WhitenessTestResults(statistic, crit_value, pvalue, df, signif, 

1995 nlags, adjusted) 

1996 

1997 def plot_acorr(self, nlags=10, resid=True, linewidth=8): 

1998 r""" 

1999 Plot autocorrelation of sample (endog) or residuals 

2000 

2001 Sample (Y) or Residual autocorrelations are plotted together with the 

2002 standard :math:`2 / \sqrt{T}` bounds. 

2003 

2004 Parameters 

2005 ---------- 

2006 nlags : int 

2007 number of lags to display (excluding 0) 

2008 resid: bool 

2009 If True, then the autocorrelation of the residuals is plotted 

2010 If False, then the autocorrelation of endog is plotted. 

2011 linewidth : int 

2012 width of vertical bars 

2013 

2014 Returns 

2015 ------- 

2016 Figure 

2017 Figure instance containing the plot. 

2018 """ 

2019 if resid: 

2020 acorrs = self.resid_acorr(nlags) 

2021 else: 

2022 acorrs = self.sample_acorr(nlags) 

2023 

2024 bound = 2 / np.sqrt(self.nobs) 

2025 

2026 fig = plotting.plot_full_acorr(acorrs[1:], 

2027 xlabel=np.arange(1, nlags+1), 

2028 err_bound=bound, 

2029 linewidth=linewidth) 

2030 fig.suptitle(r"ACF plots for residuals with $2 / \sqrt{T}$ bounds ") 

2031 return fig 

2032 

2033 def test_normality(self, signif=0.05): 

2034 """ 

2035 Test assumption of normal-distributed errors using Jarque-Bera-style 

2036 omnibus Chi^2 test. 

2037 

2038 Parameters 

2039 ---------- 

2040 signif : float 

2041 Test significance level. 

2042 

2043 Returns 

2044 ------- 

2045 result : NormalityTestResults 

2046 

2047 Notes 

2048 ----- 

2049 H0 (null) : data are generated by a Gaussian-distributed process 

2050 """ 

2051 return test_normality(self, signif=signif) 

2052 

2053 @cache_readonly 

2054 def detomega(self): 

2055 r""" 

2056 Return determinant of white noise covariance with degrees of freedom 

2057 correction: 

2058 

2059 .. math:: 

2060 

2061 \hat \Omega = \frac{T}{T - Kp - 1} \hat \Omega_{\mathrm{MLE}} 

2062 """ 

2063 return np.linalg.det(self.sigma_u) 

2064 

2065 @cache_readonly 

2066 def info_criteria(self): 

2067 "information criteria for lagorder selection" 

2068 nobs = self.nobs 

2069 neqs = self.neqs 

2070 lag_order = self.k_ar 

2071 free_params = lag_order * neqs ** 2 + neqs * self.k_exog 

2072 

2073 ld = logdet_symm(self.sigma_u_mle) 

2074 

2075 # See Lütkepohl pp. 146-150 

2076 

2077 aic = ld + (2. / nobs) * free_params 

2078 bic = ld + (np.log(nobs) / nobs) * free_params 

2079 hqic = ld + (2. * np.log(np.log(nobs)) / nobs) * free_params 

2080 fpe = ((nobs + self.df_model) / self.df_resid) ** neqs * np.exp(ld) 

2081 

2082 return { 

2083 'aic': aic, 

2084 'bic': bic, 

2085 'hqic': hqic, 

2086 'fpe': fpe 

2087 } 

2088 

2089 @property 

2090 def aic(self): 

2091 """Akaike information criterion""" 

2092 return self.info_criteria['aic'] 

2093 

2094 @property 

2095 def fpe(self): 

2096 """Final Prediction Error (FPE) 

2097 

2098 Lütkepohl p. 147, see info_criteria 

2099 """ 

2100 return self.info_criteria['fpe'] 

2101 

2102 @property 

2103 def hqic(self): 

2104 """Hannan-Quinn criterion""" 

2105 return self.info_criteria['hqic'] 

2106 

2107 @property 

2108 def bic(self): 

2109 """Bayesian a.k.a. Schwarz info criterion""" 

2110 return self.info_criteria['bic'] 

2111 

2112 @cache_readonly 

2113 def roots(self): 

2114 """ 

2115 The roots of the VAR process are the solution to 

2116 (I - coefs[0]*z - coefs[1]*z**2 ... - coefs[p-1]*z**k_ar) = 0. 

2117 Note that the inverse roots are returned, and stability requires that 

2118 the roots lie outside the unit circle. 

2119 """ 

2120 neqs = self.neqs 

2121 k_ar = self.k_ar 

2122 p = neqs * k_ar 

2123 arr = np.zeros((p, p)) 

2124 arr[:neqs, :] = np.column_stack(self.coefs) 

2125 arr[neqs:, :-neqs] = np.eye(p-neqs) 

2126 roots = np.linalg.eig(arr)[0]**-1 

2127 idx = np.argsort(np.abs(roots))[::-1] # sort by reverse modulus 

2128 return roots[idx] 

2129 

2130 

2131class VARResultsWrapper(wrap.ResultsWrapper): 

2132 _attrs = {'bse': 'columns_eq', 'cov_params': 'cov', 

2133 'params': 'columns_eq', 'pvalues': 'columns_eq', 

2134 'tvalues': 'columns_eq', 'sigma_u': 'cov_eq', 

2135 'sigma_u_mle': 'cov_eq', 

2136 'stderr': 'columns_eq'} 

2137 _wrap_attrs = wrap.union_dicts(TimeSeriesResultsWrapper._wrap_attrs, 

2138 _attrs) 

2139 _methods = {'conf_int': 'multivariate_confint'} 

2140 _wrap_methods = wrap.union_dicts(TimeSeriesResultsWrapper._wrap_methods, 

2141 _methods) 

2142 

2143wrap.populate_wrapper(VARResultsWrapper, VARResults) # noqa:E305 

2144 

2145 

2146class FEVD(object): 

2147 """ 

2148 Compute and plot Forecast error variance decomposition and asymptotic 

2149 standard errors 

2150 """ 

2151 def __init__(self, model, P=None, periods=None): 

2152 self.periods = periods 

2153 

2154 self.model = model 

2155 self.neqs = model.neqs 

2156 self.names = model.model.endog_names 

2157 

2158 self.irfobj = model.irf(var_decomp=P, periods=periods) 

2159 self.orth_irfs = self.irfobj.orth_irfs 

2160 

2161 # cumulative impulse responses 

2162 irfs = (self.orth_irfs[:periods] ** 2).cumsum(axis=0) 

2163 

2164 rng = lrange(self.neqs) 

2165 mse = self.model.mse(periods)[:, rng, rng] 

2166 

2167 # lag x equation x component 

2168 fevd = np.empty_like(irfs) 

2169 

2170 for i in range(periods): 

2171 fevd[i] = (irfs[i].T / mse[i]).T 

2172 

2173 # switch to equation x lag x component 

2174 self.decomp = fevd.swapaxes(0, 1) 

2175 

2176 def summary(self): 

2177 buf = StringIO() 

2178 

2179 rng = lrange(self.periods) 

2180 for i in range(self.neqs): 

2181 ppm = output.pprint_matrix(self.decomp[i], rng, self.names) 

2182 

2183 buf.write('FEVD for %s\n' % self.names[i]) 

2184 buf.write(ppm + '\n') 

2185 

2186 print(buf.getvalue()) 

2187 

2188 def cov(self): 

2189 """Compute asymptotic standard errors 

2190 

2191 Returns 

2192 ------- 

2193 """ 

2194 raise NotImplementedError 

2195 

2196 def plot(self, periods=None, figsize=(10, 10), **plot_kwds): 

2197 """Plot graphical display of FEVD 

2198 

2199 Parameters 

2200 ---------- 

2201 periods : int, default None 

2202 Defaults to number originally specified. Can be at most that number 

2203 """ 

2204 import matplotlib.pyplot as plt 

2205 

2206 k = self.neqs 

2207 periods = periods or self.periods 

2208 

2209 fig, axes = plt.subplots(nrows=k, figsize=figsize) 

2210 

2211 fig.suptitle('Forecast error variance decomposition (FEVD)') 

2212 

2213 colors = [str(c) for c in np.arange(k, dtype=float) / k] 

2214 ticks = np.arange(periods) 

2215 

2216 limits = self.decomp.cumsum(2) 

2217 

2218 for i in range(k): 

2219 ax = axes[i] 

2220 

2221 this_limits = limits[i].T 

2222 

2223 handles = [] 

2224 

2225 for j in range(k): 

2226 lower = this_limits[j - 1] if j > 0 else 0 

2227 upper = this_limits[j] 

2228 handle = ax.bar(ticks, upper - lower, bottom=lower, 

2229 color=colors[j], label=self.names[j], 

2230 **plot_kwds) 

2231 

2232 handles.append(handle) 

2233 

2234 ax.set_title(self.names[i]) 

2235 

2236 # just use the last axis to get handles for plotting 

2237 handles, labels = ax.get_legend_handles_labels() 

2238 fig.legend(handles, labels, loc='upper right') 

2239 plotting.adjust_subplots(right=0.85) 

2240 return fig 

2241 

2242# ------------------------------------------------------------------------------- 

2243 

2244 

2245def _compute_acov(x, nlags=1): 

2246 x = x - x.mean(0) 

2247 

2248 result = [] 

2249 for lag in range(nlags + 1): 

2250 if lag > 0: 

2251 r = np.dot(x[lag:].T, x[:-lag]) 

2252 else: 

2253 r = np.dot(x.T, x) 

2254 

2255 result.append(r) 

2256 

2257 return np.array(result) / len(x) 

2258 

2259 

2260def _acovs_to_acorrs(acovs): 

2261 sd = np.sqrt(np.diag(acovs[0])) 

2262 return acovs / np.outer(sd, sd) 

2263 

2264 

2265if __name__ == '__main__': 

2266 from statsmodels.tsa.vector_ar.util import parse_lutkepohl_data 

2267 import statsmodels.tools.data as data_util 

2268 

2269 np.set_printoptions(linewidth=140, precision=5) 

2270 

2271 sdata, dates = parse_lutkepohl_data('data/%s.dat' % 'e1') 

2272 

2273 names = sdata.dtype.names 

2274 data = data_util.struct_to_ndarray(sdata) 

2275 adj_data = np.diff(np.log(data), axis=0) 

2276 # est = VAR(adj_data, p=2, dates=dates[1:], names=names) 

2277 model = VAR(adj_data[:-16], dates=dates[1:-16], names=names) 

2278 # model = VAR(adj_data[:-16], dates=dates[1:-16], names=names) 

2279 

2280 est = model.fit(maxlags=2) 

2281 irf = est.irf() 

2282 

2283 y = est.endog[-2:] 

2284 """ 

2285 # irf.plot_irf() 

2286 

2287 # i = 2; j = 1 

2288 # cv = irf.cum_effect_cov(orth=True) 

2289 # print np.sqrt(cv[:, j * 3 + i, j * 3 + i]) / 1e-2 

2290 

2291 # data = np.genfromtxt('Canada.csv', delimiter=',', names=True) 

2292 # data = data.view((float, 4)) 

2293 """ 

2294 

2295 ''' 

2296 mdata = sm.datasets.macrodata.load(as_pandas=False).data 

2297 mdata2 = mdata[['realgdp','realcons','realinv']] 

2298 names = mdata2.dtype.names 

2299 data = mdata2.view((float,3)) 

2300 data = np.diff(np.log(data), axis=0) 

2301 

2302 import pandas as pn 

2303 df = pn.DataFrame.fromRecords(mdata) 

2304 df = np.log(df.reindex(columns=names)) 

2305 df = (df - df.shift(1)).dropna() 

2306 

2307 model = VAR(df) 

2308 est = model.fit(maxlags=2) 

2309 irf = est.irf() 

2310 '''