Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# Note: The information criteria add 1 to the number of parameters 

2# whenever the model has an AR or MA term since, in principle, 

3# the variance could be treated as a free parameter and restricted 

4# This code does not allow this, but it adds consistency with other 

5# packages such as gretl and X12-ARIMA 

6 

7import copy 

8from datetime import datetime 

9 

10import numpy as np 

11import pandas as pd 

12from numpy import dot, log, zeros, pi 

13from numpy.linalg import inv 

14from scipy import optimize 

15from scipy.signal import lfilter 

16from scipy.stats import norm 

17 

18from statsmodels.compat.pandas import Appender 

19import statsmodels.base.wrapper as wrap 

20from statsmodels.regression.linear_model import yule_walker, OLS 

21from statsmodels.tools.decorators import cache_readonly 

22from statsmodels.tools.numdiff import approx_hess_cs, approx_fprime_cs 

23from statsmodels.tools.sm_exceptions import SpecificationWarning 

24from statsmodels.tools.validation import array_like, string_like 

25from statsmodels.tsa.ar_model import AutoReg, ar_select_order 

26from statsmodels.tsa.arima_process import arma2ma 

27from statsmodels.tsa.base import tsa_model 

28from statsmodels.tsa.kalmanf import KalmanFilter 

29from statsmodels.tsa.tsatools import (lagmat, add_trend, 

30 _ar_transparams, _ar_invtransparams, 

31 _ma_transparams, _ma_invtransparams, 

32 unintegrate, unintegrate_levels) 

33from statsmodels.tsa.vector_ar import util 

34 

35REPEATED_FIT_ERROR = """ 

36Model has been fit using trend={0} and method={1}. These cannot be changed 

37in subsequent calls to `fit`. Instead, use a new instance of {mod}. 

38""" 

39 

40_armax_notes = r""" 

41 Notes 

42 ----- 

43 If exogenous variables are given, then the model that is fit is 

44 

45 .. math:: 

46 

47 \phi(L)(y_t - X_t\beta) = \theta(L)\epsilon_t 

48 

49 where :math:`\phi` and :math:`\theta` are polynomials in the lag 

50 operator, :math:`L`. This is the regression model with ARMA errors, 

51 or ARMAX model. This specification is used, whether or not the model 

52 is fit using conditional sum of square or maximum-likelihood, using 

53 the `method` argument in 

54 :meth:`statsmodels.tsa.arima_model.%(Model)s.fit`. Therefore, for 

55 now, `css` and `mle` refer to estimation methods only. This may 

56 change for the case of the `css` model in future versions. 

57""" 

58 

59_arma_params = """endog : array_like 

60 The endogenous variable. 

61 order : iterable 

62 The (p,q) order of the model for the number of AR parameters, 

63 and MA parameters to use. 

64 exog : array_like, optional 

65 An optional array of exogenous variables. This should *not* include a 

66 constant or trend. You can specify this in the `fit` method.""" 

67 

68_arma_model = "Autoregressive Moving Average ARMA(p,q) Model" 

69 

70_arima_model = "Autoregressive Integrated Moving Average ARIMA(p,d,q) Model" 

71 

72_arima_params = """endog : array_like 

73 The endogenous variable. 

74 order : iterable 

75 The (p,d,q) order of the model for the number of AR parameters, 

76 differences, and MA parameters to use. 

77 exog : array_like, optional 

78 An optional array of exogenous variables. This should *not* include a 

79 constant or trend. You can specify this in the `fit` method.""" 

80 

81_predict_notes = """ 

82 Notes 

83 ----- 

84 Use the results predict method instead. 

85""" 

86 

87_results_notes = """ 

88 Notes 

89 ----- 

90 It is recommended to use dates with the time-series models, as the 

91 below will probably make clear. However, if ARIMA is used without 

92 dates and/or `start` and `end` are given as indices, then these 

93 indices are in terms of the *original*, undifferenced series. Ie., 

94 given some undifferenced observations:: 

95 

96 1970Q1, 1 

97 1970Q2, 1.5 

98 1970Q3, 1.25 

99 1970Q4, 2.25 

100 1971Q1, 1.2 

101 1971Q2, 4.1 

102 

103 1970Q1 is observation 0 in the original series. However, if we fit an 

104 ARIMA(p,1,q) model then we lose this first observation through 

105 differencing. Therefore, the first observation we can forecast (if 

106 using exact MLE) is index 1. In the differenced series this is index 

107 0, but we refer to it as 1 from the original series. 

108""" 

109 

110_predict = """ 

111 %(Model)s model in-sample and out-of-sample prediction 

112 

113 Parameters 

114 ---------- 

115 %(params)s 

116 start : int, str, or datetime 

117 Zero-indexed observation number at which to start forecasting, ie., 

118 the first forecast is start. Can also be a date string to 

119 parse or a datetime type. 

120 end : int, str, or datetime 

121 Zero-indexed observation number at which to end forecasting, ie., 

122 the first forecast is start. Can also be a date string to 

123 parse or a datetime type. However, if the dates index does not 

124 have a fixed frequency, end must be an integer index if you 

125 want out of sample prediction. 

126 exog : array_like, optional 

127 If the model is an ARMAX and out-of-sample forecasting is 

128 requested, exog must be given. exog must be aligned so that exog[0] 

129 is used to produce the first out-of-sample forecast. The number of 

130 observation in exog should match the number of out-of-sample 

131 forecasts produced. If the length of exog does not match the number 

132 of forecasts, a SpecificationWarning is produced. 

133 dynamic : bool, optional 

134 The `dynamic` keyword affects in-sample prediction. If dynamic 

135 is False, then the in-sample lagged values are used for 

136 prediction. If `dynamic` is True, then in-sample forecasts are 

137 used in place of lagged dependent variables. The first forecast 

138 value is `start`. 

139 %(extra_params)s 

140 

141 Returns 

142 ------- 

143 %(returns)s 

144 %(extra_section)s 

145""" 

146 

147_predict_returns = """predict : ndarray 

148 The predicted values. 

149 

150""" 

151 

152_arma_predict = _predict % {"Model": "ARMA", 

153 "params": """params : array_like 

154 The fitted parameters of the model.""", 

155 "extra_params": "", 

156 "returns": _predict_returns, 

157 "extra_section": _predict_notes} 

158 

159_arma_results_predict = _predict % {"Model": "ARMA", "params": "", 

160 "extra_params": "", 

161 "returns": _predict_returns, 

162 "extra_section": _results_notes} 

163_arima_extras = """typ : str {'linear', 'levels'} 

164 

165 - 'linear' : Linear prediction in terms of the differenced 

166 endogenous variables. 

167 - 'levels' : Predict the levels of the original endogenous 

168 variables.\n""" 

169 

170_arima_predict = _predict % {"Model": "ARIMA", 

171 "params": """params : array_like 

172 The fitted parameters of the model.""", 

173 "extra_params": _arima_extras, 

174 "returns": _predict_returns, 

175 "extra_section": _predict_notes} 

176 

177_arima_results_predict = _predict % {"Model": "ARIMA", 

178 "params": "", 

179 "extra_params": _arima_extras, 

180 "returns": _predict_returns, 

181 "extra_section": _results_notes} 

182 

183_arima_plot_predict_example = """ Examples 

184 -------- 

185 >>> import statsmodels.api as sm 

186 >>> import matplotlib.pyplot as plt 

187 >>> import pandas as pd 

188 >>> 

189 >>> dta = sm.datasets.sunspots.load_pandas().data[['SUNACTIVITY']] 

190 >>> dta.index = pd.date_range(start='1700', end='2009', freq='A') 

191 >>> res = sm.tsa.ARMA(dta, (3, 0)).fit() 

192 >>> fig, ax = plt.subplots() 

193 >>> ax = dta.loc['1950':].plot(ax=ax) 

194 >>> fig = res.plot_predict('1990', '2012', dynamic=True, ax=ax, 

195 ... plot_insample=False) 

196 >>> plt.show() 

197 

198 .. plot:: plots/arma_predict_plot.py 

199""" 

200 

201_plot_extras = """alpha : float, optional 

202 The confidence intervals for the forecasts are (1 - alpha)% 

203 plot_insample : bool, optional 

204 Whether to plot the in-sample series. Default is True. 

205 ax : matplotlib.Axes, optional 

206 Existing axes to plot with.""" 

207 

208_plot_predict = (""" 

209 Plot forecasts 

210 """ + '\n'.join(_predict.split('\n')[2:])) % { 

211 "params": "", 

212 "extra_params": _plot_extras, 

213 "returns": """fig : Figure 

214 The plotted Figure instance""", 

215 "extra_section": ('\n' + _arima_plot_predict_example + 

216 '\n' + _results_notes) 

217} 

218 

219_arima_plot_predict = (""" 

220 Plot forecasts 

221 """ + '\n'.join(_predict.split('\n')[2:])) % { 

222 "params": "", 

223 "extra_params": _plot_extras, 

224 "returns": """fig : Figure 

225 The plotted Figure instance""", 

226 "extra_section": ('\n' + _arima_plot_predict_example + '\n' + 

227 '\n'.join(_results_notes.split('\n')[:3]) 

228 + (""" 

229 This is hard-coded to only allow plotting of the forecasts in levels. 

230""") + '\n'.join(_results_notes.split('\n')[3:])) 

231} 

232 

233 

234def cumsum_n(x, n): 

235 for _ in range(n): 

236 x = np.cumsum(x) 

237 

238 return x 

239 

240 

241def _prediction_adjust_exog(exog, row_labels, dynamic, end): 

242 """ 

243 Adjust exog if exog has dates that align with endog 

244 

245 Parameters 

246 ---------- 

247 exog : {array_like, None} 

248 The exog values 

249 row_labels : {pd.DatetimeIndex, None} 

250 Row labels from endog 

251 dynamic : bool 

252 Flag indicating whether dynamic forecasts are expected 

253 end : int 

254 Index of final in-sample observation 

255 """ 

256 if exog is None: 

257 return None 

258 

259 exog_start = 0 

260 exog_index = getattr(exog, 'index', None) 

261 exog_dates = isinstance(exog_index, pd.DatetimeIndex) 

262 endog_dates = isinstance(row_labels, pd.DatetimeIndex) 

263 date_adj = endog_dates and exog_dates and not dynamic 

264 if date_adj and row_labels.isin(exog_index).all(): 

265 end_label = row_labels[end] 

266 exog_start = exog.index.get_loc(end_label) + 1 

267 

268 exog = array_like(exog, 'exog', ndim=2) 

269 return exog[exog_start:] 

270 

271 

272def _check_arima_start(start, k_ar, k_diff, method, dynamic): 

273 if start < 0: 

274 raise ValueError("The start index %d of the original series " 

275 "has been differenced away" % start) 

276 elif (dynamic or 'mle' not in method) and start < k_ar: 

277 raise ValueError("Start must be >= k_ar for conditional MLE " 

278 "or dynamic forecast. Got %d" % start) 

279 

280 

281def _get_predict_out_of_sample(endog, p, q, k_trend, k_exog, start, errors, 

282 trendparam, exparams, arparams, maparams, steps, 

283 method, exog=None): 

284 """ 

285 Returns endog, resid, mu of appropriate length for out of sample 

286 prediction. 

287 """ 

288 if q: 

289 resid = np.zeros(q) 

290 if start and 'mle' in method or (start == p and not start == 0): 

291 resid[:q] = errors[start - q:start] 

292 elif start: 

293 resid[:q] = errors[start - q - p:start - p] 

294 else: 

295 resid[:q] = errors[-q:] 

296 else: 

297 resid = None 

298 

299 y = endog 

300 if k_trend == 1: 

301 # use expectation not constant 

302 if k_exog > 0: 

303 # TODO: technically should only hold for MLE not 

304 # conditional model. See #274. 

305 # ensure 2-d for conformability 

306 if np.ndim(exog) == 1 and k_exog == 1: 

307 # have a 1d series of observations -> 2d 

308 exog = exog[:, None] 

309 elif np.ndim(exog) == 1: 

310 # should have a 1d row of exog -> 2d 

311 if len(exog) != k_exog: 

312 raise ValueError("1d exog given and len(exog) != k_exog") 

313 exog = exog[None, :] 

314 X = lagmat(np.dot(exog, exparams), p, original='in', trim='both') 

315 mu = trendparam * (1 - arparams.sum()) 

316 # arparams were reversed in unpack for ease later 

317 mu = mu + (np.r_[1, -arparams[::-1]] * X).sum(1)[:, None] 

318 else: 

319 mu = trendparam * (1 - arparams.sum()) 

320 mu = np.array([mu] * steps) 

321 elif k_exog > 0: 

322 X = np.dot(exog, exparams) 

323 X = lagmat(X, p, original='in', trim='both') 

324 mu = (np.r_[1, -arparams[::-1]] * X).sum(1)[:, None] 

325 else: 

326 mu = np.zeros(steps) 

327 

328 endog = np.zeros(p + steps - 1) 

329 

330 if p and start: 

331 endog[:p] = y[start - p:start] 

332 elif p: 

333 endog[:p] = y[-p:] 

334 

335 return endog, resid, mu 

336 

337 

338def _arma_predict_out_of_sample(params, steps, errors, p, q, k_trend, k_exog, 

339 endog, exog=None, start=0, method='mle'): 

340 (trendparam, exparams, 

341 arparams, maparams) = _unpack_params(params, (p, q), k_trend, 

342 k_exog, reverse=True) 

343 endog, resid, mu = _get_predict_out_of_sample(endog, p, q, k_trend, k_exog, 

344 start, errors, trendparam, 

345 exparams, arparams, 

346 maparams, steps, method, 

347 exog) 

348 

349 forecast = np.zeros(steps) 

350 if steps == 1: 

351 if q: 

352 return mu[0] + np.dot(arparams, endog[:p]) + np.dot(maparams, 

353 resid[:q]) 

354 else: 

355 return mu[0] + np.dot(arparams, endog[:p]) 

356 

357 if q: 

358 i = 0 # if q == 1 

359 else: 

360 i = -1 

361 

362 for i in range(min(q, steps - 1)): 

363 fcast = (mu[i] + np.dot(arparams, endog[i:i + p]) + 

364 np.dot(maparams[:q - i], resid[i:i + q])) 

365 forecast[i] = fcast 

366 endog[i + p] = fcast 

367 

368 for i in range(i + 1, steps - 1): 

369 fcast = mu[i] + np.dot(arparams, endog[i:i + p]) 

370 forecast[i] = fcast 

371 endog[i + p] = fcast 

372 

373 # need to do one more without updating endog 

374 forecast[steps - 1] = mu[steps - 1] + np.dot(arparams, endog[steps - 1:]) 

375 return forecast 

376 

377 

378def _arma_predict_in_sample(start, end, endog, resid, k_ar, method): 

379 """ 

380 Pre- and in-sample fitting for ARMA. 

381 """ 

382 if 'mle' in method: 

383 fittedvalues = endog - resid # get them all then trim 

384 else: 

385 fittedvalues = endog[k_ar:] - resid 

386 

387 fv_start = start 

388 if 'mle' not in method: 

389 fv_start -= k_ar # start is in terms of endog index 

390 fv_end = min(len(fittedvalues), end + 1) 

391 return fittedvalues[fv_start:fv_end] 

392 

393 

394def _unpack_params(params, order, k_trend, k_exog, reverse=False): 

395 p, q = order 

396 k = k_trend + k_exog 

397 maparams = params[k + p:] 

398 arparams = params[k:k + p] 

399 trend = params[:k_trend] 

400 exparams = params[k_trend:k] 

401 if reverse: 

402 return trend, exparams, arparams[::-1], maparams[::-1] 

403 return trend, exparams, arparams, maparams 

404 

405 

406def _make_arma_names(data, k_trend, order, exog_names): 

407 k_ar, k_ma = order 

408 exog_names = exog_names or [] 

409 ar_lag_names = util.make_lag_names([data.ynames], k_ar, 0) 

410 ar_lag_names = [''.join(('ar.', i)) for i in ar_lag_names] 

411 ma_lag_names = util.make_lag_names([data.ynames], k_ma, 0) 

412 ma_lag_names = [''.join(('ma.', i)) for i in ma_lag_names] 

413 trend_name = util.make_lag_names('', 0, k_trend) 

414 

415 exog_names = trend_name + exog_names + ar_lag_names + ma_lag_names 

416 return exog_names 

417 

418 

419def _make_arma_exog(endog, exog, trend): 

420 k_trend = 1 # overwritten if no constant 

421 if exog is None and trend == 'c': # constant only 

422 exog = np.ones((len(endog), 1)) 

423 elif exog is not None and trend == 'c': # constant plus exogenous 

424 exog = add_trend(exog, trend='c', prepend=True, has_constant='raise') 

425 elif trend == 'nc': 

426 k_trend = 0 

427 return k_trend, exog 

428 

429 

430def _check_estimable(nobs, n_params): 

431 if nobs <= n_params: 

432 raise ValueError("Insufficient degrees of freedom to estimate") 

433 

434 

435class ARMA(tsa_model.TimeSeriesModel): 

436 __doc__ = tsa_model._tsa_doc % {"model": _arma_model, 

437 "params": _arma_params, 

438 "extra_params": "", 

439 "extra_sections": 

440 _armax_notes % {"Model": "ARMA"}} 

441 

442 def __init__(self, endog, order, exog=None, dates=None, freq=None, 

443 missing='none'): 

444 super(ARMA, self).__init__(endog, exog, dates, freq, missing=missing) 

445 # GH 2575 

446 array_like(endog, 'endog') 

447 exog = array_like(self.data.exog, 'exog', ndim=2, optional=True) 

448 _check_estimable(len(self.endog), sum(order)) 

449 self.k_ar = k_ar = order[0] 

450 self.k_ma = k_ma = order[1] 

451 self.k_lags = max(k_ar, k_ma + 1) 

452 if exog is not None: 

453 k_exog = exog.shape[1] # number of exog. variables excl. const 

454 else: 

455 k_exog = 0 

456 self.k_exog = k_exog 

457 self._orig_exog_names = self.exog_names 

458 self._fit_params = None 

459 

460 def _fit_start_params_hr(self, order, start_ar_lags=None): 

461 """ 

462 Get starting parameters for fit. 

463 

464 Parameters 

465 ---------- 

466 order : iterable 

467 (p,q,k) - AR lags, MA lags, and number of exogenous variables 

468 including the constant. 

469 start_ar_lags : int, optional 

470 If start_ar_lags is not None, rather than fitting an AR process 

471 according to best BIC, fits an AR process with a lag length equal 

472 to start_ar_lags. 

473 

474 Returns 

475 ------- 

476 start_params : ndarray 

477 A first guess at the starting parameters. 

478 

479 Notes 

480 ----- 

481 If necessary, fits an AR process with the laglength start_ar_lags, or 

482 selected according to best BIC if start_ar_lags is None. Obtain the 

483 residuals. Then fit an ARMA(p,q) model via OLS using these residuals 

484 for a first approximation. Uses a separate OLS regression to find the 

485 coefficients of exogenous variables. 

486 

487 References 

488 ---------- 

489 Hannan, E.J. and Rissanen, J. 1982. "Recursive estimation of mixed 

490 autoregressive-moving average order." `Biometrika`. 69.1. 

491 

492 Durbin, J. 1960. "The Fitting of Time-Series Models." 

493 `Review of the International Statistical Institute`. Vol. 28, No. 3 

494 """ 

495 p, q, k = order 

496 start_params = zeros((p + q + k)) 

497 # make copy of endog because overwritten 

498 endog = np.array(self.endog, np.float64) 

499 exog = self.exog 

500 if k != 0: 

501 ols_params = OLS(endog, exog).fit().params 

502 start_params[:k] = ols_params 

503 endog -= np.dot(exog, ols_params).squeeze() 

504 if q != 0: 

505 if p != 0: 

506 # make sure we do not run into small data problems in AR fit 

507 nobs = len(endog) 

508 if start_ar_lags is None: 

509 maxlag = int(round(12 * (nobs / 100.) ** (1 / 4.))) 

510 if maxlag >= nobs: 

511 maxlag = nobs - 1 

512 mod = ar_select_order(endog, maxlag, trend='n').model 

513 armod = mod.fit() 

514 else: 

515 if start_ar_lags >= nobs: 

516 start_ar_lags = nobs - 1 

517 armod = AutoReg(endog, start_ar_lags, trend='n').fit() 

518 arcoefs_tmp = armod.params 

519 p_tmp = len(armod.ar_lags) 

520 # it's possible in small samples that optimal lag-order 

521 # does not leave enough obs. No consistent way to fix. 

522 if p_tmp + q >= len(endog): 

523 raise ValueError("Proper starting parameters cannot" 

524 " be found for this order with this " 

525 "number of observations. Use the " 

526 "start_params argument, or set " 

527 "start_ar_lags to an integer less than " 

528 "len(endog) - q.") 

529 resid = endog[p_tmp:] - np.dot(lagmat(endog, p_tmp, 

530 trim='both'), 

531 arcoefs_tmp) 

532 if p < p_tmp + q: 

533 endog_start = p_tmp + q - p 

534 resid_start = 0 

535 else: 

536 endog_start = 0 

537 resid_start = p - p_tmp - q 

538 lag_endog = lagmat(endog, p, 'both')[endog_start:] 

539 lag_resid = lagmat(resid, q, 'both')[resid_start:] 

540 # stack ar lags and resids 

541 X = np.column_stack((lag_endog, lag_resid)) 

542 coefs = OLS(endog[max(p_tmp + q, p):], X).fit().params 

543 start_params[k:k + p + q] = coefs 

544 else: 

545 ar_coeffs = yule_walker(endog, order=q)[0] 

546 ar = np.r_[[1], -ar_coeffs.squeeze()] 

547 start_params[k + p:k + p + q] = arma2ma(ar, [1], lags=q+1)[1:] 

548 if q == 0 and p != 0: 

549 arcoefs = yule_walker(endog, order=p)[0] 

550 start_params[k:k + p] = arcoefs 

551 

552 # check AR coefficients 

553 if p and not np.all(np.abs(np.roots(np.r_[1, -start_params[k:k + p]] 

554 )) < 1): 

555 raise ValueError("The computed initial AR coefficients are not " 

556 "stationary\nYou should induce stationarity, " 

557 "choose a different model order, or you can\n" 

558 "pass your own start_params.") 

559 # check MA coefficients 

560 elif q and not np.all(np.abs(np.roots(np.r_[1, start_params[k + p:]] 

561 )) < 1): 

562 raise ValueError("The computed initial MA coefficients are not " 

563 "invertible\nYou should induce invertibility, " 

564 "choose a different model order, or you can\n" 

565 "pass your own start_params.") 

566 

567 # check MA coefficients 

568 return start_params 

569 

570 def _fit_start_params(self, order, method, start_ar_lags=None): 

571 if method != 'css-mle': # use Hannan-Rissanen to get start params 

572 start_params = self._fit_start_params_hr(order, start_ar_lags) 

573 else: # use CSS to get start params 

574 def func(params): 

575 return -self.loglike_css(params) 

576 

577 start_params = self._fit_start_params_hr(order, start_ar_lags) 

578 if self.transparams: 

579 start_params = self._invtransparams(start_params) 

580 bounds = [(None,) * 2] * sum(order) 

581 mlefit = optimize.fmin_l_bfgs_b(func, start_params, 

582 approx_grad=True, m=12, 

583 pgtol=1e-7, factr=1e3, 

584 bounds=bounds, iprint=-1) 

585 start_params = mlefit[0] 

586 if self.transparams: 

587 start_params = self._transparams(start_params) 

588 return start_params 

589 

590 def score(self, params): 

591 """ 

592 Compute the score function at params. 

593 

594 Notes 

595 ----- 

596 This is a numerical approximation. 

597 """ 

598 return approx_fprime_cs(params, self.loglike, args=(False,)) 

599 

600 def hessian(self, params): 

601 """ 

602 Compute the Hessian at params, 

603 

604 Notes 

605 ----- 

606 This is a numerical approximation. 

607 """ 

608 return approx_hess_cs(params, self.loglike, args=(False,)) 

609 

610 def _transparams(self, params): 

611 """ 

612 Transforms params to induce stationarity/invertability. 

613 

614 Reference 

615 --------- 

616 Jones(1980) 

617 """ 

618 k_ar, k_ma = self.k_ar, self.k_ma 

619 k = self.k_exog + self.k_trend 

620 newparams = np.zeros_like(params) 

621 

622 # just copy exogenous parameters 

623 if k != 0: 

624 newparams[:k] = params[:k] 

625 

626 # AR Coeffs 

627 if k_ar != 0: 

628 newparams[k:k + k_ar] = _ar_transparams(params[k:k + k_ar].copy()) 

629 

630 # MA Coeffs 

631 if k_ma != 0: 

632 newparams[k + k_ar:] = _ma_transparams(params[k + k_ar:].copy()) 

633 return newparams 

634 

635 def _invtransparams(self, start_params): 

636 """ 

637 Inverse of the Jones reparameterization 

638 """ 

639 k_ar, k_ma = self.k_ar, self.k_ma 

640 k = self.k_exog + self.k_trend 

641 newparams = start_params.copy() 

642 arcoefs = newparams[k:k + k_ar] 

643 macoefs = newparams[k + k_ar:] 

644 # AR coeffs 

645 if k_ar != 0: 

646 newparams[k:k + k_ar] = _ar_invtransparams(arcoefs) 

647 

648 # MA coeffs 

649 if k_ma != 0: 

650 newparams[k + k_ar:k + k_ar + k_ma] = _ma_invtransparams(macoefs) 

651 return newparams 

652 

653 def _get_prediction_index(self, start, end, dynamic, index=None): 

654 method = getattr(self, 'method', 'mle') 

655 k_ar = getattr(self, 'k_ar', 0) 

656 k_diff = getattr(self, 'k_diff', 0) 

657 

658 if start is None: 

659 if 'mle' in method and not dynamic: 

660 start = 0 

661 else: 

662 start = k_ar 

663 start = self._index[start] 

664 

665 start, end, out_of_sample, prediction_index = ( 

666 super(ARMA, self)._get_prediction_index(start, end, index)) 

667 

668 # This replaces the _validate() call 

669 if 'mle' not in method and start < k_ar - k_diff: 

670 raise ValueError("Start must be >= k_ar for conditional " 

671 "MLE or dynamic forecast. Got %s" % start) 

672 # Other validation 

673 _check_arima_start(start, k_ar, k_diff, method, dynamic) 

674 

675 return start, end, out_of_sample, prediction_index 

676 

677 def geterrors(self, params): 

678 """ 

679 Get the errors of the ARMA process. 

680 

681 Parameters 

682 ---------- 

683 params : array_like 

684 The fitted ARMA parameters 

685 order : array_like 

686 3 item iterable, with the number of AR, MA, and exogenous 

687 parameters, including the trend 

688 """ 

689 

690 # start, end, out_of_sample, prediction_index = ( 

691 # self._get_prediction_index(start, end, index)) 

692 params = np.asarray(params) 

693 k_ar, k_ma = self.k_ar, self.k_ma 

694 k = self.k_exog + self.k_trend 

695 

696 method = getattr(self, 'method', 'mle') 

697 if 'mle' in method: # use KalmanFilter to get errors 

698 (y, k, nobs, k_ar, k_ma, k_lags, newparams, Z_mat, m, R_mat, 

699 T_mat, paramsdtype) = KalmanFilter._init_kalman_state(params, 

700 self) 

701 

702 errors = KalmanFilter.geterrors(y, k, k_ar, k_ma, k_lags, nobs, 

703 Z_mat, m, R_mat, T_mat, 

704 paramsdtype) 

705 else: # use scipy.signal.lfilter 

706 y = self.endog.copy() 

707 k = self.k_exog + self.k_trend 

708 if k > 0: 

709 y -= dot(self.exog, params[:k]) 

710 

711 k_ar = self.k_ar 

712 k_ma = self.k_ma 

713 

714 (trendparams, exparams, 

715 arparams, maparams) = _unpack_params(params, (k_ar, k_ma), 

716 self.k_trend, self.k_exog, 

717 reverse=False) 

718 b, a = np.r_[1, -arparams], np.r_[1, maparams] 

719 zi = zeros((max(k_ar, k_ma))) 

720 for i in range(k_ar): 

721 zi[i] = sum(-b[:i + 1][::-1] * y[:i + 1]) 

722 e = lfilter(b, a, y, zi=zi) 

723 errors = e[0][k_ar:] 

724 return errors.squeeze() 

725 

726 @Appender(_arma_predict) 

727 def predict(self, params, start=None, end=None, exog=None, dynamic=False, 

728 **kwargs): 

729 if kwargs and 'typ' not in kwargs: 

730 raise TypeError('Unknown extra arguments') 

731 if not (hasattr(self, 'k_ar') and hasattr(self, 'k_trend')): 

732 raise RuntimeError('Model must be fit before calling predict') 

733 params = array_like(params, 'params') 

734 method = getattr(self, 'method', 'mle') # do not assume fit 

735 # will return an index of a date 

736 start, end, out_of_sample, _ = ( 

737 self._get_prediction_index(start, end, dynamic)) 

738 

739 if out_of_sample and (exog is None and self.k_exog > 0): 

740 raise ValueError("You must provide exog for ARMAX") 

741 

742 endog = self.endog 

743 resid = self.geterrors(params) 

744 k_ar = self.k_ar 

745 

746 # Adjust exog if exog has dates that align with endog 

747 row_labels = self.data.row_labels 

748 exog = _prediction_adjust_exog(exog, row_labels, dynamic, end) 

749 if out_of_sample != 0 and self.k_exog > 0: 

750 # we need the last k_ar exog for the lag-polynomial 

751 if self.k_exog > 0 and k_ar > 0 and not dynamic: 

752 # need the last k_ar exog for the lag-polynomial 

753 exog = np.vstack((self.exog[-k_ar:, self.k_trend:], exog)) 

754 

755 if dynamic: 

756 if self.k_exog > 0: 

757 # need the last k_ar exog for the lag-polynomial 

758 exog_insample = self.exog[start - k_ar:, self.k_trend:] 

759 if exog is not None: 

760 exog = np.vstack((exog_insample, exog)) 

761 else: 

762 exog = exog_insample 

763 # TODO: now that predict does dynamic in-sample it should 

764 # also return error estimates and confidence intervals 

765 # but how? len(endog) is not tot_obs 

766 out_of_sample += end - start + 1 

767 return _arma_predict_out_of_sample(params, out_of_sample, resid, 

768 k_ar, self.k_ma, self.k_trend, 

769 self.k_exog, endog, exog, 

770 start, method) 

771 

772 predictedvalues = _arma_predict_in_sample(start, end, endog, resid, 

773 k_ar, method) 

774 if out_of_sample: 

775 forecastvalues = _arma_predict_out_of_sample(params, out_of_sample, 

776 resid, k_ar, 

777 self.k_ma, 

778 self.k_trend, 

779 self.k_exog, endog, 

780 exog, 

781 method=method) 

782 if (exog is not None and 

783 (exog.shape[0] - k_ar) != forecastvalues.shape[0]): 

784 import warnings 

785 msg = """ 

786The number of observations in exog does not match the number of out-of-sample 

787observations. This might indicate that exog is not correctly aligned. exog 

788should be aligned so that the exog[0] is used for the first out-of-sample 

789forecast, and exog[-1] is used for the last out-of-sample forecast. 

790exog is not used for in-sample observations which are the fitted values. 

791 

792To silence this warning, ensure the number of observation in exog ({0}) 

793matches the number of out-of-sample forecasts ({1})' 

794""" 

795 msg = msg.format(exog.shape[0], forecastvalues.shape[0]) 

796 warnings.warn(msg, SpecificationWarning) 

797 predictedvalues = np.r_[predictedvalues, forecastvalues] 

798 return predictedvalues 

799 

800 def loglike(self, params, set_sigma2=True): 

801 """ 

802 Compute the log-likelihood for ARMA(p,q) model 

803 

804 Notes 

805 ----- 

806 Likelihood used depends on the method set in fit 

807 """ 

808 method = self.method 

809 if method in ['mle', 'css-mle']: 

810 return self.loglike_kalman(params, set_sigma2) 

811 elif method == 'css': 

812 return self.loglike_css(params, set_sigma2) 

813 else: 

814 raise ValueError("Method %s not understood" % method) 

815 

816 def loglike_kalman(self, params, set_sigma2=True): 

817 """ 

818 Compute exact loglikelihood for ARMA(p,q) model by the Kalman Filter. 

819 """ 

820 return KalmanFilter.loglike(params, self, set_sigma2) 

821 

822 def loglike_css(self, params, set_sigma2=True): 

823 """ 

824 Conditional Sum of Squares likelihood function. 

825 """ 

826 k_ar = self.k_ar 

827 k_ma = self.k_ma 

828 k = self.k_exog + self.k_trend 

829 y = self.endog.copy().astype(params.dtype) 

830 nobs = self.nobs 

831 # how to handle if empty? 

832 if self.transparams: 

833 newparams = self._transparams(params) 

834 else: 

835 newparams = params 

836 if k > 0: 

837 y -= dot(self.exog, newparams[:k]) 

838 # the order of p determines how many zeros errors to set for lfilter 

839 b, a = np.r_[1, -newparams[k:k + k_ar]], np.r_[1, newparams[k + k_ar:]] 

840 zi = np.zeros((max(k_ar, k_ma)), dtype=params.dtype) 

841 for i in range(k_ar): 

842 zi[i] = sum(-b[:i + 1][::-1] * y[:i + 1]) 

843 errors = lfilter(b, a, y, zi=zi)[0][k_ar:] 

844 

845 ssr = np.dot(errors, errors) 

846 sigma2 = ssr / nobs 

847 if set_sigma2: 

848 self.sigma2 = sigma2 

849 llf = -nobs / 2. * (log(2 * pi) + log(sigma2)) - ssr / (2 * sigma2) 

850 return llf 

851 

852 def fit(self, start_params=None, trend='c', method="css-mle", 

853 transparams=True, solver='lbfgs', maxiter=500, full_output=1, 

854 disp=5, callback=None, start_ar_lags=None, **kwargs): 

855 """ 

856 Fits ARMA(p,q) model using exact maximum likelihood via Kalman filter. 

857 

858 Parameters 

859 ---------- 

860 start_params : array_like, optional 

861 Starting parameters for ARMA(p,q). If None, the default is given 

862 by ARMA._fit_start_params. See there for more information. 

863 transparams : bool, optional 

864 Whether or not to transform the parameters to ensure stationarity. 

865 Uses the transformation suggested in Jones (1980). If False, 

866 no checking for stationarity or invertibility is done. 

867 method : str {'css-mle','mle','css'} 

868 This is the loglikelihood to maximize. If "css-mle", the 

869 conditional sum of squares likelihood is maximized and its values 

870 are used as starting values for the computation of the exact 

871 likelihood via the Kalman filter. If "mle", the exact likelihood 

872 is maximized via the Kalman Filter. If "css" the conditional sum 

873 of squares likelihood is maximized. All three methods use 

874 `start_params` as starting parameters. See above for more 

875 information. 

876 trend : str {'c','nc'} 

877 Whether to include a constant or not. 'c' includes constant, 

878 'nc' no constant. 

879 solver : str or None, optional 

880 Solver to be used. The default is 'lbfgs' (limited memory 

881 Broyden-Fletcher-Goldfarb-Shanno). Other choices are 'bfgs', 

882 'newton' (Newton-Raphson), 'nm' (Nelder-Mead), 'cg' - 

883 (conjugate gradient), 'ncg' (non-conjugate gradient), and 

884 'powell'. By default, the limited memory BFGS uses m=12 to 

885 approximate the Hessian, projected gradient tolerance of 1e-8 and 

886 factr = 1e2. You can change these by using kwargs. 

887 maxiter : int, optional 

888 The maximum number of function evaluations. Default is 500. 

889 tol : float 

890 The convergence tolerance. Default is 1e-08. 

891 full_output : bool, optional 

892 If True, all output from solver will be available in 

893 the Results object's mle_retvals attribute. Output is dependent 

894 on the solver. See Notes for more information. 

895 disp : int, optional 

896 If True, convergence information is printed. For the default 

897 l_bfgs_b solver, disp controls the frequency of the output during 

898 the iterations. disp < 0 means no output in this case. 

899 callback : function, optional 

900 Called after each iteration as callback(xk) where xk is the current 

901 parameter vector. 

902 start_ar_lags : int, optional 

903 Parameter for fitting start_params. When fitting start_params, 

904 residuals are obtained from an AR fit, then an ARMA(p,q) model is 

905 fit via OLS using these residuals. If start_ar_lags is None, fit 

906 an AR process according to best BIC. If start_ar_lags is not None, 

907 fits an AR process with a lag length equal to start_ar_lags. 

908 See ARMA._fit_start_params_hr for more information. 

909 **kwargs 

910 See Notes for keyword arguments that can be passed to fit. 

911 

912 Returns 

913 ------- 

914 statsmodels.tsa.arima_model.ARMAResults class 

915 

916 See Also 

917 -------- 

918 statsmodels.base.model.LikelihoodModel.fit : for more information 

919 on using the solvers. 

920 ARMAResults : results class returned by fit 

921 

922 Notes 

923 ----- 

924 If fit by 'mle', it is assumed for the Kalman Filter that the initial 

925 unknown state is zero, and that the initial variance is 

926 P = dot(inv(identity(m**2)-kron(T,T)),dot(R,R.T).ravel('F')).reshape(r, 

927 r, order = 'F') 

928 """ 

929 trend = string_like(trend, 'trend', options=('nc', 'c')) 

930 if self._fit_params is not None: 

931 fp = (trend, method) 

932 if self._fit_params != fp: 

933 raise RuntimeError(REPEATED_FIT_ERROR.format(*fp, mod='ARMA')) 

934 

935 k_ar = self.k_ar 

936 k_ma = self.k_ma 

937 

938 # enforce invertibility 

939 self.transparams = transparams 

940 

941 endog, exog = self.endog, self.exog 

942 k_exog = self.k_exog 

943 self.nobs = len(endog) # this is overwritten if method is 'css' 

944 

945 # (re)set trend and handle exogenous variables 

946 # always pass original exog 

947 

948 if hasattr(self, 'k_trend'): 

949 k_trend = self.k_trend 

950 exog = self.exog 

951 else: 

952 # Ensures only call once per ARMA instance 

953 k_trend, exog = _make_arma_exog(endog, self.exog, trend) 

954 

955 # Check has something to estimate 

956 if k_ar == 0 and k_ma == 0 and k_trend == 0 and k_exog == 0: 

957 raise ValueError("Estimation requires the inclusion of least one " 

958 "AR term, MA term, a constant or an exogenous " 

959 "variable.") 

960 

961 # check again now that we know the trend 

962 _check_estimable(len(endog), k_ar + k_ma + k_exog + k_trend) 

963 

964 self.k_trend = k_trend 

965 self.exog = exog # overwrites original exog from __init__ 

966 

967 # (re)set names for this model 

968 self.exog_names = _make_arma_names(self.data, k_trend, 

969 (k_ar, k_ma), self._orig_exog_names) 

970 k = k_trend + k_exog 

971 

972 # choose objective function 

973 if k_ma == 0 and k_ar == 0: 

974 method = "css" # Always CSS when no AR or MA terms 

975 

976 self.method = method = method.lower() 

977 

978 # adjust nobs for css 

979 if method == 'css': 

980 self.nobs = len(self.endog) - k_ar 

981 

982 if start_params is not None: 

983 start_params = array_like(start_params, 'start_params') 

984 else: # estimate starting parameters 

985 start_params = self._fit_start_params((k_ar, k_ma, k), method, 

986 start_ar_lags) 

987 

988 if transparams: # transform initial parameters to ensure invertibility 

989 start_params = self._invtransparams(start_params) 

990 

991 if solver == 'lbfgs': 

992 kwargs.setdefault('pgtol', 1e-8) 

993 kwargs.setdefault('factr', 1e2) 

994 kwargs.setdefault('m', 12) 

995 kwargs.setdefault('approx_grad', True) 

996 mlefit = super(ARMA, self).fit(start_params, method=solver, 

997 maxiter=maxiter, 

998 full_output=full_output, disp=disp, 

999 callback=callback, **kwargs) 

1000 params = mlefit.params 

1001 

1002 if transparams: # transform parameters back 

1003 params = self._transparams(params) 

1004 

1005 self.transparams = False # so methods do not expect transf. 

1006 

1007 normalized_cov_params = None # TODO: fix this 

1008 armafit = ARMAResults(copy.copy(self), params, normalized_cov_params) 

1009 armafit.mle_retvals = mlefit.mle_retvals 

1010 armafit.mle_settings = mlefit.mle_settings 

1011 # Save core fit parameters for future checks 

1012 self._fit_params = (trend, method) 

1013 

1014 return ARMAResultsWrapper(armafit) 

1015 

1016 @classmethod 

1017 def from_formula(cls, formula, data, subset=None, drop_cols=None, *args, 

1018 **kwargs): 

1019 raise NotImplementedError("from_formula is not supported" 

1020 " for ARMA models.") 

1021 

1022 

1023# TODO: the length of endog changes when we give a difference to fit 

1024# so model methods are not the same on unfit models as fit ones 

1025# starting to think that order of model should be put in instantiation... 

1026class ARIMA(ARMA): 

1027 __doc__ = tsa_model._tsa_doc % {"model": _arima_model, 

1028 "params": _arima_params, 

1029 "extra_params": "", 

1030 "extra_sections": 

1031 _armax_notes % {"Model": "ARIMA"}} 

1032 

1033 def __new__(cls, endog, order, exog=None, dates=None, freq=None, 

1034 missing='none'): 

1035 p, d, q = order 

1036 if d == 0: # then we just use an ARMA model 

1037 return ARMA(endog, (p, q), exog, dates, freq, missing) 

1038 else: 

1039 mod = super(ARIMA, cls).__new__(cls) 

1040 mod.__init__(endog, order, exog, dates, freq, missing) 

1041 return mod 

1042 

1043 def __getnewargs__(self): 

1044 # using same defaults as in __init__ 

1045 dates = getattr(self, 'dates', None) 

1046 freq = getattr(self, 'freq', None) 

1047 missing = getattr(self, 'missing', 'none') 

1048 return ((self.endog), 

1049 (self.k_lags, self.k_diff, self.k_ma), 

1050 self.exog, dates, freq, missing) 

1051 

1052 def __init__(self, endog, order, exog=None, dates=None, freq=None, 

1053 missing='none'): 

1054 p, d, q = order 

1055 if d > 2: 

1056 # TODO: to make more general, need to address the d == 2 stuff 

1057 # in the predict method 

1058 raise ValueError("d > 2 is not supported") 

1059 super(ARIMA, self).__init__(endog, (p, q), exog, dates, freq, missing) 

1060 self.k_diff = d 

1061 self._first_unintegrate = unintegrate_levels(self.endog[:d], d) 

1062 self.endog = np.diff(self.endog, n=d) 

1063 # NOTE: will check in ARMA but check again since differenced now 

1064 _check_estimable(len(self.endog), p + q) 

1065 if exog is not None: 

1066 self.exog = self.exog[d:] 

1067 if d == 1: 

1068 self.data.ynames = 'D.' + self.endog_names 

1069 else: 

1070 self.data.ynames = 'D{0:d}.'.format(d) + self.endog_names 

1071 # what about exog, should we difference it automatically before 

1072 # super call? 

1073 

1074 # Reset index 

1075 orig_length = len(self._index) 

1076 new_length = self.endog.shape[0] 

1077 if self.data.row_labels is not None: 

1078 self.data._cache['row_labels'] = ( 

1079 self.data.row_labels[orig_length - new_length:]) 

1080 if self._index is not None: 

1081 if self._index_generated: 

1082 self._index = self._index[:-(orig_length - new_length)] 

1083 else: 

1084 self._index = self._index[orig_length - new_length:] 

1085 

1086 def _get_prediction_index(self, start, end, dynamic, index=None): 

1087 method = getattr(self, 'method', 'mle') 

1088 k_ar = getattr(self, 'k_ar', 0) 

1089 k_diff = getattr(self, 'k_diff', 0) 

1090 if start is None: 

1091 if 'mle' in method and not dynamic: 

1092 start = 0 

1093 else: 

1094 start = k_ar 

1095 start = self._index[start] 

1096 elif isinstance(start, (int, np.integer)): 

1097 start -= k_diff 

1098 if start < 0: 

1099 raise ValueError('The start index %d of the original series ' 

1100 ' has been differenced away' % start) 

1101 if isinstance(end, (int, np.integer)): 

1102 end -= k_diff 

1103 

1104 start, end, out_of_sample, prediction_index = ( 

1105 super(ARIMA, self)._get_prediction_index(start, end, index)) 

1106 

1107 # From _get_predict_end 

1108 if 'mle' not in self.method and not dynamic: 

1109 end -= k_ar 

1110 

1111 # This replaces the _validate() call 

1112 if 'mle' not in method and start < k_ar - k_diff: 

1113 raise ValueError("Start must be >= k_ar for conditional " 

1114 "MLE or dynamic forecast. Got %s" % start) 

1115 # Other validation 

1116 _check_arima_start(start, k_ar, k_diff, method, dynamic) 

1117 

1118 return start, end, out_of_sample, prediction_index 

1119 

1120 def fit(self, start_params=None, trend='c', method="css-mle", 

1121 transparams=True, solver='lbfgs', maxiter=500, full_output=1, 

1122 disp=5, callback=None, start_ar_lags=None, **kwargs): 

1123 """ 

1124 Fits ARIMA(p,d,q) model by exact maximum likelihood via Kalman filter. 

1125 

1126 Parameters 

1127 ---------- 

1128 start_params : array_like, optional 

1129 Starting parameters for ARMA(p,q). If None, the default is given 

1130 by ARMA._fit_start_params. See there for more information. 

1131 transparams : bool, optional 

1132 Whether or not to transform the parameters to ensure stationarity. 

1133 Uses the transformation suggested in Jones (1980). If False, 

1134 no checking for stationarity or invertibility is done. 

1135 method : str {'css-mle','mle','css'} 

1136 This is the loglikelihood to maximize. If "css-mle", the 

1137 conditional sum of squares likelihood is maximized and its values 

1138 are used as starting values for the computation of the exact 

1139 likelihood via the Kalman filter. If "mle", the exact likelihood 

1140 is maximized via the Kalman Filter. If "css" the conditional sum 

1141 of squares likelihood is maximized. All three methods use 

1142 `start_params` as starting parameters. See above for more 

1143 information. 

1144 trend : str {'c','nc'} 

1145 Whether to include a constant or not. 'c' includes constant, 

1146 'nc' no constant. 

1147 solver : str or None, optional 

1148 Solver to be used. The default is 'lbfgs' (limited memory 

1149 Broyden-Fletcher-Goldfarb-Shanno). Other choices are 'bfgs', 

1150 'newton' (Newton-Raphson), 'nm' (Nelder-Mead), 'cg' - 

1151 (conjugate gradient), 'ncg' (non-conjugate gradient), and 

1152 'powell'. By default, the limited memory BFGS uses m=12 to 

1153 approximate the Hessian, projected gradient tolerance of 1e-8 and 

1154 factr = 1e2. You can change these by using kwargs. 

1155 maxiter : int, optional 

1156 The maximum number of function evaluations. Default is 500. 

1157 tol : float 

1158 The convergence tolerance. Default is 1e-08. 

1159 full_output : bool, optional 

1160 If True, all output from solver will be available in 

1161 the Results object's mle_retvals attribute. Output is dependent 

1162 on the solver. See Notes for more information. 

1163 disp : int, optional 

1164 If True, convergence information is printed. For the default 

1165 l_bfgs_b solver, disp controls the frequency of the output during 

1166 the iterations. disp < 0 means no output in this case. 

1167 callback : function, optional 

1168 Called after each iteration as callback(xk) where xk is the current 

1169 parameter vector. 

1170 start_ar_lags : int, optional 

1171 Parameter for fitting start_params. When fitting start_params, 

1172 residuals are obtained from an AR fit, then an ARMA(p,q) model is 

1173 fit via OLS using these residuals. If start_ar_lags is None, fit 

1174 an AR process according to best BIC. If start_ar_lags is not None, 

1175 fits an AR process with a lag length equal to start_ar_lags. 

1176 See ARMA._fit_start_params_hr for more information. 

1177 **kwargs 

1178 See Notes for keyword arguments that can be passed to fit. 

1179 

1180 Returns 

1181 ------- 

1182 `statsmodels.tsa.arima.ARIMAResults` class 

1183 

1184 See Also 

1185 -------- 

1186 statsmodels.base.model.LikelihoodModel.fit : for more information 

1187 on using the solvers. 

1188 ARIMAResults : results class returned by fit 

1189 

1190 Notes 

1191 ----- 

1192 If fit by 'mle', it is assumed for the Kalman Filter that the initial 

1193 unknown state is zero, and that the initial variance is 

1194 P = dot(inv(identity(m**2)-kron(T,T)),dot(R,R.T).ravel('F')).reshape(r, 

1195 r, order = 'F') 

1196 """ 

1197 mlefit = super(ARIMA, self).fit(start_params, trend, 

1198 method, transparams, solver, 

1199 maxiter, full_output, disp, 

1200 callback, start_ar_lags, **kwargs) 

1201 normalized_cov_params = None # TODO: fix this? 

1202 arima_fit = ARIMAResults(self, mlefit._results.params, 

1203 normalized_cov_params) 

1204 arima_fit.k_diff = self.k_diff 

1205 

1206 arima_fit.mle_retvals = mlefit.mle_retvals 

1207 arima_fit.mle_settings = mlefit.mle_settings 

1208 

1209 return ARIMAResultsWrapper(arima_fit) 

1210 

1211 @Appender(_arima_predict) 

1212 def predict(self, params, start=None, end=None, exog=None, typ='linear', 

1213 dynamic=False): 

1214 if not (hasattr(self, 'k_ar') and hasattr(self, 'k_trend')): 

1215 raise RuntimeError('Model must be fit before calling predict') 

1216 # go ahead and convert to an index for easier checking 

1217 if isinstance(start, (str, datetime)): 

1218 start, _, _ = self._get_index_label_loc(start) 

1219 if isinstance(start, slice): 

1220 start = start.start 

1221 # Adjustment since _index was already changed to fit the 

1222 # differenced endog. 

1223 start += self.k_diff 

1224 if typ == 'linear': 

1225 if not dynamic or (start != self.k_ar + self.k_diff and 

1226 start is not None): 

1227 return super(ARIMA, self).predict(params, start, end, exog, 

1228 dynamic) 

1229 else: 

1230 # need to assume pre-sample residuals are zero 

1231 # do this by a hack 

1232 q = self.k_ma 

1233 self.k_ma = 0 

1234 predictedvalues = super(ARIMA, self).predict(params, start, 

1235 end, exog, 

1236 dynamic) 

1237 self.k_ma = q 

1238 return predictedvalues 

1239 elif typ == 'levels': 

1240 endog = self.data.endog 

1241 if not dynamic: 

1242 predict = super(ARIMA, self).predict(params, start, end, exog, 

1243 dynamic) 

1244 

1245 start, end, out_of_sample, _ = ( 

1246 self._get_prediction_index(start, end, dynamic)) 

1247 

1248 d = self.k_diff 

1249 if 'mle' in self.method: 

1250 start += d - 1 # for case where d == 2 

1251 end += d - 1 

1252 # add each predicted diff to lagged endog 

1253 if out_of_sample: 

1254 fv = predict[:-out_of_sample] + endog[start:end + 1] 

1255 if d == 2: # TODO: make a general solution to this 

1256 fv += np.diff(endog[start - 1:end + 1]) 

1257 levels = unintegrate_levels(endog[-d:], d) 

1258 fv = np.r_[fv, 

1259 unintegrate(predict[-out_of_sample:], 

1260 levels)[d:]] 

1261 else: 

1262 fv = predict + endog[start:end + 1] 

1263 if d == 2: 

1264 fv += np.diff(endog[start - 1:end + 1]) 

1265 else: 

1266 k_ar = self.k_ar 

1267 if out_of_sample: 

1268 fv = (predict[:-out_of_sample] + 

1269 endog[max(start, self.k_ar - 1):end + k_ar + 1]) 

1270 if d == 2: 

1271 fv += np.diff(endog[start - 1:end + 1]) 

1272 levels = unintegrate_levels(endog[-d:], d) 

1273 fv = np.r_[fv, 

1274 unintegrate(predict[-out_of_sample:], 

1275 levels)[d:]] 

1276 else: 

1277 fv = predict + endog[max(start, k_ar):end + k_ar + 1] 

1278 if d == 2: 

1279 fv += np.diff(endog[start - 1:end + 1]) 

1280 else: 

1281 # IFF we need to use pre-sample values assume pre-sample 

1282 # residuals are zero, do this by a hack 

1283 if start == self.k_ar + self.k_diff or start is None: 

1284 # do the first k_diff+1 separately 

1285 p = self.k_ar 

1286 q = self.k_ma 

1287 k_exog = self.k_exog 

1288 k_trend = self.k_trend 

1289 k_diff = self.k_diff 

1290 (trendparam, exparams, 

1291 arparams, maparams) = _unpack_params(params, (p, q), 

1292 k_trend, 

1293 k_exog, 

1294 reverse=True) 

1295 # this is the hack 

1296 self.k_ma = 0 

1297 

1298 predict = super(ARIMA, self).predict(params, start, end, 

1299 exog, dynamic) 

1300 if not start: 

1301 start, _, _, _ = self._get_prediction_index( 

1302 start, end, dynamic) 

1303 start += k_diff 

1304 self.k_ma = q 

1305 return endog[start - 1] + np.cumsum(predict) 

1306 else: 

1307 predict = super(ARIMA, self).predict(params, start, end, 

1308 exog, dynamic) 

1309 return endog[start - 1] + np.cumsum(predict) 

1310 return fv 

1311 

1312 else: # pragma : no cover 

1313 raise ValueError("typ %s not understood" % typ) 

1314 

1315 

1316class ARMAResults(tsa_model.TimeSeriesModelResults): 

1317 """ 

1318 Class to hold results from fitting an ARMA model. 

1319 

1320 Parameters 

1321 ---------- 

1322 model : ARMA instance 

1323 The fitted model instance 

1324 params : ndarray 

1325 Fitted parameters 

1326 normalized_cov_params : ndarray, optional 

1327 The normalized variance covariance matrix 

1328 scale : float, optional 

1329 Optional argument to scale the variance covariance matrix. 

1330 

1331 Attributes 

1332 ---------- 

1333 aic : float 

1334 Akaike Information Criterion 

1335 :math:`-2*llf+2* df_model` 

1336 where `df_model` includes all AR parameters, MA parameters, constant 

1337 terms parameters on constant terms and the variance. 

1338 arparams : ndarray 

1339 The parameters associated with the AR coefficients in the model. 

1340 arroots : ndarray 

1341 The roots of the AR coefficients are the solution to 

1342 (1 - arparams[0]*z - arparams[1]*z**2 -...- arparams[p-1]*z**k_ar) = 0 

1343 Stability requires that the roots in modulus lie outside the unit 

1344 circle. 

1345 bic : float 

1346 Bayes Information Criterion 

1347 -2*llf + log(nobs)*df_model 

1348 Where if the model is fit using conditional sum of squares, the 

1349 number of observations `nobs` does not include the `p` pre-sample 

1350 observations. 

1351 bse : ndarray 

1352 The standard errors of the parameters. These are computed using the 

1353 numerical Hessian. 

1354 df_model : ndarray 

1355 The model degrees of freedom = `k_exog` + `k_trend` + `k_ar` + `k_ma` 

1356 df_resid : ndarray 

1357 The residual degrees of freedom = `nobs` - `df_model` 

1358 fittedvalues : ndarray 

1359 The predicted values of the model. 

1360 hqic : float 

1361 Hannan-Quinn Information Criterion 

1362 -2*llf + 2*(`df_model`)*log(log(nobs)) 

1363 Like `bic` if the model is fit using conditional sum of squares then 

1364 the `k_ar` pre-sample observations are not counted in `nobs`. 

1365 k_ar : int 

1366 The number of AR coefficients in the model. 

1367 k_exog : int 

1368 The number of exogenous variables included in the model. Does not 

1369 include the constant. 

1370 k_ma : int 

1371 The number of MA coefficients. 

1372 k_trend : int 

1373 This is 0 for no constant or 1 if a constant is included. 

1374 llf : float 

1375 The value of the log-likelihood function evaluated at `params`. 

1376 maparams : ndarray 

1377 The value of the moving average coefficients. 

1378 maroots : ndarray 

1379 The roots of the MA coefficients are the solution to 

1380 (1 + maparams[0]*z + maparams[1]*z**2 + ... + maparams[q-1]*z**q) = 0 

1381 Stability requires that the roots in modules lie outside the unit 

1382 circle. 

1383 model : ARMA instance 

1384 A reference to the model that was fit. 

1385 nobs : float 

1386 The number of observations used to fit the model. If the model is fit 

1387 using exact maximum likelihood this is equal to the total number of 

1388 observations, `n_totobs`. If the model is fit using conditional 

1389 maximum likelihood this is equal to `n_totobs` - `k_ar`. 

1390 n_totobs : float 

1391 The total number of observations for `endog`. This includes all 

1392 observations, even pre-sample values if the model is fit using `css`. 

1393 params : ndarray 

1394 The parameters of the model. The order of variables is the trend 

1395 coefficients and the `k_exog` exogenous coefficients, then the 

1396 `k_ar` AR coefficients, and finally the `k_ma` MA coefficients. 

1397 pvalues : ndarray 

1398 The p-values associated with the t-values of the coefficients. Note 

1399 that the coefficients are assumed to have a Student's T distribution. 

1400 resid : ndarray 

1401 The model residuals. If the model is fit using 'mle' then the 

1402 residuals are created via the Kalman Filter. If the model is fit 

1403 using 'css' then the residuals are obtained via `scipy.signal.lfilter` 

1404 adjusted such that the first `k_ma` residuals are zero. These zero 

1405 residuals are not returned. 

1406 scale : float 

1407 This is currently set to 1.0 and not used by the model or its results. 

1408 sigma2 : float 

1409 The variance of the residuals. If the model is fit by 'css', 

1410 sigma2 = ssr/nobs, where ssr is the sum of squared residuals. If 

1411 the model is fit by 'mle', then sigma2 = 1/nobs * sum(v**2 / F) 

1412 where v is the one-step forecast error and F is the forecast error 

1413 variance. See `nobs` for the difference in definitions depending on the 

1414 fit. 

1415 """ 

1416 _cache = {} 

1417 

1418 def __init__(self, model, params, normalized_cov_params=None, scale=1.): 

1419 super(ARMAResults, self).__init__(model, params, normalized_cov_params, 

1420 scale) 

1421 self.sigma2 = model.sigma2 

1422 nobs = model.nobs 

1423 self.nobs = nobs 

1424 k_exog = model.k_exog 

1425 self.k_exog = k_exog 

1426 k_trend = model.k_trend 

1427 self.k_trend = k_trend 

1428 k_ar = model.k_ar 

1429 self.k_ar = k_ar 

1430 self.n_totobs = len(model.endog) 

1431 k_ma = model.k_ma 

1432 self.k_ma = k_ma 

1433 df_model = k_exog + k_trend + k_ar + k_ma 

1434 self._ic_df_model = df_model + 1 

1435 self.df_model = df_model 

1436 self.df_resid = self.nobs - df_model 

1437 self._cache = {} 

1438 

1439 @cache_readonly 

1440 def arroots(self): 

1441 return np.roots(np.r_[1, -self.arparams]) ** -1 

1442 

1443 @cache_readonly 

1444 def maroots(self): 

1445 return np.roots(np.r_[1, self.maparams]) ** -1 

1446 

1447 @cache_readonly 

1448 def arfreq(self): 

1449 r""" 

1450 Returns the frequency of the AR roots. 

1451 

1452 This is the solution, x, to z = abs(z)*exp(2j*np.pi*x) where z are the 

1453 roots. 

1454 """ 

1455 z = self.arroots 

1456 return np.arctan2(z.imag, z.real) / (2 * pi) 

1457 

1458 @cache_readonly 

1459 def mafreq(self): 

1460 r""" 

1461 Returns the frequency of the MA roots. 

1462 

1463 This is the solution, x, to z = abs(z)*exp(2j*np.pi*x) where z are the 

1464 roots. 

1465 """ 

1466 z = self.maroots 

1467 return np.arctan2(z.imag, z.real) / (2 * pi) 

1468 

1469 @cache_readonly 

1470 def arparams(self): 

1471 k = self.k_exog + self.k_trend 

1472 return self.params[k:k + self.k_ar] 

1473 

1474 @cache_readonly 

1475 def maparams(self): 

1476 k = self.k_exog + self.k_trend 

1477 k_ar = self.k_ar 

1478 return self.params[k + k_ar:] 

1479 

1480 @cache_readonly 

1481 def llf(self): 

1482 return self.model.loglike(self.params) 

1483 

1484 @cache_readonly 

1485 def bse(self): 

1486 params = self.params 

1487 hess = self.model.hessian(params) 

1488 if len(params) == 1: # cannot take an inverse, ensure 1d 

1489 return np.sqrt(-1. / hess[0]) 

1490 return np.sqrt(np.diag(-inv(hess))) 

1491 

1492 @cache_readonly 

1493 def cov_params_default(self): 

1494 hess = self.model.hessian(self.params) 

1495 return -inv(hess) 

1496 

1497 @cache_readonly 

1498 def aic(self): 

1499 return -2 * self.llf + 2 * self._ic_df_model 

1500 

1501 @cache_readonly 

1502 def bic(self): 

1503 nobs = self.nobs 

1504 return -2 * self.llf + np.log(nobs) * self._ic_df_model 

1505 

1506 @cache_readonly 

1507 def hqic(self): 

1508 nobs = self.nobs 

1509 return -2 * self.llf + 2 * np.log(np.log(nobs)) * self._ic_df_model 

1510 

1511 @cache_readonly 

1512 def fittedvalues(self): 

1513 model = self.model 

1514 endog = model.endog.copy() 

1515 k_ar = self.k_ar 

1516 exog = model.exog # this is a copy 

1517 if exog is not None: 

1518 if model.method == "css" and k_ar > 0: 

1519 exog = exog[k_ar:] 

1520 if model.method == "css" and k_ar > 0: 

1521 endog = endog[k_ar:] 

1522 fv = endog - self.resid 

1523 

1524 return fv 

1525 

1526 @cache_readonly 

1527 def resid(self): 

1528 return self.model.geterrors(self.params) 

1529 

1530 @Appender(_arma_results_predict) 

1531 def predict(self, start=None, end=None, exog=None, dynamic=False, 

1532 **kwargs): 

1533 return self.model.predict(self.params, start, end, exog, dynamic, 

1534 **kwargs) 

1535 

1536 def _forecast_error(self, steps): 

1537 sigma2 = self.sigma2 

1538 ma_rep = arma2ma(np.r_[1, -self.arparams], 

1539 np.r_[1, self.maparams], lags=steps) 

1540 

1541 fcasterr = np.sqrt(sigma2 * np.cumsum(ma_rep ** 2)) 

1542 return fcasterr 

1543 

1544 def _forecast_conf_int(self, forecast, fcasterr, alpha): 

1545 const = norm.ppf(1 - alpha / 2.) 

1546 conf_int = np.c_[forecast - const * fcasterr, 

1547 forecast + const * fcasterr] 

1548 

1549 return conf_int 

1550 

1551 def forecast(self, steps=1, exog=None, alpha=.05): 

1552 """ 

1553 Out-of-sample forecasts 

1554 

1555 Parameters 

1556 ---------- 

1557 steps : int 

1558 The number of out of sample forecasts from the end of the 

1559 sample. 

1560 exog : ndarray 

1561 If the model is an ARMAX, you must provide out of sample 

1562 values for the exogenous variables. This should not include 

1563 the constant. The number of observation in exog must match the 

1564 value of steps. 

1565 alpha : float 

1566 The confidence intervals for the forecasts are (1 - alpha) % 

1567 

1568 Returns 

1569 ------- 

1570 forecast : ndarray 

1571 Array of out of sample forecasts 

1572 stderr : ndarray 

1573 Array of the standard error of the forecasts. 

1574 conf_int : ndarray 

1575 2d array of the confidence interval for the forecast 

1576 """ 

1577 if exog is not None: 

1578 exog = array_like(exog, 'exog', maxdim=2) 

1579 if self.k_exog == 1 and exog.ndim == 1: 

1580 exog = exog[:, None] 

1581 elif exog.ndim == 1: 

1582 if len(exog) != self.k_exog: 

1583 raise ValueError("1d exog given and len(exog) != k_exog") 

1584 exog = exog[None, :] 

1585 if exog.shape[0] != steps: 

1586 raise ValueError("new exog needed for each step") 

1587 if self.k_exog != exog.shape[1]: 

1588 raise ValueError('exog must contain the same number of ' 

1589 'variables as in the estimated model.') 

1590 # prepend in-sample exog observations 

1591 if self.k_ar > 0: 

1592 exog = np.vstack((self.model.exog[-self.k_ar:, self.k_trend:], 

1593 exog)) 

1594 else: 

1595 if self.k_exog: 

1596 raise ValueError('Forecast values for exog are required when ' 

1597 'the model contains exogenous regressors.') 

1598 

1599 forecast = _arma_predict_out_of_sample(self.params, 

1600 steps, self.resid, self.k_ar, 

1601 self.k_ma, self.k_trend, 

1602 self.k_exog, self.model.endog, 

1603 exog, method=self.model.method) 

1604 

1605 # compute the standard errors 

1606 fcasterr = self._forecast_error(steps) 

1607 conf_int = self._forecast_conf_int(forecast, fcasterr, alpha) 

1608 

1609 return forecast, fcasterr, conf_int 

1610 

1611 def summary(self, alpha=.05): 

1612 """Summarize the Model 

1613 

1614 Parameters 

1615 ---------- 

1616 alpha : float, optional 

1617 Significance level for the confidence intervals. 

1618 

1619 Returns 

1620 ------- 

1621 smry : Summary instance 

1622 This holds the summary table and text, which can be printed or 

1623 converted to various output formats. 

1624 

1625 See Also 

1626 -------- 

1627 statsmodels.iolib.summary.Summary 

1628 """ 

1629 from statsmodels.iolib.summary import Summary 

1630 model = self.model 

1631 title = model.__class__.__name__ + ' Model Results' 

1632 method = model.method 

1633 # get sample TODO: make better sample machinery for estimation 

1634 k_diff = getattr(self, 'k_diff', 0) 

1635 if 'mle' in method: 

1636 start = k_diff 

1637 else: 

1638 start = k_diff + self.k_ar 

1639 if self.data.dates is not None: 

1640 dates = self.data.dates 

1641 sample = [dates[start].strftime('%m-%d-%Y')] 

1642 sample += ['- ' + dates[-1].strftime('%m-%d-%Y')] 

1643 else: 

1644 sample = str(start) + ' - ' + str(len(self.data.orig_endog)) 

1645 

1646 k_ar, k_ma = self.k_ar, self.k_ma 

1647 if not k_diff: 

1648 order = str((k_ar, k_ma)) 

1649 else: 

1650 order = str((k_ar, k_diff, k_ma)) 

1651 top_left = [('Dep. Variable:', None), 

1652 ('Model:', [model.__class__.__name__ + order]), 

1653 ('Method:', [method]), 

1654 ('Date:', None), 

1655 ('Time:', None), 

1656 ('Sample:', [sample[0]]), 

1657 ('', [sample[1]]) 

1658 ] 

1659 

1660 top_right = [ 

1661 ('No. Observations:', [str(len(self.model.endog))]), 

1662 ('Log Likelihood', ["%#5.3f" % self.llf]), 

1663 ('S.D. of innovations', ["%#5.3f" % self.sigma2 ** .5]), 

1664 ('AIC', ["%#5.3f" % self.aic]), 

1665 ('BIC', ["%#5.3f" % self.bic]), 

1666 ('HQIC', ["%#5.3f" % self.hqic])] 

1667 

1668 smry = Summary() 

1669 smry.add_table_2cols(self, gleft=top_left, gright=top_right, 

1670 title=title) 

1671 smry.add_table_params(self, alpha=alpha, use_t=False) 

1672 

1673 # Make the roots table 

1674 from statsmodels.iolib.table import SimpleTable 

1675 

1676 if k_ma and k_ar: 

1677 arstubs = ["AR.%d" % i for i in range(1, k_ar + 1)] 

1678 mastubs = ["MA.%d" % i for i in range(1, k_ma + 1)] 

1679 stubs = arstubs + mastubs 

1680 roots = np.r_[self.arroots, self.maroots] 

1681 freq = np.r_[self.arfreq, self.mafreq] 

1682 elif k_ma: 

1683 mastubs = ["MA.%d" % i for i in range(1, k_ma + 1)] 

1684 stubs = mastubs 

1685 roots = self.maroots 

1686 freq = self.mafreq 

1687 elif k_ar: 

1688 arstubs = ["AR.%d" % i for i in range(1, k_ar + 1)] 

1689 stubs = arstubs 

1690 roots = self.arroots 

1691 freq = self.arfreq 

1692 else: # 0,0 model 

1693 stubs = [] 

1694 if len(stubs): # not 0, 0 

1695 modulus = np.abs(roots) 

1696 data = np.column_stack((roots.real, roots.imag, modulus, freq)) 

1697 roots_table = SimpleTable([('%17.4f' % row[0], 

1698 '%+17.4fj' % row[1], 

1699 '%17.4f' % row[2], 

1700 '%17.4f' % row[3]) for row in data], 

1701 headers=[' Real', 

1702 ' Imaginary', 

1703 ' Modulus', 

1704 ' Frequency'], 

1705 title="Roots", 

1706 stubs=stubs) 

1707 

1708 smry.tables.append(roots_table) 

1709 return smry 

1710 

1711 def summary2(self, title=None, alpha=.05, float_format="%.4f"): 

1712 """ 

1713 Experimental summary function for ARIMA Results 

1714 

1715 Parameters 

1716 ---------- 

1717 title : str, optional 

1718 Title for the top table. If not None, then this replaces the 

1719 default title 

1720 alpha : float, optional 

1721 significance level for the confidence intervals 

1722 float_format : str, optional 

1723 print format for floats in parameters summary 

1724 

1725 Returns 

1726 ------- 

1727 smry : Summary instance 

1728 This holds the summary table and text, which can be printed or 

1729 converted to various output formats. 

1730 

1731 See Also 

1732 -------- 

1733 statsmodels.iolib.summary2.Summary : class to hold summary 

1734 results 

1735 """ 

1736 from pandas import DataFrame 

1737 # get sample TODO: make better sample machinery for estimation 

1738 k_diff = getattr(self, 'k_diff', 0) 

1739 if 'mle' in self.model.method: 

1740 start = k_diff 

1741 else: 

1742 start = k_diff + self.k_ar 

1743 if self.data.dates is not None: 

1744 dates = self.data.dates 

1745 sample = [dates[start].strftime('%m-%d-%Y')] 

1746 sample += [dates[-1].strftime('%m-%d-%Y')] 

1747 else: 

1748 sample = str(start) + ' - ' + str(len(self.data.orig_endog)) 

1749 

1750 k_ar, k_ma = self.k_ar, self.k_ma 

1751 

1752 # Roots table 

1753 if k_ma and k_ar: 

1754 arstubs = ["AR.%d" % i for i in range(1, k_ar + 1)] 

1755 mastubs = ["MA.%d" % i for i in range(1, k_ma + 1)] 

1756 stubs = arstubs + mastubs 

1757 roots = np.r_[self.arroots, self.maroots] 

1758 freq = np.r_[self.arfreq, self.mafreq] 

1759 elif k_ma: 

1760 mastubs = ["MA.%d" % i for i in range(1, k_ma + 1)] 

1761 stubs = mastubs 

1762 roots = self.maroots 

1763 freq = self.mafreq 

1764 elif k_ar: 

1765 arstubs = ["AR.%d" % i for i in range(1, k_ar + 1)] 

1766 stubs = arstubs 

1767 roots = self.arroots 

1768 freq = self.arfreq 

1769 else: # 0, 0 order 

1770 stubs = [] 

1771 

1772 if len(stubs): 

1773 modulus = np.abs(roots) 

1774 data = np.column_stack((roots.real, roots.imag, modulus, freq)) 

1775 data = DataFrame(data) 

1776 data.columns = ['Real', 'Imaginary', 'Modulus', 'Frequency'] 

1777 data.index = stubs 

1778 

1779 # Summary 

1780 from statsmodels.iolib import summary2 

1781 smry = summary2.Summary() 

1782 

1783 # Model info 

1784 model_info = summary2.summary_model(self) 

1785 model_info['Method:'] = self.model.method 

1786 model_info['Sample:'] = sample[0] 

1787 model_info[' '] = sample[-1] 

1788 model_info['S.D. of innovations:'] = "%#5.3f" % self.sigma2 ** .5 

1789 model_info['HQIC:'] = "%#5.3f" % self.hqic 

1790 model_info['No. Observations:'] = str(len(self.model.endog)) 

1791 

1792 # Parameters 

1793 params = summary2.summary_params(self) 

1794 smry.add_dict(model_info) 

1795 smry.add_df(params, float_format=float_format) 

1796 if len(stubs): 

1797 smry.add_df(data, float_format="%17.4f") 

1798 smry.add_title(results=self, title=title) 

1799 

1800 return smry 

1801 

1802 @Appender(_plot_predict) 

1803 def plot_predict(self, start=None, end=None, exog=None, dynamic=False, 

1804 alpha=.05, plot_insample=True, ax=None): 

1805 from statsmodels.graphics.utils import _import_mpl, create_mpl_ax 

1806 _ = _import_mpl() 

1807 fig, ax = create_mpl_ax(ax) 

1808 

1809 # use predict so you set dates 

1810 forecast = self.predict(start, end, exog, dynamic) 

1811 # doing this twice. just add a plot keyword to predict? 

1812 start, end, out_of_sample, _ = ( 

1813 self.model._get_prediction_index(start, end, dynamic=False)) 

1814 

1815 if out_of_sample: 

1816 steps = out_of_sample 

1817 fc_error = self._forecast_error(steps) 

1818 conf_int = self._forecast_conf_int(forecast[-steps:], fc_error, 

1819 alpha) 

1820 

1821 if hasattr(self.data, "predict_dates"): 

1822 from pandas import Series 

1823 forecast = Series(forecast, index=self.data.predict_dates) 

1824 ax = forecast.plot(ax=ax, label='forecast') 

1825 else: 

1826 ax.plot(forecast) 

1827 

1828 x = ax.get_lines()[-1].get_xdata() 

1829 if out_of_sample: 

1830 label = "{0:.0%} confidence interval".format(1 - alpha) 

1831 ax.fill_between(x[-out_of_sample:], conf_int[:, 0], conf_int[:, 1], 

1832 color='gray', alpha=.5, label=label) 

1833 

1834 if plot_insample: 

1835 ax.plot(x[:end + 1 - start], self.model.endog[start:end + 1], 

1836 label=self.model.endog_names) 

1837 

1838 ax.legend(loc='best') 

1839 

1840 return fig 

1841 

1842 

1843class ARMAResultsWrapper(wrap.ResultsWrapper): 

1844 _attrs = {} 

1845 _wrap_attrs = wrap.union_dicts( 

1846 tsa_model.TimeSeriesResultsWrapper._wrap_attrs, _attrs) 

1847 _methods = {} 

1848 _wrap_methods = wrap.union_dicts( 

1849 tsa_model.TimeSeriesResultsWrapper._wrap_methods, _methods) 

1850 

1851 

1852wrap.populate_wrapper(ARMAResultsWrapper, ARMAResults) # noqa:E305 

1853 

1854 

1855class ARIMAResults(ARMAResults): 

1856 @Appender(_arima_results_predict) 

1857 def predict(self, start=None, end=None, exog=None, typ='linear', 

1858 dynamic=False): 

1859 return self.model.predict(self.params, start, end, exog, typ, dynamic) 

1860 

1861 def _forecast_error(self, steps): 

1862 sigma2 = self.sigma2 

1863 ma_rep = arma2ma(np.r_[1, -self.arparams], 

1864 np.r_[1, self.maparams], lags=steps) 

1865 

1866 fcerr = np.sqrt(np.cumsum(cumsum_n(ma_rep, self.k_diff) ** 2) * sigma2) 

1867 return fcerr 

1868 

1869 def _forecast_conf_int(self, forecast, fcerr, alpha): 

1870 const = norm.ppf(1 - alpha / 2.) 

1871 conf_int = np.c_[forecast - const * fcerr, forecast + const * fcerr] 

1872 return conf_int 

1873 

1874 def forecast(self, steps=1, exog=None, alpha=.05): 

1875 """ 

1876 Out-of-sample forecasts 

1877 

1878 Parameters 

1879 ---------- 

1880 steps : int 

1881 The number of out of sample forecasts from the end of the 

1882 sample. 

1883 exog : ndarray 

1884 If the model is an ARIMAX, you must provide out of sample 

1885 values for the exogenous variables. This should not include 

1886 the constant. The number of observation in exog must match the 

1887 value of steps. 

1888 alpha : float 

1889 The confidence intervals for the forecasts are (1 - alpha) % 

1890 

1891 Returns 

1892 ------- 

1893 forecast : ndarray 

1894 Array of out of sample forecasts 

1895 stderr : ndarray 

1896 Array of the standard error of the forecasts. 

1897 conf_int : ndarray 

1898 2d array of the confidence interval for the forecast 

1899 

1900 Notes 

1901 ----- 

1902 Prediction is done in the levels of the original endogenous variable. 

1903 If you would like prediction of differences in levels use `predict`. 

1904 """ 

1905 if exog is not None: 

1906 exog = array_like(exog, 'exog', ndim=2) 

1907 if exog.shape[0] != steps: 

1908 raise ValueError("new exog needed for each step") 

1909 if self.k_exog != exog.shape[1]: 

1910 raise ValueError('exog must contain the same number of ' 

1911 'variables as in the estimated model.') 

1912 # prepend in-sample exog observations 

1913 if self.k_ar > 0: 

1914 exog = np.vstack((self.model.exog[-self.k_ar:, self.k_trend:], 

1915 exog)) 

1916 else: 

1917 if self.k_exog: 

1918 raise ValueError('Forecast values for exog are required when ' 

1919 'the model contains exogenous regressors.') 

1920 

1921 forecast = _arma_predict_out_of_sample(self.params, steps, self.resid, 

1922 self.k_ar, self.k_ma, 

1923 self.k_trend, self.k_exog, 

1924 self.model.endog, 

1925 exog, method=self.model.method) 

1926 

1927 d = self.model.k_diff 

1928 endog = self.model.data.endog[-d:] 

1929 forecast = unintegrate(forecast, unintegrate_levels(endog, d))[d:] 

1930 

1931 # get forecast errors 

1932 fcerr = self._forecast_error(steps) 

1933 conf_int = self._forecast_conf_int(forecast, fcerr, alpha) 

1934 return forecast, fcerr, conf_int 

1935 

1936 @Appender(_arima_plot_predict) 

1937 def plot_predict(self, start=None, end=None, exog=None, dynamic=False, 

1938 alpha=.05, plot_insample=True, ax=None): 

1939 from statsmodels.graphics.utils import _import_mpl, create_mpl_ax 

1940 _ = _import_mpl() 

1941 fig, ax = create_mpl_ax(ax) 

1942 

1943 # use predict so you set dates 

1944 forecast = self.predict(start, end, exog, 'levels', dynamic) 

1945 # doing this twice. just add a plot keyword to predict? 

1946 start, end, out_of_sample, _ = ( 

1947 self.model._get_prediction_index(start, end, dynamic)) 

1948 

1949 if out_of_sample: 

1950 steps = out_of_sample 

1951 fc_error = self._forecast_error(steps) 

1952 conf_int = self._forecast_conf_int(forecast[-steps:], fc_error, 

1953 alpha) 

1954 

1955 if hasattr(self.data, "predict_dates"): 

1956 from pandas import Series 

1957 forecast = Series(forecast, index=self.data.predict_dates) 

1958 ax = forecast.plot(ax=ax, label='forecast') 

1959 else: 

1960 ax.plot(forecast) 

1961 

1962 x = ax.get_lines()[-1].get_xdata() 

1963 if out_of_sample: 

1964 label = "{0:.0%} confidence interval".format(1 - alpha) 

1965 ax.fill_between(x[-out_of_sample:], conf_int[:, 0], conf_int[:, 1], 

1966 color='gray', alpha=.5, label=label) 

1967 

1968 if plot_insample: 

1969 import re 

1970 k_diff = self.k_diff 

1971 label = re.sub(r"D\d*\.", "", self.model.endog_names) 

1972 levels = unintegrate(self.model.endog, 

1973 self.model._first_unintegrate) 

1974 ax.plot(x[:end + 1 - start], 

1975 levels[start + k_diff:end + k_diff + 1], label=label) 

1976 

1977 ax.legend(loc='best') 

1978 

1979 return fig 

1980 

1981 

1982class ARIMAResultsWrapper(ARMAResultsWrapper): 

1983 pass 

1984 

1985 

1986wrap.populate_wrapper(ARIMAResultsWrapper, ARIMAResults) # noqa:E305