Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Hannan-Rissanen procedure for estimating ARMA(p,q) model parameters. 

3 

4Author: Chad Fulton 

5License: BSD-3 

6""" 

7import numpy as np 

8 

9from scipy.signal import lfilter 

10from statsmodels.tools.tools import Bunch 

11from statsmodels.regression.linear_model import OLS, yule_walker 

12from statsmodels.tsa.tsatools import lagmat 

13 

14from statsmodels.tsa.arima.specification import SARIMAXSpecification 

15from statsmodels.tsa.arima.params import SARIMAXParams 

16 

17 

18def hannan_rissanen(endog, ar_order=0, ma_order=0, demean=True, 

19 initial_ar_order=None, unbiased=None): 

20 """ 

21 Estimate ARMA parameters using Hannan-Rissanen procedure. 

22 

23 Parameters 

24 ---------- 

25 endog : array_like 

26 Input time series array, assumed to be stationary. 

27 ar_order : int 

28 Autoregressive order 

29 ma_order : int 

30 Moving average order 

31 demean : bool, optional 

32 Whether to estimate and remove the mean from the process prior to 

33 fitting the ARMA coefficients. Default is True. 

34 initial_ar_order : int, optional 

35 Order of long autoregressive process used for initial computation of 

36 residuals. 

37 unbiased: bool, optional 

38 Whether or not to apply the bias correction step. Default is True if 

39 the estimated coefficients from the previous step imply a stationary 

40 and invertible process and False otherwise. 

41 

42 Returns 

43 ------- 

44 parameters : SARIMAXParams object 

45 other_results : Bunch 

46 Includes three components: `spec`, containing the 

47 `SARIMAXSpecification` instance corresponding to the input arguments; 

48 `initial_ar_order`, containing the autoregressive lag order used in the 

49 first step; and `resid`, which contains the computed residuals from the 

50 last step. 

51 

52 Notes 

53 ----- 

54 The primary reference is [1]_, section 5.1.4, which describes a three-step 

55 procedure that we implement here. 

56 

57 1. Fit a large-order AR model via Yule-Walker to estimate residuals 

58 2. Compute AR and MA estimates via least squares 

59 3. (Unless the estimated coefficients from step (2) are non-stationary / 

60 non-invertible or `unbiased=False`) Perform bias correction 

61 

62 The order used for the AR model in the first step may be given as an 

63 argument. If it is not, we compute it as suggested by [2]_. 

64 

65 The estimate of the variance that we use is computed from the residuals 

66 of the least-squares regression and not from the innovations algorithm. 

67 This is because our fast implementation of the innovations algorithm is 

68 only valid for stationary processes, and the Hannan-Rissanen procedure may 

69 produce estimates that imply non-stationary processes. To avoid 

70 inconsistency, we never compute this latter variance here, even if it is 

71 possible. See test_hannan_rissanen::test_brockwell_davis_example_517 for 

72 an example of how to compute this variance manually. 

73 

74 This procedure assumes that the series is stationary, but if this is not 

75 true, it is still possible that this procedure will return parameters that 

76 imply a non-stationary / non-invertible process. 

77 

78 Note that the third stage will only be applied if the parameters from the 

79 second stage imply a stationary / invertible model. If `unbiased=True` is 

80 given, then non-stationary / non-invertible parameters in the second stage 

81 will throw an exception. 

82 

83 References 

84 ---------- 

85 .. [1] Brockwell, Peter J., and Richard A. Davis. 2016. 

86 Introduction to Time Series and Forecasting. Springer. 

87 .. [2] Gomez, Victor, and Agustin Maravall. 2001. 

88 "Automatic Modeling Methods for Univariate Series." 

89 A Course in Time Series Analysis, 171–201. 

90 """ 

91 spec = SARIMAXSpecification(endog, ar_order=ar_order, ma_order=ma_order) 

92 endog = spec.endog 

93 if demean: 

94 endog = endog - endog.mean() 

95 

96 p = SARIMAXParams(spec=spec) 

97 

98 nobs = len(endog) 

99 max_ar_order = spec.max_ar_order 

100 max_ma_order = spec.max_ma_order 

101 

102 # Default initial_ar_order is as suggested by Gomez and Maravall (2001) 

103 if initial_ar_order is None: 

104 initial_ar_order = max(np.floor(np.log(nobs)**2).astype(int), 

105 2 * max(max_ar_order, max_ma_order)) 

106 # Create a spec, just to validate the initial autoregressive order 

107 _ = SARIMAXSpecification(endog, ar_order=initial_ar_order) 

108 

109 # Compute lagged endog 

110 # (`ar_ix`, and `ma_ix` below, are to account for non-consecutive lags; 

111 # for indexing purposes, must have dtype int) 

112 ar_ix = np.array(spec.ar_lags, dtype=int) - 1 

113 lagged_endog = lagmat(endog, max_ar_order, trim='both')[:, ar_ix] 

114 

115 # If no AR or MA components, this is just a variance computation 

116 if max_ma_order == 0 and max_ar_order == 0: 

117 p.sigma2 = np.var(endog, ddof=0) 

118 resid = endog.copy() 

119 # If no MA component, this is just CSS 

120 elif max_ma_order == 0: 

121 mod = OLS(endog[max_ar_order:], lagged_endog) 

122 res = mod.fit() 

123 resid = res.resid 

124 p.ar_params = res.params 

125 p.sigma2 = res.scale 

126 # Otherwise ARMA model 

127 else: 

128 # Step 1: Compute long AR model via Yule-Walker, get residuals 

129 initial_ar_params, _ = yule_walker( 

130 endog, order=initial_ar_order, method='mle') 

131 X = lagmat(endog, initial_ar_order, trim='both') 

132 y = endog[initial_ar_order:] 

133 resid = y - X.dot(initial_ar_params) 

134 

135 # Get lagged residuals for `exog` in least-squares regression 

136 ma_ix = np.array(spec.ma_lags, dtype=int) - 1 

137 lagged_resid = lagmat(resid, max_ma_order, trim='both')[:, ma_ix] 

138 

139 # Step 2: estimate ARMA model via least squares 

140 ix = initial_ar_order + max_ma_order - max_ar_order 

141 mod = OLS(endog[initial_ar_order + max_ma_order:], 

142 np.c_[lagged_endog[ix:], lagged_resid]) 

143 res = mod.fit() 

144 p.ar_params = res.params[:spec.k_ar_params] 

145 p.ma_params = res.params[spec.k_ar_params:] 

146 resid = res.resid 

147 p.sigma2 = res.scale 

148 

149 # Step 3: bias correction (if requested) 

150 if unbiased is True or unbiased is None: 

151 if p.is_stationary and p.is_invertible: 

152 Z = np.zeros_like(endog) 

153 V = np.zeros_like(endog) 

154 W = np.zeros_like(endog) 

155 

156 ar_coef = p.ar_poly.coef 

157 ma_coef = p.ma_poly.coef 

158 

159 for t in range(nobs): 

160 if t >= max(max_ar_order, max_ma_order): 

161 # Note: in the case of non-consecutive lag orders, the 

162 # polynomials have the appropriate zeros so we don't 

163 # need to subset `endog[t - max_ar_order:t]` or 

164 # Z[t - max_ma_order:t] 

165 tmp_ar = np.dot( 

166 -ar_coef[1:], endog[t - max_ar_order:t][::-1]) 

167 tmp_ma = np.dot(ma_coef[1:], 

168 Z[t - max_ma_order:t][::-1]) 

169 Z[t] = endog[t] - tmp_ar - tmp_ma 

170 

171 V = lfilter([1], ar_coef, Z) 

172 W = lfilter(np.r_[1, -ma_coef[1:]], [1], Z) 

173 

174 lagged_V = lagmat(V, max_ar_order, trim='both') 

175 lagged_W = lagmat(W, max_ma_order, trim='both') 

176 

177 exog = np.c_[ 

178 lagged_V[max(max_ma_order - max_ar_order, 0):, ar_ix], 

179 lagged_W[max(max_ar_order - max_ma_order, 0):, ma_ix]] 

180 

181 mod_unbias = OLS(Z[max(max_ar_order, max_ma_order):], exog) 

182 res_unbias = mod_unbias.fit() 

183 

184 p.ar_params = ( 

185 p.ar_params + res_unbias.params[:spec.k_ar_params]) 

186 p.ma_params = ( 

187 p.ma_params + res_unbias.params[spec.k_ar_params:]) 

188 

189 # Recompute sigma2 

190 resid = mod.endog - mod.exog.dot( 

191 np.r_[p.ar_params, p.ma_params]) 

192 p.sigma2 = np.inner(resid, resid) / len(resid) 

193 elif unbiased is True: 

194 raise ValueError('Cannot perform third step of Hannan-Rissanen' 

195 ' estimation to remove paramater bias,' 

196 ' because parameters estimated from the' 

197 ' second step are non-stationary or' 

198 ' non-invertible') 

199 

200 # TODO: Gomez and Maravall (2001) or Gomez (1998) 

201 # propose one more step here to further improve MA estimates 

202 

203 # Construct results 

204 other_results = Bunch({ 

205 'spec': spec, 

206 'initial_ar_order': initial_ar_order, 

207 'resid': resid 

208 }) 

209 

210 return p, other_results