Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2This module implements empirical likelihood regression that is forced through 

3the origin. 

4 

5This is different than regression not forced through the origin because the 

6maximum empirical likelihood estimate is calculated with a vector of ones in 

7the exogenous matrix but restricts the intercept parameter to be 0. This 

8results in significantly more narrow confidence intervals and different 

9parameter estimates. 

10 

11For notes on regression not forced through the origin, see empirical likelihood 

12methods in the OLSResults class. 

13 

14General References 

15------------------ 

16Owen, A.B. (2001). Empirical Likelihood. Chapman and Hall. p. 82. 

17 

18""" 

19import numpy as np 

20from scipy.stats import chi2 

21from scipy import optimize 

22# When descriptive merged, this will be changed 

23from statsmodels.tools.tools import add_constant 

24from statsmodels.regression.linear_model import OLS, RegressionResults 

25 

26 

27class ELOriginRegress(object): 

28 """ 

29 Empirical Likelihood inference and estimation for linear regression 

30 through the origin. 

31 

32 Parameters 

33 ---------- 

34 endog: nx1 array 

35 Array of response variables. 

36 

37 exog: nxk array 

38 Array of exogenous variables. Assumes no array of ones 

39 

40 Attributes 

41 ---------- 

42 endog : nx1 array 

43 Array of response variables 

44 

45 exog : nxk array 

46 Array of exogenous variables. Assumes no array of ones. 

47 

48 nobs : float 

49 Number of observations. 

50 

51 nvar : float 

52 Number of exogenous regressors. 

53 """ 

54 def __init__(self, endog, exog): 

55 self.endog = endog 

56 self.exog = exog 

57 self.nobs = self.exog.shape[0] 

58 try: 

59 self.nvar = float(exog.shape[1]) 

60 except IndexError: 

61 self.nvar = 1. 

62 

63 def fit(self): 

64 """ 

65 Fits the model and provides regression results. 

66 

67 Returns 

68 ------- 

69 Results : class 

70 Empirical likelihood regression class. 

71 """ 

72 exog_with = add_constant(self.exog, prepend=True) 

73 restricted_model = OLS(self.endog, exog_with) 

74 restricted_fit = restricted_model.fit() 

75 restricted_el = restricted_fit.el_test( 

76 np.array([0]), np.array([0]), ret_params=1) 

77 params = np.squeeze(restricted_el[3]) 

78 beta_hat_llr = restricted_el[0] 

79 llf = np.sum(np.log(restricted_el[2])) 

80 return OriginResults(restricted_model, params, beta_hat_llr, llf) 

81 

82 def predict(self, params, exog=None): 

83 if exog is None: 

84 exog = self.exog 

85 return np.dot(add_constant(exog, prepend=True), params) 

86 

87 

88class OriginResults(RegressionResults): 

89 """ 

90 A Results class for empirical likelihood regression through the origin. 

91 

92 Parameters 

93 ---------- 

94 model : class 

95 An OLS model with an intercept. 

96 

97 params : 1darray 

98 Fitted parameters. 

99 

100 est_llr : float 

101 The log likelihood ratio of the model with the intercept restricted to 

102 0 at the maximum likelihood estimates of the parameters. 

103 llr_restricted/llr_unrestricted 

104 

105 llf_el : float 

106 The log likelihood of the fitted model with the intercept restricted to 0. 

107 

108 Attributes 

109 ---------- 

110 model : class 

111 An OLS model with an intercept. 

112 

113 params : 1darray 

114 Fitted parameter. 

115 

116 llr : float 

117 The log likelihood ratio of the maximum empirical likelihood estimate. 

118 

119 llf_el : float 

120 The log likelihood of the fitted model with the intercept restricted to 0. 

121 

122 Notes 

123 ----- 

124 IMPORTANT. Since EL estimation does not drop the intercept parameter but 

125 instead estimates the slope parameters conditional on the slope parameter 

126 being 0, the first element for params will be the intercept, which is 

127 restricted to 0. 

128 

129 IMPORTANT. This class inherits from RegressionResults but inference is 

130 conducted via empirical likelihood. Therefore, any methods that 

131 require an estimate of the covariance matrix will not function. Instead 

132 use el_test and conf_int_el to conduct inference. 

133 

134 Examples 

135 -------- 

136 >>> import statsmodels.api as sm 

137 >>> data = sm.datasets.bc.load(as_pandas=False) 

138 >>> model = sm.emplike.ELOriginRegress(data.endog, data.exog) 

139 >>> fitted = model.fit() 

140 >>> fitted.params # 0 is the intercept term. 

141 array([ 0. , 0.00351813]) 

142 

143 >>> fitted.el_test(np.array([.0034]), np.array([1])) 

144 (3.6696503297979302, 0.055411808127497755) 

145 >>> fitted.conf_int_el(1) 

146 (0.0033971871114706867, 0.0036373150174892847) 

147 

148 # No covariance matrix so normal inference is not valid 

149 >>> fitted.conf_int() 

150 TypeError: unsupported operand type(s) for *: 'instancemethod' and 'float' 

151 """ 

152 def __init__(self, model, params, est_llr, llf_el): 

153 self.model = model 

154 self.params = np.squeeze(params) 

155 self.llr = est_llr 

156 self.llf_el = llf_el 

157 def el_test(self, b0_vals, param_nums, method='nm', 

158 stochastic_exog=1, return_weights=0): 

159 """ 

160 Returns the llr and p-value for a hypothesized parameter value 

161 for a regression that goes through the origin. 

162 

163 Parameters 

164 ---------- 

165 b0_vals : 1darray 

166 The hypothesized value to be tested. 

167 

168 param_num : 1darray 

169 Which parameters to test. Note this uses python 

170 indexing but the '0' parameter refers to the intercept term, 

171 which is assumed 0. Therefore, param_num should be > 0. 

172 

173 return_weights : bool 

174 If true, returns the weights that optimize the likelihood 

175 ratio at b0_vals. Default is False. 

176 

177 method : str 

178 Can either be 'nm' for Nelder-Mead or 'powell' for Powell. The 

179 optimization method that optimizes over nuisance parameters. 

180 Default is 'nm'. 

181 

182 stochastic_exog : bool 

183 When TRUE, the exogenous variables are assumed to be stochastic. 

184 When the regressors are nonstochastic, moment conditions are 

185 placed on the exogenous variables. Confidence intervals for 

186 stochastic regressors are at least as large as non-stochastic 

187 regressors. Default is TRUE. 

188 

189 Returns 

190 ------- 

191 res : tuple 

192 pvalue and likelihood ratio. 

193 """ 

194 b0_vals = np.hstack((0, b0_vals)) 

195 param_nums = np.hstack((0, param_nums)) 

196 test_res = self.model.fit().el_test(b0_vals, param_nums, method=method, 

197 stochastic_exog=stochastic_exog, 

198 return_weights=return_weights) 

199 llr_test = test_res[0] 

200 llr_res = llr_test - self.llr 

201 pval = chi2.sf(llr_res, self.model.exog.shape[1] - 1) 

202 if return_weights: 

203 return llr_res, pval, test_res[2] 

204 else: 

205 return llr_res, pval 

206 

207 def conf_int_el(self, param_num, upper_bound=None, 

208 lower_bound=None, sig=.05, method='nm', 

209 stochastic_exog=1): 

210 """ 

211 Returns the confidence interval for a regression parameter when the 

212 regression is forced through the origin. 

213 

214 Parameters 

215 ---------- 

216 param_num : int 

217 The parameter number to be tested. Note this uses python 

218 indexing but the '0' parameter refers to the intercept term. 

219 

220 upper_bound : float 

221 The maximum value the upper confidence limit can be. The 

222 closer this is to the confidence limit, the quicker the 

223 computation. Default is .00001 confidence limit under normality. 

224 

225 lower_bound : float 

226 The minimum value the lower confidence limit can be. 

227 Default is .00001 confidence limit under normality. 

228 

229 sig : float, optional 

230 The significance level. Default .05. 

231 

232 method : str, optional 

233 Algorithm to optimize of nuisance params. Can be 'nm' or 

234 'powell'. Default is 'nm'. 

235 

236 Returns 

237 ------- 

238 ci: tuple 

239 The confidence interval for the parameter 'param_num'. 

240 """ 

241 r0 = chi2.ppf(1 - sig, 1) 

242 param_num = np.array([param_num]) 

243 if upper_bound is None: 

244 upper_bound = (np.squeeze(self.model.fit(). 

245 conf_int(.0001)[param_num])[1]) 

246 if lower_bound is None: 

247 lower_bound = (np.squeeze(self.model.fit().conf_int(.00001) 

248 [param_num])[0]) 

249 f = lambda b0: self.el_test(np.array([b0]), param_num, 

250 method=method, 

251 stochastic_exog=stochastic_exog)[0] - r0 

252 lowerl = optimize.brentq(f, lower_bound, self.params[param_num]) 

253 upperl = optimize.brentq(f, self.params[param_num], upper_bound) 

254 return (lowerl, upperl)