Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/emplike/originregress.py : 24%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2This module implements empirical likelihood regression that is forced through
3the origin.
5This is different than regression not forced through the origin because the
6maximum empirical likelihood estimate is calculated with a vector of ones in
7the exogenous matrix but restricts the intercept parameter to be 0. This
8results in significantly more narrow confidence intervals and different
9parameter estimates.
11For notes on regression not forced through the origin, see empirical likelihood
12methods in the OLSResults class.
14General References
15------------------
16Owen, A.B. (2001). Empirical Likelihood. Chapman and Hall. p. 82.
18"""
19import numpy as np
20from scipy.stats import chi2
21from scipy import optimize
22# When descriptive merged, this will be changed
23from statsmodels.tools.tools import add_constant
24from statsmodels.regression.linear_model import OLS, RegressionResults
27class ELOriginRegress(object):
28 """
29 Empirical Likelihood inference and estimation for linear regression
30 through the origin.
32 Parameters
33 ----------
34 endog: nx1 array
35 Array of response variables.
37 exog: nxk array
38 Array of exogenous variables. Assumes no array of ones
40 Attributes
41 ----------
42 endog : nx1 array
43 Array of response variables
45 exog : nxk array
46 Array of exogenous variables. Assumes no array of ones.
48 nobs : float
49 Number of observations.
51 nvar : float
52 Number of exogenous regressors.
53 """
54 def __init__(self, endog, exog):
55 self.endog = endog
56 self.exog = exog
57 self.nobs = self.exog.shape[0]
58 try:
59 self.nvar = float(exog.shape[1])
60 except IndexError:
61 self.nvar = 1.
63 def fit(self):
64 """
65 Fits the model and provides regression results.
67 Returns
68 -------
69 Results : class
70 Empirical likelihood regression class.
71 """
72 exog_with = add_constant(self.exog, prepend=True)
73 restricted_model = OLS(self.endog, exog_with)
74 restricted_fit = restricted_model.fit()
75 restricted_el = restricted_fit.el_test(
76 np.array([0]), np.array([0]), ret_params=1)
77 params = np.squeeze(restricted_el[3])
78 beta_hat_llr = restricted_el[0]
79 llf = np.sum(np.log(restricted_el[2]))
80 return OriginResults(restricted_model, params, beta_hat_llr, llf)
82 def predict(self, params, exog=None):
83 if exog is None:
84 exog = self.exog
85 return np.dot(add_constant(exog, prepend=True), params)
88class OriginResults(RegressionResults):
89 """
90 A Results class for empirical likelihood regression through the origin.
92 Parameters
93 ----------
94 model : class
95 An OLS model with an intercept.
97 params : 1darray
98 Fitted parameters.
100 est_llr : float
101 The log likelihood ratio of the model with the intercept restricted to
102 0 at the maximum likelihood estimates of the parameters.
103 llr_restricted/llr_unrestricted
105 llf_el : float
106 The log likelihood of the fitted model with the intercept restricted to 0.
108 Attributes
109 ----------
110 model : class
111 An OLS model with an intercept.
113 params : 1darray
114 Fitted parameter.
116 llr : float
117 The log likelihood ratio of the maximum empirical likelihood estimate.
119 llf_el : float
120 The log likelihood of the fitted model with the intercept restricted to 0.
122 Notes
123 -----
124 IMPORTANT. Since EL estimation does not drop the intercept parameter but
125 instead estimates the slope parameters conditional on the slope parameter
126 being 0, the first element for params will be the intercept, which is
127 restricted to 0.
129 IMPORTANT. This class inherits from RegressionResults but inference is
130 conducted via empirical likelihood. Therefore, any methods that
131 require an estimate of the covariance matrix will not function. Instead
132 use el_test and conf_int_el to conduct inference.
134 Examples
135 --------
136 >>> import statsmodels.api as sm
137 >>> data = sm.datasets.bc.load(as_pandas=False)
138 >>> model = sm.emplike.ELOriginRegress(data.endog, data.exog)
139 >>> fitted = model.fit()
140 >>> fitted.params # 0 is the intercept term.
141 array([ 0. , 0.00351813])
143 >>> fitted.el_test(np.array([.0034]), np.array([1]))
144 (3.6696503297979302, 0.055411808127497755)
145 >>> fitted.conf_int_el(1)
146 (0.0033971871114706867, 0.0036373150174892847)
148 # No covariance matrix so normal inference is not valid
149 >>> fitted.conf_int()
150 TypeError: unsupported operand type(s) for *: 'instancemethod' and 'float'
151 """
152 def __init__(self, model, params, est_llr, llf_el):
153 self.model = model
154 self.params = np.squeeze(params)
155 self.llr = est_llr
156 self.llf_el = llf_el
157 def el_test(self, b0_vals, param_nums, method='nm',
158 stochastic_exog=1, return_weights=0):
159 """
160 Returns the llr and p-value for a hypothesized parameter value
161 for a regression that goes through the origin.
163 Parameters
164 ----------
165 b0_vals : 1darray
166 The hypothesized value to be tested.
168 param_num : 1darray
169 Which parameters to test. Note this uses python
170 indexing but the '0' parameter refers to the intercept term,
171 which is assumed 0. Therefore, param_num should be > 0.
173 return_weights : bool
174 If true, returns the weights that optimize the likelihood
175 ratio at b0_vals. Default is False.
177 method : str
178 Can either be 'nm' for Nelder-Mead or 'powell' for Powell. The
179 optimization method that optimizes over nuisance parameters.
180 Default is 'nm'.
182 stochastic_exog : bool
183 When TRUE, the exogenous variables are assumed to be stochastic.
184 When the regressors are nonstochastic, moment conditions are
185 placed on the exogenous variables. Confidence intervals for
186 stochastic regressors are at least as large as non-stochastic
187 regressors. Default is TRUE.
189 Returns
190 -------
191 res : tuple
192 pvalue and likelihood ratio.
193 """
194 b0_vals = np.hstack((0, b0_vals))
195 param_nums = np.hstack((0, param_nums))
196 test_res = self.model.fit().el_test(b0_vals, param_nums, method=method,
197 stochastic_exog=stochastic_exog,
198 return_weights=return_weights)
199 llr_test = test_res[0]
200 llr_res = llr_test - self.llr
201 pval = chi2.sf(llr_res, self.model.exog.shape[1] - 1)
202 if return_weights:
203 return llr_res, pval, test_res[2]
204 else:
205 return llr_res, pval
207 def conf_int_el(self, param_num, upper_bound=None,
208 lower_bound=None, sig=.05, method='nm',
209 stochastic_exog=1):
210 """
211 Returns the confidence interval for a regression parameter when the
212 regression is forced through the origin.
214 Parameters
215 ----------
216 param_num : int
217 The parameter number to be tested. Note this uses python
218 indexing but the '0' parameter refers to the intercept term.
220 upper_bound : float
221 The maximum value the upper confidence limit can be. The
222 closer this is to the confidence limit, the quicker the
223 computation. Default is .00001 confidence limit under normality.
225 lower_bound : float
226 The minimum value the lower confidence limit can be.
227 Default is .00001 confidence limit under normality.
229 sig : float, optional
230 The significance level. Default .05.
232 method : str, optional
233 Algorithm to optimize of nuisance params. Can be 'nm' or
234 'powell'. Default is 'nm'.
236 Returns
237 -------
238 ci: tuple
239 The confidence interval for the parameter 'param_num'.
240 """
241 r0 = chi2.ppf(1 - sig, 1)
242 param_num = np.array([param_num])
243 if upper_bound is None:
244 upper_bound = (np.squeeze(self.model.fit().
245 conf_int(.0001)[param_num])[1])
246 if lower_bound is None:
247 lower_bound = (np.squeeze(self.model.fit().conf_int(.00001)
248 [param_num])[0])
249 f = lambda b0: self.el_test(np.array([b0]), param_num,
250 method=method,
251 stochastic_exog=stochastic_exog)[0] - r0
252 lowerl = optimize.brentq(f, lower_bound, self.params[param_num])
253 upperl = optimize.brentq(f, self.params[param_num], upper_bound)
254 return (lowerl, upperl)