Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/discrete/count_model.py : 23%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1__all__ = ["ZeroInflatedPoisson", "ZeroInflatedGeneralizedPoisson",
2 "ZeroInflatedNegativeBinomialP"]
4import warnings
5import numpy as np
6import statsmodels.base.model as base
7import statsmodels.base.wrapper as wrap
8import statsmodels.regression.linear_model as lm
9from statsmodels.discrete.discrete_model import (DiscreteModel, CountModel,
10 Poisson, Logit, CountResults,
11 L1CountResults, Probit,
12 _discrete_results_docs,
13 _validate_l1_method,
14 GeneralizedPoisson,
15 NegativeBinomialP)
16from statsmodels.distributions import zipoisson, zigenpoisson, zinegbin
17from statsmodels.tools.numdiff import approx_fprime, approx_hess
18from statsmodels.tools.decorators import cache_readonly
19from statsmodels.tools.sm_exceptions import ConvergenceWarning
20from statsmodels.compat.pandas import Appender
23_doc_zi_params = """
24 exog_infl : array_like or None
25 Explanatory variables for the binary inflation model, i.e. for
26 mixing probability model. If None, then a constant is used.
27 offset : array_like
28 Offset is added to the linear prediction with coefficient equal to 1.
29 exposure : array_like
30 Log(exposure) is added to the linear prediction with coefficient
31 equal to 1.
32 inflation : {'logit', 'probit'}
33 The model for the zero inflation, either Logit (default) or Probit
34 """
37class GenericZeroInflated(CountModel):
38 __doc__ = """
39 Generic Zero Inflated Model
41 %(params)s
42 %(extra_params)s
44 Attributes
45 ----------
46 endog : ndarray
47 A reference to the endogenous response variable
48 exog : ndarray
49 A reference to the exogenous design.
50 exog_infl: ndarray
51 A reference to the zero-inflated exogenous design.
52 """ % {'params' : base._model_params_doc,
53 'extra_params' : _doc_zi_params + base._missing_param_doc}
55 def __init__(self, endog, exog, exog_infl=None, offset=None,
56 inflation='logit', exposure=None, missing='none', **kwargs):
57 super(GenericZeroInflated, self).__init__(endog, exog, offset=offset,
58 exposure=exposure,
59 missing=missing, **kwargs)
61 if exog_infl is None:
62 self.k_inflate = 1
63 self.exog_infl = np.ones((endog.size, self.k_inflate),
64 dtype=np.float64)
65 else:
66 self.exog_infl = exog_infl
67 self.k_inflate = exog_infl.shape[1]
69 if len(exog.shape) == 1:
70 self.k_exog = 1
71 else:
72 self.k_exog = exog.shape[1]
74 self.infl = inflation
75 if inflation == 'logit':
76 self.model_infl = Logit(np.zeros(self.exog_infl.shape[0]),
77 self.exog_infl)
78 self._hessian_inflate = self._hessian_logit
79 elif inflation == 'probit':
80 self.model_infl = Probit(np.zeros(self.exog_infl.shape[0]),
81 self.exog_infl)
82 self._hessian_inflate = self._hessian_probit
84 else:
85 raise ValueError("inflation == %s, which is not handled"
86 % inflation)
88 self.inflation = inflation
89 self.k_extra = self.k_inflate
91 if len(self.exog) != len(self.exog_infl):
92 raise ValueError('exog and exog_infl have different number of'
93 'observation. `missing` handling is not supported')
95 infl_names = ['inflate_%s' % i for i in self.model_infl.data.param_names]
96 self.exog_names[:] = infl_names + list(self.exog_names)
97 self.exog_infl = np.asarray(self.exog_infl, dtype=np.float64)
99 self._init_keys.extend(['exog_infl', 'inflation'])
100 self._null_drop_keys = ['exog_infl']
102 def loglike(self, params):
103 """
104 Loglikelihood of Generic Zero Inflated model.
106 Parameters
107 ----------
108 params : array_like
109 The parameters of the model.
111 Returns
112 -------
113 loglike : float
114 The log-likelihood function of the model evaluated at `params`.
115 See notes.
117 Notes
118 --------
119 .. math:: \\ln L=\\sum_{y_{i}=0}\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
120 \\sum_{y_{i}>0}(\\ln(1-w_{i})+L_{main\\_model})
121 where P - pdf of main model, L - loglike function of main model.
122 """
123 return np.sum(self.loglikeobs(params))
125 def loglikeobs(self, params):
126 """
127 Loglikelihood for observations of Generic Zero Inflated model.
129 Parameters
130 ----------
131 params : array_like
132 The parameters of the model.
134 Returns
135 -------
136 loglike : ndarray
137 The log likelihood for each observation of the model evaluated
138 at `params`. See Notes for definition.
140 Notes
141 --------
142 .. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
143 \\ln(1-w_{i})+L_{main\\_model}
144 where P - pdf of main model, L - loglike function of main model.
146 for observations :math:`i=1,...,n`
147 """
148 params_infl = params[:self.k_inflate]
149 params_main = params[self.k_inflate:]
151 y = self.endog
152 w = self.model_infl.predict(params_infl)
154 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
155 llf_main = self.model_main.loglikeobs(params_main)
156 zero_idx = np.nonzero(y == 0)[0]
157 nonzero_idx = np.nonzero(y)[0]
159 llf = np.zeros_like(y, dtype=np.float64)
160 llf[zero_idx] = (np.log(w[zero_idx] +
161 (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
162 llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
164 return llf
166 @Appender(DiscreteModel.fit.__doc__)
167 def fit(self, start_params=None, method='bfgs', maxiter=35,
168 full_output=1, disp=1, callback=None,
169 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
170 if start_params is None:
171 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
172 if np.size(offset) == 1 and offset == 0:
173 offset = None
174 start_params = self._get_start_params()
176 if callback is None:
177 # work around perfect separation callback #3895
178 callback = lambda *x: x
180 mlefit = super(GenericZeroInflated, self).fit(start_params=start_params,
181 maxiter=maxiter, disp=disp, method=method,
182 full_output=full_output, callback=callback,
183 **kwargs)
185 zipfit = self.result_class(self, mlefit._results)
186 result = self.result_class_wrapper(zipfit)
188 if cov_kwds is None:
189 cov_kwds = {}
191 result._get_robustcov_results(cov_type=cov_type,
192 use_self=True, use_t=use_t, **cov_kwds)
193 return result
195 @Appender(DiscreteModel.fit_regularized.__doc__)
196 def fit_regularized(self, start_params=None, method='l1',
197 maxiter='defined_by_method', full_output=1, disp=1, callback=None,
198 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
199 qc_tol=0.03, **kwargs):
201 _validate_l1_method(method)
203 if np.size(alpha) == 1 and alpha != 0:
204 k_params = self.k_exog + self.k_inflate
205 alpha = alpha * np.ones(k_params)
207 extra = self.k_extra - self.k_inflate
208 alpha_p = alpha[:-(self.k_extra - extra)] if (self.k_extra
209 and np.size(alpha) > 1) else alpha
210 if start_params is None:
211 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
212 if np.size(offset) == 1 and offset == 0:
213 offset = None
214 start_params = self.model_main.fit_regularized(
215 start_params=start_params, method=method, maxiter=maxiter,
216 full_output=full_output, disp=0, callback=callback,
217 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
218 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params
219 start_params = np.append(np.ones(self.k_inflate), start_params)
220 cntfit = super(CountModel, self).fit_regularized(
221 start_params=start_params, method=method, maxiter=maxiter,
222 full_output=full_output, disp=disp, callback=callback,
223 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
224 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
226 discretefit = self.result_class_reg(self, cntfit)
227 return self.result_class_reg_wrapper(discretefit)
229 def score_obs(self, params):
230 """
231 Generic Zero Inflated model score (gradient) vector of the log-likelihood
233 Parameters
234 ----------
235 params : array_like
236 The parameters of the model
238 Returns
239 -------
240 score : ndarray, 1-D
241 The score vector of the model, i.e. the first derivative of the
242 loglikelihood function, evaluated at `params`
243 """
244 params_infl = params[:self.k_inflate]
245 params_main = params[self.k_inflate:]
247 y = self.endog
248 w = self.model_infl.predict(params_infl)
249 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
250 score_main = self.model_main.score_obs(params_main)
251 llf_main = self.model_main.loglikeobs(params_main)
252 llf = self.loglikeobs(params)
253 zero_idx = np.nonzero(y == 0)[0]
254 nonzero_idx = np.nonzero(y)[0]
256 mu = self.model_main.predict(params_main)
258 dldp = np.zeros((self.exog.shape[0], self.k_exog), dtype=np.float64)
259 dldw = np.zeros_like(self.exog_infl, dtype=np.float64)
261 dldp[zero_idx,:] = (score_main[zero_idx].T *
262 (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T
263 dldp[nonzero_idx,:] = score_main[nonzero_idx]
265 if self.inflation == 'logit':
266 dldw[zero_idx,:] = (self.exog_infl[zero_idx].T * w[zero_idx] *
267 (1 - w[zero_idx]) *
268 (1 - np.exp(llf_main[zero_idx])) /
269 np.exp(llf[zero_idx])).T
270 dldw[nonzero_idx,:] = -(self.exog_infl[nonzero_idx].T *
271 w[nonzero_idx]).T
272 elif self.inflation == 'probit':
273 return approx_fprime(params, self.loglikeobs)
275 return np.hstack((dldw, dldp))
277 def score(self, params):
278 return self.score_obs(params).sum(0)
280 def _hessian_main(self, params):
281 pass
283 def _hessian_logit(self, params):
284 params_infl = params[:self.k_inflate]
285 params_main = params[self.k_inflate:]
287 y = self.endog
288 w = self.model_infl.predict(params_infl)
289 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
290 score_main = self.model_main.score_obs(params_main)
291 llf_main = self.model_main.loglikeobs(params_main)
292 llf = self.loglikeobs(params)
293 zero_idx = np.nonzero(y == 0)[0]
294 nonzero_idx = np.nonzero(y)[0]
296 hess_arr = np.zeros((self.k_inflate, self.k_exog + self.k_inflate))
298 pmf = np.exp(llf)
300 #d2l/dw2
301 for i in range(self.k_inflate):
302 for j in range(i, -1, -1):
303 hess_arr[i, j] = ((
304 self.exog_infl[zero_idx, i] * self.exog_infl[zero_idx, j] *
305 (w[zero_idx] * (1 - w[zero_idx]) * ((1 -
306 np.exp(llf_main[zero_idx])) * (1 - 2 * w[zero_idx]) *
307 np.exp(llf[zero_idx]) - (w[zero_idx] - w[zero_idx]**2) *
308 (1 - np.exp(llf_main[zero_idx]))**2) /
309 pmf[zero_idx]**2)).sum() -
310 (self.exog_infl[nonzero_idx, i] * self.exog_infl[nonzero_idx, j] *
311 w[nonzero_idx] * (1 - w[nonzero_idx])).sum())
313 #d2l/dpdw
314 for i in range(self.k_inflate):
315 for j in range(self.k_exog):
316 hess_arr[i, j + self.k_inflate] = -(score_main[zero_idx, j] *
317 w[zero_idx] * (1 - w[zero_idx]) *
318 self.exog_infl[zero_idx, i] / pmf[zero_idx]).sum()
320 return hess_arr
322 def _hessian_probit(self, params):
323 pass
325 def hessian(self, params):
326 """
327 Generic Zero Inflated model Hessian matrix of the loglikelihood
329 Parameters
330 ----------
331 params : array_like
332 The parameters of the model
334 Returns
335 -------
336 hess : ndarray, (k_vars, k_vars)
337 The Hessian, second derivative of loglikelihood function,
338 evaluated at `params`
340 Notes
341 -----
342 """
343 hess_arr_main = self._hessian_main(params)
344 hess_arr_infl = self._hessian_inflate(params)
346 if hess_arr_main is None or hess_arr_infl is None:
347 return approx_hess(params, self.loglike)
349 dim = self.k_exog + self.k_inflate
351 hess_arr = np.zeros((dim, dim))
353 hess_arr[:self.k_inflate,:] = hess_arr_infl
354 hess_arr[self.k_inflate:,self.k_inflate:] = hess_arr_main
356 tri_idx = np.triu_indices(self.k_exog + self.k_inflate, k=1)
357 hess_arr[tri_idx] = hess_arr.T[tri_idx]
359 return hess_arr
361 def predict(self, params, exog=None, exog_infl=None, exposure=None,
362 offset=None, which='mean'):
363 """
364 Predict response variable of a count model given exogenous variables.
366 Parameters
367 ----------
368 params : array_like
369 The parameters of the model
370 exog : ndarray, optional
371 A reference to the exogenous design.
372 If not assigned, will be used exog from fitting.
373 exog_infl : ndarray, optional
374 A reference to the zero-inflated exogenous design.
375 If not assigned, will be used exog from fitting.
376 offset : ndarray, optional
377 Offset is added to the linear prediction with coefficient equal to 1.
378 exposure : ndarray, optional
379 Log(exposure) is added to the linear prediction with coefficient
380 equal to 1. If exposure is specified, then it will be logged by the method.
381 The user does not need to log it first.
382 which : str, optional
383 Define values that will be predicted.
384 'mean', 'mean-main', 'linear', 'mean-nonzero', 'prob-zero, 'prob', 'prob-main'
385 Default is 'mean'.
387 Notes
388 -----
389 """
390 if exog is None:
391 exog = self.exog
393 if exog_infl is None:
394 exog_infl = self.exog_infl
396 if exposure is None:
397 exposure = getattr(self, 'exposure', 0)
398 else:
399 exposure = np.log(exposure)
401 if offset is None:
402 offset = 0
404 params_infl = params[:self.k_inflate]
405 params_main = params[self.k_inflate:]
407 prob_main = 1 - self.model_infl.predict(params_infl, exog_infl)
409 lin_pred = np.dot(exog, params_main[:self.exog.shape[1]]) + exposure + offset
411 # Refactor: This is pretty hacky,
412 # there should be an appropriate predict method in model_main
413 # this is just prob(y=0 | model_main)
414 tmp_exog = self.model_main.exog
415 tmp_endog = self.model_main.endog
416 tmp_offset = getattr(self.model_main, 'offset', ['no'])
417 tmp_exposure = getattr(self.model_main, 'exposure', ['no'])
418 self.model_main.exog = exog
419 self.model_main.endog = np.zeros((exog.shape[0]))
420 self.model_main.offset = offset
421 self.model_main.exposure = exposure
422 llf = self.model_main.loglikeobs(params_main)
423 self.model_main.exog = tmp_exog
424 self.model_main.endog = tmp_endog
425 # tmp_offset might be an array with elementwise equality testing
426 if len(tmp_offset) == 1 and tmp_offset[0] == 'no':
427 del self.model_main.offset
428 else:
429 self.model_main.offset = tmp_offset
430 if len(tmp_exposure) == 1 and tmp_exposure[0] == 'no':
431 del self.model_main.exposure
432 else:
433 self.model_main.exposure = tmp_exposure
434 # end hack
436 prob_zero = (1 - prob_main) + prob_main * np.exp(llf)
438 if which == 'mean':
439 return prob_main * np.exp(lin_pred)
440 elif which == 'mean-main':
441 return np.exp(lin_pred)
442 elif which == 'linear':
443 return lin_pred
444 elif which == 'mean-nonzero':
445 return prob_main * np.exp(lin_pred) / (1 - prob_zero)
446 elif which == 'prob-zero':
447 return prob_zero
448 elif which == 'prob-main':
449 return prob_main
450 elif which == 'prob':
451 return self._predict_prob(params, exog, exog_infl, exposure, offset)
452 else:
453 raise ValueError('which = %s is not available' % which)
456class ZeroInflatedPoisson(GenericZeroInflated):
457 __doc__ = """
458 Poisson Zero Inflated Model
460 %(params)s
461 %(extra_params)s
463 Attributes
464 ----------
465 endog : ndarray
466 A reference to the endogenous response variable
467 exog : ndarray
468 A reference to the exogenous design.
469 exog_infl: ndarray
470 A reference to the zero-inflated exogenous design.
471 """ % {'params' : base._model_params_doc,
472 'extra_params' : _doc_zi_params + base._missing_param_doc}
474 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
475 inflation='logit', missing='none', **kwargs):
476 super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset,
477 inflation=inflation,
478 exog_infl=exog_infl,
479 exposure=exposure,
480 missing=missing, **kwargs)
481 self.model_main = Poisson(self.endog, self.exog, offset=offset,
482 exposure=exposure)
483 self.distribution = zipoisson
484 self.result_class = ZeroInflatedPoissonResults
485 self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
486 self.result_class_reg = L1ZeroInflatedPoissonResults
487 self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper
489 def _hessian_main(self, params):
490 params_infl = params[:self.k_inflate]
491 params_main = params[self.k_inflate:]
493 y = self.endog
494 w = self.model_infl.predict(params_infl)
495 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
496 score = self.score(params)
497 zero_idx = np.nonzero(y == 0)[0]
498 nonzero_idx = np.nonzero(y)[0]
500 mu = self.model_main.predict(params_main)
502 hess_arr = np.zeros((self.k_exog, self.k_exog))
504 coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1))
506 #d2l/dp2
507 for i in range(self.k_exog):
508 for j in range(i, -1, -1):
509 hess_arr[i, j] = ((
510 self.exog[zero_idx, i] * self.exog[zero_idx, j] *
511 mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff -
512 w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) /
513 coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] *
514 self.exog[nonzero_idx, j]).sum())
516 return hess_arr
518 def _predict_prob(self, params, exog, exog_infl, exposure, offset):
519 params_infl = params[:self.k_inflate]
520 params_main = params[self.k_inflate:]
522 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
524 if len(exog_infl.shape) < 2:
525 transform = True
526 w = np.atleast_2d(
527 self.model_infl.predict(params_infl, exog_infl))[:, None]
528 else:
529 transform = False
530 w = self.model_infl.predict(params_infl, exog_infl)[:, None]
532 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
533 mu = self.model_main.predict(params_main, exog,
534 offset=offset)[:, None]
535 result = self.distribution.pmf(counts, mu, w)
536 return result[0] if transform else result
538 def _get_start_params(self):
539 start_params = self.model_main.fit(disp=0, method="nm").params
540 start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params)
541 return start_params
544class ZeroInflatedGeneralizedPoisson(GenericZeroInflated):
545 __doc__ = """
546 Zero Inflated Generalized Poisson Model
548 %(params)s
549 %(extra_params)s
551 Attributes
552 ----------
553 endog : ndarray
554 A reference to the endogenous response variable
555 exog : ndarray
556 A reference to the exogenous design.
557 exog_infl: ndarray
558 A reference to the zero-inflated exogenous design.
559 p: scalar
560 P denotes parametrizations for ZIGP regression.
561 """ % {'params' : base._model_params_doc,
562 'extra_params' : _doc_zi_params +
563 """p : float
564 dispersion power parameter for the GeneralizedPoisson model. p=1 for
565 ZIGP-1 and p=2 for ZIGP-2. Default is p=2
566 """ + base._missing_param_doc}
568 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
569 inflation='logit', p=2, missing='none', **kwargs):
570 super(ZeroInflatedGeneralizedPoisson, self).__init__(endog, exog,
571 offset=offset,
572 inflation=inflation,
573 exog_infl=exog_infl,
574 exposure=exposure,
575 missing=missing, **kwargs)
576 self.model_main = GeneralizedPoisson(self.endog, self.exog,
577 offset=offset, exposure=exposure, p=p)
578 self.distribution = zigenpoisson
579 self.k_exog += 1
580 self.k_extra += 1
581 self.exog_names.append("alpha")
582 self.result_class = ZeroInflatedGeneralizedPoissonResults
583 self.result_class_wrapper = ZeroInflatedGeneralizedPoissonResultsWrapper
584 self.result_class_reg = L1ZeroInflatedGeneralizedPoissonResults
585 self.result_class_reg_wrapper = L1ZeroInflatedGeneralizedPoissonResultsWrapper
587 def _get_init_kwds(self):
588 kwds = super(ZeroInflatedGeneralizedPoisson, self)._get_init_kwds()
589 kwds['p'] = self.model_main.parameterization + 1
590 return kwds
592 def _predict_prob(self, params, exog, exog_infl, exposure, offset):
593 params_infl = params[:self.k_inflate]
594 params_main = params[self.k_inflate:]
596 p = self.model_main.parameterization
597 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
599 if len(exog_infl.shape) < 2:
600 transform = True
601 w = np.atleast_2d(
602 self.model_infl.predict(params_infl, exog_infl))[:, None]
603 else:
604 transform = False
605 w = self.model_infl.predict(params_infl, exog_infl)[:, None]
607 w[w == 1.] = np.nextafter(1, 0)
608 mu = self.model_main.predict(params_main, exog,
609 exposure=exposure, offset=offset)[:, None]
610 result = self.distribution.pmf(counts, mu, params_main[-1], p, w)
611 return result[0] if transform else result
613 def _get_start_params(self):
614 with warnings.catch_warnings():
615 warnings.simplefilter("ignore", category=ConvergenceWarning)
616 start_params = ZeroInflatedPoisson(self.endog, self.exog,
617 exog_infl=self.exog_infl).fit(disp=0).params
618 start_params = np.append(start_params, 0.1)
619 return start_params
622class ZeroInflatedNegativeBinomialP(GenericZeroInflated):
623 __doc__ = """
624 Zero Inflated Generalized Negative Binomial Model
626 %(params)s
627 %(extra_params)s
629 Attributes
630 ----------
631 endog : ndarray
632 A reference to the endogenous response variable
633 exog : ndarray
634 A reference to the exogenous design.
635 exog_infl: ndarray
636 A reference to the zero-inflated exogenous design.
637 p: scalar
638 P denotes parametrizations for ZINB regression. p=1 for ZINB-1 and
639 p=2 for ZINB-2. Default is p=2
640 """ % {'params' : base._model_params_doc,
641 'extra_params' : _doc_zi_params +
642 """p : float
643 dispersion power parameter for the NegativeBinomialP model. p=1 for
644 ZINB-1 and p=2 for ZINM-2. Default is p=2
645 """ + base._missing_param_doc}
647 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
648 inflation='logit', p=2, missing='none', **kwargs):
649 super(ZeroInflatedNegativeBinomialP, self).__init__(endog, exog,
650 offset=offset,
651 inflation=inflation,
652 exog_infl=exog_infl,
653 exposure=exposure,
654 missing=missing, **kwargs)
655 self.model_main = NegativeBinomialP(self.endog, self.exog,
656 offset=offset, exposure=exposure, p=p)
657 self.distribution = zinegbin
658 self.k_exog += 1
659 self.k_extra += 1
660 self.exog_names.append("alpha")
661 self.result_class = ZeroInflatedNegativeBinomialResults
662 self.result_class_wrapper = ZeroInflatedNegativeBinomialResultsWrapper
663 self.result_class_reg = L1ZeroInflatedNegativeBinomialResults
664 self.result_class_reg_wrapper = L1ZeroInflatedNegativeBinomialResultsWrapper
666 def _get_init_kwds(self):
667 kwds = super(ZeroInflatedNegativeBinomialP, self)._get_init_kwds()
668 kwds['p'] = self.model_main.parameterization
669 return kwds
671 def _predict_prob(self, params, exog, exog_infl, exposure, offset):
672 params_infl = params[:self.k_inflate]
673 params_main = params[self.k_inflate:]
675 p = self.model_main.parameterization
676 counts = np.arange(0, np.max(self.endog)+1)
678 if len(exog_infl.shape) < 2:
679 transform = True
680 w = np.atleast_2d(
681 self.model_infl.predict(params_infl, exog_infl))[:, None]
682 else:
683 transform = False
684 w = self.model_infl.predict(params_infl, exog_infl)[:, None]
686 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
687 mu = self.model_main.predict(params_main, exog,
688 exposure=exposure, offset=offset)[:, None]
689 result = self.distribution.pmf(counts, mu, params_main[-1], p, w)
690 return result[0] if transform else result
692 def _get_start_params(self):
693 with warnings.catch_warnings():
694 warnings.simplefilter("ignore", category=ConvergenceWarning)
695 start_params = self.model_main.fit(disp=0, method='nm').params
696 start_params = np.append(np.zeros(self.k_inflate), start_params)
697 return start_params
700class ZeroInflatedPoissonResults(CountResults):
701 __doc__ = _discrete_results_docs % {
702 "one_line_description": "A results class for Zero Inflated Poisson",
703 "extra_attr": ""}
705 @cache_readonly
706 def _dispersion_factor(self):
707 mu = self.predict(which='linear')
708 w = 1 - self.predict() / np.exp(self.predict(which='linear'))
709 return (1 + w * np.exp(mu))
711 def get_margeff(self, at='overall', method='dydx', atexog=None,
712 dummy=False, count=False):
713 """Get marginal effects of the fitted model.
715 Not yet implemented for Zero Inflated Models
716 """
717 raise NotImplementedError("not yet implemented for zero inflation")
720class L1ZeroInflatedPoissonResults(L1CountResults, ZeroInflatedPoissonResults):
721 pass
724class ZeroInflatedPoissonResultsWrapper(lm.RegressionResultsWrapper):
725 pass
726wrap.populate_wrapper(ZeroInflatedPoissonResultsWrapper,
727 ZeroInflatedPoissonResults)
730class L1ZeroInflatedPoissonResultsWrapper(lm.RegressionResultsWrapper):
731 pass
732wrap.populate_wrapper(L1ZeroInflatedPoissonResultsWrapper,
733 L1ZeroInflatedPoissonResults)
736class ZeroInflatedGeneralizedPoissonResults(CountResults):
737 __doc__ = _discrete_results_docs % {
738 "one_line_description": "A results class for Zero Inflated Generalized Poisson",
739 "extra_attr": ""}
741 @cache_readonly
742 def _dispersion_factor(self):
743 p = self.model.model_main.parameterization
744 alpha = self.params[self.model.k_inflate:][-1]
745 mu = np.exp(self.predict(which='linear'))
746 w = 1 - self.predict() / mu
747 return ((1 + alpha * mu**p)**2 + w * mu)
749 def get_margeff(self, at='overall', method='dydx', atexog=None,
750 dummy=False, count=False):
751 """Get marginal effects of the fitted model.
753 Not yet implemented for Zero Inflated Models
754 """
755 raise NotImplementedError("not yet implemented for zero inflation")
758class L1ZeroInflatedGeneralizedPoissonResults(L1CountResults,
759 ZeroInflatedGeneralizedPoissonResults):
760 pass
763class ZeroInflatedGeneralizedPoissonResultsWrapper(
764 lm.RegressionResultsWrapper):
765 pass
766wrap.populate_wrapper(ZeroInflatedGeneralizedPoissonResultsWrapper,
767 ZeroInflatedGeneralizedPoissonResults)
770class L1ZeroInflatedGeneralizedPoissonResultsWrapper(
771 lm.RegressionResultsWrapper):
772 pass
773wrap.populate_wrapper(L1ZeroInflatedGeneralizedPoissonResultsWrapper,
774 L1ZeroInflatedGeneralizedPoissonResults)
777class ZeroInflatedNegativeBinomialResults(CountResults):
778 __doc__ = _discrete_results_docs % {
779 "one_line_description": "A results class for Zero Inflated Generalized Negative Binomial",
780 "extra_attr": ""}
782 @cache_readonly
783 def _dispersion_factor(self):
784 p = self.model.model_main.parameterization
785 alpha = self.params[self.model.k_inflate:][-1]
786 mu = np.exp(self.predict(which='linear'))
787 w = 1 - self.predict() / mu
788 return (1 + alpha * mu**(p-1) + w * mu)
790 def get_margeff(self, at='overall', method='dydx', atexog=None,
791 dummy=False, count=False):
792 """Get marginal effects of the fitted model.
794 Not yet implemented for Zero Inflated Models
795 """
796 raise NotImplementedError("not yet implemented for zero inflation")
799class L1ZeroInflatedNegativeBinomialResults(L1CountResults,
800 ZeroInflatedNegativeBinomialResults):
801 pass
804class ZeroInflatedNegativeBinomialResultsWrapper(
805 lm.RegressionResultsWrapper):
806 pass
807wrap.populate_wrapper(ZeroInflatedNegativeBinomialResultsWrapper,
808 ZeroInflatedNegativeBinomialResults)
811class L1ZeroInflatedNegativeBinomialResultsWrapper(
812 lm.RegressionResultsWrapper):
813 pass
814wrap.populate_wrapper(L1ZeroInflatedNegativeBinomialResultsWrapper,
815 L1ZeroInflatedNegativeBinomialResults)