Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Limited dependent variable and qualitative variables. 

3 

4Includes binary outcomes, count data, (ordered) ordinal data and limited 

5dependent variables. 

6 

7General References 

8-------------------- 

9 

10A.C. Cameron and P.K. Trivedi. `Regression Analysis of Count Data`. 

11 Cambridge, 1998 

12 

13G.S. Madalla. `Limited-Dependent and Qualitative Variables in Econometrics`. 

14 Cambridge, 1983. 

15 

16W. Greene. `Econometric Analysis`. Prentice Hall, 5th. edition. 2003. 

17""" 

18__all__ = ["Poisson", "Logit", "Probit", "MNLogit", "NegativeBinomial", 

19 "GeneralizedPoisson", "NegativeBinomialP", "CountModel"] 

20 

21import numpy as np 

22from pandas import get_dummies, MultiIndex 

23 

24from scipy.special import gammaln, digamma, polygamma, loggamma 

25from scipy import stats, special 

26from scipy.stats import nbinom 

27 

28from statsmodels.compat.pandas import Appender 

29 

30import statsmodels.tools.tools as tools 

31from statsmodels.tools import data as data_tools 

32from statsmodels.tools.decorators import cache_readonly 

33from statsmodels.tools.sm_exceptions import (PerfectSeparationError, 

34 SpecificationWarning) 

35from statsmodels.tools.numdiff import approx_fprime_cs 

36import statsmodels.base.model as base 

37from statsmodels.base.data import handle_data # for mnlogit 

38import statsmodels.regression.linear_model as lm 

39import statsmodels.base.wrapper as wrap 

40 

41from statsmodels.base.l1_slsqp import fit_l1_slsqp 

42from statsmodels.distributions import genpoisson_p 

43 

44try: 

45 import cvxopt # noqa:F401 

46 have_cvxopt = True 

47except ImportError: 

48 have_cvxopt = False 

49 

50import warnings 

51 

52#TODO: When we eventually get user-settable precision, we need to change 

53# this 

54FLOAT_EPS = np.finfo(float).eps 

55 

56#TODO: add options for the parameter covariance/variance 

57# ie., OIM, EIM, and BHHH see Green 21.4 

58 

59_discrete_models_docs = """ 

60""" 

61 

62_discrete_results_docs = """ 

63 %(one_line_description)s 

64 

65 Parameters 

66 ---------- 

67 model : A DiscreteModel instance 

68 params : array_like 

69 The parameters of a fitted model. 

70 hessian : array_like 

71 The hessian of the fitted model. 

72 scale : float 

73 A scale parameter for the covariance matrix. 

74 

75 Attributes 

76 ---------- 

77 df_resid : float 

78 See model definition. 

79 df_model : float 

80 See model definition. 

81 llf : float 

82 Value of the loglikelihood 

83 %(extra_attr)s""" 

84 

85_l1_results_attr = """ nnz_params : int 

86 The number of nonzero parameters in the model. Train with 

87 trim_params == True or else numerical error will distort this. 

88 trimmed : bool array 

89 trimmed[i] == True if the ith parameter was trimmed from the model.""" 

90 

91_get_start_params_null_docs = """ 

92Compute one-step moment estimator for null (constant-only) model 

93 

94This is a preliminary estimator used as start_params. 

95 

96Returns 

97------- 

98params : ndarray 

99 parameter estimate based one one-step moment matching 

100 

101""" 

102 

103# helper for MNLogit (will be generally useful later) 

104 

105def _numpy_to_dummies(endog): 

106 if endog.dtype.kind in ['S', 'O']: 

107 endog_dummies, ynames = tools.categorical(endog, drop=True, 

108 dictnames=True) 

109 elif endog.ndim == 2: 

110 endog_dummies = endog 

111 ynames = range(endog.shape[1]) 

112 else: 

113 endog_dummies, ynames = tools.categorical(endog, drop=True, 

114 dictnames=True) 

115 return endog_dummies, ynames 

116 

117 

118def _pandas_to_dummies(endog): 

119 if endog.ndim == 2: 

120 if endog.shape[1] == 1: 

121 yname = endog.columns[0] 

122 endog_dummies = get_dummies(endog.iloc[:, 0]) 

123 else: # series 

124 yname = 'y' 

125 endog_dummies = endog 

126 else: 

127 yname = endog.name 

128 endog_dummies = get_dummies(endog) 

129 ynames = endog_dummies.columns.tolist() 

130 

131 return endog_dummies, ynames, yname 

132 

133 

134def _validate_l1_method(method): 

135 """ 

136 As of 0.10.0, the supported values for `method` in `fit_regularized` 

137 are "l1" and "l1_cvxopt_cp". If an invalid value is passed, raise 

138 with a helpful error message 

139 

140 Parameters 

141 ---------- 

142 method : str 

143 

144 Raises 

145 ------ 

146 ValueError 

147 """ 

148 if method not in ['l1', 'l1_cvxopt_cp']: 

149 raise ValueError('`method` = {method} is not supported, use either ' 

150 '"l1" or "l1_cvxopt_cp"'.format(method=method)) 

151 

152 

153#### Private Model Classes #### 

154 

155 

156class DiscreteModel(base.LikelihoodModel): 

157 """ 

158 Abstract class for discrete choice models. 

159 

160 This class does not do anything itself but lays out the methods and 

161 call signature expected of child classes in addition to those of 

162 statsmodels.model.LikelihoodModel. 

163 """ 

164 def __init__(self, endog, exog, **kwargs): 

165 super(DiscreteModel, self).__init__(endog, exog, **kwargs) 

166 self.raise_on_perfect_prediction = True 

167 

168 def initialize(self): 

169 """ 

170 Initialize is called by 

171 statsmodels.model.LikelihoodModel.__init__ 

172 and should contain any preprocessing that needs to be done for a model. 

173 """ 

174 # assumes constant 

175 rank = np.linalg.matrix_rank(self.exog) 

176 self.df_model = float(rank - 1) 

177 self.df_resid = float(self.exog.shape[0] - rank) 

178 

179 def cdf(self, X): 

180 """ 

181 The cumulative distribution function of the model. 

182 """ 

183 raise NotImplementedError 

184 

185 def pdf(self, X): 

186 """ 

187 The probability density (mass) function of the model. 

188 """ 

189 raise NotImplementedError 

190 

191 def _check_perfect_pred(self, params, *args): 

192 endog = self.endog 

193 fittedvalues = self.cdf(np.dot(self.exog, params[:self.exog.shape[1]])) 

194 if (self.raise_on_perfect_prediction and 

195 np.allclose(fittedvalues - endog, 0)): 

196 msg = "Perfect separation detected, results not available" 

197 raise PerfectSeparationError(msg) 

198 

199 @Appender(base.LikelihoodModel.fit.__doc__) 

200 def fit(self, start_params=None, method='newton', maxiter=35, 

201 full_output=1, disp=1, callback=None, **kwargs): 

202 """ 

203 Fit the model using maximum likelihood. 

204 

205 The rest of the docstring is from 

206 statsmodels.base.model.LikelihoodModel.fit 

207 """ 

208 if callback is None: 

209 callback = self._check_perfect_pred 

210 else: 

211 pass # TODO: make a function factory to have multiple call-backs 

212 

213 mlefit = super(DiscreteModel, self).fit( 

214 start_params=start_params, 

215 method=method, maxiter=maxiter, full_output=full_output, 

216 disp=disp, callback=callback, **kwargs) 

217 

218 return mlefit # It is up to subclasses to wrap results 

219 

220 def fit_regularized(self, start_params=None, method='l1', 

221 maxiter='defined_by_method', full_output=1, disp=True, 

222 callback=None, alpha=0, trim_mode='auto', 

223 auto_trim_tol=0.01, size_trim_tol=1e-4, qc_tol=0.03, 

224 qc_verbose=False, **kwargs): 

225 """ 

226 Fit the model using a regularized maximum likelihood. 

227 

228 The regularization method AND the solver used is determined by the 

229 argument method. 

230 

231 Parameters 

232 ---------- 

233 start_params : array_like, optional 

234 Initial guess of the solution for the loglikelihood maximization. 

235 The default is an array of zeros. 

236 method : 'l1' or 'l1_cvxopt_cp' 

237 See notes for details. 

238 maxiter : {int, 'defined_by_method'} 

239 Maximum number of iterations to perform. 

240 If 'defined_by_method', then use method defaults (see notes). 

241 full_output : bool 

242 Set to True to have all available output in the Results object's 

243 mle_retvals attribute. The output is dependent on the solver. 

244 See LikelihoodModelResults notes section for more information. 

245 disp : bool 

246 Set to True to print convergence messages. 

247 fargs : tuple 

248 Extra arguments passed to the likelihood function, i.e., 

249 loglike(x,*args). 

250 callback : callable callback(xk) 

251 Called after each iteration, as callback(xk), where xk is the 

252 current parameter vector. 

253 retall : bool 

254 Set to True to return list of solutions at each iteration. 

255 Available in Results object's mle_retvals attribute. 

256 alpha : non-negative scalar or numpy array (same size as parameters) 

257 The weight multiplying the l1 penalty term. 

258 trim_mode : 'auto, 'size', or 'off' 

259 If not 'off', trim (set to zero) parameters that would have been 

260 zero if the solver reached the theoretical minimum. 

261 If 'auto', trim params using the Theory above. 

262 If 'size', trim params if they have very small absolute value. 

263 size_trim_tol : float or 'auto' (default = 'auto') 

264 Tolerance used when trim_mode == 'size'. 

265 auto_trim_tol : float 

266 Tolerance used when trim_mode == 'auto'. 

267 qc_tol : float 

268 Print warning and do not allow auto trim when (ii) (above) is 

269 violated by this much. 

270 qc_verbose : bool 

271 If true, print out a full QC report upon failure. 

272 **kwargs 

273 Additional keyword arguments used when fitting the model. 

274 

275 Returns 

276 ------- 

277 Results 

278 A results instance. 

279 

280 Notes 

281 ----- 

282 Using 'l1_cvxopt_cp' requires the cvxopt module. 

283 

284 Extra parameters are not penalized if alpha is given as a scalar. 

285 An example is the shape parameter in NegativeBinomial `nb1` and `nb2`. 

286 

287 Optional arguments for the solvers (available in Results.mle_settings):: 

288 

289 'l1' 

290 acc : float (default 1e-6) 

291 Requested accuracy as used by slsqp 

292 'l1_cvxopt_cp' 

293 abstol : float 

294 absolute accuracy (default: 1e-7). 

295 reltol : float 

296 relative accuracy (default: 1e-6). 

297 feastol : float 

298 tolerance for feasibility conditions (default: 1e-7). 

299 refinement : int 

300 number of iterative refinement steps when solving KKT 

301 equations (default: 1). 

302 

303 Optimization methodology 

304 

305 With :math:`L` the negative log likelihood, we solve the convex but 

306 non-smooth problem 

307 

308 .. math:: \\min_\\beta L(\\beta) + \\sum_k\\alpha_k |\\beta_k| 

309 

310 via the transformation to the smooth, convex, constrained problem 

311 in twice as many variables (adding the "added variables" :math:`u_k`) 

312 

313 .. math:: \\min_{\\beta,u} L(\\beta) + \\sum_k\\alpha_k u_k, 

314 

315 subject to 

316 

317 .. math:: -u_k \\leq \\beta_k \\leq u_k. 

318 

319 With :math:`\\partial_k L` the derivative of :math:`L` in the 

320 :math:`k^{th}` parameter direction, theory dictates that, at the 

321 minimum, exactly one of two conditions holds: 

322 

323 (i) :math:`|\\partial_k L| = \\alpha_k` and :math:`\\beta_k \\neq 0` 

324 (ii) :math:`|\\partial_k L| \\leq \\alpha_k` and :math:`\\beta_k = 0` 

325 """ 

326 _validate_l1_method(method) 

327 # Set attributes based on method 

328 cov_params_func = self.cov_params_func_l1 

329 

330 ### Bundle up extra kwargs for the dictionary kwargs. These are 

331 ### passed through super(...).fit() as kwargs and unpacked at 

332 ### appropriate times 

333 alpha = np.array(alpha) 

334 assert alpha.min() >= 0 

335 try: 

336 kwargs['alpha'] = alpha 

337 except TypeError: 

338 kwargs = dict(alpha=alpha) 

339 kwargs['alpha_rescaled'] = kwargs['alpha'] / float(self.endog.shape[0]) 

340 kwargs['trim_mode'] = trim_mode 

341 kwargs['size_trim_tol'] = size_trim_tol 

342 kwargs['auto_trim_tol'] = auto_trim_tol 

343 kwargs['qc_tol'] = qc_tol 

344 kwargs['qc_verbose'] = qc_verbose 

345 

346 ### Define default keyword arguments to be passed to super(...).fit() 

347 if maxiter == 'defined_by_method': 

348 if method == 'l1': 

349 maxiter = 1000 

350 elif method == 'l1_cvxopt_cp': 

351 maxiter = 70 

352 

353 ## Parameters to pass to super(...).fit() 

354 # For the 'extra' parameters, pass all that are available, 

355 # even if we know (at this point) we will only use one. 

356 extra_fit_funcs = {'l1': fit_l1_slsqp} 

357 if have_cvxopt and method == 'l1_cvxopt_cp': 

358 from statsmodels.base.l1_cvxopt import fit_l1_cvxopt_cp 

359 extra_fit_funcs['l1_cvxopt_cp'] = fit_l1_cvxopt_cp 

360 elif method.lower() == 'l1_cvxopt_cp': 

361 raise ValueError("Cannot use l1_cvxopt_cp as cvxopt " 

362 "was not found (install it, or use method='l1' instead)") 

363 

364 if callback is None: 

365 callback = self._check_perfect_pred 

366 else: 

367 pass # make a function factory to have multiple call-backs 

368 

369 mlefit = super(DiscreteModel, self).fit(start_params=start_params, 

370 method=method, maxiter=maxiter, full_output=full_output, 

371 disp=disp, callback=callback, extra_fit_funcs=extra_fit_funcs, 

372 cov_params_func=cov_params_func, **kwargs) 

373 

374 return mlefit # up to subclasses to wrap results 

375 

376 def cov_params_func_l1(self, likelihood_model, xopt, retvals): 

377 """ 

378 Computes cov_params on a reduced parameter space 

379 corresponding to the nonzero parameters resulting from the 

380 l1 regularized fit. 

381 

382 Returns a full cov_params matrix, with entries corresponding 

383 to zero'd values set to np.nan. 

384 """ 

385 H = likelihood_model.hessian(xopt) 

386 trimmed = retvals['trimmed'] 

387 nz_idx = np.nonzero(~trimmed)[0] 

388 nnz_params = (~trimmed).sum() 

389 if nnz_params > 0: 

390 H_restricted = H[nz_idx[:, None], nz_idx] 

391 # Covariance estimate for the nonzero params 

392 H_restricted_inv = np.linalg.inv(-H_restricted) 

393 else: 

394 H_restricted_inv = np.zeros(0) 

395 

396 cov_params = np.nan * np.ones(H.shape) 

397 cov_params[nz_idx[:, None], nz_idx] = H_restricted_inv 

398 

399 return cov_params 

400 

401 def predict(self, params, exog=None, linear=False): 

402 """ 

403 Predict response variable of a model given exogenous variables. 

404 """ 

405 raise NotImplementedError 

406 

407 def _derivative_exog(self, params, exog=None, dummy_idx=None, 

408 count_idx=None): 

409 """ 

410 This should implement the derivative of the non-linear function 

411 """ 

412 raise NotImplementedError 

413 

414 def _derivative_exog_helper(self, margeff, params, exog, dummy_idx, 

415 count_idx, transform): 

416 """ 

417 Helper for _derivative_exog to wrap results appropriately 

418 """ 

419 from .discrete_margins import _get_count_effects, _get_dummy_effects 

420 

421 if count_idx is not None: 

422 margeff = _get_count_effects(margeff, exog, count_idx, transform, 

423 self, params) 

424 if dummy_idx is not None: 

425 margeff = _get_dummy_effects(margeff, exog, dummy_idx, transform, 

426 self, params) 

427 

428 return margeff 

429 

430 

431class BinaryModel(DiscreteModel): 

432 

433 def __init__(self, endog, exog, **kwargs): 

434 super(BinaryModel, self).__init__(endog, exog, **kwargs) 

435 if (not issubclass(self.__class__, MultinomialModel) and 

436 not np.all((self.endog >= 0) & (self.endog <= 1))): 

437 raise ValueError("endog must be in the unit interval.") 

438 

439 

440 def predict(self, params, exog=None, linear=False): 

441 """ 

442 Predict response variable of a model given exogenous variables. 

443 

444 Parameters 

445 ---------- 

446 params : array_like 

447 Fitted parameters of the model. 

448 exog : array_like 

449 1d or 2d array of exogenous values. If not supplied, the 

450 whole exog attribute of the model is used. 

451 linear : bool, optional 

452 If True, returns the linear predictor dot(exog,params). Else, 

453 returns the value of the cdf at the linear predictor. 

454 

455 Returns 

456 ------- 

457 array 

458 Fitted values at exog. 

459 """ 

460 if exog is None: 

461 exog = self.exog 

462 if not linear: 

463 return self.cdf(np.dot(exog, params)) 

464 else: 

465 return np.dot(exog, params) 

466 

467 @Appender(DiscreteModel.fit_regularized.__doc__) 

468 def fit_regularized(self, start_params=None, method='l1', 

469 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 

470 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 

471 qc_tol=0.03, **kwargs): 

472 

473 _validate_l1_method(method) 

474 

475 bnryfit = super(BinaryModel, self).fit_regularized( 

476 start_params=start_params, method=method, maxiter=maxiter, 

477 full_output=full_output, disp=disp, callback=callback, 

478 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

479 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 

480 

481 discretefit = L1BinaryResults(self, bnryfit) 

482 return L1BinaryResultsWrapper(discretefit) 

483 

484 def _derivative_predict(self, params, exog=None, transform='dydx'): 

485 """ 

486 For computing marginal effects standard errors. 

487 

488 This is used only in the case of discrete and count regressors to 

489 get the variance-covariance of the marginal effects. It returns 

490 [d F / d params] where F is the predict. 

491 

492 Transform can be 'dydx' or 'eydx'. Checking is done in margeff 

493 computations for appropriate transform. 

494 """ 

495 if exog is None: 

496 exog = self.exog 

497 dF = self.pdf(np.dot(exog, params))[:,None] * exog 

498 if 'ey' in transform: 

499 dF /= self.predict(params, exog)[:,None] 

500 return dF 

501 

502 def _derivative_exog(self, params, exog=None, transform='dydx', 

503 dummy_idx=None, count_idx=None): 

504 """ 

505 For computing marginal effects returns dF(XB) / dX where F(.) is 

506 the predicted probabilities 

507 

508 transform can be 'dydx', 'dyex', 'eydx', or 'eyex'. 

509 

510 Not all of these make sense in the presence of discrete regressors, 

511 but checks are done in the results in get_margeff. 

512 """ 

513 # Note: this form should be appropriate for 

514 # group 1 probit, logit, logistic, cloglog, heckprob, xtprobit 

515 if exog is None: 

516 exog = self.exog 

517 

518 margeff = np.dot(self.pdf(np.dot(exog, params))[:, None], 

519 params[None, :]) 

520 

521 if 'ex' in transform: 

522 margeff *= exog 

523 if 'ey' in transform: 

524 margeff /= self.predict(params, exog)[:, None] 

525 

526 return self._derivative_exog_helper(margeff, params, exog, 

527 dummy_idx, count_idx, transform) 

528 

529 

530class MultinomialModel(BinaryModel): 

531 

532 def _handle_data(self, endog, exog, missing, hasconst, **kwargs): 

533 if data_tools._is_using_ndarray_type(endog, None): 

534 endog_dummies, ynames = _numpy_to_dummies(endog) 

535 yname = 'y' 

536 elif data_tools._is_using_pandas(endog, None): 

537 endog_dummies, ynames, yname = _pandas_to_dummies(endog) 

538 else: 

539 endog = np.asarray(endog) 

540 endog_dummies, ynames = _numpy_to_dummies(endog) 

541 yname = 'y' 

542 

543 if not isinstance(ynames, dict): 

544 ynames = dict(zip(range(endog_dummies.shape[1]), ynames)) 

545 

546 self._ynames_map = ynames 

547 data = handle_data(endog_dummies, exog, missing, hasconst, **kwargs) 

548 data.ynames = yname # overwrite this to single endog name 

549 data.orig_endog = endog 

550 self.wendog = data.endog 

551 

552 # repeating from upstream... 

553 for key in kwargs: 

554 if key in ['design_info', 'formula']: # leave attached to data 

555 continue 

556 try: 

557 setattr(self, key, data.__dict__.pop(key)) 

558 except KeyError: 

559 pass 

560 return data 

561 

562 def initialize(self): 

563 """ 

564 Preprocesses the data for MNLogit. 

565 """ 

566 super(MultinomialModel, self).initialize() 

567 # This is also a "whiten" method in other models (eg regression) 

568 self.endog = self.endog.argmax(1) # turn it into an array of col idx 

569 self.J = self.wendog.shape[1] 

570 self.K = self.exog.shape[1] 

571 self.df_model *= (self.J-1) # for each J - 1 equation. 

572 self.df_resid = self.exog.shape[0] - self.df_model - (self.J-1) 

573 

574 def predict(self, params, exog=None, linear=False): 

575 """ 

576 Predict response variable of a model given exogenous variables. 

577 

578 Parameters 

579 ---------- 

580 params : array_like 

581 2d array of fitted parameters of the model. Should be in the 

582 order returned from the model. 

583 exog : array_like 

584 1d or 2d array of exogenous values. If not supplied, the 

585 whole exog attribute of the model is used. If a 1d array is given 

586 it assumed to be 1 row of exogenous variables. If you only have 

587 one regressor and would like to do prediction, you must provide 

588 a 2d array with shape[1] == 1. 

589 linear : bool, optional 

590 If True, returns the linear predictor dot(exog,params). Else, 

591 returns the value of the cdf at the linear predictor. 

592 

593 Notes 

594 ----- 

595 Column 0 is the base case, the rest conform to the rows of params 

596 shifted up one for the base case. 

597 """ 

598 if exog is None: # do here to accommodate user-given exog 

599 exog = self.exog 

600 if exog.ndim == 1: 

601 exog = exog[None] 

602 pred = super(MultinomialModel, self).predict(params, exog, linear) 

603 if linear: 

604 pred = np.column_stack((np.zeros(len(exog)), pred)) 

605 return pred 

606 

607 @Appender(DiscreteModel.fit.__doc__) 

608 def fit(self, start_params=None, method='newton', maxiter=35, 

609 full_output=1, disp=1, callback=None, **kwargs): 

610 if start_params is None: 

611 start_params = np.zeros((self.K * (self.J-1))) 

612 else: 

613 start_params = np.asarray(start_params) 

614 callback = lambda x : None # placeholder until check_perfect_pred 

615 # skip calling super to handle results from LikelihoodModel 

616 mnfit = base.LikelihoodModel.fit(self, start_params = start_params, 

617 method=method, maxiter=maxiter, full_output=full_output, 

618 disp=disp, callback=callback, **kwargs) 

619 mnfit.params = mnfit.params.reshape(self.K, -1, order='F') 

620 mnfit = MultinomialResults(self, mnfit) 

621 return MultinomialResultsWrapper(mnfit) 

622 

623 @Appender(DiscreteModel.fit_regularized.__doc__) 

624 def fit_regularized(self, start_params=None, method='l1', 

625 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 

626 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 

627 qc_tol=0.03, **kwargs): 

628 if start_params is None: 

629 start_params = np.zeros((self.K * (self.J-1))) 

630 else: 

631 start_params = np.asarray(start_params) 

632 mnfit = DiscreteModel.fit_regularized( 

633 self, start_params=start_params, method=method, maxiter=maxiter, 

634 full_output=full_output, disp=disp, callback=callback, 

635 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

636 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 

637 mnfit.params = mnfit.params.reshape(self.K, -1, order='F') 

638 mnfit = L1MultinomialResults(self, mnfit) 

639 return L1MultinomialResultsWrapper(mnfit) 

640 

641 def _derivative_predict(self, params, exog=None, transform='dydx'): 

642 """ 

643 For computing marginal effects standard errors. 

644 

645 This is used only in the case of discrete and count regressors to 

646 get the variance-covariance of the marginal effects. It returns 

647 [d F / d params] where F is the predicted probabilities for each 

648 choice. dFdparams is of shape nobs x (J*K) x (J-1)*K. 

649 The zero derivatives for the base category are not included. 

650 

651 Transform can be 'dydx' or 'eydx'. Checking is done in margeff 

652 computations for appropriate transform. 

653 """ 

654 if exog is None: 

655 exog = self.exog 

656 if params.ndim == 1: # will get flatted from approx_fprime 

657 params = params.reshape(self.K, self.J-1, order='F') 

658 

659 eXB = np.exp(np.dot(exog, params)) 

660 sum_eXB = (1 + eXB.sum(1))[:,None] 

661 J = int(self.J) 

662 K = int(self.K) 

663 repeat_eXB = np.repeat(eXB, J, axis=1) 

664 X = np.tile(exog, J-1) 

665 # this is the derivative wrt the base level 

666 F0 = -repeat_eXB * X / sum_eXB ** 2 

667 # this is the derivative wrt the other levels when 

668 # dF_j / dParams_j (ie., own equation) 

669 #NOTE: this computes too much, any easy way to cut down? 

670 F1 = eXB.T[:,:,None]*X * (sum_eXB - repeat_eXB) / (sum_eXB**2) 

671 F1 = F1.transpose((1,0,2)) # put the nobs index first 

672 

673 # other equation index 

674 other_idx = ~np.kron(np.eye(J-1), np.ones(K)).astype(bool) 

675 F1[:, other_idx] = (-eXB.T[:,:,None]*X*repeat_eXB / \ 

676 (sum_eXB**2)).transpose((1,0,2))[:, other_idx] 

677 dFdX = np.concatenate((F0[:, None,:], F1), axis=1) 

678 

679 if 'ey' in transform: 

680 dFdX /= self.predict(params, exog)[:, :, None] 

681 return dFdX 

682 

683 def _derivative_exog(self, params, exog=None, transform='dydx', 

684 dummy_idx=None, count_idx=None): 

685 """ 

686 For computing marginal effects returns dF(XB) / dX where F(.) is 

687 the predicted probabilities 

688 

689 transform can be 'dydx', 'dyex', 'eydx', or 'eyex'. 

690 

691 Not all of these make sense in the presence of discrete regressors, 

692 but checks are done in the results in get_margeff. 

693 

694 For Multinomial models the marginal effects are 

695 

696 P[j] * (params[j] - sum_k P[k]*params[k]) 

697 

698 It is returned unshaped, so that each row contains each of the J 

699 equations. This makes it easier to take derivatives of this for 

700 standard errors. If you want average marginal effects you can do 

701 margeff.reshape(nobs, K, J, order='F).mean(0) and the marginal effects 

702 for choice J are in column J 

703 """ 

704 J = int(self.J) # number of alternative choices 

705 K = int(self.K) # number of variables 

706 # Note: this form should be appropriate for 

707 # group 1 probit, logit, logistic, cloglog, heckprob, xtprobit 

708 if exog is None: 

709 exog = self.exog 

710 if params.ndim == 1: # will get flatted from approx_fprime 

711 params = params.reshape(K, J-1, order='F') 

712 

713 zeroparams = np.c_[np.zeros(K), params] # add base in 

714 

715 cdf = self.cdf(np.dot(exog, params)) 

716 

717 # TODO: meaningful interpretation for `iterm`? 

718 iterm = np.array([cdf[:, [i]] * zeroparams[:, i] 

719 for i in range(int(J))]).sum(0) 

720 

721 margeff = np.array([cdf[:, [j]] * (zeroparams[:, j] - iterm) 

722 for j in range(J)]) 

723 

724 # swap the axes to make sure margeff are in order nobs, K, J 

725 margeff = np.transpose(margeff, (1, 2, 0)) 

726 

727 if 'ex' in transform: 

728 margeff *= exog 

729 if 'ey' in transform: 

730 margeff /= self.predict(params, exog)[:,None,:] 

731 

732 margeff = self._derivative_exog_helper(margeff, params, exog, 

733 dummy_idx, count_idx, transform) 

734 return margeff.reshape(len(exog), -1, order='F') 

735 

736 

737class CountModel(DiscreteModel): 

738 def __init__(self, endog, exog, offset=None, exposure=None, missing='none', 

739 **kwargs): 

740 super(CountModel, self).__init__(endog, exog, missing=missing, 

741 offset=offset, 

742 exposure=exposure, **kwargs) 

743 if exposure is not None: 

744 self.exposure = np.log(self.exposure) 

745 self._check_inputs(self.offset, self.exposure, self.endog) 

746 if offset is None: 

747 delattr(self, 'offset') 

748 if exposure is None: 

749 delattr(self, 'exposure') 

750 

751 # promote dtype to float64 if needed 

752 dt = np.promote_types(self.endog.dtype, np.float64) 

753 self.endog = np.asarray(self.endog, dt) 

754 dt = np.promote_types(self.exog.dtype, np.float64) 

755 self.exog = np.asarray(self.exog, dt) 

756 

757 

758 def _check_inputs(self, offset, exposure, endog): 

759 if offset is not None and offset.shape[0] != endog.shape[0]: 

760 raise ValueError("offset is not the same length as endog") 

761 

762 if exposure is not None and exposure.shape[0] != endog.shape[0]: 

763 raise ValueError("exposure is not the same length as endog") 

764 

765 def _get_init_kwds(self): 

766 # this is a temporary fixup because exposure has been transformed 

767 # see #1609 

768 kwds = super(CountModel, self)._get_init_kwds() 

769 if 'exposure' in kwds and kwds['exposure'] is not None: 

770 kwds['exposure'] = np.exp(kwds['exposure']) 

771 return kwds 

772 

773 def predict(self, params, exog=None, exposure=None, offset=None, 

774 linear=False): 

775 """ 

776 Predict response variable of a count model given exogenous variables 

777 

778 Parameters 

779 ---------- 

780 params : array_like 

781 Model parameters 

782 exog : array_like, optional 

783 Design / exogenous data. Is exog is None, model exog is used. 

784 exposure : array_like, optional 

785 Log(exposure) is added to the linear prediction with 

786 coefficient equal to 1. If exposure is not provided and exog 

787 is None, uses the model's exposure if present. If not, uses 

788 0 as the default value. 

789 offset : array_like, optional 

790 Offset is added to the linear prediction with coefficient 

791 equal to 1. If offset is not provided and exog 

792 is None, uses the model's offset if present. If not, uses 

793 0 as the default value. 

794 linear : bool 

795 If True, returns the linear predicted values. If False, 

796 returns the exponential of the linear predicted value. 

797 

798 Notes 

799 ----- 

800 If exposure is specified, then it will be logged by the method. 

801 The user does not need to log it first. 

802 """ 

803 # the following is copied from GLM predict (without family/link check) 

804 # Use fit offset if appropriate 

805 if offset is None and exog is None and hasattr(self, 'offset'): 

806 offset = self.offset 

807 elif offset is None: 

808 offset = 0. 

809 

810 # Use fit exposure if appropriate 

811 if exposure is None and exog is None and hasattr(self, 'exposure'): 

812 # Already logged 

813 exposure = self.exposure 

814 elif exposure is None: 

815 exposure = 0. 

816 else: 

817 exposure = np.log(exposure) 

818 

819 if exog is None: 

820 exog = self.exog 

821 

822 fitted = np.dot(exog, params[:exog.shape[1]]) 

823 linpred = fitted + exposure + offset 

824 if not linear: 

825 return np.exp(linpred) # not cdf 

826 else: 

827 return linpred 

828 

829 def _derivative_predict(self, params, exog=None, transform='dydx'): 

830 """ 

831 For computing marginal effects standard errors. 

832 

833 This is used only in the case of discrete and count regressors to 

834 get the variance-covariance of the marginal effects. It returns 

835 [d F / d params] where F is the predict. 

836 

837 Transform can be 'dydx' or 'eydx'. Checking is done in margeff 

838 computations for appropriate transform. 

839 """ 

840 if exog is None: 

841 exog = self.exog 

842 #NOTE: this handles offset and exposure 

843 dF = self.predict(params, exog)[:,None] * exog 

844 if 'ey' in transform: 

845 dF /= self.predict(params, exog)[:,None] 

846 return dF 

847 

848 def _derivative_exog(self, params, exog=None, transform="dydx", 

849 dummy_idx=None, count_idx=None): 

850 """ 

851 For computing marginal effects. These are the marginal effects 

852 d F(XB) / dX 

853 For the Poisson model F(XB) is the predicted counts rather than 

854 the probabilities. 

855 

856 transform can be 'dydx', 'dyex', 'eydx', or 'eyex'. 

857 

858 Not all of these make sense in the presence of discrete regressors, 

859 but checks are done in the results in get_margeff. 

860 """ 

861 # group 3 poisson, nbreg, zip, zinb 

862 if exog is None: 

863 exog = self.exog 

864 k_extra = getattr(self, 'k_extra', 0) 

865 params_exog = params if k_extra == 0 else params[:-k_extra] 

866 margeff = self.predict(params, exog)[:,None] * params_exog[None,:] 

867 if 'ex' in transform: 

868 margeff *= exog 

869 if 'ey' in transform: 

870 margeff /= self.predict(params, exog)[:,None] 

871 

872 return self._derivative_exog_helper(margeff, params, exog, 

873 dummy_idx, count_idx, transform) 

874 

875 @Appender(DiscreteModel.fit.__doc__) 

876 def fit(self, start_params=None, method='newton', maxiter=35, 

877 full_output=1, disp=1, callback=None, **kwargs): 

878 cntfit = super(CountModel, self).fit(start_params=start_params, 

879 method=method, maxiter=maxiter, full_output=full_output, 

880 disp=disp, callback=callback, **kwargs) 

881 discretefit = CountResults(self, cntfit) 

882 return CountResultsWrapper(discretefit) 

883 

884 @Appender(DiscreteModel.fit_regularized.__doc__) 

885 def fit_regularized(self, start_params=None, method='l1', 

886 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 

887 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 

888 qc_tol=0.03, **kwargs): 

889 

890 _validate_l1_method(method) 

891 

892 cntfit = super(CountModel, self).fit_regularized( 

893 start_params=start_params, method=method, maxiter=maxiter, 

894 full_output=full_output, disp=disp, callback=callback, 

895 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

896 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 

897 

898 discretefit = L1CountResults(self, cntfit) 

899 return L1CountResultsWrapper(discretefit) 

900 

901 

902class OrderedModel(DiscreteModel): 

903 pass 

904 

905# Public Model Classes 

906 

907 

908class Poisson(CountModel): 

909 __doc__ = """ 

910 Poisson Model 

911 

912 %(params)s 

913 %(extra_params)s 

914 

915 Attributes 

916 ---------- 

917 endog : ndarray 

918 A reference to the endogenous response variable 

919 exog : ndarray 

920 A reference to the exogenous design. 

921 """ % {'params' : base._model_params_doc, 

922 'extra_params' : 

923 """offset : array_like 

924 Offset is added to the linear prediction with coefficient equal to 1. 

925 exposure : array_like 

926 Log(exposure) is added to the linear prediction with coefficient 

927 equal to 1. 

928 """ + base._missing_param_doc} 

929 

930 @property 

931 def family(self): 

932 from statsmodels.genmod import families 

933 return families.Poisson() 

934 

935 def cdf(self, X): 

936 """ 

937 Poisson model cumulative distribution function 

938 

939 Parameters 

940 ---------- 

941 X : array_like 

942 `X` is the linear predictor of the model. See notes. 

943 

944 Returns 

945 ------- 

946 The value of the Poisson CDF at each point. 

947 

948 Notes 

949 ----- 

950 The CDF is defined as 

951 

952 .. math:: \\exp\\left(-\\lambda\\right)\\sum_{i=0}^{y}\\frac{\\lambda^{i}}{i!} 

953 

954 where :math:`\\lambda` assumes the loglinear model. I.e., 

955 

956 .. math:: \\ln\\lambda_{i}=X\\beta 

957 

958 The parameter `X` is :math:`X\\beta` in the above formula. 

959 """ 

960 y = self.endog 

961 return stats.poisson.cdf(y, np.exp(X)) 

962 

963 def pdf(self, X): 

964 """ 

965 Poisson model probability mass function 

966 

967 Parameters 

968 ---------- 

969 X : array_like 

970 `X` is the linear predictor of the model. See notes. 

971 

972 Returns 

973 ------- 

974 pdf : ndarray 

975 The value of the Poisson probability mass function, PMF, for each 

976 point of X. 

977 

978 Notes 

979 -------- 

980 The PMF is defined as 

981 

982 .. math:: \\frac{e^{-\\lambda_{i}}\\lambda_{i}^{y_{i}}}{y_{i}!} 

983 

984 where :math:`\\lambda` assumes the loglinear model. I.e., 

985 

986 .. math:: \\ln\\lambda_{i}=x_{i}\\beta 

987 

988 The parameter `X` is :math:`x_{i}\\beta` in the above formula. 

989 """ 

990 y = self.endog 

991 return np.exp(stats.poisson.logpmf(y, np.exp(X))) 

992 

993 def loglike(self, params): 

994 """ 

995 Loglikelihood of Poisson model 

996 

997 Parameters 

998 ---------- 

999 params : array_like 

1000 The parameters of the model. 

1001 

1002 Returns 

1003 ------- 

1004 loglike : float 

1005 The log-likelihood function of the model evaluated at `params`. 

1006 See notes. 

1007 

1008 Notes 

1009 -------- 

1010 .. math:: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right] 

1011 """ 

1012 offset = getattr(self, "offset", 0) 

1013 exposure = getattr(self, "exposure", 0) 

1014 XB = np.dot(self.exog, params) + offset + exposure 

1015 endog = self.endog 

1016 return np.sum(-np.exp(XB) + endog*XB - gammaln(endog+1)) 

1017 

1018 def loglikeobs(self, params): 

1019 """ 

1020 Loglikelihood for observations of Poisson model 

1021 

1022 Parameters 

1023 ---------- 

1024 params : array_like 

1025 The parameters of the model. 

1026 

1027 Returns 

1028 ------- 

1029 loglike : array_like 

1030 The log likelihood for each observation of the model evaluated 

1031 at `params`. See Notes 

1032 

1033 Notes 

1034 -------- 

1035 .. math:: \\ln L_{i}=\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right] 

1036 

1037 for observations :math:`i=1,...,n` 

1038 """ 

1039 offset = getattr(self, "offset", 0) 

1040 exposure = getattr(self, "exposure", 0) 

1041 XB = np.dot(self.exog, params) + offset + exposure 

1042 endog = self.endog 

1043 #np.sum(stats.poisson.logpmf(endog, np.exp(XB))) 

1044 return -np.exp(XB) + endog*XB - gammaln(endog+1) 

1045 

1046 @Appender(_get_start_params_null_docs) 

1047 def _get_start_params_null(self): 

1048 offset = getattr(self, "offset", 0) 

1049 exposure = getattr(self, "exposure", 0) 

1050 const = (self.endog / np.exp(offset + exposure)).mean() 

1051 params = [np.log(const)] 

1052 return params 

1053 

1054 @Appender(DiscreteModel.fit.__doc__) 

1055 def fit(self, start_params=None, method='newton', maxiter=35, 

1056 full_output=1, disp=1, callback=None, **kwargs): 

1057 

1058 if start_params is None and self.data.const_idx is not None: 

1059 # k_params or k_exog not available? 

1060 start_params = 0.001 * np.ones(self.exog.shape[1]) 

1061 start_params[self.data.const_idx] = self._get_start_params_null()[0] 

1062 

1063 cntfit = super(CountModel, self).fit(start_params=start_params, 

1064 method=method, maxiter=maxiter, full_output=full_output, 

1065 disp=disp, callback=callback, **kwargs) 

1066 

1067 if 'cov_type' in kwargs: 

1068 cov_kwds = kwargs.get('cov_kwds', {}) 

1069 kwds = {'cov_type':kwargs['cov_type'], 'cov_kwds':cov_kwds} 

1070 else: 

1071 kwds = {} 

1072 discretefit = PoissonResults(self, cntfit, **kwds) 

1073 return PoissonResultsWrapper(discretefit) 

1074 

1075 @Appender(DiscreteModel.fit_regularized.__doc__) 

1076 def fit_regularized(self, start_params=None, method='l1', 

1077 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 

1078 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 

1079 qc_tol=0.03, **kwargs): 

1080 

1081 _validate_l1_method(method) 

1082 

1083 cntfit = super(CountModel, self).fit_regularized( 

1084 start_params=start_params, method=method, maxiter=maxiter, 

1085 full_output=full_output, disp=disp, callback=callback, 

1086 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

1087 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 

1088 

1089 discretefit = L1PoissonResults(self, cntfit) 

1090 return L1PoissonResultsWrapper(discretefit) 

1091 

1092 def fit_constrained(self, constraints, start_params=None, **fit_kwds): 

1093 """fit the model subject to linear equality constraints 

1094 

1095 The constraints are of the form `R params = q` 

1096 where R is the constraint_matrix and q is the vector of 

1097 constraint_values. 

1098 

1099 The estimation creates a new model with transformed design matrix, 

1100 exog, and converts the results back to the original parameterization. 

1101 

1102 Parameters 

1103 ---------- 

1104 constraints : formula expression or tuple 

1105 If it is a tuple, then the constraint needs to be given by two 

1106 arrays (constraint_matrix, constraint_value), i.e. (R, q). 

1107 Otherwise, the constraints can be given as strings or list of 

1108 strings. 

1109 see t_test for details 

1110 start_params : None or array_like 

1111 starting values for the optimization. `start_params` needs to be 

1112 given in the original parameter space and are internally 

1113 transformed. 

1114 **fit_kwds : keyword arguments 

1115 fit_kwds are used in the optimization of the transformed model. 

1116 

1117 Returns 

1118 ------- 

1119 results : Results instance 

1120 """ 

1121 

1122 #constraints = (R, q) 

1123 # TODO: temporary trailing underscore to not overwrite the monkey 

1124 # patched version 

1125 # TODO: decide whether to move the imports 

1126 from patsy import DesignInfo 

1127 from statsmodels.base._constraints import fit_constrained 

1128 

1129 # same pattern as in base.LikelihoodModel.t_test 

1130 lc = DesignInfo(self.exog_names).linear_constraint(constraints) 

1131 R, q = lc.coefs, lc.constants 

1132 

1133 # TODO: add start_params option, need access to tranformation 

1134 # fit_constrained needs to do the transformation 

1135 params, cov, res_constr = fit_constrained(self, R, q, 

1136 start_params=start_params, 

1137 fit_kwds=fit_kwds) 

1138 #create dummy results Instance, TODO: wire up properly 

1139 res = self.fit(maxiter=0, method='nm', disp=0, 

1140 warn_convergence=False) # we get a wrapper back 

1141 res.mle_retvals['fcall'] = res_constr.mle_retvals.get('fcall', np.nan) 

1142 res.mle_retvals['iterations'] = res_constr.mle_retvals.get( 

1143 'iterations', np.nan) 

1144 res.mle_retvals['converged'] = res_constr.mle_retvals['converged'] 

1145 res._results.params = params 

1146 res._results.cov_params_default = cov 

1147 cov_type = fit_kwds.get('cov_type', 'nonrobust') 

1148 if cov_type != 'nonrobust': 

1149 res._results.normalized_cov_params = cov # assume scale=1 

1150 else: 

1151 res._results.normalized_cov_params = None 

1152 k_constr = len(q) 

1153 res._results.df_resid += k_constr 

1154 res._results.df_model -= k_constr 

1155 res._results.constraints = lc 

1156 res._results.k_constr = k_constr 

1157 res._results.results_constrained = res_constr 

1158 return res 

1159 

1160 

1161 def score(self, params): 

1162 """ 

1163 Poisson model score (gradient) vector of the log-likelihood 

1164 

1165 Parameters 

1166 ---------- 

1167 params : array_like 

1168 The parameters of the model 

1169 

1170 Returns 

1171 ------- 

1172 score : ndarray, 1-D 

1173 The score vector of the model, i.e. the first derivative of the 

1174 loglikelihood function, evaluated at `params` 

1175 

1176 Notes 

1177 ----- 

1178 .. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left(y_{i}-\\lambda_{i}\\right)x_{i} 

1179 

1180 where the loglinear model is assumed 

1181 

1182 .. math:: \\ln\\lambda_{i}=x_{i}\\beta 

1183 """ 

1184 offset = getattr(self, "offset", 0) 

1185 exposure = getattr(self, "exposure", 0) 

1186 X = self.exog 

1187 L = np.exp(np.dot(X,params) + offset + exposure) 

1188 return np.dot(self.endog - L, X) 

1189 

1190 def score_obs(self, params): 

1191 """ 

1192 Poisson model Jacobian of the log-likelihood for each observation 

1193 

1194 Parameters 

1195 ---------- 

1196 params : array_like 

1197 The parameters of the model 

1198 

1199 Returns 

1200 ------- 

1201 score : array_like 

1202 The score vector (nobs, k_vars) of the model evaluated at `params` 

1203 

1204 Notes 

1205 ----- 

1206 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\lambda_{i}\\right)x_{i} 

1207 

1208 for observations :math:`i=1,...,n` 

1209 

1210 where the loglinear model is assumed 

1211 

1212 .. math:: \\ln\\lambda_{i}=x_{i}\\beta 

1213 """ 

1214 offset = getattr(self, "offset", 0) 

1215 exposure = getattr(self, "exposure", 0) 

1216 X = self.exog 

1217 L = np.exp(np.dot(X,params) + offset + exposure) 

1218 return (self.endog - L)[:,None] * X 

1219 

1220 def score_factor(self, params): 

1221 """ 

1222 Poisson model score_factor for each observation 

1223 

1224 Parameters 

1225 ---------- 

1226 params : array_like 

1227 The parameters of the model 

1228 

1229 Returns 

1230 ------- 

1231 score : array_like 

1232 The score factor (nobs, ) of the model evaluated at `params` 

1233 

1234 Notes 

1235 ----- 

1236 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\lambda_{i}\\right) 

1237 

1238 for observations :math:`i=1,...,n` 

1239 

1240 where the loglinear model is assumed 

1241 

1242 .. math:: \\ln\\lambda_{i}=x_{i}\\beta 

1243 """ 

1244 offset = getattr(self, "offset", 0) 

1245 exposure = getattr(self, "exposure", 0) 

1246 X = self.exog 

1247 L = np.exp(np.dot(X,params) + offset + exposure) 

1248 return (self.endog - L) 

1249 

1250 

1251 def hessian(self, params): 

1252 """ 

1253 Poisson model Hessian matrix of the loglikelihood 

1254 

1255 Parameters 

1256 ---------- 

1257 params : array_like 

1258 The parameters of the model 

1259 

1260 Returns 

1261 ------- 

1262 hess : ndarray, (k_vars, k_vars) 

1263 The Hessian, second derivative of loglikelihood function, 

1264 evaluated at `params` 

1265 

1266 Notes 

1267 ----- 

1268 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i=1}^{n}\\lambda_{i}x_{i}x_{i}^{\\prime} 

1269 

1270 where the loglinear model is assumed 

1271 

1272 .. math:: \\ln\\lambda_{i}=x_{i}\\beta 

1273 """ 

1274 offset = getattr(self, "offset", 0) 

1275 exposure = getattr(self, "exposure", 0) 

1276 X = self.exog 

1277 L = np.exp(np.dot(X,params) + exposure + offset) 

1278 return -np.dot(L*X.T, X) 

1279 

1280 def hessian_factor(self, params): 

1281 """ 

1282 Poisson model Hessian factor 

1283 

1284 Parameters 

1285 ---------- 

1286 params : array_like 

1287 The parameters of the model 

1288 

1289 Returns 

1290 ------- 

1291 hess : ndarray, (nobs,) 

1292 The Hessian factor, second derivative of loglikelihood function 

1293 with respect to the linear predictor evaluated at `params` 

1294 

1295 Notes 

1296 ----- 

1297 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i=1}^{n}\\lambda_{i} 

1298 

1299 where the loglinear model is assumed 

1300 

1301 .. math:: \\ln\\lambda_{i}=x_{i}\\beta 

1302 """ 

1303 offset = getattr(self, "offset", 0) 

1304 exposure = getattr(self, "exposure", 0) 

1305 X = self.exog 

1306 L = np.exp(np.dot(X,params) + exposure + offset) 

1307 return L 

1308 

1309 

1310class GeneralizedPoisson(CountModel): 

1311 __doc__ = """ 

1312 Generalized Poisson Model 

1313 

1314 %(params)s 

1315 %(extra_params)s 

1316 

1317 Attributes 

1318 ---------- 

1319 endog : ndarray 

1320 A reference to the endogenous response variable 

1321 exog : ndarray 

1322 A reference to the exogenous design. 

1323 """ % {'params' : base._model_params_doc, 

1324 'extra_params' : 

1325 """ 

1326 p : scalar 

1327 P denotes parameterizations for GP regression. p=1 for GP-1 and 

1328 p=2 for GP-2. Default is p=1. 

1329 offset : array_like 

1330 Offset is added to the linear prediction with coefficient equal to 1. 

1331 exposure : array_like 

1332 Log(exposure) is added to the linear prediction with coefficient 

1333 equal to 1. 

1334 """ + base._missing_param_doc} 

1335 

1336 def __init__(self, endog, exog, p = 1, offset=None, 

1337 exposure=None, missing='none', **kwargs): 

1338 super(GeneralizedPoisson, self).__init__(endog, exog, offset=offset, 

1339 exposure=exposure, 

1340 missing=missing, **kwargs) 

1341 self.parameterization = p - 1 

1342 self.exog_names.append('alpha') 

1343 self.k_extra = 1 

1344 self._transparams = False 

1345 

1346 def _get_init_kwds(self): 

1347 kwds = super(GeneralizedPoisson, self)._get_init_kwds() 

1348 kwds['p'] = self.parameterization + 1 

1349 return kwds 

1350 

1351 def loglike(self, params): 

1352 """ 

1353 Loglikelihood of Generalized Poisson model 

1354 

1355 Parameters 

1356 ---------- 

1357 params : array_like 

1358 The parameters of the model. 

1359 

1360 Returns 

1361 ------- 

1362 loglike : float 

1363 The log-likelihood function of the model evaluated at `params`. 

1364 See notes. 

1365 

1366 Notes 

1367 -------- 

1368 .. math:: \\ln L=\\sum_{i=1}^{n}\\left[\\mu_{i}+(y_{i}-1)*ln(\\mu_{i}+ 

1369 \\alpha*\\mu_{i}^{p-1}*y_{i})-y_{i}*ln(1+\\alpha*\\mu_{i}^{p-1})- 

1370 ln(y_{i}!)-\\frac{\\mu_{i}+\\alpha*\\mu_{i}^{p-1}*y_{i}}{1+\\alpha* 

1371 \\mu_{i}^{p-1}}\\right] 

1372 """ 

1373 return np.sum(self.loglikeobs(params)) 

1374 

1375 def loglikeobs(self, params): 

1376 """ 

1377 Loglikelihood for observations of Generalized Poisson model 

1378 

1379 Parameters 

1380 ---------- 

1381 params : array_like 

1382 The parameters of the model. 

1383 

1384 Returns 

1385 ------- 

1386 loglike : ndarray 

1387 The log likelihood for each observation of the model evaluated 

1388 at `params`. See Notes 

1389 

1390 Notes 

1391 -------- 

1392 .. math:: \\ln L=\\sum_{i=1}^{n}\\left[\\mu_{i}+(y_{i}-1)*ln(\\mu_{i}+ 

1393 \\alpha*\\mu_{i}^{p-1}*y_{i})-y_{i}*ln(1+\\alpha*\\mu_{i}^{p-1})- 

1394 ln(y_{i}!)-\\frac{\\mu_{i}+\\alpha*\\mu_{i}^{p-1}*y_{i}}{1+\\alpha* 

1395 \\mu_{i}^{p-1}}\\right] 

1396 

1397 for observations :math:`i=1,...,n` 

1398 """ 

1399 if self._transparams: 

1400 alpha = np.exp(params[-1]) 

1401 else: 

1402 alpha = params[-1] 

1403 params = params[:-1] 

1404 p = self.parameterization 

1405 endog = self.endog 

1406 mu = self.predict(params) 

1407 mu_p = np.power(mu, p) 

1408 a1 = 1 + alpha * mu_p 

1409 a2 = mu + (a1 - 1) * endog 

1410 return (np.log(mu) + (endog - 1) * np.log(a2) - endog * 

1411 np.log(a1) - gammaln(endog + 1) - a2 / a1) 

1412 

1413 @Appender(_get_start_params_null_docs) 

1414 def _get_start_params_null(self): 

1415 offset = getattr(self, "offset", 0) 

1416 exposure = getattr(self, "exposure", 0) 

1417 

1418 const = (self.endog / np.exp(offset + exposure)).mean() 

1419 params = [np.log(const)] 

1420 mu = const * np.exp(offset + exposure) 

1421 resid = self.endog - mu 

1422 a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1) 

1423 params.append(a) 

1424 

1425 return np.array(params) 

1426 

1427 def _estimate_dispersion(self, mu, resid, df_resid=None): 

1428 q = self.parameterization 

1429 if df_resid is None: 

1430 df_resid = resid.shape[0] 

1431 a = ((np.abs(resid) / np.sqrt(mu) - 1) * mu**(-q)).sum() / df_resid 

1432 return a 

1433 

1434 

1435 @Appender( 

1436 """ 

1437 use_transparams : bool 

1438 This parameter enable internal transformation to impose 

1439 non-negativity. True to enable. Default is False. 

1440 use_transparams=True imposes the no underdispersion (alpha > 0) 

1441 constraint. In case use_transparams=True and method="newton" or 

1442 "ncg" transformation is ignored. 

1443 """) 

1444 @Appender(DiscreteModel.fit.__doc__) 

1445 def fit(self, start_params=None, method='bfgs', maxiter=35, 

1446 full_output=1, disp=1, callback=None, use_transparams=False, 

1447 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs): 

1448 if use_transparams and method not in ['newton', 'ncg']: 

1449 self._transparams = True 

1450 else: 

1451 if use_transparams: 

1452 warnings.warn('Parameter "use_transparams" is ignored', 

1453 RuntimeWarning) 

1454 self._transparams = False 

1455 

1456 if start_params is None: 

1457 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 

1458 if np.size(offset) == 1 and offset == 0: 

1459 offset = None 

1460 optim_kwds_prelim = {'disp': 0, 'skip_hessian': True, 

1461 'warn_convergence': False} 

1462 optim_kwds_prelim.update(kwargs.get('optim_kwds_prelim', {})) 

1463 mod_poi = Poisson(self.endog, self.exog, offset=offset) 

1464 res_poi = mod_poi.fit(**optim_kwds_prelim) 

1465 start_params = res_poi.params 

1466 a = self._estimate_dispersion(res_poi.predict(), res_poi.resid, 

1467 df_resid=res_poi.df_resid) 

1468 start_params = np.append(start_params, max(-0.1, a)) 

1469 

1470 if callback is None: 

1471 # work around perfect separation callback #3895 

1472 callback = lambda *x: x 

1473 

1474 mlefit = super(GeneralizedPoisson, self).fit(start_params=start_params, 

1475 maxiter=maxiter, method=method, disp=disp, 

1476 full_output=full_output, callback=callback, 

1477 **kwargs) 

1478 

1479 if use_transparams and method not in ["newton", "ncg"]: 

1480 self._transparams = False 

1481 mlefit._results.params[-1] = np.exp(mlefit._results.params[-1]) 

1482 

1483 gpfit = GeneralizedPoissonResults(self, mlefit._results) 

1484 result = GeneralizedPoissonResultsWrapper(gpfit) 

1485 

1486 if cov_kwds is None: 

1487 cov_kwds = {} 

1488 

1489 result._get_robustcov_results(cov_type=cov_type, 

1490 use_self=True, use_t=use_t, **cov_kwds) 

1491 return result 

1492 

1493 @Appender(DiscreteModel.fit_regularized.__doc__) 

1494 def fit_regularized(self, start_params=None, method='l1', 

1495 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 

1496 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 

1497 qc_tol=0.03, **kwargs): 

1498 

1499 _validate_l1_method(method) 

1500 

1501 if np.size(alpha) == 1 and alpha != 0: 

1502 k_params = self.exog.shape[1] + self.k_extra 

1503 alpha = alpha * np.ones(k_params) 

1504 alpha[-1] = 0 

1505 

1506 alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha 

1507 self._transparams = False 

1508 if start_params is None: 

1509 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 

1510 if np.size(offset) == 1 and offset == 0: 

1511 offset = None 

1512 mod_poi = Poisson(self.endog, self.exog, offset=offset) 

1513 start_params = mod_poi.fit_regularized( 

1514 start_params=start_params, method=method, maxiter=maxiter, 

1515 full_output=full_output, disp=0, callback=callback, 

1516 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

1517 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params 

1518 start_params = np.append(start_params, 0.1) 

1519 

1520 cntfit = super(CountModel, self).fit_regularized( 

1521 start_params=start_params, method=method, maxiter=maxiter, 

1522 full_output=full_output, disp=disp, callback=callback, 

1523 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

1524 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 

1525 

1526 discretefit = L1GeneralizedPoissonResults(self, cntfit) 

1527 return L1GeneralizedPoissonResultsWrapper(discretefit) 

1528 

1529 def score_obs(self, params): 

1530 if self._transparams: 

1531 alpha = np.exp(params[-1]) 

1532 else: 

1533 alpha = params[-1] 

1534 

1535 params = params[:-1] 

1536 p = self.parameterization 

1537 exog = self.exog 

1538 y = self.endog[:,None] 

1539 mu = self.predict(params)[:,None] 

1540 mu_p = np.power(mu, p) 

1541 a1 = 1 + alpha * mu_p 

1542 a2 = mu + alpha * mu_p * y 

1543 a3 = alpha * p * mu ** (p - 1) 

1544 a4 = a3 * y 

1545 dmudb = mu * exog 

1546 

1547 dalpha = (mu_p * (y * ((y - 1) / a2 - 2 / a1) + a2 / a1**2)) 

1548 dparams = dmudb * (-a4 / a1 + 

1549 a3 * a2 / (a1 ** 2) + 

1550 (1 + a4) * ((y - 1) / a2 - 1 / a1) + 

1551 1 / mu) 

1552 

1553 return np.concatenate((dparams, np.atleast_2d(dalpha)), 

1554 axis=1) 

1555 

1556 def score(self, params): 

1557 score = np.sum(self.score_obs(params), axis=0) 

1558 if self._transparams: 

1559 score[-1] == score[-1] ** 2 

1560 return score 

1561 else: 

1562 return score 

1563 

1564 def _score_p(self, params): 

1565 """ 

1566 Generalized Poisson model derivative of the log-likelihood by p-parameter 

1567 

1568 Parameters 

1569 ---------- 

1570 params : array_like 

1571 The parameters of the model 

1572 

1573 Returns 

1574 ------- 

1575 dldp : float 

1576 dldp is first derivative of the loglikelihood function, 

1577 evaluated at `p-parameter`. 

1578 """ 

1579 if self._transparams: 

1580 alpha = np.exp(params[-1]) 

1581 else: 

1582 alpha = params[-1] 

1583 params = params[:-1] 

1584 p = self.parameterization 

1585 exog = self.exog 

1586 y = self.endog[:,None] 

1587 mu = self.predict(params)[:,None] 

1588 mu_p = np.power(mu, p) 

1589 a1 = 1 + alpha * mu_p 

1590 a2 = mu + alpha * mu_p * y 

1591 

1592 dp = np.sum((np.log(mu) * ((a2 - mu) * ((y - 1) / a2 - 2 / a1) + 

1593 (a1 - 1) * a2 / a1 ** 2))) 

1594 return dp 

1595 

1596 def hessian(self, params): 

1597 """ 

1598 Generalized Poisson model Hessian matrix of the loglikelihood 

1599 

1600 Parameters 

1601 ---------- 

1602 params : array_like 

1603 The parameters of the model 

1604 

1605 Returns 

1606 ------- 

1607 hess : ndarray, (k_vars, k_vars) 

1608 The Hessian, second derivative of loglikelihood function, 

1609 evaluated at `params` 

1610 """ 

1611 if self._transparams: 

1612 alpha = np.exp(params[-1]) 

1613 else: 

1614 alpha = params[-1] 

1615 

1616 params = params[:-1] 

1617 p = self.parameterization 

1618 exog = self.exog 

1619 y = self.endog[:,None] 

1620 mu = self.predict(params)[:,None] 

1621 mu_p = np.power(mu, p) 

1622 a1 = 1 + alpha * mu_p 

1623 a2 = mu + alpha * mu_p * y 

1624 a3 = alpha * p * mu ** (p - 1) 

1625 a4 = a3 * y 

1626 a5 = p * mu ** (p - 1) 

1627 dmudb = mu * exog 

1628 

1629 # for dl/dparams dparams 

1630 dim = exog.shape[1] 

1631 hess_arr = np.empty((dim+1,dim+1)) 

1632 

1633 for i in range(dim): 

1634 for j in range(i + 1): 

1635 hess_arr[i,j] = np.sum(mu * exog[:,i,None] * exog[:,j,None] * 

1636 (mu * (a3 * a4 / a1**2 - 

1637 2 * a3**2 * a2 / a1**3 + 

1638 2 * a3 * (a4 + 1) / a1**2 - 

1639 a4 * p / (mu * a1) + 

1640 a3 * p * a2 / (mu * a1**2) + 

1641 (y - 1) * a4 * (p - 1) / (a2 * mu) - 

1642 (y - 1) * (1 + a4)**2 / a2**2 - 

1643 a4 * (p - 1) / (a1 * mu)) + 

1644 ((y - 1) * (1 + a4) / a2 - 

1645 (1 + a4) / a1)), axis=0) 

1646 tri_idx = np.triu_indices(dim, k=1) 

1647 hess_arr[tri_idx] = hess_arr.T[tri_idx] 

1648 

1649 # for dl/dparams dalpha 

1650 dldpda = np.sum((2 * a4 * mu_p / a1**2 - 

1651 2 * a3 * mu_p * a2 / a1**3 - 

1652 mu_p * y * (y - 1) * (1 + a4) / a2**2 + 

1653 mu_p * (1 + a4) / a1**2 + 

1654 a5 * y * (y - 1) / a2 - 

1655 2 * a5 * y / a1 + 

1656 a5 * a2 / a1**2) * dmudb, 

1657 axis=0) 

1658 

1659 hess_arr[-1,:-1] = dldpda 

1660 hess_arr[:-1,-1] = dldpda 

1661 

1662 # for dl/dalpha dalpha 

1663 dldada = mu_p**2 * (3 * y / a1**2 - 

1664 (y / a2)**2. * (y - 1) - 

1665 2 * a2 / a1**3) 

1666 

1667 hess_arr[-1,-1] = dldada.sum() 

1668 

1669 return hess_arr 

1670 

1671 def predict(self, params, exog=None, exposure=None, offset=None, 

1672 which='mean'): 

1673 """ 

1674 Predict response variable of a count model given exogenous variables. 

1675 

1676 Notes 

1677 ----- 

1678 If exposure is specified, then it will be logged by the method. 

1679 The user does not need to log it first. 

1680 """ 

1681 if exog is None: 

1682 exog = self.exog 

1683 

1684 if exposure is None: 

1685 exposure = getattr(self, 'exposure', 0) 

1686 elif exposure != 0: 

1687 exposure = np.log(exposure) 

1688 

1689 if offset is None: 

1690 offset = getattr(self, 'offset', 0) 

1691 

1692 fitted = np.dot(exog, params[:exog.shape[1]]) 

1693 linpred = fitted + exposure + offset 

1694 

1695 if which == 'mean': 

1696 return np.exp(linpred) 

1697 elif which == 'linear': 

1698 return linpred 

1699 elif which =='prob': 

1700 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1)) 

1701 mu = self.predict(params, exog=exog, exposure=exposure, 

1702 offset=offset)[:,None] 

1703 return genpoisson_p.pmf(counts, mu, params[-1], 

1704 self.parameterization + 1) 

1705 else: 

1706 raise ValueError('keyword \'which\' not recognized') 

1707 

1708 

1709class Logit(BinaryModel): 

1710 __doc__ = """ 

1711 Logit Model 

1712 

1713 %(params)s 

1714 %(extra_params)s 

1715 

1716 Attributes 

1717 ---------- 

1718 endog : ndarray 

1719 A reference to the endogenous response variable 

1720 exog : ndarray 

1721 A reference to the exogenous design. 

1722 """ % {'params' : base._model_params_doc, 

1723 'extra_params' : base._missing_param_doc} 

1724 

1725 def cdf(self, X): 

1726 """ 

1727 The logistic cumulative distribution function 

1728 

1729 Parameters 

1730 ---------- 

1731 X : array_like 

1732 `X` is the linear predictor of the logit model. See notes. 

1733 

1734 Returns 

1735 ------- 

1736 1/(1 + exp(-X)) 

1737 

1738 Notes 

1739 ----- 

1740 In the logit model, 

1741 

1742 .. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)= 

1743 \\text{Prob}\\left(Y=1|x\\right)= 

1744 \\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}} 

1745 """ 

1746 X = np.asarray(X) 

1747 return 1/(1+np.exp(-X)) 

1748 

1749 def pdf(self, X): 

1750 """ 

1751 The logistic probability density function 

1752 

1753 Parameters 

1754 ---------- 

1755 X : array_like 

1756 `X` is the linear predictor of the logit model. See notes. 

1757 

1758 Returns 

1759 ------- 

1760 pdf : ndarray 

1761 The value of the Logit probability mass function, PMF, for each 

1762 point of X. ``np.exp(-x)/(1+np.exp(-X))**2`` 

1763 

1764 Notes 

1765 ----- 

1766 In the logit model, 

1767 

1768 .. math:: \\lambda\\left(x^{\\prime}\\beta\\right)=\\frac{e^{-x^{\\prime}\\beta}}{\\left(1+e^{-x^{\\prime}\\beta}\\right)^{2}} 

1769 """ 

1770 X = np.asarray(X) 

1771 return np.exp(-X)/(1+np.exp(-X))**2 

1772 

1773 def loglike(self, params): 

1774 """ 

1775 Log-likelihood of logit model. 

1776 

1777 Parameters 

1778 ---------- 

1779 params : array_like 

1780 The parameters of the logit model. 

1781 

1782 Returns 

1783 ------- 

1784 loglike : float 

1785 The log-likelihood function of the model evaluated at `params`. 

1786 See notes. 

1787 

1788 Notes 

1789 ----- 

1790 .. math:: 

1791 

1792 \\ln L=\\sum_{i}\\ln\\Lambda 

1793 \\left(q_{i}x_{i}^{\\prime}\\beta\\right) 

1794 

1795 Where :math:`q=2y-1`. This simplification comes from the fact that the 

1796 logistic distribution is symmetric. 

1797 """ 

1798 q = 2*self.endog - 1 

1799 X = self.exog 

1800 return np.sum(np.log(self.cdf(q*np.dot(X,params)))) 

1801 

1802 def loglikeobs(self, params): 

1803 """ 

1804 Log-likelihood of logit model for each observation. 

1805 

1806 Parameters 

1807 ---------- 

1808 params : array_like 

1809 The parameters of the logit model. 

1810 

1811 Returns 

1812 ------- 

1813 loglike : ndarray 

1814 The log likelihood for each observation of the model evaluated 

1815 at `params`. See Notes 

1816 

1817 Notes 

1818 ----- 

1819 .. math:: 

1820 

1821 \\ln L=\\sum_{i}\\ln\\Lambda 

1822 \\left(q_{i}x_{i}^{\\prime}\\beta\\right) 

1823 

1824 for observations :math:`i=1,...,n` 

1825 

1826 where :math:`q=2y-1`. This simplification comes from the fact that the 

1827 logistic distribution is symmetric. 

1828 """ 

1829 q = 2*self.endog - 1 

1830 X = self.exog 

1831 return np.log(self.cdf(q*np.dot(X,params))) 

1832 

1833 def score(self, params): 

1834 """ 

1835 Logit model score (gradient) vector of the log-likelihood 

1836 

1837 Parameters 

1838 ---------- 

1839 params : array_like 

1840 The parameters of the model 

1841 

1842 Returns 

1843 ------- 

1844 score : ndarray, 1-D 

1845 The score vector of the model, i.e. the first derivative of the 

1846 loglikelihood function, evaluated at `params` 

1847 

1848 Notes 

1849 ----- 

1850 .. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left(y_{i}-\\Lambda_{i}\\right)x_{i} 

1851 """ 

1852 

1853 y = self.endog 

1854 X = self.exog 

1855 L = self.cdf(np.dot(X,params)) 

1856 return np.dot(y - L,X) 

1857 

1858 def score_obs(self, params): 

1859 """ 

1860 Logit model Jacobian of the log-likelihood for each observation 

1861 

1862 Parameters 

1863 ---------- 

1864 params : array_like 

1865 The parameters of the model 

1866 

1867 Returns 

1868 ------- 

1869 jac : array_like 

1870 The derivative of the loglikelihood for each observation evaluated 

1871 at `params`. 

1872 

1873 Notes 

1874 ----- 

1875 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left(y_{i}-\\Lambda_{i}\\right)x_{i} 

1876 

1877 for observations :math:`i=1,...,n` 

1878 """ 

1879 

1880 y = self.endog 

1881 X = self.exog 

1882 L = self.cdf(np.dot(X, params)) 

1883 return (y - L)[:,None] * X 

1884 

1885 def hessian(self, params): 

1886 """ 

1887 Logit model Hessian matrix of the log-likelihood 

1888 

1889 Parameters 

1890 ---------- 

1891 params : array_like 

1892 The parameters of the model 

1893 

1894 Returns 

1895 ------- 

1896 hess : ndarray, (k_vars, k_vars) 

1897 The Hessian, second derivative of loglikelihood function, 

1898 evaluated at `params` 

1899 

1900 Notes 

1901 ----- 

1902 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\sum_{i}\\Lambda_{i}\\left(1-\\Lambda_{i}\\right)x_{i}x_{i}^{\\prime} 

1903 """ 

1904 X = self.exog 

1905 L = self.cdf(np.dot(X,params)) 

1906 return -np.dot(L*(1-L)*X.T,X) 

1907 

1908 @Appender(DiscreteModel.fit.__doc__) 

1909 def fit(self, start_params=None, method='newton', maxiter=35, 

1910 full_output=1, disp=1, callback=None, **kwargs): 

1911 bnryfit = super(Logit, self).fit(start_params=start_params, 

1912 method=method, maxiter=maxiter, full_output=full_output, 

1913 disp=disp, callback=callback, **kwargs) 

1914 

1915 discretefit = LogitResults(self, bnryfit) 

1916 return BinaryResultsWrapper(discretefit) 

1917 

1918 

1919class Probit(BinaryModel): 

1920 __doc__ = """ 

1921 Probit Model 

1922 

1923 %(params)s 

1924 %(extra_params)s 

1925 

1926 Attributes 

1927 ---------- 

1928 endog : ndarray 

1929 A reference to the endogenous response variable 

1930 exog : ndarray 

1931 A reference to the exogenous design. 

1932 """ % {'params' : base._model_params_doc, 

1933 'extra_params' : base._missing_param_doc} 

1934 

1935 def cdf(self, X): 

1936 """ 

1937 Probit (Normal) cumulative distribution function 

1938 

1939 Parameters 

1940 ---------- 

1941 X : array_like 

1942 The linear predictor of the model (XB). 

1943 

1944 Returns 

1945 ------- 

1946 cdf : ndarray 

1947 The cdf evaluated at `X`. 

1948 

1949 Notes 

1950 ----- 

1951 This function is just an alias for scipy.stats.norm.cdf 

1952 """ 

1953 return stats.norm._cdf(X) 

1954 

1955 def pdf(self, X): 

1956 """ 

1957 Probit (Normal) probability density function 

1958 

1959 Parameters 

1960 ---------- 

1961 X : array_like 

1962 The linear predictor of the model (XB). 

1963 

1964 Returns 

1965 ------- 

1966 pdf : ndarray 

1967 The value of the normal density function for each point of X. 

1968 

1969 Notes 

1970 ----- 

1971 This function is just an alias for scipy.stats.norm.pdf 

1972 """ 

1973 X = np.asarray(X) 

1974 return stats.norm._pdf(X) 

1975 

1976 

1977 def loglike(self, params): 

1978 """ 

1979 Log-likelihood of probit model (i.e., the normal distribution). 

1980 

1981 Parameters 

1982 ---------- 

1983 params : array_like 

1984 The parameters of the model. 

1985 

1986 Returns 

1987 ------- 

1988 loglike : float 

1989 The log-likelihood function of the model evaluated at `params`. 

1990 See notes. 

1991 

1992 Notes 

1993 ----- 

1994 .. math:: \\ln L=\\sum_{i}\\ln\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right) 

1995 

1996 Where :math:`q=2y-1`. This simplification comes from the fact that the 

1997 normal distribution is symmetric. 

1998 """ 

1999 

2000 q = 2*self.endog - 1 

2001 X = self.exog 

2002 return np.sum(np.log(np.clip(self.cdf(q*np.dot(X,params)), 

2003 FLOAT_EPS, 1))) 

2004 

2005 def loglikeobs(self, params): 

2006 """ 

2007 Log-likelihood of probit model for each observation 

2008 

2009 Parameters 

2010 ---------- 

2011 params : array_like 

2012 The parameters of the model. 

2013 

2014 Returns 

2015 ------- 

2016 loglike : array_like 

2017 The log likelihood for each observation of the model evaluated 

2018 at `params`. See Notes 

2019 

2020 Notes 

2021 ----- 

2022 .. math:: \\ln L_{i}=\\ln\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right) 

2023 

2024 for observations :math:`i=1,...,n` 

2025 

2026 where :math:`q=2y-1`. This simplification comes from the fact that the 

2027 normal distribution is symmetric. 

2028 """ 

2029 

2030 q = 2*self.endog - 1 

2031 X = self.exog 

2032 return np.log(np.clip(self.cdf(q*np.dot(X,params)), FLOAT_EPS, 1)) 

2033 

2034 

2035 def score(self, params): 

2036 """ 

2037 Probit model score (gradient) vector 

2038 

2039 Parameters 

2040 ---------- 

2041 params : array_like 

2042 The parameters of the model 

2043 

2044 Returns 

2045 ------- 

2046 score : ndarray, 1-D 

2047 The score vector of the model, i.e. the first derivative of the 

2048 loglikelihood function, evaluated at `params` 

2049 

2050 Notes 

2051 ----- 

2052 .. math:: \\frac{\\partial\\ln L}{\\partial\\beta}=\\sum_{i=1}^{n}\\left[\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}\\right]x_{i} 

2053 

2054 Where :math:`q=2y-1`. This simplification comes from the fact that the 

2055 normal distribution is symmetric. 

2056 """ 

2057 y = self.endog 

2058 X = self.exog 

2059 XB = np.dot(X,params) 

2060 q = 2*y - 1 

2061 # clip to get rid of invalid divide complaint 

2062 L = q*self.pdf(q*XB)/np.clip(self.cdf(q*XB), FLOAT_EPS, 1 - FLOAT_EPS) 

2063 return np.dot(L,X) 

2064 

2065 def score_obs(self, params): 

2066 """ 

2067 Probit model Jacobian for each observation 

2068 

2069 Parameters 

2070 ---------- 

2071 params : array_like 

2072 The parameters of the model 

2073 

2074 Returns 

2075 ------- 

2076 jac : array_like 

2077 The derivative of the loglikelihood for each observation evaluated 

2078 at `params`. 

2079 

2080 Notes 

2081 ----- 

2082 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta}=\\left[\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}\\right]x_{i} 

2083 

2084 for observations :math:`i=1,...,n` 

2085 

2086 Where :math:`q=2y-1`. This simplification comes from the fact that the 

2087 normal distribution is symmetric. 

2088 """ 

2089 y = self.endog 

2090 X = self.exog 

2091 XB = np.dot(X,params) 

2092 q = 2*y - 1 

2093 # clip to get rid of invalid divide complaint 

2094 L = q*self.pdf(q*XB)/np.clip(self.cdf(q*XB), FLOAT_EPS, 1 - FLOAT_EPS) 

2095 return L[:,None] * X 

2096 

2097 def hessian(self, params): 

2098 """ 

2099 Probit model Hessian matrix of the log-likelihood 

2100 

2101 Parameters 

2102 ---------- 

2103 params : array_like 

2104 The parameters of the model 

2105 

2106 Returns 

2107 ------- 

2108 hess : ndarray, (k_vars, k_vars) 

2109 The Hessian, second derivative of loglikelihood function, 

2110 evaluated at `params` 

2111 

2112 Notes 

2113 ----- 

2114 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta\\partial\\beta^{\\prime}}=-\\lambda_{i}\\left(\\lambda_{i}+x_{i}^{\\prime}\\beta\\right)x_{i}x_{i}^{\\prime} 

2115 

2116 where 

2117 

2118 .. math:: \\lambda_{i}=\\frac{q_{i}\\phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)}{\\Phi\\left(q_{i}x_{i}^{\\prime}\\beta\\right)} 

2119 

2120 and :math:`q=2y-1` 

2121 """ 

2122 X = self.exog 

2123 XB = np.dot(X,params) 

2124 q = 2*self.endog - 1 

2125 L = q*self.pdf(q*XB)/self.cdf(q*XB) 

2126 return np.dot(-L*(L+XB)*X.T,X) 

2127 

2128 @Appender(DiscreteModel.fit.__doc__) 

2129 def fit(self, start_params=None, method='newton', maxiter=35, 

2130 full_output=1, disp=1, callback=None, **kwargs): 

2131 bnryfit = super(Probit, self).fit(start_params=start_params, 

2132 method=method, maxiter=maxiter, full_output=full_output, 

2133 disp=disp, callback=callback, **kwargs) 

2134 discretefit = ProbitResults(self, bnryfit) 

2135 return BinaryResultsWrapper(discretefit) 

2136 

2137 

2138class MNLogit(MultinomialModel): 

2139 __doc__ = """ 

2140 Multinomial Logit Model 

2141 

2142 Parameters 

2143 ---------- 

2144 endog : array_like 

2145 `endog` is an 1-d vector of the endogenous response. `endog` can 

2146 contain strings, ints, or floats or may be a pandas Categorical Series. 

2147 Note that if it contains strings, every distinct string will be a 

2148 category. No stripping of whitespace is done. 

2149 exog : array_like 

2150 A nobs x k array where `nobs` is the number of observations and `k` 

2151 is the number of regressors. An intercept is not included by default 

2152 and should be added by the user. See `statsmodels.tools.add_constant`. 

2153 %(extra_params)s 

2154 

2155 Attributes 

2156 ---------- 

2157 endog : ndarray 

2158 A reference to the endogenous response variable 

2159 exog : ndarray 

2160 A reference to the exogenous design. 

2161 J : float 

2162 The number of choices for the endogenous variable. Note that this 

2163 is zero-indexed. 

2164 K : float 

2165 The actual number of parameters for the exogenous design. Includes 

2166 the constant if the design has one. 

2167 names : dict 

2168 A dictionary mapping the column number in `wendog` to the variables 

2169 in `endog`. 

2170 wendog : ndarray 

2171 An n x j array where j is the number of unique categories in `endog`. 

2172 Each column of j is a dummy variable indicating the category of 

2173 each observation. See `names` for a dictionary mapping each column to 

2174 its category. 

2175 

2176 Notes 

2177 ----- 

2178 See developer notes for further information on `MNLogit` internals. 

2179 """ % {'extra_params': base._missing_param_doc} 

2180 

2181 def __init__(self, endog, exog, **kwargs): 

2182 super(MNLogit, self).__init__(endog, exog, **kwargs) 

2183 

2184 # Override cov_names since multivariate model 

2185 yname = self.endog_names 

2186 ynames = self._ynames_map 

2187 ynames = MultinomialResults._maybe_convert_ynames_int(ynames) 

2188 # use range below to ensure sortedness 

2189 ynames = [ynames[key] for key in range(int(self.J))] 

2190 idx = MultiIndex.from_product((ynames[1:], self.data.xnames), 

2191 names=(yname, None)) 

2192 self.data.cov_names = idx 

2193 

2194 def pdf(self, eXB): 

2195 """ 

2196 NotImplemented 

2197 """ 

2198 raise NotImplementedError 

2199 

2200 def cdf(self, X): 

2201 """ 

2202 Multinomial logit cumulative distribution function. 

2203 

2204 Parameters 

2205 ---------- 

2206 X : ndarray 

2207 The linear predictor of the model XB. 

2208 

2209 Returns 

2210 ------- 

2211 cdf : ndarray 

2212 The cdf evaluated at `X`. 

2213 

2214 Notes 

2215 ----- 

2216 In the multinomial logit model. 

2217 .. math:: \\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)} 

2218 """ 

2219 eXB = np.column_stack((np.ones(len(X)), np.exp(X))) 

2220 return eXB/eXB.sum(1)[:,None] 

2221 

2222 def loglike(self, params): 

2223 """ 

2224 Log-likelihood of the multinomial logit model. 

2225 

2226 Parameters 

2227 ---------- 

2228 params : array_like 

2229 The parameters of the multinomial logit model. 

2230 

2231 Returns 

2232 ------- 

2233 loglike : float 

2234 The log-likelihood function of the model evaluated at `params`. 

2235 See notes. 

2236 

2237 Notes 

2238 ----- 

2239 .. math:: 

2240 

2241 \\ln L=\\sum_{i=1}^{n}\\sum_{j=0}^{J}d_{ij}\\ln 

2242 \\left(\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)} 

2243 {\\sum_{k=0}^{J} 

2244 \\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right) 

2245 

2246 where :math:`d_{ij}=1` if individual `i` chose alternative `j` and 0 

2247 if not. 

2248 """ 

2249 params = params.reshape(self.K, -1, order='F') 

2250 d = self.wendog 

2251 logprob = np.log(self.cdf(np.dot(self.exog,params))) 

2252 return np.sum(d * logprob) 

2253 

2254 def loglikeobs(self, params): 

2255 """ 

2256 Log-likelihood of the multinomial logit model for each observation. 

2257 

2258 Parameters 

2259 ---------- 

2260 params : array_like 

2261 The parameters of the multinomial logit model. 

2262 

2263 Returns 

2264 ------- 

2265 loglike : array_like 

2266 The log likelihood for each observation of the model evaluated 

2267 at `params`. See Notes 

2268 

2269 Notes 

2270 ----- 

2271 .. math:: 

2272 

2273 \\ln L_{i}=\\sum_{j=0}^{J}d_{ij}\\ln 

2274 \\left(\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)} 

2275 {\\sum_{k=0}^{J} 

2276 \\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right) 

2277 

2278 for observations :math:`i=1,...,n` 

2279 

2280 where :math:`d_{ij}=1` if individual `i` chose alternative `j` and 0 

2281 if not. 

2282 """ 

2283 params = params.reshape(self.K, -1, order='F') 

2284 d = self.wendog 

2285 logprob = np.log(self.cdf(np.dot(self.exog,params))) 

2286 return d * logprob 

2287 

2288 def score(self, params): 

2289 """ 

2290 Score matrix for multinomial logit model log-likelihood 

2291 

2292 Parameters 

2293 ---------- 

2294 params : ndarray 

2295 The parameters of the multinomial logit model. 

2296 

2297 Returns 

2298 ------- 

2299 score : ndarray, (K * (J-1),) 

2300 The 2-d score vector, i.e. the first derivative of the 

2301 loglikelihood function, of the multinomial logit model evaluated at 

2302 `params`. 

2303 

2304 Notes 

2305 ----- 

2306 .. math:: \\frac{\\partial\\ln L}{\\partial\\beta_{j}}=\\sum_{i}\\left(d_{ij}-\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)x_{i} 

2307 

2308 for :math:`j=1,...,J` 

2309 

2310 In the multinomial model the score matrix is K x J-1 but is returned 

2311 as a flattened array to work with the solvers. 

2312 """ 

2313 params = params.reshape(self.K, -1, order='F') 

2314 firstterm = self.wendog[:,1:] - self.cdf(np.dot(self.exog, 

2315 params))[:,1:] 

2316 #NOTE: might need to switch terms if params is reshaped 

2317 return np.dot(firstterm.T, self.exog).flatten() 

2318 

2319 def loglike_and_score(self, params): 

2320 """ 

2321 Returns log likelihood and score, efficiently reusing calculations. 

2322 

2323 Note that both of these returned quantities will need to be negated 

2324 before being minimized by the maximum likelihood fitting machinery. 

2325 """ 

2326 params = params.reshape(self.K, -1, order='F') 

2327 cdf_dot_exog_params = self.cdf(np.dot(self.exog, params)) 

2328 loglike_value = np.sum(self.wendog * np.log(cdf_dot_exog_params)) 

2329 firstterm = self.wendog[:, 1:] - cdf_dot_exog_params[:, 1:] 

2330 score_array = np.dot(firstterm.T, self.exog).flatten() 

2331 return loglike_value, score_array 

2332 

2333 def score_obs(self, params): 

2334 """ 

2335 Jacobian matrix for multinomial logit model log-likelihood 

2336 

2337 Parameters 

2338 ---------- 

2339 params : ndarray 

2340 The parameters of the multinomial logit model. 

2341 

2342 Returns 

2343 ------- 

2344 jac : array_like 

2345 The derivative of the loglikelihood for each observation evaluated 

2346 at `params` . 

2347 

2348 Notes 

2349 ----- 

2350 .. math:: \\frac{\\partial\\ln L_{i}}{\\partial\\beta_{j}}=\\left(d_{ij}-\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right)x_{i} 

2351 

2352 for :math:`j=1,...,J`, for observations :math:`i=1,...,n` 

2353 

2354 In the multinomial model the score vector is K x (J-1) but is returned 

2355 as a flattened array. The Jacobian has the observations in rows and 

2356 the flattened array of derivatives in columns. 

2357 """ 

2358 params = params.reshape(self.K, -1, order='F') 

2359 firstterm = self.wendog[:,1:] - self.cdf(np.dot(self.exog, 

2360 params))[:,1:] 

2361 #NOTE: might need to switch terms if params is reshaped 

2362 return (firstterm[:,:,None] * self.exog[:,None,:]).reshape(self.exog.shape[0], -1) 

2363 

2364 def hessian(self, params): 

2365 """ 

2366 Multinomial logit Hessian matrix of the log-likelihood 

2367 

2368 Parameters 

2369 ---------- 

2370 params : array_like 

2371 The parameters of the model 

2372 

2373 Returns 

2374 ------- 

2375 hess : ndarray, (J*K, J*K) 

2376 The Hessian, second derivative of loglikelihood function with 

2377 respect to the flattened parameters, evaluated at `params` 

2378 

2379 Notes 

2380 ----- 

2381 .. math:: \\frac{\\partial^{2}\\ln L}{\\partial\\beta_{j}\\partial\\beta_{l}}=-\\sum_{i=1}^{n}\\frac{\\exp\\left(\\beta_{j}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\left[\\boldsymbol{1}\\left(j=l\\right)-\\frac{\\exp\\left(\\beta_{l}^{\\prime}x_{i}\\right)}{\\sum_{k=0}^{J}\\exp\\left(\\beta_{k}^{\\prime}x_{i}\\right)}\\right]x_{i}x_{l}^{\\prime} 

2382 

2383 where 

2384 :math:`\\boldsymbol{1}\\left(j=l\\right)` equals 1 if `j` = `l` and 0 

2385 otherwise. 

2386 

2387 The actual Hessian matrix has J**2 * K x K elements. Our Hessian 

2388 is reshaped to be square (J*K, J*K) so that the solvers can use it. 

2389 

2390 This implementation does not take advantage of the symmetry of 

2391 the Hessian and could probably be refactored for speed. 

2392 """ 

2393 params = params.reshape(self.K, -1, order='F') 

2394 X = self.exog 

2395 pr = self.cdf(np.dot(X,params)) 

2396 partials = [] 

2397 J = self.J 

2398 K = self.K 

2399 for i in range(J-1): 

2400 for j in range(J-1): # this loop assumes we drop the first col. 

2401 if i == j: 

2402 partials.append(\ 

2403 -np.dot(((pr[:,i+1]*(1-pr[:,j+1]))[:,None]*X).T,X)) 

2404 else: 

2405 partials.append(-np.dot(((pr[:,i+1]*-pr[:,j+1])[:,None]*X).T,X)) 

2406 H = np.array(partials) 

2407 # the developer's notes on multinomial should clear this math up 

2408 H = np.transpose(H.reshape(J-1, J-1, K, K), (0, 2, 1, 3)).reshape((J-1)*K, (J-1)*K) 

2409 return H 

2410 

2411 

2412#TODO: Weibull can replaced by a survival analsysis function 

2413# like stat's streg (The cox model as well) 

2414#class Weibull(DiscreteModel): 

2415# """ 

2416# Binary choice Weibull model 

2417# 

2418# Notes 

2419# ------ 

2420# This is unfinished and untested. 

2421# """ 

2422##TODO: add analytic hessian for Weibull 

2423# def initialize(self): 

2424# pass 

2425# 

2426# def cdf(self, X): 

2427# """ 

2428# Gumbell (Log Weibull) cumulative distribution function 

2429# """ 

2430## return np.exp(-np.exp(-X)) 

2431# return stats.gumbel_r.cdf(X) 

2432# # these two are equivalent. 

2433# # Greene table and discussion is incorrect. 

2434# 

2435# def pdf(self, X): 

2436# """ 

2437# Gumbell (LogWeibull) probability distribution function 

2438# """ 

2439# return stats.gumbel_r.pdf(X) 

2440# 

2441# def loglike(self, params): 

2442# """ 

2443# Loglikelihood of Weibull distribution 

2444# """ 

2445# X = self.exog 

2446# cdf = self.cdf(np.dot(X,params)) 

2447# y = self.endog 

2448# return np.sum(y*np.log(cdf) + (1-y)*np.log(1-cdf)) 

2449# 

2450# def score(self, params): 

2451# y = self.endog 

2452# X = self.exog 

2453# F = self.cdf(np.dot(X,params)) 

2454# f = self.pdf(np.dot(X,params)) 

2455# term = (y*f/F + (1 - y)*-f/(1-F)) 

2456# return np.dot(term,X) 

2457# 

2458# def hessian(self, params): 

2459# hess = nd.Jacobian(self.score) 

2460# return hess(params) 

2461# 

2462# def fit(self, start_params=None, method='newton', maxiter=35, tol=1e-08): 

2463## The example had problems with all zero start values, Hessian = 0 

2464# if start_params is None: 

2465# start_params = OLS(self.endog, self.exog).fit().params 

2466# mlefit = super(Weibull, self).fit(start_params=start_params, 

2467# method=method, maxiter=maxiter, tol=tol) 

2468# return mlefit 

2469# 

2470 

2471 

2472class NegativeBinomial(CountModel): 

2473 __doc__ = """ 

2474 Negative Binomial Model 

2475 

2476 %(params)s 

2477 %(extra_params)s 

2478 

2479 Attributes 

2480 ---------- 

2481 endog : ndarray 

2482 A reference to the endogenous response variable 

2483 exog : ndarray 

2484 A reference to the exogenous design. 

2485 

2486 References 

2487 ---------- 

2488 Greene, W. 2008. "Functional forms for the negative binomial model 

2489 for count data". Economics Letters. Volume 99, Number 3, pp.585-590. 

2490 Hilbe, J.M. 2011. "Negative binomial regression". Cambridge University 

2491 Press. 

2492 """ % {'params': base._model_params_doc, 

2493 'extra_params': 

2494 """loglike_method : str 

2495 Log-likelihood type. 'nb2','nb1', or 'geometric'. 

2496 Fitted value :math:`\\mu` 

2497 Heterogeneity parameter :math:`\\alpha` 

2498 

2499 - nb2: Variance equal to :math:`\\mu + \\alpha\\mu^2` (most common) 

2500 - nb1: Variance equal to :math:`\\mu + \\alpha\\mu` 

2501 - geometric: Variance equal to :math:`\\mu + \\mu^2` 

2502 offset : array_like 

2503 Offset is added to the linear prediction with coefficient equal to 1. 

2504 exposure : array_like 

2505 Log(exposure) is added to the linear prediction with coefficient 

2506 equal to 1. 

2507 """ + base._missing_param_doc} 

2508 def __init__(self, endog, exog, loglike_method='nb2', offset=None, 

2509 exposure=None, missing='none', **kwargs): 

2510 super(NegativeBinomial, self).__init__(endog, exog, offset=offset, 

2511 exposure=exposure, 

2512 missing=missing, **kwargs) 

2513 self.loglike_method = loglike_method 

2514 self._initialize() 

2515 if loglike_method in ['nb2', 'nb1']: 

2516 self.exog_names.append('alpha') 

2517 self.k_extra = 1 

2518 else: 

2519 self.k_extra = 0 

2520 # store keys for extras if we need to recreate model instance 

2521 # we need to append keys that do not go to super 

2522 self._init_keys.append('loglike_method') 

2523 

2524 def _initialize(self): 

2525 if self.loglike_method == 'nb2': 

2526 self.hessian = self._hessian_nb2 

2527 self.score = self._score_nbin 

2528 self.loglikeobs = self._ll_nb2 

2529 self._transparams = True # transform lnalpha -> alpha in fit 

2530 elif self.loglike_method == 'nb1': 

2531 self.hessian = self._hessian_nb1 

2532 self.score = self._score_nb1 

2533 self.loglikeobs = self._ll_nb1 

2534 self._transparams = True # transform lnalpha -> alpha in fit 

2535 elif self.loglike_method == 'geometric': 

2536 self.hessian = self._hessian_geom 

2537 self.score = self._score_geom 

2538 self.loglikeobs = self._ll_geometric 

2539 else: 

2540 raise ValueError('Likelihood type must "nb1", "nb2" ' 

2541 'or "geometric"') 

2542 

2543 # Workaround to pickle instance methods 

2544 def __getstate__(self): 

2545 odict = self.__dict__.copy() # copy the dict since we change it 

2546 del odict['hessian'] 

2547 del odict['score'] 

2548 del odict['loglikeobs'] 

2549 return odict 

2550 

2551 def __setstate__(self, indict): 

2552 self.__dict__.update(indict) 

2553 self._initialize() 

2554 

2555 def _ll_nbin(self, params, alpha, Q=0): 

2556 if np.any(np.iscomplex(params)) or np.iscomplex(alpha): 

2557 gamma_ln = loggamma 

2558 else: 

2559 gamma_ln = gammaln 

2560 endog = self.endog 

2561 mu = self.predict(params) 

2562 size = 1/alpha * mu**Q 

2563 prob = size/(size+mu) 

2564 coeff = (gamma_ln(size+endog) - gamma_ln(endog+1) - 

2565 gamma_ln(size)) 

2566 llf = coeff + size*np.log(prob) + endog*np.log(1-prob) 

2567 return llf 

2568 

2569 def _ll_nb2(self, params): 

2570 if self._transparams: # got lnalpha during fit 

2571 alpha = np.exp(params[-1]) 

2572 else: 

2573 alpha = params[-1] 

2574 return self._ll_nbin(params[:-1], alpha, Q=0) 

2575 

2576 def _ll_nb1(self, params): 

2577 if self._transparams: # got lnalpha during fit 

2578 alpha = np.exp(params[-1]) 

2579 else: 

2580 alpha = params[-1] 

2581 return self._ll_nbin(params[:-1], alpha, Q=1) 

2582 

2583 def _ll_geometric(self, params): 

2584 # we give alpha of 1 because it's actually log(alpha) where alpha=0 

2585 return self._ll_nbin(params, 1, 0) 

2586 

2587 def loglike(self, params): 

2588 r""" 

2589 Loglikelihood for negative binomial model 

2590 

2591 Parameters 

2592 ---------- 

2593 params : array_like 

2594 The parameters of the model. If `loglike_method` is nb1 or 

2595 nb2, then the ancillary parameter is expected to be the 

2596 last element. 

2597 

2598 Returns 

2599 ------- 

2600 llf : float 

2601 The loglikelihood value at `params` 

2602 

2603 Notes 

2604 ----- 

2605 Following notation in Greene (2008), with negative binomial 

2606 heterogeneity parameter :math:`\alpha`: 

2607 

2608 .. math:: 

2609 

2610 \lambda_i &= exp(X\beta) \\ 

2611 \theta &= 1 / \alpha \\ 

2612 g_i &= \theta \lambda_i^Q \\ 

2613 w_i &= g_i/(g_i + \lambda_i) \\ 

2614 r_i &= \theta / (\theta+\lambda_i) \\ 

2615 ln \mathcal{L}_i &= ln \Gamma(y_i+g_i) - ln \Gamma(1+y_i) + g_iln (r_i) + y_i ln(1-r_i) 

2616 

2617 where :math`Q=0` for NB2 and geometric and :math:`Q=1` for NB1. 

2618 For the geometric, :math:`\alpha=0` as well. 

2619 """ 

2620 llf = np.sum(self.loglikeobs(params)) 

2621 return llf 

2622 

2623 def _score_geom(self, params): 

2624 exog = self.exog 

2625 y = self.endog[:, None] 

2626 mu = self.predict(params)[:, None] 

2627 dparams = exog * (y-mu)/(mu+1) 

2628 return dparams.sum(0) 

2629 

2630 def _score_nbin(self, params, Q=0): 

2631 """ 

2632 Score vector for NB2 model 

2633 """ 

2634 if self._transparams: # lnalpha came in during fit 

2635 alpha = np.exp(params[-1]) 

2636 else: 

2637 alpha = params[-1] 

2638 params = params[:-1] 

2639 exog = self.exog 

2640 y = self.endog[:,None] 

2641 mu = self.predict(params)[:,None] 

2642 a1 = 1/alpha * mu**Q 

2643 prob = a1 / (a1 + mu) # a1 aka "size" in _ll_nbin 

2644 if Q == 1: # nb1 

2645 # Q == 1 --> a1 = mu / alpha --> prob = 1 / (alpha + 1) 

2646 dgpart = digamma(y + a1) - digamma(a1) 

2647 dparams = exog * a1 * (np.log(prob) + 

2648 dgpart) 

2649 dalpha = ((alpha * (y - mu * np.log(prob) - 

2650 mu*(dgpart + 1)) - 

2651 mu * (np.log(prob) + 

2652 dgpart))/ 

2653 (alpha**2*(alpha + 1))).sum() 

2654 

2655 elif Q == 0: # nb2 

2656 dgpart = digamma(y + a1) - digamma(a1) 

2657 dparams = exog*a1 * (y-mu)/(mu+a1) 

2658 da1 = -alpha**-2 

2659 dalpha = (dgpart + np.log(a1) 

2660 - np.log(a1+mu) - (y-mu)/(a1+mu)).sum() * da1 

2661 

2662 #multiply above by constant outside sum to reduce rounding error 

2663 if self._transparams: 

2664 return np.r_[dparams.sum(0), dalpha*alpha] 

2665 else: 

2666 return np.r_[dparams.sum(0), dalpha] 

2667 

2668 def _score_nb1(self, params): 

2669 return self._score_nbin(params, Q=1) 

2670 

2671 def _hessian_geom(self, params): 

2672 exog = self.exog 

2673 y = self.endog[:,None] 

2674 mu = self.predict(params)[:,None] 

2675 

2676 # for dl/dparams dparams 

2677 dim = exog.shape[1] 

2678 hess_arr = np.empty((dim, dim)) 

2679 const_arr = mu*(1+y)/(mu+1)**2 

2680 for i in range(dim): 

2681 for j in range(dim): 

2682 if j > i: 

2683 continue 

2684 hess_arr[i,j] = np.sum(-exog[:,i,None] * exog[:,j,None] * 

2685 const_arr, axis=0) 

2686 tri_idx = np.triu_indices(dim, k=1) 

2687 hess_arr[tri_idx] = hess_arr.T[tri_idx] 

2688 return hess_arr 

2689 

2690 

2691 def _hessian_nb1(self, params): 

2692 """ 

2693 Hessian of NB1 model. 

2694 """ 

2695 if self._transparams: # lnalpha came in during fit 

2696 alpha = np.exp(params[-1]) 

2697 else: 

2698 alpha = params[-1] 

2699 

2700 params = params[:-1] 

2701 exog = self.exog 

2702 y = self.endog[:,None] 

2703 mu = self.predict(params)[:,None] 

2704 

2705 a1 = mu/alpha 

2706 dgpart = digamma(y + a1) - digamma(a1) 

2707 prob = 1 / (1 + alpha) # equiv: a1 / (a1 + mu) 

2708 

2709 # for dl/dparams dparams 

2710 dim = exog.shape[1] 

2711 hess_arr = np.empty((dim+1,dim+1)) 

2712 #const_arr = a1*mu*(a1+y)/(mu+a1)**2 

2713 # not all of dparams 

2714 dparams = exog / alpha * (np.log(prob) + 

2715 dgpart) 

2716 

2717 dmudb = exog*mu 

2718 xmu_alpha = exog * a1 

2719 trigamma = (special.polygamma(1, a1 + y) - 

2720 special.polygamma(1, a1)) 

2721 for i in range(dim): 

2722 for j in range(dim): 

2723 if j > i: 

2724 continue 

2725 hess_arr[i,j] = np.sum(dparams[:,i,None] * dmudb[:,j,None] + 

2726 xmu_alpha[:,i,None] * xmu_alpha[:,j,None] * 

2727 trigamma, axis=0) 

2728 tri_idx = np.triu_indices(dim, k=1) 

2729 hess_arr[tri_idx] = hess_arr.T[tri_idx] 

2730 

2731 # for dl/dparams dalpha 

2732 da1 = -alpha**-2 

2733 dldpda = np.sum(-a1 * dparams + exog * a1 * 

2734 (-trigamma*mu/alpha**2 - prob), axis=0) 

2735 

2736 hess_arr[-1,:-1] = dldpda 

2737 hess_arr[:-1,-1] = dldpda 

2738 

2739 log_alpha = np.log(prob) 

2740 alpha3 = alpha**3 

2741 alpha2 = alpha**2 

2742 mu2 = mu**2 

2743 dada = ((alpha3*mu*(2*log_alpha + 2*dgpart + 3) - 

2744 2*alpha3*y + 

2745 4*alpha2*mu*(log_alpha + dgpart) + 

2746 alpha2 * (2*mu - y) + 

2747 2*alpha*mu2*trigamma + mu2 * trigamma + alpha2 * mu2 * trigamma + 

2748 2*alpha*mu*(log_alpha + dgpart) 

2749 )/(alpha**4*(alpha2 + 2*alpha + 1))) 

2750 hess_arr[-1,-1] = dada.sum() 

2751 

2752 return hess_arr 

2753 

2754 def _hessian_nb2(self, params): 

2755 """ 

2756 Hessian of NB2 model. 

2757 """ 

2758 if self._transparams: # lnalpha came in during fit 

2759 alpha = np.exp(params[-1]) 

2760 else: 

2761 alpha = params[-1] 

2762 a1 = 1/alpha 

2763 params = params[:-1] 

2764 

2765 exog = self.exog 

2766 y = self.endog[:,None] 

2767 mu = self.predict(params)[:,None] 

2768 prob = a1 / (a1 + mu) 

2769 dgpart = digamma(a1 + y) - digamma(a1) 

2770 

2771 # for dl/dparams dparams 

2772 dim = exog.shape[1] 

2773 hess_arr = np.empty((dim+1,dim+1)) 

2774 const_arr = a1*mu*(a1+y)/(mu+a1)**2 

2775 for i in range(dim): 

2776 for j in range(dim): 

2777 if j > i: 

2778 continue 

2779 hess_arr[i,j] = np.sum(-exog[:,i,None] * exog[:,j,None] * 

2780 const_arr, axis=0) 

2781 tri_idx = np.triu_indices(dim, k=1) 

2782 hess_arr[tri_idx] = hess_arr.T[tri_idx] 

2783 

2784 # for dl/dparams dalpha 

2785 da1 = -alpha**-2 

2786 dldpda = -np.sum(mu*exog*(y-mu)*a1**2/(mu+a1)**2 , axis=0) 

2787 hess_arr[-1,:-1] = dldpda 

2788 hess_arr[:-1,-1] = dldpda 

2789 

2790 # for dl/dalpha dalpha 

2791 #NOTE: polygamma(1,x) is the trigamma function 

2792 da2 = 2*alpha**-3 

2793 dalpha = da1 * (dgpart + 

2794 np.log(prob) - (y - mu)/(a1+mu)) 

2795 dada = (da2 * dalpha/da1 + da1**2 * (special.polygamma(1, a1+y) - 

2796 special.polygamma(1, a1) + 1/a1 - 1/(a1 + mu) + 

2797 (y - mu)/(mu + a1)**2)).sum() 

2798 hess_arr[-1,-1] = dada 

2799 

2800 return hess_arr 

2801 

2802 #TODO: replace this with analytic where is it used? 

2803 def score_obs(self, params): 

2804 sc = approx_fprime_cs(params, self.loglikeobs) 

2805 return sc 

2806 

2807 @Appender(_get_start_params_null_docs) 

2808 def _get_start_params_null(self): 

2809 offset = getattr(self, "offset", 0) 

2810 exposure = getattr(self, "exposure", 0) 

2811 const = (self.endog / np.exp(offset + exposure)).mean() 

2812 params = [np.log(const)] 

2813 mu = const * np.exp(offset + exposure) 

2814 resid = self.endog - mu 

2815 a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1) 

2816 params.append(a) 

2817 return np.array(params) 

2818 

2819 def _estimate_dispersion(self, mu, resid, df_resid=None): 

2820 if df_resid is None: 

2821 df_resid = resid.shape[0] 

2822 if self.loglike_method == 'nb2': 

2823 #params.append(np.linalg.pinv(mu[:,None]).dot(resid**2 / mu - 1)) 

2824 a = ((resid**2 / mu - 1) / mu).sum() / df_resid 

2825 else: #self.loglike_method == 'nb1': 

2826 a = (resid**2 / mu - 1).sum() / df_resid 

2827 return a 

2828 

2829 def fit(self, start_params=None, method='bfgs', maxiter=35, 

2830 full_output=1, disp=1, callback=None, 

2831 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs): 

2832 

2833 # Note: do not let super handle robust covariance because it has 

2834 # transformed params 

2835 self._transparams = False # always define attribute 

2836 if self.loglike_method.startswith('nb') and method not in ['newton', 

2837 'ncg']: 

2838 self._transparams = True # in case same Model instance is refit 

2839 elif self.loglike_method.startswith('nb'): # method is newton/ncg 

2840 self._transparams = False # because we need to step in alpha space 

2841 

2842 if start_params is None: 

2843 # Use poisson fit as first guess. 

2844 #TODO, Warning: this assumes exposure is logged 

2845 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 

2846 if np.size(offset) == 1 and offset == 0: 

2847 offset = None 

2848 optim_kwds_prelim = {'disp': 0, 'skip_hessian': True, 

2849 'warn_convergence': False} 

2850 optim_kwds_prelim.update(kwargs.get('optim_kwds_prelim', {})) 

2851 mod_poi = Poisson(self.endog, self.exog, offset=offset) 

2852 res_poi = mod_poi.fit(**optim_kwds_prelim) 

2853 start_params = res_poi.params 

2854 if self.loglike_method.startswith('nb'): 

2855 a = self._estimate_dispersion(res_poi.predict(), res_poi.resid, 

2856 df_resid=res_poi.df_resid) 

2857 start_params = np.append(start_params, max(0.05, a)) 

2858 else: 

2859 if self._transparams is True: 

2860 # transform user provided start_params dispersion, see #3918 

2861 start_params = np.array(start_params, copy=True) 

2862 start_params[-1] = np.log(start_params[-1]) 

2863 

2864 if callback is None: 

2865 # work around perfect separation callback #3895 

2866 callback = lambda *x: x 

2867 

2868 mlefit = super(NegativeBinomial, self).fit(start_params=start_params, 

2869 maxiter=maxiter, method=method, disp=disp, 

2870 full_output=full_output, callback=callback, 

2871 **kwargs) 

2872 # TODO: Fix NBin _check_perfect_pred 

2873 if self.loglike_method.startswith('nb'): 

2874 # mlefit is a wrapped counts results 

2875 self._transparams = False # do not need to transform anymore now 

2876 # change from lnalpha to alpha 

2877 if method not in ["newton", "ncg"]: 

2878 mlefit._results.params[-1] = np.exp(mlefit._results.params[-1]) 

2879 

2880 nbinfit = NegativeBinomialResults(self, mlefit._results) 

2881 result = NegativeBinomialResultsWrapper(nbinfit) 

2882 else: 

2883 result = mlefit 

2884 

2885 if cov_kwds is None: 

2886 cov_kwds = {} #TODO: make this unnecessary ? 

2887 result._get_robustcov_results(cov_type=cov_type, 

2888 use_self=True, use_t=use_t, **cov_kwds) 

2889 return result 

2890 

2891 

2892 def fit_regularized(self, start_params=None, method='l1', 

2893 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 

2894 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 

2895 qc_tol=0.03, **kwargs): 

2896 

2897 _validate_l1_method(method) 

2898 

2899 if self.loglike_method.startswith('nb') and (np.size(alpha) == 1 and 

2900 alpha != 0): 

2901 # do not penalize alpha if alpha is scalar 

2902 k_params = self.exog.shape[1] + self.k_extra 

2903 alpha = alpha * np.ones(k_params) 

2904 alpha[-1] = 0 

2905 

2906 # alpha for regularized poisson to get starting values 

2907 alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha 

2908 

2909 self._transparams = False 

2910 if start_params is None: 

2911 # Use poisson fit as first guess. 

2912 #TODO, Warning: this assumes exposure is logged 

2913 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 

2914 if np.size(offset) == 1 and offset == 0: 

2915 offset = None 

2916 mod_poi = Poisson(self.endog, self.exog, offset=offset) 

2917 start_params = mod_poi.fit_regularized( 

2918 start_params=start_params, method=method, maxiter=maxiter, 

2919 full_output=full_output, disp=0, callback=callback, 

2920 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

2921 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params 

2922 if self.loglike_method.startswith('nb'): 

2923 start_params = np.append(start_params, 0.1) 

2924 

2925 cntfit = super(CountModel, self).fit_regularized( 

2926 start_params=start_params, method=method, maxiter=maxiter, 

2927 full_output=full_output, disp=disp, callback=callback, 

2928 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

2929 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 

2930 

2931 discretefit = L1NegativeBinomialResults(self, cntfit) 

2932 return L1NegativeBinomialResultsWrapper(discretefit) 

2933 

2934 

2935class NegativeBinomialP(CountModel): 

2936 __doc__ = """ 

2937 Generalized Negative Binomial (NB-P) Model 

2938 

2939 %(params)s 

2940 %(extra_params)s 

2941 

2942 Attributes 

2943 ---------- 

2944 endog : ndarray 

2945 A reference to the endogenous response variable 

2946 exog : ndarray 

2947 A reference to the exogenous design. 

2948 p : scalar 

2949 P denotes parameterizations for NB-P regression. p=1 for NB-1 and 

2950 p=2 for NB-2. Default is p=1. 

2951 """ % {'params' : base._model_params_doc, 

2952 'extra_params' : 

2953 """p : scalar 

2954 P denotes parameterizations for NB regression. p=1 for NB-1 and 

2955 p=2 for NB-2. Default is p=2. 

2956 offset : array_like 

2957 Offset is added to the linear prediction with coefficient equal to 1. 

2958 exposure : array_like 

2959 Log(exposure) is added to the linear prediction with coefficient 

2960 equal to 1. 

2961 """ + base._missing_param_doc} 

2962 

2963 def __init__(self, endog, exog, p=2, offset=None, 

2964 exposure=None, missing='none', **kwargs): 

2965 super(NegativeBinomialP, self).__init__(endog, exog, offset=offset, 

2966 exposure=exposure, 

2967 missing=missing, **kwargs) 

2968 self.parameterization = p 

2969 self.exog_names.append('alpha') 

2970 self.k_extra = 1 

2971 self._transparams = False 

2972 

2973 def _get_init_kwds(self): 

2974 kwds = super(NegativeBinomialP, self)._get_init_kwds() 

2975 kwds['p'] = self.parameterization 

2976 return kwds 

2977 

2978 def loglike(self, params): 

2979 """ 

2980 Loglikelihood of Generalized Negative Binomial (NB-P) model 

2981 

2982 Parameters 

2983 ---------- 

2984 params : array_like 

2985 The parameters of the model. 

2986 

2987 Returns 

2988 ------- 

2989 loglike : float 

2990 The log-likelihood function of the model evaluated at `params`. 

2991 See notes. 

2992 """ 

2993 return np.sum(self.loglikeobs(params)) 

2994 

2995 def loglikeobs(self, params): 

2996 """ 

2997 Loglikelihood for observations of Generalized Negative Binomial (NB-P) model 

2998 

2999 Parameters 

3000 ---------- 

3001 params : array_like 

3002 The parameters of the model. 

3003 

3004 Returns 

3005 ------- 

3006 loglike : ndarray 

3007 The log likelihood for each observation of the model evaluated 

3008 at `params`. See Notes 

3009 """ 

3010 if self._transparams: 

3011 alpha = np.exp(params[-1]) 

3012 else: 

3013 alpha = params[-1] 

3014 

3015 params = params[:-1] 

3016 p = self.parameterization 

3017 y = self.endog 

3018 

3019 mu = self.predict(params) 

3020 mu_p = mu**(2 - p) 

3021 a1 = mu_p / alpha 

3022 a2 = mu + a1 

3023 

3024 llf = (gammaln(y + a1) - gammaln(y + 1) - gammaln(a1) + 

3025 a1 * np.log(a1) + y * np.log(mu) - 

3026 (y + a1) * np.log(a2)) 

3027 

3028 return llf 

3029 

3030 def score_obs(self, params): 

3031 """ 

3032 Generalized Negative Binomial (NB-P) model score (gradient) vector of the log-likelihood for each observations. 

3033 

3034 Parameters 

3035 ---------- 

3036 params : array_like 

3037 The parameters of the model 

3038 

3039 Returns 

3040 ------- 

3041 score : ndarray, 1-D 

3042 The score vector of the model, i.e. the first derivative of the 

3043 loglikelihood function, evaluated at `params` 

3044 """ 

3045 if self._transparams: 

3046 alpha = np.exp(params[-1]) 

3047 else: 

3048 alpha = params[-1] 

3049 

3050 params = params[:-1] 

3051 p = 2 - self.parameterization 

3052 y = self.endog 

3053 

3054 mu = self.predict(params) 

3055 mu_p = mu**p 

3056 a1 = mu_p / alpha 

3057 a2 = mu + a1 

3058 a3 = y + a1 

3059 a4 = p * a1 / mu 

3060 

3061 dgpart = digamma(a3) - digamma(a1) 

3062 dgterm = dgpart + np.log(a1 / a2) + 1 - a3 / a2 

3063 # TODO: better name/interpretation for dgterm? 

3064 

3065 dparams = (a4 * dgterm - 

3066 a3 / a2 + 

3067 y / mu) 

3068 dparams = (self.exog.T * mu * dparams).T 

3069 dalpha = -a1 / alpha * dgterm 

3070 

3071 return np.concatenate((dparams, np.atleast_2d(dalpha).T), 

3072 axis=1) 

3073 

3074 def score(self, params): 

3075 """ 

3076 Generalized Negative Binomial (NB-P) model score (gradient) vector of the log-likelihood 

3077 

3078 Parameters 

3079 ---------- 

3080 params : array_like 

3081 The parameters of the model 

3082 

3083 Returns 

3084 ------- 

3085 score : ndarray, 1-D 

3086 The score vector of the model, i.e. the first derivative of the 

3087 loglikelihood function, evaluated at `params` 

3088 """ 

3089 score = np.sum(self.score_obs(params), axis=0) 

3090 if self._transparams: 

3091 score[-1] == score[-1] ** 2 

3092 return score 

3093 else: 

3094 return score 

3095 

3096 def hessian(self, params): 

3097 """ 

3098 Generalized Negative Binomial (NB-P) model hessian maxtrix of the log-likelihood 

3099 

3100 Parameters 

3101 ---------- 

3102 params : array_like 

3103 The parameters of the model 

3104 

3105 Returns 

3106 ------- 

3107 hessian : ndarray, 2-D 

3108 The hessian matrix of the model. 

3109 """ 

3110 if self._transparams: 

3111 alpha = np.exp(params[-1]) 

3112 else: 

3113 alpha = params[-1] 

3114 params = params[:-1] 

3115 

3116 p = 2 - self.parameterization 

3117 y = self.endog 

3118 exog = self.exog 

3119 mu = self.predict(params) 

3120 

3121 mu_p = mu**p 

3122 a1 = mu_p / alpha 

3123 a2 = mu + a1 

3124 a3 = y + a1 

3125 a4 = p * a1 / mu 

3126 

3127 prob = a1 / a2 

3128 lprob = np.log(prob) 

3129 dgpart = digamma(a3) - digamma(a1) 

3130 pgpart = polygamma(1, a3) - polygamma(1, a1) 

3131 

3132 dim = exog.shape[1] 

3133 hess_arr = np.zeros((dim + 1, dim + 1)) 

3134 

3135 coeff = mu**2 * (((1 + a4)**2 * a3 / a2**2 - 

3136 a3 / a2 * (p - 1) * a4 / mu - 

3137 y / mu**2 - 

3138 2 * a4 * (1 + a4) / a2 + 

3139 p * a4 / mu * (lprob + dgpart + 2) - 

3140 a4 / mu * (lprob + dgpart + 1) + 

3141 a4**2 * pgpart) + 

3142 (-(1 + a4) * a3 / a2 + 

3143 y / mu + 

3144 a4 * (lprob + dgpart + 1)) / mu) 

3145 

3146 for i in range(dim): 

3147 hess_arr[i, :-1] = np.sum(self.exog[:, :].T * self.exog[:, i] * coeff, axis=1) 

3148 

3149 

3150 hess_arr[-1,:-1] = (self.exog[:, :].T * mu * a1 * 

3151 ((1 + a4) * (1 - a3 / a2) / a2 - 

3152 p * (lprob + dgpart + 2) / mu + 

3153 p / mu * (a3 + p * a1) / a2 - 

3154 a4 * pgpart) / alpha).sum(axis=1) 

3155 

3156 

3157 da2 = (a1 * (2 * lprob + 

3158 2 * dgpart + 3 - 

3159 2 * a3 / a2 

3160 + a1 * pgpart 

3161 - 2 * prob + 

3162 prob * a3 / a2) / alpha**2) 

3163 

3164 hess_arr[-1, -1] = da2.sum() 

3165 

3166 tri_idx = np.triu_indices(dim + 1, k=1) 

3167 hess_arr[tri_idx] = hess_arr.T[tri_idx] 

3168 

3169 return hess_arr 

3170 

3171 @Appender(_get_start_params_null_docs) 

3172 def _get_start_params_null(self): 

3173 offset = getattr(self, "offset", 0) 

3174 exposure = getattr(self, "exposure", 0) 

3175 q = self.parameterization - 1 

3176 

3177 const = (self.endog / np.exp(offset + exposure)).mean() 

3178 params = [np.log(const)] 

3179 mu = const * np.exp(offset + exposure) 

3180 resid = self.endog - mu 

3181 a = self._estimate_dispersion(mu, resid, df_resid=resid.shape[0] - 1) 

3182 params.append(a) 

3183 

3184 return np.array(params) 

3185 

3186 def _estimate_dispersion(self, mu, resid, df_resid=None): 

3187 q = self.parameterization - 1 

3188 if df_resid is None: 

3189 df_resid = resid.shape[0] 

3190 a = ((resid**2 / mu - 1) * mu**(-q)).sum() / df_resid 

3191 return a 

3192 

3193 @Appender(DiscreteModel.fit.__doc__) 

3194 def fit(self, start_params=None, method='bfgs', maxiter=35, 

3195 full_output=1, disp=1, callback=None, use_transparams=False, 

3196 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs): 

3197 # TODO: Fix doc string 

3198 """ 

3199 use_transparams : bool 

3200 This parameter enable internal transformation to impose 

3201 non-negativity. True to enable. Default is False. 

3202 use_transparams=True imposes the no underdispersion (alpha > 0) 

3203 constraint. In case use_transparams=True and method="newton" or 

3204 "ncg" transformation is ignored. 

3205 """ 

3206 if use_transparams and method not in ['newton', 'ncg']: 

3207 self._transparams = True 

3208 else: 

3209 if use_transparams: 

3210 warnings.warn('Parameter "use_transparams" is ignored', 

3211 RuntimeWarning) 

3212 self._transparams = False 

3213 

3214 if start_params is None: 

3215 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 

3216 if np.size(offset) == 1 and offset == 0: 

3217 offset = None 

3218 

3219 optim_kwds_prelim = {'disp': 0, 'skip_hessian': True, 

3220 'warn_convergence': False} 

3221 optim_kwds_prelim.update(kwargs.get('optim_kwds_prelim', {})) 

3222 mod_poi = Poisson(self.endog, self.exog, offset=offset) 

3223 res_poi = mod_poi.fit(**optim_kwds_prelim) 

3224 start_params = res_poi.params 

3225 a = self._estimate_dispersion(res_poi.predict(), res_poi.resid, 

3226 df_resid=res_poi.df_resid) 

3227 start_params = np.append(start_params, max(0.05, a)) 

3228 

3229 if callback is None: 

3230 # work around perfect separation callback #3895 

3231 callback = lambda *x: x 

3232 

3233 mlefit = super(NegativeBinomialP, self).fit(start_params=start_params, 

3234 maxiter=maxiter, method=method, disp=disp, 

3235 full_output=full_output, callback=callback, 

3236 **kwargs) 

3237 

3238 if use_transparams and method not in ["newton", "ncg"]: 

3239 self._transparams = False 

3240 mlefit._results.params[-1] = np.exp(mlefit._results.params[-1]) 

3241 

3242 nbinfit = NegativeBinomialResults(self, mlefit._results) 

3243 result = NegativeBinomialResultsWrapper(nbinfit) 

3244 

3245 if cov_kwds is None: 

3246 cov_kwds = {} 

3247 result._get_robustcov_results(cov_type=cov_type, 

3248 use_self=True, use_t=use_t, **cov_kwds) 

3249 return result 

3250 

3251 @Appender(DiscreteModel.fit_regularized.__doc__) 

3252 def fit_regularized(self, start_params=None, method='l1', 

3253 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 

3254 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 

3255 qc_tol=0.03, **kwargs): 

3256 

3257 _validate_l1_method(method) 

3258 

3259 if np.size(alpha) == 1 and alpha != 0: 

3260 k_params = self.exog.shape[1] + self.k_extra 

3261 alpha = alpha * np.ones(k_params) 

3262 alpha[-1] = 0 

3263 

3264 alpha_p = alpha[:-1] if (self.k_extra and np.size(alpha) > 1) else alpha 

3265 

3266 self._transparams = False 

3267 if start_params is None: 

3268 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 

3269 if np.size(offset) == 1 and offset == 0: 

3270 offset = None 

3271 mod_poi = Poisson(self.endog, self.exog, offset=offset) 

3272 start_params = mod_poi.fit_regularized( 

3273 start_params=start_params, method=method, maxiter=maxiter, 

3274 full_output=full_output, disp=0, callback=callback, 

3275 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

3276 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params 

3277 start_params = np.append(start_params, 0.1) 

3278 

3279 cntfit = super(CountModel, self).fit_regularized( 

3280 start_params=start_params, method=method, maxiter=maxiter, 

3281 full_output=full_output, disp=disp, callback=callback, 

3282 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 

3283 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 

3284 

3285 discretefit = L1NegativeBinomialResults(self, cntfit) 

3286 

3287 return L1NegativeBinomialResultsWrapper(discretefit) 

3288 

3289 def predict(self, params, exog=None, exposure=None, offset=None, 

3290 which='mean'): 

3291 """ 

3292 Predict response variable of a model given exogenous variables. 

3293 

3294 Parameters 

3295 ---------- 

3296 params : array_like 

3297 2d array of fitted parameters of the model. Should be in the 

3298 order returned from the model. 

3299 exog : array_like, optional 

3300 1d or 2d array of exogenous values. If not supplied, the 

3301 whole exog attribute of the model is used. If a 1d array is given 

3302 it assumed to be 1 row of exogenous variables. If you only have 

3303 one regressor and would like to do prediction, you must provide 

3304 a 2d array with shape[1] == 1. 

3305 linear : bool, optional 

3306 If True, returns the linear predictor dot(exog,params). Else, 

3307 returns the value of the cdf at the linear predictor. 

3308 offset : array_like, optional 

3309 Offset is added to the linear prediction with coefficient equal to 1. 

3310 exposure : array_like, optional 

3311 Log(exposure) is added to the linear prediction with coefficient 

3312 equal to 1. 

3313 which : 'mean', 'linear', 'prob', optional. 

3314 'mean' returns the exp of linear predictor exp(dot(exog,params)). 

3315 'linear' returns the linear predictor dot(exog,params). 

3316 'prob' return probabilities for counts from 0 to max(endog). 

3317 Default is 'mean'. 

3318 

3319 Notes 

3320 ----- 

3321 """ 

3322 if exog is None: 

3323 exog = self.exog 

3324 

3325 if exposure is None: 

3326 exposure = getattr(self, 'exposure', 0) 

3327 elif exposure != 0: 

3328 exposure = np.log(exposure) 

3329 

3330 if offset is None: 

3331 offset = getattr(self, 'offset', 0) 

3332 

3333 fitted = np.dot(exog, params[:exog.shape[1]]) 

3334 linpred = fitted + exposure + offset 

3335 

3336 if which == 'mean': 

3337 return np.exp(linpred) 

3338 elif which == 'linear': 

3339 return linpred 

3340 elif which =='prob': 

3341 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1)) 

3342 mu = self.predict(params, exog, exposure, offset) 

3343 size, prob = self.convert_params(params, mu) 

3344 return nbinom.pmf(counts, size[:,None], prob[:,None]) 

3345 else: 

3346 raise ValueError('keyword "which" = %s not recognized' % which) 

3347 

3348 def convert_params(self, params, mu): 

3349 alpha = params[-1] 

3350 p = 2 - self.parameterization 

3351 

3352 size = 1. / alpha * mu**p 

3353 prob = size / (size + mu) 

3354 

3355 return (size, prob) 

3356 

3357 

3358### Results Class ### 

3359 

3360class DiscreteResults(base.LikelihoodModelResults): 

3361 __doc__ = _discrete_results_docs % {"one_line_description" : 

3362 "A results class for the discrete dependent variable models.", 

3363 "extra_attr" : ""} 

3364 

3365 def __init__(self, model, mlefit, cov_type='nonrobust', cov_kwds=None, 

3366 use_t=None): 

3367 #super(DiscreteResults, self).__init__(model, params, 

3368 # np.linalg.inv(-hessian), scale=1.) 

3369 self.model = model 

3370 self.df_model = model.df_model 

3371 self.df_resid = model.df_resid 

3372 self._cache = {} 

3373 self.nobs = model.exog.shape[0] 

3374 self.__dict__.update(mlefit.__dict__) 

3375 

3376 if not hasattr(self, 'cov_type'): 

3377 # do this only if super, i.e. mlefit did not already add cov_type 

3378 # robust covariance 

3379 if use_t is not None: 

3380 self.use_t = use_t 

3381 if cov_type == 'nonrobust': 

3382 self.cov_type = 'nonrobust' 

3383 self.cov_kwds = {'description' : 'Standard Errors assume that the ' + 

3384 'covariance matrix of the errors is correctly ' + 

3385 'specified.'} 

3386 else: 

3387 if cov_kwds is None: 

3388 cov_kwds = {} 

3389 from statsmodels.base.covtype import get_robustcov_results 

3390 get_robustcov_results(self, cov_type=cov_type, use_self=True, 

3391 **cov_kwds) 

3392 

3393 

3394 def __getstate__(self): 

3395 # remove unpicklable methods 

3396 mle_settings = getattr(self, 'mle_settings', None) 

3397 if mle_settings is not None: 

3398 if 'callback' in mle_settings: 

3399 mle_settings['callback'] = None 

3400 if 'cov_params_func' in mle_settings: 

3401 mle_settings['cov_params_func'] = None 

3402 return self.__dict__ 

3403 

3404 @cache_readonly 

3405 def prsquared(self): 

3406 """ 

3407 McFadden's pseudo-R-squared. `1 - (llf / llnull)` 

3408 """ 

3409 return 1 - self.llf/self.llnull 

3410 

3411 @cache_readonly 

3412 def llr(self): 

3413 """ 

3414 Likelihood ratio chi-squared statistic; `-2*(llnull - llf)` 

3415 """ 

3416 return -2*(self.llnull - self.llf) 

3417 

3418 @cache_readonly 

3419 def llr_pvalue(self): 

3420 """ 

3421 The chi-squared probability of getting a log-likelihood ratio 

3422 statistic greater than llr. llr has a chi-squared distribution 

3423 with degrees of freedom `df_model`. 

3424 """ 

3425 return stats.distributions.chi2.sf(self.llr, self.df_model) 

3426 

3427 def set_null_options(self, llnull=None, attach_results=True, **kwargs): 

3428 """ 

3429 Set the fit options for the Null (constant-only) model. 

3430 

3431 This resets the cache for related attributes which is potentially 

3432 fragile. This only sets the option, the null model is estimated 

3433 when llnull is accessed, if llnull is not yet in cache. 

3434 

3435 Parameters 

3436 ---------- 

3437 llnull : {None, float} 

3438 If llnull is not None, then the value will be directly assigned to 

3439 the cached attribute "llnull". 

3440 attach_results : bool 

3441 Sets an internal flag whether the results instance of the null 

3442 model should be attached. By default without calling this method, 

3443 thenull model results are not attached and only the loglikelihood 

3444 value llnull is stored. 

3445 **kwargs 

3446 Additional keyword arguments used as fit keyword arguments for the 

3447 null model. The override and model default values. 

3448 

3449 Notes 

3450 ----- 

3451 Modifies attributes of this instance, and so has no return. 

3452 """ 

3453 # reset cache, note we need to add here anything that depends on 

3454 # llnullor the null model. If something is missing, then the attribute 

3455 # might be incorrect. 

3456 self._cache.pop('llnull', None) 

3457 self._cache.pop('llr', None) 

3458 self._cache.pop('llr_pvalue', None) 

3459 self._cache.pop('prsquared', None) 

3460 if hasattr(self, 'res_null'): 

3461 del self.res_null 

3462 

3463 if llnull is not None: 

3464 self._cache['llnull'] = llnull 

3465 self._attach_nullmodel = attach_results 

3466 self._optim_kwds_null = kwargs 

3467 

3468 @cache_readonly 

3469 def llnull(self): 

3470 """ 

3471 Value of the constant-only loglikelihood 

3472 """ 

3473 model = self.model 

3474 kwds = model._get_init_kwds().copy() 

3475 for key in getattr(model, '_null_drop_keys', []): 

3476 del kwds[key] 

3477 # TODO: what parameters to pass to fit? 

3478 mod_null = model.__class__(model.endog, np.ones(self.nobs), **kwds) 

3479 # TODO: consider catching and warning on convergence failure? 

3480 # in the meantime, try hard to converge. see 

3481 # TestPoissonConstrained1a.test_smoke 

3482 

3483 optim_kwds = getattr(self, '_optim_kwds_null', {}).copy() 

3484 

3485 if 'start_params' in optim_kwds: 

3486 # user provided 

3487 sp_null = optim_kwds.pop('start_params') 

3488 elif hasattr(model, '_get_start_params_null'): 

3489 # get moment estimates if available 

3490 sp_null = model._get_start_params_null() 

3491 else: 

3492 sp_null = None 

3493 

3494 opt_kwds = dict(method='bfgs', warn_convergence=False, maxiter=10000, 

3495 disp=0) 

3496 opt_kwds.update(optim_kwds) 

3497 

3498 if optim_kwds: 

3499 res_null = mod_null.fit(start_params=sp_null, **opt_kwds) 

3500 else: 

3501 # this should be a reasonably method case across versions 

3502 res_null = mod_null.fit(start_params=sp_null, method='nm', 

3503 warn_convergence=False, 

3504 maxiter=10000, disp=0) 

3505 res_null = mod_null.fit(start_params=res_null.params, method='bfgs', 

3506 warn_convergence=False, 

3507 maxiter=10000, disp=0) 

3508 

3509 if getattr(self, '_attach_nullmodel', False) is not False: 

3510 self.res_null = res_null 

3511 

3512 return res_null.llf 

3513 

3514 @cache_readonly 

3515 def fittedvalues(self): 

3516 """ 

3517 Linear predictor XB. 

3518 """ 

3519 return np.dot(self.model.exog, self.params[:self.model.exog.shape[1]]) 

3520 

3521 @cache_readonly 

3522 def resid_response(self): 

3523 """ 

3524 Respnose residuals. The response residuals are defined as 

3525 `endog - fittedvalues` 

3526 """ 

3527 return self.model.endog - self.predict() 

3528 

3529 @cache_readonly 

3530 def aic(self): 

3531 """ 

3532 Akaike information criterion. `-2*(llf - p)` where `p` is the number 

3533 of regressors including the intercept. 

3534 """ 

3535 return -2*(self.llf - (self.df_model+1)) 

3536 

3537 @cache_readonly 

3538 def bic(self): 

3539 """ 

3540 Bayesian information criterion. `-2*llf + ln(nobs)*p` where `p` is the 

3541 number of regressors including the intercept. 

3542 """ 

3543 return -2*self.llf + np.log(self.nobs)*(self.df_model+1) 

3544 

3545 def _get_endog_name(self, yname, yname_list): 

3546 if yname is None: 

3547 yname = self.model.endog_names 

3548 if yname_list is None: 

3549 yname_list = self.model.endog_names 

3550 return yname, yname_list 

3551 

3552 def get_margeff(self, at='overall', method='dydx', atexog=None, 

3553 dummy=False, count=False): 

3554 """Get marginal effects of the fitted model. 

3555 

3556 Parameters 

3557 ---------- 

3558 at : str, optional 

3559 Options are: 

3560 

3561 - 'overall', The average of the marginal effects at each 

3562 observation. 

3563 - 'mean', The marginal effects at the mean of each regressor. 

3564 - 'median', The marginal effects at the median of each regressor. 

3565 - 'zero', The marginal effects at zero for each regressor. 

3566 - 'all', The marginal effects at each observation. If `at` is all 

3567 only margeff will be available from the returned object. 

3568 

3569 Note that if `exog` is specified, then marginal effects for all 

3570 variables not specified by `exog` are calculated using the `at` 

3571 option. 

3572 method : str, optional 

3573 Options are: 

3574 

3575 - 'dydx' - dy/dx - No transformation is made and marginal effects 

3576 are returned. This is the default. 

3577 - 'eyex' - estimate elasticities of variables in `exog` -- 

3578 d(lny)/d(lnx) 

3579 - 'dyex' - estimate semi-elasticity -- dy/d(lnx) 

3580 - 'eydx' - estimate semi-elasticity -- d(lny)/dx 

3581 

3582 Note that tranformations are done after each observation is 

3583 calculated. Semi-elasticities for binary variables are computed 

3584 using the midpoint method. 'dyex' and 'eyex' do not make sense 

3585 for discrete variables. For interpretations of these methods 

3586 see notes below. 

3587 atexog : array_like, optional 

3588 Optionally, you can provide the exogenous variables over which to 

3589 get the marginal effects. This should be a dictionary with the key 

3590 as the zero-indexed column number and the value of the dictionary. 

3591 Default is None for all independent variables less the constant. 

3592 dummy : bool, optional 

3593 If False, treats binary variables (if present) as continuous. This 

3594 is the default. Else if True, treats binary variables as 

3595 changing from 0 to 1. Note that any variable that is either 0 or 1 

3596 is treated as binary. Each binary variable is treated separately 

3597 for now. 

3598 count : bool, optional 

3599 If False, treats count variables (if present) as continuous. This 

3600 is the default. Else if True, the marginal effect is the 

3601 change in probabilities when each observation is increased by one. 

3602 

3603 Returns 

3604 ------- 

3605 DiscreteMargins : marginal effects instance 

3606 Returns an object that holds the marginal effects, standard 

3607 errors, confidence intervals, etc. See 

3608 `statsmodels.discrete.discrete_margins.DiscreteMargins` for more 

3609 information. 

3610 

3611 Notes 

3612 ----- 

3613 Interpretations of methods: 

3614 

3615 - 'dydx' - change in `endog` for a change in `exog`. 

3616 - 'eyex' - proportional change in `endog` for a proportional change 

3617 in `exog`. 

3618 - 'dyex' - change in `endog` for a proportional change in `exog`. 

3619 - 'eydx' - proportional change in `endog` for a change in `exog`. 

3620 

3621 When using after Poisson, returns the expected number of events per 

3622 period, assuming that the model is loglinear. 

3623 """ 

3624 from statsmodels.discrete.discrete_margins import DiscreteMargins 

3625 return DiscreteMargins(self, (at, method, atexog, dummy, count)) 

3626 

3627 def summary(self, yname=None, xname=None, title=None, alpha=.05, 

3628 yname_list=None): 

3629 """ 

3630 Summarize the Regression Results. 

3631 

3632 Parameters 

3633 ---------- 

3634 yname : str, optional 

3635 The name of the endog variable in the tables. The default is `y`. 

3636 xname : list[str], optional 

3637 The names for the exogenous variables, default is "var_xx". 

3638 Must match the number of parameters in the model. 

3639 title : str, optional 

3640 Title for the top table. If not None, then this replaces the 

3641 default title. 

3642 alpha : float 

3643 The significance level for the confidence intervals. 

3644 

3645 Returns 

3646 ------- 

3647 Summary 

3648 Class that holds the summary tables and text, which can be printed 

3649 or converted to various output formats. 

3650 

3651 See Also 

3652 -------- 

3653 statsmodels.iolib.summary.Summary : Class that hold summary results. 

3654 """ 

3655 

3656 top_left = [('Dep. Variable:', None), 

3657 ('Model:', [self.model.__class__.__name__]), 

3658 ('Method:', ['MLE']), 

3659 ('Date:', None), 

3660 ('Time:', None), 

3661 ('converged:', ["%s" % self.mle_retvals['converged']]), 

3662 ] 

3663 

3664 top_right = [('No. Observations:', None), 

3665 ('Df Residuals:', None), 

3666 ('Df Model:', None), 

3667 ('Pseudo R-squ.:', ["%#6.4g" % self.prsquared]), 

3668 ('Log-Likelihood:', None), 

3669 ('LL-Null:', ["%#8.5g" % self.llnull]), 

3670 ('LLR p-value:', ["%#6.4g" % self.llr_pvalue]) 

3671 ] 

3672 

3673 if hasattr(self, 'cov_type'): 

3674 top_left.append(('Covariance Type:', [self.cov_type])) 

3675 

3676 if title is None: 

3677 title = self.model.__class__.__name__ + ' ' + "Regression Results" 

3678 

3679 # boiler plate 

3680 from statsmodels.iolib.summary import Summary 

3681 smry = Summary() 

3682 yname, yname_list = self._get_endog_name(yname, yname_list) 

3683 

3684 # for top of table 

3685 smry.add_table_2cols(self, gleft=top_left, gright=top_right, 

3686 yname=yname, xname=xname, title=title) 

3687 

3688 # for parameters, etc 

3689 smry.add_table_params(self, yname=yname_list, xname=xname, alpha=alpha, 

3690 use_t=self.use_t) 

3691 

3692 if hasattr(self, 'constraints'): 

3693 smry.add_extra_txt(['Model has been estimated subject to linear ' 

3694 'equality constraints.']) 

3695 

3696 return smry 

3697 

3698 def summary2(self, yname=None, xname=None, title=None, alpha=.05, 

3699 float_format="%.4f"): 

3700 """ 

3701 Experimental function to summarize regression results. 

3702 

3703 Parameters 

3704 ---------- 

3705 yname : str 

3706 Name of the dependent variable (optional). 

3707 xname : list[str], optional 

3708 List of strings of length equal to the number of parameters 

3709 Names of the independent variables (optional). 

3710 title : str, optional 

3711 Title for the top table. If not None, then this replaces the 

3712 default title. 

3713 alpha : float 

3714 The significance level for the confidence intervals. 

3715 float_format : str 

3716 The print format for floats in parameters summary. 

3717 

3718 Returns 

3719 ------- 

3720 Summary 

3721 Instance that contains the summary tables and text, which can be 

3722 printed or converted to various output formats. 

3723 

3724 See Also 

3725 -------- 

3726 statsmodels.iolib.summary2.Summary : Class that holds summary results. 

3727 """ 

3728 from statsmodels.iolib import summary2 

3729 smry = summary2.Summary() 

3730 smry.add_base(results=self, alpha=alpha, float_format=float_format, 

3731 xname=xname, yname=yname, title=title) 

3732 

3733 if hasattr(self, 'constraints'): 

3734 smry.add_text('Model has been estimated subject to linear ' 

3735 'equality constraints.') 

3736 

3737 return smry 

3738 

3739 

3740class CountResults(DiscreteResults): 

3741 __doc__ = _discrete_results_docs % { 

3742 "one_line_description": "A results class for count data", 

3743 "extra_attr": ""} 

3744 @cache_readonly 

3745 def resid(self): 

3746 """ 

3747 Residuals 

3748 

3749 Notes 

3750 ----- 

3751 The residuals for Count models are defined as 

3752 

3753 .. math:: y - p 

3754 

3755 where :math:`p = \\exp(X\\beta)`. Any exposure and offset variables 

3756 are also handled. 

3757 """ 

3758 return self.model.endog - self.predict() 

3759 

3760 

3761class NegativeBinomialResults(CountResults): 

3762 __doc__ = _discrete_results_docs % { 

3763 "one_line_description": "A results class for NegativeBinomial 1 and 2", 

3764 "extra_attr": ""} 

3765 

3766 @cache_readonly 

3767 def lnalpha(self): 

3768 """Natural log of alpha""" 

3769 return np.log(self.params[-1]) 

3770 

3771 @cache_readonly 

3772 def lnalpha_std_err(self): 

3773 """Natural log of standardized error""" 

3774 return self.bse[-1] / self.params[-1] 

3775 

3776 @cache_readonly 

3777 def aic(self): 

3778 # + 1 because we estimate alpha 

3779 k_extra = getattr(self.model, 'k_extra', 0) 

3780 return -2*(self.llf - (self.df_model + self.k_constant + k_extra)) 

3781 

3782 @cache_readonly 

3783 def bic(self): 

3784 # + 1 because we estimate alpha 

3785 k_extra = getattr(self.model, 'k_extra', 0) 

3786 return -2*self.llf + np.log(self.nobs)*(self.df_model + 

3787 self.k_constant + k_extra) 

3788 

3789 

3790class GeneralizedPoissonResults(NegativeBinomialResults): 

3791 __doc__ = _discrete_results_docs % { 

3792 "one_line_description": "A results class for Generalized Poisson", 

3793 "extra_attr": ""} 

3794 

3795 @cache_readonly 

3796 def _dispersion_factor(self): 

3797 p = getattr(self.model, 'parameterization', 0) 

3798 mu = self.predict() 

3799 return (1 + self.params[-1] * mu**p)**2 

3800 

3801class L1CountResults(DiscreteResults): 

3802 __doc__ = _discrete_results_docs % {"one_line_description" : 

3803 "A results class for count data fit by l1 regularization", 

3804 "extra_attr" : _l1_results_attr} 

3805 

3806 def __init__(self, model, cntfit): 

3807 super(L1CountResults, self).__init__(model, cntfit) 

3808 # self.trimmed is a boolean array with T/F telling whether or not that 

3809 # entry in params has been set zero'd out. 

3810 self.trimmed = cntfit.mle_retvals['trimmed'] 

3811 self.nnz_params = (~self.trimmed).sum() 

3812 

3813 # Set degrees of freedom. In doing so, 

3814 # adjust for extra parameter in NegativeBinomial nb1 and nb2 

3815 # extra parameter is not included in df_model 

3816 k_extra = getattr(self.model, 'k_extra', 0) 

3817 

3818 self.df_model = self.nnz_params - 1 - k_extra 

3819 self.df_resid = float(self.model.endog.shape[0] - self.nnz_params) + k_extra 

3820 

3821class PoissonResults(CountResults): 

3822 

3823 def predict_prob(self, n=None, exog=None, exposure=None, offset=None, 

3824 transform=True): 

3825 """ 

3826 Return predicted probability of each count level for each observation 

3827 

3828 Parameters 

3829 ---------- 

3830 n : array_like or int 

3831 The counts for which you want the probabilities. If n is None 

3832 then the probabilities for each count from 0 to max(y) are 

3833 given. 

3834 

3835 Returns 

3836 ------- 

3837 ndarray 

3838 A nobs x n array where len(`n`) columns are indexed by the count 

3839 n. If n is None, then column 0 is the probability that each 

3840 observation is 0, column 1 is the probability that each 

3841 observation is 1, etc. 

3842 """ 

3843 if n is not None: 

3844 counts = np.atleast_2d(n) 

3845 else: 

3846 counts = np.atleast_2d(np.arange(0, np.max(self.model.endog)+1)) 

3847 mu = self.predict(exog=exog, exposure=exposure, offset=offset, 

3848 transform=transform, linear=False)[:,None] 

3849 # uses broadcasting 

3850 return stats.poisson.pmf(counts, mu) 

3851 

3852 @property 

3853 def resid_pearson(self): 

3854 """ 

3855 Pearson residuals 

3856 

3857 Notes 

3858 ----- 

3859 Pearson residuals are defined to be 

3860 

3861 .. math:: r_j = \\frac{(y - M_jp_j)}{\\sqrt{M_jp_j(1-p_j)}} 

3862 

3863 where :math:`p_j=cdf(X\\beta)` and :math:`M_j` is the total number of 

3864 observations sharing the covariate pattern :math:`j`. 

3865 

3866 For now :math:`M_j` is always set to 1. 

3867 """ 

3868 # Pearson residuals 

3869 p = self.predict() # fittedvalues is still linear 

3870 return (self.model.endog - p)/np.sqrt(p) 

3871 

3872 

3873class L1PoissonResults(L1CountResults, PoissonResults): 

3874 pass 

3875 

3876class L1NegativeBinomialResults(L1CountResults, NegativeBinomialResults): 

3877 pass 

3878 

3879class L1GeneralizedPoissonResults(L1CountResults, GeneralizedPoissonResults): 

3880 pass 

3881 

3882class OrderedResults(DiscreteResults): 

3883 __doc__ = _discrete_results_docs % {"one_line_description" : "A results class for ordered discrete data." , "extra_attr" : ""} 

3884 pass 

3885 

3886class BinaryResults(DiscreteResults): 

3887 __doc__ = _discrete_results_docs % {"one_line_description" : "A results class for binary data", "extra_attr" : ""} 

3888 

3889 def pred_table(self, threshold=.5): 

3890 """ 

3891 Prediction table 

3892 

3893 Parameters 

3894 ---------- 

3895 threshold : scalar 

3896 Number between 0 and 1. Threshold above which a prediction is 

3897 considered 1 and below which a prediction is considered 0. 

3898 

3899 Notes 

3900 ----- 

3901 pred_table[i,j] refers to the number of times "i" was observed and 

3902 the model predicted "j". Correct predictions are along the diagonal. 

3903 """ 

3904 model = self.model 

3905 actual = model.endog 

3906 pred = np.array(self.predict() > threshold, dtype=float) 

3907 bins = np.array([0, 0.5, 1]) 

3908 return np.histogram2d(actual, pred, bins=bins)[0] 

3909 

3910 @Appender(DiscreteResults.summary.__doc__) 

3911 def summary(self, yname=None, xname=None, title=None, alpha=.05, 

3912 yname_list=None): 

3913 smry = super(BinaryResults, self).summary(yname, xname, title, alpha, 

3914 yname_list) 

3915 fittedvalues = self.model.cdf(self.fittedvalues) 

3916 absprederror = np.abs(self.model.endog - fittedvalues) 

3917 predclose_sum = (absprederror < 1e-4).sum() 

3918 predclose_frac = predclose_sum / len(fittedvalues) 

3919 

3920 # add warnings/notes 

3921 etext = [] 

3922 if predclose_sum == len(fittedvalues): # TODO: nobs? 

3923 wstr = "Complete Separation: The results show that there is" 

3924 wstr += "complete separation.\n" 

3925 wstr += "In this case the Maximum Likelihood Estimator does " 

3926 wstr += "not exist and the parameters\n" 

3927 wstr += "are not identified." 

3928 etext.append(wstr) 

3929 elif predclose_frac > 0.1: # TODO: get better diagnosis 

3930 wstr = "Possibly complete quasi-separation: A fraction " 

3931 wstr += "%4.2f of observations can be\n" % predclose_frac 

3932 wstr += "perfectly predicted. This might indicate that there " 

3933 wstr += "is complete\nquasi-separation. In this case some " 

3934 wstr += "parameters will not be identified." 

3935 etext.append(wstr) 

3936 if etext: 

3937 smry.add_extra_txt(etext) 

3938 return smry 

3939 

3940 @cache_readonly 

3941 def resid_dev(self): 

3942 """ 

3943 Deviance residuals 

3944 

3945 Notes 

3946 ----- 

3947 Deviance residuals are defined 

3948 

3949 .. math:: d_j = \\pm\\left(2\\left[Y_j\\ln\\left(\\frac{Y_j}{M_jp_j}\\right) + (M_j - Y_j\\ln\\left(\\frac{M_j-Y_j}{M_j(1-p_j)} \\right) \\right] \\right)^{1/2} 

3950 

3951 where 

3952 

3953 :math:`p_j = cdf(X\\beta)` and :math:`M_j` is the total number of 

3954 observations sharing the covariate pattern :math:`j`. 

3955 

3956 For now :math:`M_j` is always set to 1. 

3957 """ 

3958 #These are the deviance residuals 

3959 #model = self.model 

3960 endog = self.model.endog 

3961 #exog = model.exog 

3962 # M = # of individuals that share a covariate pattern 

3963 # so M[i] = 2 for i = two share a covariate pattern 

3964 M = 1 

3965 p = self.predict() 

3966 #Y_0 = np.where(exog == 0) 

3967 #Y_M = np.where(exog == M) 

3968 #NOTE: Common covariate patterns are not yet handled 

3969 res = -(1-endog)*np.sqrt(2*M*np.abs(np.log(1-p))) + \ 

3970 endog*np.sqrt(2*M*np.abs(np.log(p))) 

3971 return res 

3972 

3973 @cache_readonly 

3974 def resid_pearson(self): 

3975 """ 

3976 Pearson residuals 

3977 

3978 Notes 

3979 ----- 

3980 Pearson residuals are defined to be 

3981 

3982 .. math:: r_j = \\frac{(y - M_jp_j)}{\\sqrt{M_jp_j(1-p_j)}} 

3983 

3984 where :math:`p_j=cdf(X\\beta)` and :math:`M_j` is the total number of 

3985 observations sharing the covariate pattern :math:`j`. 

3986 

3987 For now :math:`M_j` is always set to 1. 

3988 """ 

3989 # Pearson residuals 

3990 #model = self.model 

3991 endog = self.model.endog 

3992 #exog = model.exog 

3993 # M = # of individuals that share a covariate pattern 

3994 # so M[i] = 2 for i = two share a covariate pattern 

3995 # use unique row pattern? 

3996 M = 1 

3997 p = self.predict() 

3998 return (endog - M*p)/np.sqrt(M*p*(1-p)) 

3999 

4000 @cache_readonly 

4001 def resid_response(self): 

4002 """ 

4003 The response residuals 

4004 

4005 Notes 

4006 ----- 

4007 Response residuals are defined to be 

4008 

4009 .. math:: y - p 

4010 

4011 where :math:`p=cdf(X\\beta)`. 

4012 """ 

4013 return self.model.endog - self.predict() 

4014 

4015 

4016class LogitResults(BinaryResults): 

4017 __doc__ = _discrete_results_docs % { 

4018 "one_line_description": "A results class for Logit Model", 

4019 "extra_attr": ""} 

4020 

4021 @cache_readonly 

4022 def resid_generalized(self): 

4023 """ 

4024 Generalized residuals 

4025 

4026 Notes 

4027 ----- 

4028 The generalized residuals for the Logit model are defined 

4029 

4030 .. math:: y - p 

4031 

4032 where :math:`p=cdf(X\\beta)`. This is the same as the `resid_response` 

4033 for the Logit model. 

4034 """ 

4035 # Generalized residuals 

4036 return self.model.endog - self.predict() 

4037 

4038 

4039class ProbitResults(BinaryResults): 

4040 __doc__ = _discrete_results_docs % { 

4041 "one_line_description": "A results class for Probit Model", 

4042 "extra_attr": ""} 

4043 

4044 @cache_readonly 

4045 def resid_generalized(self): 

4046 """ 

4047 Generalized residuals 

4048 

4049 Notes 

4050 ----- 

4051 The generalized residuals for the Probit model are defined 

4052 

4053 .. math:: y\\frac{\\phi(X\\beta)}{\\Phi(X\\beta)}-(1-y)\\frac{\\phi(X\\beta)}{1-\\Phi(X\\beta)} 

4054 """ 

4055 # generalized residuals 

4056 model = self.model 

4057 endog = model.endog 

4058 XB = self.predict(linear=True) 

4059 pdf = model.pdf(XB) 

4060 cdf = model.cdf(XB) 

4061 return endog * pdf/cdf - (1-endog)*pdf/(1-cdf) 

4062 

4063class L1BinaryResults(BinaryResults): 

4064 __doc__ = _discrete_results_docs % {"one_line_description" : 

4065 "Results instance for binary data fit by l1 regularization", 

4066 "extra_attr" : _l1_results_attr} 

4067 def __init__(self, model, bnryfit): 

4068 super(L1BinaryResults, self).__init__(model, bnryfit) 

4069 # self.trimmed is a boolean array with T/F telling whether or not that 

4070 # entry in params has been set zero'd out. 

4071 self.trimmed = bnryfit.mle_retvals['trimmed'] 

4072 self.nnz_params = (~self.trimmed).sum() 

4073 self.df_model = self.nnz_params - 1 

4074 self.df_resid = float(self.model.endog.shape[0] - self.nnz_params) 

4075 

4076 

4077class MultinomialResults(DiscreteResults): 

4078 __doc__ = _discrete_results_docs % {"one_line_description" : 

4079 "A results class for multinomial data", "extra_attr" : ""} 

4080 

4081 def __init__(self, model, mlefit): 

4082 super(MultinomialResults, self).__init__(model, mlefit) 

4083 self.J = model.J 

4084 self.K = model.K 

4085 

4086 @staticmethod 

4087 def _maybe_convert_ynames_int(ynames): 

4088 # see if they're integers 

4089 issue_warning = False 

4090 msg = ('endog contains values are that not int-like. Uses string ' 

4091 'representation of value. Use integer-valued endog to ' 

4092 'suppress this warning.') 

4093 for i in ynames: 

4094 try: 

4095 if ynames[i] % 1 == 0: 

4096 ynames[i] = str(int(ynames[i])) 

4097 else: 

4098 issue_warning = True 

4099 ynames[i] = str(ynames[i]) 

4100 except TypeError: 

4101 ynames[i] = str(ynames[i]) 

4102 if issue_warning: 

4103 import warnings 

4104 warnings.warn(msg, SpecificationWarning) 

4105 

4106 return ynames 

4107 

4108 def _get_endog_name(self, yname, yname_list, all=False): 

4109 """ 

4110 If all is False, the first variable name is dropped 

4111 """ 

4112 model = self.model 

4113 if yname is None: 

4114 yname = model.endog_names 

4115 if yname_list is None: 

4116 ynames = model._ynames_map 

4117 ynames = self._maybe_convert_ynames_int(ynames) 

4118 # use range below to ensure sortedness 

4119 ynames = [ynames[key] for key in range(int(model.J))] 

4120 ynames = ['='.join([yname, name]) for name in ynames] 

4121 if not all: 

4122 yname_list = ynames[1:] # assumes first variable is dropped 

4123 else: 

4124 yname_list = ynames 

4125 return yname, yname_list 

4126 

4127 def pred_table(self): 

4128 """ 

4129 Returns the J x J prediction table. 

4130 

4131 Notes 

4132 ----- 

4133 pred_table[i,j] refers to the number of times "i" was observed and 

4134 the model predicted "j". Correct predictions are along the diagonal. 

4135 """ 

4136 ju = self.model.J - 1 # highest index 

4137 # these are the actual, predicted indices 

4138 #idx = lzip(self.model.endog, self.predict().argmax(1)) 

4139 bins = np.concatenate(([0], np.linspace(0.5, ju - 0.5, ju), [ju])) 

4140 return np.histogram2d(self.model.endog, self.predict().argmax(1), 

4141 bins=bins)[0] 

4142 

4143 @cache_readonly 

4144 def bse(self): 

4145 bse = np.sqrt(np.diag(self.cov_params())) 

4146 return bse.reshape(self.params.shape, order='F') 

4147 

4148 @cache_readonly 

4149 def aic(self): 

4150 return -2*(self.llf - (self.df_model+self.model.J-1)) 

4151 

4152 @cache_readonly 

4153 def bic(self): 

4154 return -2*self.llf + np.log(self.nobs)*(self.df_model+self.model.J-1) 

4155 

4156 def conf_int(self, alpha=.05, cols=None): 

4157 confint = super(DiscreteResults, self).conf_int(alpha=alpha, 

4158 cols=cols) 

4159 return confint.transpose(2,0,1) 

4160 

4161 def margeff(self): 

4162 raise NotImplementedError("Use get_margeff instead") 

4163 

4164 @cache_readonly 

4165 def resid_misclassified(self): 

4166 """ 

4167 Residuals indicating which observations are misclassified. 

4168 

4169 Notes 

4170 ----- 

4171 The residuals for the multinomial model are defined as 

4172 

4173 .. math:: argmax(y_i) \\neq argmax(p_i) 

4174 

4175 where :math:`argmax(y_i)` is the index of the category for the 

4176 endogenous variable and :math:`argmax(p_i)` is the index of the 

4177 predicted probabilities for each category. That is, the residual 

4178 is a binary indicator that is 0 if the category with the highest 

4179 predicted probability is the same as that of the observed variable 

4180 and 1 otherwise. 

4181 """ 

4182 # it's 0 or 1 - 0 for correct prediction and 1 for a missed one 

4183 return (self.model.wendog.argmax(1) != 

4184 self.predict().argmax(1)).astype(float) 

4185 

4186 def summary2(self, alpha=0.05, float_format="%.4f"): 

4187 """Experimental function to summarize regression results 

4188 

4189 Parameters 

4190 ---------- 

4191 alpha : float 

4192 significance level for the confidence intervals 

4193 float_format : str 

4194 print format for floats in parameters summary 

4195 

4196 Returns 

4197 ------- 

4198 smry : Summary instance 

4199 this holds the summary tables and text, which can be printed or 

4200 converted to various output formats. 

4201 

4202 See Also 

4203 -------- 

4204 statsmodels.iolib.summary2.Summary : class to hold summary results 

4205 """ 

4206 

4207 from statsmodels.iolib import summary2 

4208 smry = summary2.Summary() 

4209 smry.add_dict(summary2.summary_model(self)) 

4210 # One data frame per value of endog 

4211 eqn = self.params.shape[1] 

4212 confint = self.conf_int(alpha) 

4213 for i in range(eqn): 

4214 coefs = summary2.summary_params((self, self.params[:, i], 

4215 self.bse[:, i], 

4216 self.tvalues[:, i], 

4217 self.pvalues[:, i], 

4218 confint[i]), 

4219 alpha=alpha) 

4220 # Header must show value of endog 

4221 level_str = self.model.endog_names + ' = ' + str(i) 

4222 coefs[level_str] = coefs.index 

4223 coefs = coefs.iloc[:, [-1, 0, 1, 2, 3, 4, 5]] 

4224 smry.add_df(coefs, index=False, header=True, 

4225 float_format=float_format) 

4226 smry.add_title(results=self) 

4227 return smry 

4228 

4229 

4230class L1MultinomialResults(MultinomialResults): 

4231 __doc__ = _discrete_results_docs % {"one_line_description" : 

4232 "A results class for multinomial data fit by l1 regularization", 

4233 "extra_attr" : _l1_results_attr} 

4234 def __init__(self, model, mlefit): 

4235 super(L1MultinomialResults, self).__init__(model, mlefit) 

4236 # self.trimmed is a boolean array with T/F telling whether or not that 

4237 # entry in params has been set zero'd out. 

4238 self.trimmed = mlefit.mle_retvals['trimmed'] 

4239 self.nnz_params = (~self.trimmed).sum() 

4240 

4241 # Note: J-1 constants 

4242 self.df_model = self.nnz_params - (self.model.J - 1) 

4243 self.df_resid = float(self.model.endog.shape[0] - self.nnz_params) 

4244 

4245 

4246#### Results Wrappers #### 

4247 

4248class OrderedResultsWrapper(lm.RegressionResultsWrapper): 

4249 pass 

4250 

4251 

4252wrap.populate_wrapper(OrderedResultsWrapper, OrderedResults) 

4253 

4254 

4255class CountResultsWrapper(lm.RegressionResultsWrapper): 

4256 pass 

4257 

4258 

4259wrap.populate_wrapper(CountResultsWrapper, CountResults) 

4260 

4261 

4262class NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper): 

4263 pass 

4264 

4265 

4266wrap.populate_wrapper(NegativeBinomialResultsWrapper, 

4267 NegativeBinomialResults) 

4268 

4269 

4270class GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper): 

4271 pass 

4272 

4273 

4274wrap.populate_wrapper(GeneralizedPoissonResultsWrapper, 

4275 GeneralizedPoissonResults) 

4276 

4277 

4278class PoissonResultsWrapper(lm.RegressionResultsWrapper): 

4279 pass 

4280 

4281 

4282wrap.populate_wrapper(PoissonResultsWrapper, PoissonResults) 

4283 

4284 

4285class L1CountResultsWrapper(lm.RegressionResultsWrapper): 

4286 pass 

4287 

4288 

4289class L1PoissonResultsWrapper(lm.RegressionResultsWrapper): 

4290 pass 

4291 

4292 

4293wrap.populate_wrapper(L1PoissonResultsWrapper, L1PoissonResults) 

4294 

4295 

4296class L1NegativeBinomialResultsWrapper(lm.RegressionResultsWrapper): 

4297 pass 

4298 

4299 

4300wrap.populate_wrapper(L1NegativeBinomialResultsWrapper, 

4301 L1NegativeBinomialResults) 

4302 

4303 

4304class L1GeneralizedPoissonResultsWrapper(lm.RegressionResultsWrapper): 

4305 pass 

4306 

4307 

4308wrap.populate_wrapper(L1GeneralizedPoissonResultsWrapper, 

4309 L1GeneralizedPoissonResults) 

4310 

4311 

4312class BinaryResultsWrapper(lm.RegressionResultsWrapper): 

4313 _attrs = {"resid_dev": "rows", 

4314 "resid_generalized": "rows", 

4315 "resid_pearson": "rows", 

4316 "resid_response": "rows" 

4317 } 

4318 _wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs, 

4319 _attrs) 

4320 

4321 

4322wrap.populate_wrapper(BinaryResultsWrapper, BinaryResults) 

4323 

4324 

4325class L1BinaryResultsWrapper(lm.RegressionResultsWrapper): 

4326 pass 

4327 

4328 

4329wrap.populate_wrapper(L1BinaryResultsWrapper, L1BinaryResults) 

4330 

4331 

4332class MultinomialResultsWrapper(lm.RegressionResultsWrapper): 

4333 _attrs = {"resid_misclassified": "rows"} 

4334 _wrap_attrs = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_attrs, 

4335 _attrs) 

4336 _methods = {'conf_int': 'multivariate_confint'} 

4337 _wrap_methods = wrap.union_dicts(lm.RegressionResultsWrapper._wrap_methods, 

4338 _methods) 

4339 

4340 

4341wrap.populate_wrapper(MultinomialResultsWrapper, MultinomialResults) 

4342 

4343 

4344class L1MultinomialResultsWrapper(lm.RegressionResultsWrapper): 

4345 pass 

4346 

4347 

4348wrap.populate_wrapper(L1MultinomialResultsWrapper, L1MultinomialResults)