Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3Generalized Additive Models 

4 

5Author: Luca Puggini 

6Author: Josef Perktold 

7 

8created on 08/07/2015 

9""" 

10 

11from collections.abc import Iterable 

12import copy # check if needed when dropping python 2.7 

13 

14import numpy as np 

15from scipy import optimize 

16import pandas as pd 

17 

18import statsmodels.base.wrapper as wrap 

19 

20from statsmodels.discrete.discrete_model import Logit 

21from statsmodels.genmod.generalized_linear_model import ( 

22 GLM, GLMResults, GLMResultsWrapper, _check_convergence) 

23import statsmodels.regression.linear_model as lm 

24# import statsmodels.regression._tools as reg_tools # TODO: use this for pirls 

25from statsmodels.tools.sm_exceptions import (PerfectSeparationError, 

26 ValueWarning) 

27from statsmodels.tools.decorators import cache_readonly 

28from statsmodels.tools.data import _is_using_pandas 

29from statsmodels.tools.linalg import matrix_sqrt 

30 

31from statsmodels.base._penalized import PenalizedMixin 

32from statsmodels.gam.gam_penalties import MultivariateGamPenalty 

33from statsmodels.gam.gam_cross_validation.gam_cross_validation import ( 

34 MultivariateGAMCVPath) 

35from statsmodels.gam.gam_cross_validation.cross_validators import KFold 

36 

37 

38def _transform_predict_exog(model, exog, design_info=None): 

39 """transform exog for predict using design_info 

40 

41 Note: this is copied from base.model.Results.predict and converted to 

42 standalone function with additional options. 

43 """ 

44 

45 is_pandas = _is_using_pandas(exog, None) 

46 

47 exog_index = exog.index if is_pandas else None 

48 

49 if design_info is None: 

50 design_info = getattr(model.data, 'design_info', None) 

51 

52 if design_info is not None and (exog is not None): 

53 from patsy import dmatrix 

54 if isinstance(exog, pd.Series): 

55 # we are guessing whether it should be column or row 

56 if (hasattr(exog, 'name') and isinstance(exog.name, str) and 

57 exog.name in design_info.describe()): 

58 # assume we need one column 

59 exog = pd.DataFrame(exog) 

60 else: 

61 # assume we need a row 

62 exog = pd.DataFrame(exog).T 

63 orig_exog_len = len(exog) 

64 is_dict = isinstance(exog, dict) 

65 exog = dmatrix(design_info, exog, return_type="dataframe") 

66 if orig_exog_len > len(exog) and not is_dict: 

67 import warnings 

68 if exog_index is None: 

69 warnings.warn('nan values have been dropped', ValueWarning) 

70 else: 

71 exog = exog.reindex(exog_index) 

72 exog_index = exog.index 

73 

74 if exog is not None: 

75 exog = np.asarray(exog) 

76 if exog.ndim == 1 and (model.exog.ndim == 1 or 

77 model.exog.shape[1] == 1): 

78 exog = exog[:, None] 

79 exog = np.atleast_2d(exog) # needed in count model shape[1] 

80 

81 return exog, exog_index 

82 

83 

84class GLMGamResults(GLMResults): 

85 """Results class for generalized additive models, GAM. 

86 

87 This inherits from GLMResults. 

88 

89 Warning: some inherited methods might not correctly take account of the 

90 penalization 

91 

92 GLMGamResults inherits from GLMResults 

93 All methods related to the loglikelihood function return the penalized 

94 values. 

95 

96 Attributes 

97 ---------- 

98 

99 edf 

100 list of effective degrees of freedom for each column of the design 

101 matrix. 

102 hat_matrix_diag 

103 diagonal of hat matrix 

104 gcv 

105 generalized cross-validation criterion computed as 

106 ``gcv = scale / (1. - hat_matrix_trace / self.nobs)**2`` 

107 cv 

108 cross-validation criterion computed as 

109 ``cv = ((resid_pearson / (1 - hat_matrix_diag))**2).sum() / nobs`` 

110 

111 Notes 

112 ----- 

113 status: experimental 

114 """ 

115 

116 def __init__(self, model, params, normalized_cov_params, scale, **kwds): 

117 

118 # this is a messy way to compute edf and update scale 

119 # need several attributes to compute edf 

120 self.model = model 

121 self.params = params 

122 self.normalized_cov_params = normalized_cov_params 

123 self.scale = scale 

124 edf = self.edf.sum() 

125 self.df_model = edf - 1 # assume constant 

126 # need to use nobs or wnobs attribute 

127 self.df_resid = self.model.endog.shape[0] - edf 

128 

129 # we are setting the model df for the case when super is using it 

130 # df in model will be incorrect state when alpha/pen_weight changes 

131 self.model.df_model = self.df_model 

132 self.model.df_resid = self.df_resid 

133 mu = self.fittedvalues 

134 self.scale = scale = self.model.estimate_scale(mu) 

135 super(GLMGamResults, self).__init__(model, params, 

136 normalized_cov_params, scale, 

137 **kwds) 

138 

139 def _tranform_predict_exog(self, exog=None, exog_smooth=None, 

140 transform=True): 

141 """Transform original explanatory variables for prediction 

142 

143 Parameters 

144 ---------- 

145 exog : array_like, optional 

146 The values for the linear explanatory variables. 

147 exog_smooth : array_like 

148 values for the variables in the smooth terms 

149 transform : bool, optional 

150 If transform is False, then ``exog`` is returned unchanged and 

151 ``x`` is ignored. It is assumed that exog contains the full 

152 design matrix for the predict observations. 

153 If transform is True, then the basis representation of the smooth 

154 term will be constructed from the provided ``x``. 

155 

156 Returns 

157 ------- 

158 exog_transformed : ndarray 

159 design matrix for the prediction 

160 """ 

161 exog_index = None 

162 if transform is False: 

163 # the following allows that either or both exog are not None 

164 if exog_smooth is None: 

165 # exog could be None or array 

166 ex = exog 

167 else: 

168 if exog is None: 

169 ex = exog_smooth 

170 else: 

171 ex = np.column_stack((exog, exog_smooth)) 

172 else: 

173 # transform exog_linear if needed 

174 if exog is not None and hasattr(self.model, 'design_info_linear'): 

175 exog, exog_index = _transform_predict_exog( 

176 self.model, exog, self.model.design_info_linear) 

177 

178 # create smooth basis 

179 if exog_smooth is not None: 

180 ex_smooth = self.model.smoother.transform(exog_smooth) 

181 if exog is None: 

182 ex = ex_smooth 

183 else: 

184 # TODO: there might be problems is exog_smooth is 1-D 

185 ex = np.column_stack((exog, ex_smooth)) 

186 else: 

187 ex = exog 

188 

189 return ex, exog_index 

190 

191 def predict(self, exog=None, exog_smooth=None, transform=True, **kwargs): 

192 """" 

193 compute prediction 

194 

195 Parameters 

196 ---------- 

197 exog : array_like, optional 

198 The values for the linear explanatory variables 

199 exog_smooth : array_like 

200 values for the variables in the smooth terms 

201 transform : bool, optional 

202 If transform is True, then the basis representation of the smooth 

203 term will be constructed from the provided ``exog``. 

204 kwargs : 

205 Some models can take additional arguments or keywords, see the 

206 predict method of the model for the details. 

207 

208 Returns 

209 ------- 

210 prediction : ndarray, pandas.Series or pandas.DataFrame 

211 predicted values 

212 """ 

213 ex, exog_index = self._tranform_predict_exog(exog=exog, 

214 exog_smooth=exog_smooth, 

215 transform=transform) 

216 predict_results = super(GLMGamResults, self).predict(ex, 

217 transform=False, 

218 **kwargs) 

219 if exog_index is not None and not hasattr( 

220 predict_results, 'predicted_values'): 

221 if predict_results.ndim == 1: 

222 return pd.Series(predict_results, index=exog_index) 

223 else: 

224 return pd.DataFrame(predict_results, index=exog_index) 

225 else: 

226 return predict_results 

227 

228 def get_prediction(self, exog=None, exog_smooth=None, transform=True, 

229 **kwargs): 

230 """compute prediction results 

231 

232 Parameters 

233 ---------- 

234 exog : array_like, optional 

235 The values for which you want to predict. 

236 exog_smooth : array_like 

237 values for the variables in the smooth terms 

238 transform : bool, optional 

239 If transform is True, then the basis representation of the smooth 

240 term will be constructed from the provided ``x``. 

241 kwargs : 

242 Some models can take additional arguments or keywords, see the 

243 predict method of the model for the details. 

244 

245 Returns 

246 ------- 

247 prediction_results : generalized_linear_model.PredictionResults 

248 The prediction results instance contains prediction and prediction 

249 variance and can on demand calculate confidence intervals and 

250 summary tables for the prediction of the mean and of new 

251 observations. 

252 """ 

253 ex, exog_index = self._tranform_predict_exog(exog=exog, 

254 exog_smooth=exog_smooth, 

255 transform=transform) 

256 return super(GLMGamResults, self).get_prediction(ex, transform=False, 

257 **kwargs) 

258 

259 def partial_values(self, smooth_index, include_constant=True): 

260 """contribution of a smooth term to the linear prediction 

261 

262 Warning: This will be replaced by a predict method 

263 

264 Parameters 

265 ---------- 

266 smooth_index : int 

267 index of the smooth term within list of smooth terms 

268 include_constant : bool 

269 If true, then the estimated intercept is added to the prediction 

270 and its standard errors. This avoids that the confidence interval 

271 has zero width at the imposed identification constraint, e.g. 

272 either at a reference point or at the mean. 

273 

274 Returns 

275 ------- 

276 predicted : nd_array 

277 predicted value of linear term. 

278 This is not the expected response if the link function is not 

279 linear. 

280 se_pred : nd_array 

281 standard error of linear prediction 

282 """ 

283 variable = smooth_index 

284 smoother = self.model.smoother 

285 mask = smoother.mask[variable] 

286 

287 start_idx = self.model.k_exog_linear 

288 idx = start_idx + np.nonzero(mask)[0] 

289 

290 # smoother has only smooth parts, not exog_linear 

291 exog_part = smoother.basis[:, mask] 

292 

293 const_idx = self.model.data.const_idx 

294 if include_constant and const_idx is not None: 

295 idx = np.concatenate(([const_idx], idx)) 

296 exog_part = self.model.exog[:, idx] 

297 

298 linpred = np.dot(exog_part, self.params[idx]) 

299 # select the submatrix corresponding to a single variable 

300 partial_cov_params = self.cov_params(column=idx) 

301 

302 covb = partial_cov_params 

303 var = (exog_part * np.dot(covb, exog_part.T).T).sum(1) 

304 se = np.sqrt(var) 

305 

306 return linpred, se 

307 

308 def plot_partial(self, smooth_index, plot_se=True, cpr=False, 

309 include_constant=True, ax=None): 

310 """plot the contribution of a smooth term to the linear prediction 

311 

312 Parameters 

313 ---------- 

314 smooth_index : int 

315 index of the smooth term within list of smooth terms 

316 plot_se : book 

317 If plot_se is true, then the confidence interval for the linear 

318 prediction will be added to the plot. 

319 cpr : bool 

320 If cpr (component plus residual) is true, the a scatter plot of 

321 the partial working residuals will be added to the plot. 

322 include_constant : bool 

323 If true, then the estimated intercept is added to the prediction 

324 and its standard errors. This avoids that the confidence interval 

325 has zero width at the imposed identification constraint, e.g. 

326 either at a reference point or at the mean. 

327 ax : None or matplotlib axis instance 

328 If ax is not None, then the plot will be added to it. 

329 

330 Returns 

331 ------- 

332 Figure 

333 If `ax` is None, the created figure. Otherwise the Figure to which 

334 `ax` is connected. 

335 """ 

336 from statsmodels.graphics.utils import _import_mpl, create_mpl_ax 

337 _import_mpl() 

338 

339 variable = smooth_index 

340 y_est, se = self.partial_values(variable, 

341 include_constant=include_constant) 

342 smoother = self.model.smoother 

343 x = smoother.smoothers[variable].x 

344 sort_index = np.argsort(x) 

345 x = x[sort_index] 

346 y_est = y_est[sort_index] 

347 se = se[sort_index] 

348 

349 fig, ax = create_mpl_ax(ax) 

350 ax.plot(x, y_est, c='blue', lw=2) 

351 if plot_se: 

352 ax.plot(x, y_est + 1.96 * se, '-', c='blue') 

353 ax.plot(x, y_est - 1.96 * se, '-', c='blue') 

354 if cpr: 

355 # TODO: resid_response does not make sense with nonlinear link 

356 # use resid_working ? 

357 cpr_ = y_est + self.resid_working 

358 ax.plot(x, cpr_, '.', lw=2) 

359 

360 ax.set_xlabel(smoother.smoothers[variable].variable_name) 

361 

362 return fig 

363 

364 def test_significance(self, smooth_index): 

365 """hypothesis test that a smooth component is zero. 

366 

367 This calls `wald_test` to compute the hypothesis test, but uses 

368 effective degrees of freedom. 

369 

370 Parameters 

371 ---------- 

372 smooth_index : int 

373 index of the smooth term within list of smooth terms 

374 

375 Returns 

376 ------- 

377 wald_test : ContrastResults instance 

378 the results instance created by `wald_test` 

379 """ 

380 

381 variable = smooth_index 

382 smoother = self.model.smoother 

383 start_idx = self.model.k_exog_linear 

384 

385 k_params = len(self.params) 

386 # a bit messy, we need first index plus length of smooth term 

387 mask = smoother.mask[variable] 

388 k_constraints = mask.sum() 

389 idx = start_idx + np.nonzero(mask)[0][0] 

390 constraints = np.eye(k_constraints, k_params, idx) 

391 df_constraints = self.edf[idx: idx + k_constraints].sum() 

392 

393 return self.wald_test(constraints, df_constraints=df_constraints) 

394 

395 def get_hat_matrix_diag(self, observed=True, _axis=1): 

396 """ 

397 Compute the diagonal of the hat matrix 

398 

399 Parameters 

400 ---------- 

401 observed : bool 

402 If true, then observed hessian is used in the hat matrix 

403 computation. If false, then the expected hessian is used. 

404 In the case of a canonical link function both are the same. 

405 This is only relevant for models that implement both observed 

406 and expected Hessian, which is currently only GLM. Other 

407 models only use the observed Hessian. 

408 _axis : int 

409 This is mainly for internal use. By default it returns the usual 

410 diagonal of the hat matrix. If _axis is zero, then the result 

411 corresponds to the effective degrees of freedom, ``edf`` for each 

412 column of exog. 

413 

414 Returns 

415 ------- 

416 hat_matrix_diag : ndarray 

417 The diagonal of the hat matrix computed from the observed 

418 or expected hessian. 

419 """ 

420 weights = self.model.hessian_factor(self.params, scale=self.scale, 

421 observed=observed) 

422 wexog = np.sqrt(weights)[:, None] * self.model.exog 

423 

424 # we can use inverse hessian directly instead of computing it from 

425 # WLS/IRLS as in GLM 

426 

427 # TODO: does `normalized_cov_params * scale` work in all cases? 

428 # this avoids recomputing hessian, check when used for other models. 

429 hess_inv = self.normalized_cov_params * self.scale 

430 # this is in GLM equivalent to the more generic and direct 

431 # hess_inv = np.linalg.inv(-self.model.hessian(self.params)) 

432 hd = (wexog * hess_inv.dot(wexog.T).T).sum(axis=_axis) 

433 return hd 

434 

435 @cache_readonly 

436 def edf(self): 

437 return self.get_hat_matrix_diag(_axis=0) 

438 

439 @cache_readonly 

440 def hat_matrix_trace(self): 

441 return self.hat_matrix_diag.sum() 

442 

443 @cache_readonly 

444 def hat_matrix_diag(self): 

445 return self.get_hat_matrix_diag(observed=True) 

446 

447 @cache_readonly 

448 def gcv(self): 

449 return self.scale / (1. - self.hat_matrix_trace / self.nobs)**2 

450 

451 @cache_readonly 

452 def cv(self): 

453 cv_ = ((self.resid_pearson / (1. - self.hat_matrix_diag))**2).sum() 

454 cv_ /= self.nobs 

455 return cv_ 

456 

457 

458class GLMGamResultsWrapper(GLMResultsWrapper): 

459 pass 

460 

461 

462wrap.populate_wrapper(GLMGamResultsWrapper, GLMGamResults) 

463 

464 

465class GLMGam(PenalizedMixin, GLM): 

466 """ 

467 Generalized Additive Models (GAM) 

468 

469 This inherits from `GLM`. 

470 

471 Warning: Not all inherited methods might take correctly account of the 

472 penalization. Not all options including offset and exposure have been 

473 verified yet. 

474 

475 Parameters 

476 ---------- 

477 endog : array_like 

478 The response variable. 

479 exog : array_like or None 

480 This explanatory variables are treated as linear. The model in this 

481 case is a partial linear model. 

482 smoother : instance of additive smoother class 

483 Examples of smoother instances include Bsplines or CyclicCubicSplines. 

484 alpha : float or list of floats 

485 Penalization weights for smooth terms. The length of the list needs 

486 to be the same as the number of smooth terms in the ``smoother``. 

487 family : instance of GLM family 

488 See GLM. 

489 offset : None or array_like 

490 See GLM. 

491 exposure : None or array_like 

492 See GLM. 

493 missing : 'none' 

494 Missing value handling is not supported in this class. 

495 **kwargs 

496 Extra keywords are used in call to the super classes. 

497 

498 Notes 

499 ----- 

500 Status: experimental. This has full unit test coverage for the core 

501 results with Gaussian and Poisson (without offset and exposure). Other 

502 options and additional results might not be correctly supported yet. 

503 (Binomial with counts, i.e. with n_trials, is most likely wrong in pirls. 

504 User specified var or freq weights are most likely also not correct for 

505 all results.) 

506 """ 

507 

508 _results_class = GLMGamResults 

509 _results_class_wrapper = GLMGamResultsWrapper 

510 

511 def __init__(self, endog, exog=None, smoother=None, alpha=0, family=None, 

512 offset=None, exposure=None, missing='none', **kwargs): 

513 

514 # TODO: check usage of hasconst 

515 hasconst = kwargs.get('hasconst', None) 

516 xnames_linear = None 

517 if hasattr(exog, 'design_info'): 

518 self.design_info_linear = exog.design_info 

519 xnames_linear = self.design_info_linear.column_names 

520 

521 is_pandas = _is_using_pandas(exog, None) 

522 

523 # TODO: handle data is experimental, see #5469 

524 # This is a bit wasteful because we need to `handle_data twice` 

525 self.data_linear = self._handle_data(endog, exog, missing, hasconst) 

526 if xnames_linear is None: 

527 xnames_linear = self.data_linear.xnames 

528 if exog is not None: 

529 exog_linear = self.data_linear.exog 

530 k_exog_linear = exog_linear.shape[1] 

531 else: 

532 exog_linear = None 

533 k_exog_linear = 0 

534 self.k_exog_linear = k_exog_linear 

535 # We need exog_linear for k-fold cross validation 

536 # TODO: alternative is to take columns from combined exog 

537 self.exog_linear = exog_linear 

538 

539 self.smoother = smoother 

540 self.k_smooths = smoother.k_variables 

541 self.alpha = self._check_alpha(alpha) 

542 penal = MultivariateGamPenalty(smoother, alpha=self.alpha, 

543 start_idx=k_exog_linear) 

544 kwargs.pop('penal', None) 

545 if exog_linear is not None: 

546 exog = np.column_stack((exog_linear, smoother.basis)) 

547 else: 

548 exog = smoother.basis 

549 

550 # TODO: check: xnames_linear will be None instead of empty list 

551 # if no exog_linear 

552 # can smoother be empty ? I guess not allowed. 

553 if xnames_linear is None: 

554 xnames_linear = [] 

555 xnames = xnames_linear + self.smoother.col_names 

556 

557 if is_pandas and exog_linear is not None: 

558 # we a dataframe so we can get a PandasData instance for wrapping 

559 exog = pd.DataFrame(exog, index=self.data_linear.row_labels, 

560 columns=xnames) 

561 

562 super(GLMGam, self).__init__(endog, exog=exog, family=family, 

563 offset=offset, exposure=exposure, 

564 penal=penal, missing=missing, **kwargs) 

565 

566 if not is_pandas: 

567 # set exog nanmes if not given by pandas DataFrame 

568 self.exog_names[:] = xnames 

569 

570 # TODO: the generic data handling might attach the design_info from the 

571 # linear part, but this is incorrect for the full model and 

572 # causes problems in wald_test_terms 

573 

574 if hasattr(self.data, 'design_info'): 

575 del self.data.design_info 

576 # formula also might be attached which causes problems in predict 

577 if hasattr(self, 'formula'): 

578 self.formula_linear = self.formula 

579 self.formula = None 

580 del self.formula 

581 

582 def _check_alpha(self, alpha): 

583 """check and convert alpha to required list format 

584 

585 Parameters 

586 ---------- 

587 alpha : scalar, list or array_like 

588 penalization weight 

589 

590 Returns 

591 ------- 

592 alpha : list 

593 penalization weight, list with length equal to the number of 

594 smooth terms 

595 """ 

596 if not isinstance(alpha, Iterable): 

597 alpha = [alpha] * len(self.smoother.smoothers) 

598 elif not isinstance(alpha, list): 

599 # we want alpha to be a list 

600 alpha = list(alpha) 

601 return alpha 

602 

603 def fit(self, start_params=None, maxiter=1000, method='pirls', tol=1e-8, 

604 scale=None, cov_type='nonrobust', cov_kwds=None, use_t=None, 

605 full_output=True, disp=False, max_start_irls=3, **kwargs): 

606 """estimate parameters and create instance of GLMGamResults class 

607 

608 Parameters 

609 ---------- 

610 most parameters are the same as for GLM 

611 method : optimization method 

612 The special optimization method is "pirls" which uses a penalized 

613 version of IRLS. Other methods are gradient optimizers as used in 

614 base.model.LikelihoodModel. 

615 

616 Returns 

617 ------- 

618 res : instance of wrapped GLMGamResults 

619 """ 

620 # TODO: temporary hack to remove attribute 

621 # formula also might be attached which in inherited from_formula 

622 # causes problems in predict 

623 if hasattr(self, 'formula'): 

624 self.formula_linear = self.formula 

625 del self.formula 

626 

627 # TODO: alpha not allowed yet, but is in `_fit_pirls` 

628 # alpha = self._check_alpha() 

629 

630 if method.lower() in ['pirls', 'irls']: 

631 res = self._fit_pirls(self.alpha, start_params=start_params, 

632 maxiter=maxiter, tol=tol, scale=scale, 

633 cov_type=cov_type, cov_kwds=cov_kwds, 

634 use_t=use_t, **kwargs) 

635 else: 

636 if max_start_irls > 0 and (start_params is None): 

637 res = self._fit_pirls(self.alpha, start_params=start_params, 

638 maxiter=max_start_irls, tol=tol, 

639 scale=scale, 

640 cov_type=cov_type, cov_kwds=cov_kwds, 

641 use_t=use_t, **kwargs) 

642 start_params = res.params 

643 del res 

644 res = super(GLMGam, self).fit(start_params=start_params, 

645 maxiter=maxiter, method=method, 

646 tol=tol, scale=scale, 

647 cov_type=cov_type, cov_kwds=cov_kwds, 

648 use_t=use_t, 

649 full_output=full_output, disp=disp, 

650 max_start_irls=0, 

651 **kwargs) 

652 return res 

653 

654 # pag 165 4.3 # pag 136 PIRLS 

655 def _fit_pirls(self, alpha, start_params=None, maxiter=100, tol=1e-8, 

656 scale=None, cov_type='nonrobust', cov_kwds=None, use_t=None, 

657 weights=None): 

658 """fit model with penalized reweighted least squares 

659 """ 

660 # TODO: this currently modifies several attributes 

661 # self.scale, self.scaletype, self.mu, self.weights 

662 # self.data_weights, 

663 # and possibly self._offset_exposure 

664 # several of those might not be necessary, e.g. mu and weights 

665 

666 # alpha = alpha * len(y) * self.scale / 100 

667 # TODO: we need to rescale alpha 

668 endog = self.endog 

669 wlsexog = self.exog # smoother.basis 

670 spl_s = self.penal.penalty_matrix(alpha=alpha) 

671 

672 nobs, n_columns = wlsexog.shape 

673 

674 # TODO what are these values? 

675 if weights is None: 

676 self.data_weights = np.array([1.] * nobs) 

677 else: 

678 self.data_weights = weights 

679 

680 if not hasattr(self, '_offset_exposure'): 

681 self._offset_exposure = 0 

682 

683 self.scaletype = scale 

684 # TODO: check default scale types 

685 # self.scaletype = 'dev' 

686 # during iteration 

687 self.scale = 1 

688 

689 if start_params is None: 

690 mu = self.family.starting_mu(endog) 

691 lin_pred = self.family.predict(mu) 

692 else: 

693 lin_pred = np.dot(wlsexog, start_params) + self._offset_exposure 

694 mu = self.family.fitted(lin_pred) 

695 dev = self.family.deviance(endog, mu) 

696 

697 history = dict(params=[None, start_params], deviance=[np.inf, dev]) 

698 converged = False 

699 criterion = history['deviance'] 

700 # This special case is used to get the likelihood for a specific 

701 # params vector. 

702 if maxiter == 0: 

703 mu = self.family.fitted(lin_pred) 

704 self.scale = self.estimate_scale(mu) 

705 wls_results = lm.RegressionResults(self, start_params, None) 

706 iteration = 0 

707 

708 for iteration in range(maxiter): 

709 

710 # TODO: is this equivalent to point 1 of page 136: 

711 # w = 1 / (V(mu) * g'(mu)) ? 

712 self.weights = self.data_weights * self.family.weights(mu) 

713 

714 # TODO: is this equivalent to point 1 of page 136: 

715 # z = g(mu)(y - mu) + X beta ? 

716 wlsendog = (lin_pred + self.family.link.deriv(mu) * (endog - mu) 

717 - self._offset_exposure) 

718 

719 # this defines the augmented matrix point 2a on page 136 

720 wls_results = penalized_wls(wlsendog, wlsexog, spl_s, self.weights) 

721 lin_pred = np.dot(wlsexog, wls_results.params).ravel() 

722 lin_pred += self._offset_exposure 

723 mu = self.family.fitted(lin_pred) 

724 

725 # We do not need to update scale in GLM/LEF models 

726 # We might need it in dispersion models. 

727 # self.scale = self.estimate_scale(mu) 

728 history = self._update_history(wls_results, mu, history) 

729 

730 if endog.squeeze().ndim == 1 and np.allclose(mu - endog, 0): 

731 msg = "Perfect separation detected, results not available" 

732 raise PerfectSeparationError(msg) 

733 

734 # TODO need atol, rtol 

735 # args of _check_convergence: (criterion, iteration, atol, rtol) 

736 converged = _check_convergence(criterion, iteration, tol, 0) 

737 if converged: 

738 break 

739 self.mu = mu 

740 self.scale = self.estimate_scale(mu) 

741 glm_results = GLMGamResults(self, wls_results.params, 

742 wls_results.normalized_cov_params, 

743 self.scale, 

744 cov_type=cov_type, cov_kwds=cov_kwds, 

745 use_t=use_t) 

746 

747 glm_results.method = "PIRLS" 

748 history['iteration'] = iteration + 1 

749 glm_results.fit_history = history 

750 glm_results.converged = converged 

751 

752 return GLMGamResultsWrapper(glm_results) 

753 

754 def select_penweight(self, criterion='aic', start_params=None, 

755 start_model_params=None, 

756 method='basinhopping', **fit_kwds): 

757 """find alpha by minimizing results criterion 

758 

759 The objective for the minimization can be results attributes like 

760 ``gcv``, ``aic`` or ``bic`` where the latter are based on effective 

761 degrees of freedom. 

762 

763 Warning: In many case the optimization might converge to a local 

764 optimum or near optimum. Different start_params or using a global 

765 optimizer is recommended, default is basinhopping. 

766 

767 Parameters 

768 ---------- 

769 criterion='aic' 

770 name of results attribute to be minimized. 

771 Default is 'aic', other options are 'gcv', 'cv' or 'bic'. 

772 start_params : None or array 

773 starting parameters for alpha in the penalization weight 

774 minimization. The parameters are internally exponentiated and 

775 the minimization is with respect to ``exp(alpha)`` 

776 start_model_params : None or array 

777 starting parameter for the ``model._fit_pirls``. 

778 method : 'basinhopping', 'nm' or 'minimize' 

779 'basinhopping' and 'nm' directly use the underlying scipy.optimize 

780 functions `basinhopping` and `fmin`. 'minimize' provides access 

781 to the high level interface, `scipy.optimize.minimize`. 

782 fit_kwds : keyword arguments 

783 additional keyword arguments will be used in the call to the 

784 scipy optimizer. Which keywords are supported depends on the 

785 scipy optimization function. 

786 

787 Returns 

788 ------- 

789 alpha : ndarray 

790 penalization parameter found by minimizing the criterion. 

791 Note that this can be only a local (near) optimum. 

792 fit_res : tuple 

793 results returned by the scipy optimization routine. The 

794 parameters in the optimization problem are `log(alpha)` 

795 history : dict 

796 history of calls to pirls and contains alpha, the fit 

797 criterion and the parameters to which pirls converged to for the 

798 given alpha. 

799 

800 Notes 

801 ----- 

802 In the test cases Nelder-Mead and bfgs often converge to local optima, 

803 see also https://github.com/statsmodels/statsmodels/issues/5381. 

804 

805 This does not use any analytical derivatives for the criterion 

806 minimization. 

807 

808 Status: experimental, It is possible that defaults change if there 

809 is a better way to find a global optimum. API (e.g. type of return) 

810 might also change. 

811 """ 

812 # copy attributes that are changed, so we can reset them 

813 scale_keep = self.scale 

814 scaletype_keep = self.scaletype 

815 # TODO: use .copy() method when available for all types 

816 alpha_keep = copy.copy(self.alpha) 

817 

818 if start_params is None: 

819 start_params = np.zeros(self.k_smooths) 

820 else: 

821 start_params = np.log(1e-20 + start_params) 

822 

823 history = {} 

824 history['alpha'] = [] 

825 history['params'] = [start_model_params] 

826 history['criterion'] = [] 

827 

828 def fun(p): 

829 a = np.exp(p) 

830 res_ = self._fit_pirls(start_params=history['params'][-1], 

831 alpha=a) 

832 history['alpha'].append(a) 

833 history['params'].append(np.asarray(res_.params)) 

834 return getattr(res_, criterion) 

835 

836 if method == 'nm': 

837 kwds = dict(full_output=True, maxiter=1000, maxfun=2000) 

838 kwds.update(fit_kwds) 

839 fit_res = optimize.fmin(fun, start_params, **kwds) 

840 opt = fit_res[0] 

841 elif method == 'basinhopping': 

842 kwds = dict(minimizer_kwargs={'method': 'Nelder-Mead', 

843 'options': {'maxiter': 100, 'maxfev': 500}}, 

844 niter=10) 

845 kwds.update(fit_kwds) 

846 fit_res = optimize.basinhopping(fun, start_params, **kwds) 

847 opt = fit_res.x 

848 elif method == 'minimize': 

849 fit_res = optimize.minimize(fun, start_params, **fit_kwds) 

850 opt = fit_res.x 

851 else: 

852 raise ValueError('method not recognized') 

853 

854 del history['params'][0] # remove the model start_params 

855 

856 alpha = np.exp(opt) 

857 

858 # reset attributes that have or might have changed 

859 self.scale = scale_keep 

860 self.scaletype = scaletype_keep 

861 self.alpha = alpha_keep 

862 

863 return alpha, fit_res, history 

864 

865 def select_penweight_kfold(self, alphas=None, cv_iterator=None, cost=None, 

866 k_folds=5, k_grid=11): 

867 """find alphas by k-fold cross-validation 

868 

869 Warning: This estimates ``k_folds`` models for each point in the 

870 grid of alphas. 

871 

872 Parameters 

873 ---------- 

874 alphas : None or list of arrays 

875 cv_iterator : instance 

876 instance of a cross-validation iterator, by default this is a 

877 KFold instance 

878 cost : function 

879 default is mean squared error. The cost function to evaluate the 

880 prediction error for the left out sample. This should take two 

881 arrays as argument and return one float. 

882 k_folds : int 

883 number of folds if default Kfold iterator is used. 

884 This is ignored if ``cv_iterator`` is not None. 

885 

886 Returns 

887 ------- 

888 alpha_cv : list of float 

889 Best alpha in grid according to cross-validation 

890 res_cv : instance of MultivariateGAMCVPath 

891 The instance was used for cross-validation and holds the results 

892 

893 Notes 

894 ----- 

895 The default alphas are defined as 

896 ``alphas = [np.logspace(0, 7, k_grid) for _ in range(k_smooths)]`` 

897 """ 

898 

899 if cost is None: 

900 def cost(x1, x2): 

901 return np.linalg.norm(x1 - x2) / len(x1) 

902 

903 if alphas is None: 

904 alphas = [np.logspace(0, 7, k_grid) for _ in range(self.k_smooths)] 

905 

906 if cv_iterator is None: 

907 cv_iterator = KFold(k_folds=k_folds, shuffle=True) 

908 

909 gam_cv = MultivariateGAMCVPath(smoother=self.smoother, alphas=alphas, 

910 gam=GLMGam, cost=cost, endog=self.endog, 

911 exog=self.exog_linear, 

912 cv_iterator=cv_iterator) 

913 gam_cv_res = gam_cv.fit() 

914 

915 return gam_cv_res.alpha_cv, gam_cv_res 

916 

917 

918class LogitGam(PenalizedMixin, Logit): 

919 """Generalized Additive model for discrete Logit 

920 

921 This subclasses discrete_model Logit. 

922 

923 Warning: not all inherited methods might take correctly account of the 

924 penalization 

925 

926 not verified yet. 

927 """ 

928 def __init__(self, endog, smoother, alpha, *args, **kwargs): 

929 if not isinstance(alpha, Iterable): 

930 alpha = np.array([alpha] * len(smoother.smoothers)) 

931 

932 self.smoother = smoother 

933 self.alpha = alpha 

934 self.pen_weight = 1 # TODO: pen weight should not be defined here!! 

935 penal = MultivariateGamPenalty(smoother, alpha=alpha) 

936 

937 super(LogitGam, self).__init__(endog, smoother.basis, penal=penal, 

938 *args, **kwargs) 

939 

940 

941def penalized_wls(endog, exog, penalty_matrix, weights): 

942 """weighted least squares with quadratic penalty 

943 

944 Parameters 

945 ---------- 

946 endog : ndarray 

947 response or endogenous variable 

948 exog : ndarray 

949 design matrix, matrix of exogenous or explanatory variables 

950 penalty_matrix : ndarray, 2-Dim square 

951 penality matrix for quadratic penalization. Note, the penalty_matrix 

952 is multiplied by two to match non-pirls fitting methods. 

953 weights : ndarray 

954 weights for WLS 

955 

956 Returns 

957 ------- 

958 results : Results instance of WLS 

959 """ 

960 y, x, s = endog, exog, penalty_matrix 

961 # TODO: I do not understand why I need 2 * s 

962 aug_y, aug_x, aug_weights = make_augmented_matrix(y, x, 2 * s, weights) 

963 wls_results = lm.WLS(aug_y, aug_x, aug_weights).fit() 

964 # TODO: use MinimalWLS during iterations, less overhead 

965 # However, MinimalWLS does not return normalized_cov_params 

966 # which we need at the end of the iterations 

967 # call would be 

968 # wls_results = reg_tools._MinimalWLS(aug_y, aug_x, aug_weights).fit() 

969 wls_results.params = wls_results.params.ravel() 

970 

971 return wls_results 

972 

973 

974def make_augmented_matrix(endog, exog, penalty_matrix, weights): 

975 """augment endog, exog and weights with stochastic restriction matrix 

976 

977 Parameters 

978 ---------- 

979 endog : ndarray 

980 response or endogenous variable 

981 exog : ndarray 

982 design matrix, matrix of exogenous or explanatory variables 

983 penalty_matrix : ndarray, 2-Dim square 

984 penality matrix for quadratic penalization 

985 weights : ndarray 

986 weights for WLS 

987 

988 Returns 

989 ------- 

990 endog_aug : ndarray 

991 augmented response variable 

992 exog_aug : ndarray 

993 augmented design matrix 

994 weights_aug : ndarray 

995 augmented weights for WLS 

996 """ 

997 y, x, s, = endog, exog, penalty_matrix 

998 nobs = x.shape[0] 

999 

1000 # TODO: needs full because of broadcasting with weights 

1001 # check what weights should be doing 

1002 rs = matrix_sqrt(s) 

1003 x1 = np.vstack([x, rs]) # augmented x 

1004 n_samp1es_x1 = x1.shape[0] 

1005 

1006 y1 = np.array([0.] * n_samp1es_x1) # augmented y 

1007 y1[:nobs] = y 

1008 

1009 id1 = np.array([1.] * rs.shape[0]) 

1010 w1 = np.concatenate([weights, id1]) 

1011 

1012 return y1, x1, w1