Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1'''Partial Regression plot and residual plots to find misspecification 

2 

3 

4Author: Josef Perktold 

5License: BSD-3 

6Created: 2011-01-23 

7 

8update 

92011-06-05 : start to convert example to usable functions 

102011-10-27 : docstrings 

11 

12''' 

13from statsmodels.compat.python import lrange, lzip 

14from statsmodels.compat.pandas import Appender 

15 

16import numpy as np 

17import pandas as pd 

18from patsy import dmatrix 

19 

20from statsmodels.regression.linear_model import OLS, GLS, WLS 

21from statsmodels.genmod.generalized_linear_model import GLM 

22from statsmodels.genmod.generalized_estimating_equations import GEE 

23from statsmodels.sandbox.regression.predstd import wls_prediction_std 

24from statsmodels.graphics import utils 

25from statsmodels.nonparametric.smoothers_lowess import lowess 

26from statsmodels.tools.tools import maybe_unwrap_results 

27 

28from ._regressionplots_doc import ( 

29 _plot_added_variable_doc, 

30 _plot_partial_residuals_doc, 

31 _plot_ceres_residuals_doc, 

32 _plot_influence_doc, 

33 _plot_leverage_resid2_doc) 

34 

35__all__ = ['plot_fit', 'plot_regress_exog', 'plot_partregress', 'plot_ccpr', 

36 'plot_regress_exog', 'plot_partregress_grid', 'plot_ccpr_grid', 

37 'add_lowess', 'abline_plot', 'influence_plot', 

38 'plot_leverage_resid2', 'added_variable_resids', 

39 'partial_resids', 'ceres_resids', 'plot_added_variable', 

40 'plot_partial_residuals', 'plot_ceres_residuals'] 

41 

42#TODO: consider moving to influence module 

43def _high_leverage(results): 

44 #TODO: replace 1 with k_constant 

45 return 2. * (results.df_model + 1)/results.nobs 

46 

47 

48def add_lowess(ax, lines_idx=0, frac=.2, **lowess_kwargs): 

49 """ 

50 Add Lowess line to a plot. 

51 

52 Parameters 

53 ---------- 

54 ax : AxesSubplot 

55 The Axes to which to add the plot 

56 lines_idx : int 

57 This is the line on the existing plot to which you want to add 

58 a smoothed lowess line. 

59 frac : float 

60 The fraction of the points to use when doing the lowess fit. 

61 lowess_kwargs 

62 Additional keyword arguments are passes to lowess. 

63 

64 Returns 

65 ------- 

66 Figure 

67 The figure that holds the instance. 

68 """ 

69 y0 = ax.get_lines()[lines_idx]._y 

70 x0 = ax.get_lines()[lines_idx]._x 

71 lres = lowess(y0, x0, frac=frac, **lowess_kwargs) 

72 ax.plot(lres[:, 0], lres[:, 1], 'r', lw=1.5) 

73 return ax.figure 

74 

75 

76def plot_fit(results, exog_idx, y_true=None, ax=None, vlines=True, **kwargs): 

77 """ 

78 Plot fit against one regressor. 

79 

80 This creates one graph with the scatterplot of observed values 

81 compared to fitted values. 

82 

83 Parameters 

84 ---------- 

85 results : Results 

86 A result instance with resid, model.endog and model.exog as 

87 attributes. 

88 exog_idx : {int, str} 

89 Name or index of regressor in exog matrix. 

90 y_true : array_like. optional 

91 If this is not None, then the array is added to the plot. 

92 ax : AxesSubplot, optional 

93 If given, this subplot is used to plot in instead of a new figure being 

94 created. 

95 vlines : bool, optional 

96 If this not True, then the uncertainty of the fit is not 

97 plotted. 

98 **kwargs 

99 The keyword arguments are passed to the plot command for the fitted 

100 values points. 

101 

102 Returns 

103 ------- 

104 Figure 

105 If `ax` is None, the created figure. Otherwise the figure to which 

106 `ax` is connected. 

107 

108 Examples 

109 -------- 

110 Load the Statewide Crime data set and perform linear regression with 

111 `poverty` and `hs_grad` as variables and `murder` as the response 

112 

113 >>> import statsmodels.api as sm 

114 >>> import matplotlib.pyplot as plt 

115 

116 >>> data = sm.datasets.statecrime.load_pandas().data 

117 >>> murder = data['murder'] 

118 >>> X = data[['poverty', 'hs_grad']] 

119 

120 >>> X["constant"] = 1 

121 >>> y = murder 

122 >>> model = sm.OLS(y, X) 

123 >>> results = model.fit() 

124 

125 Create a plot just for the variable 'Poverty': 

126 

127 >>> fig, ax = plt.subplots() 

128 >>> fig = sm.graphics.plot_fit(results, 0, ax=ax) 

129 >>> ax.set_ylabel("Murder Rate") 

130 >>> ax.set_xlabel("Poverty Level") 

131 >>> ax.set_title("Linear Regression") 

132 

133 >>> plt.show() 

134 

135 .. plot:: plots/graphics_plot_fit_ex.py 

136 """ 

137 

138 fig, ax = utils.create_mpl_ax(ax) 

139 

140 exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) 

141 results = maybe_unwrap_results(results) 

142 

143 #maybe add option for wendog, wexog 

144 y = results.model.endog 

145 x1 = results.model.exog[:, exog_idx] 

146 x1_argsort = np.argsort(x1) 

147 y = y[x1_argsort] 

148 x1 = x1[x1_argsort] 

149 

150 ax.plot(x1, y, 'bo', label=results.model.endog_names) 

151 if y_true is not None: 

152 ax.plot(x1, y_true[x1_argsort], 'b-', label='True values') 

153 title = 'Fitted values versus %s' % exog_name 

154 

155 ax.plot(x1, results.fittedvalues[x1_argsort], 'D', color='r', 

156 label='fitted', **kwargs) 

157 if vlines is True: 

158 _, iv_l, iv_u = wls_prediction_std(results) 

159 ax.vlines(x1, iv_l[x1_argsort], iv_u[x1_argsort], linewidth=1, 

160 color='k', alpha=.7) 

161 #ax.fill_between(x1, iv_l[x1_argsort], iv_u[x1_argsort], alpha=0.1, 

162 # color='k') 

163 ax.set_title(title) 

164 ax.set_xlabel(exog_name) 

165 ax.set_ylabel(results.model.endog_names) 

166 ax.legend(loc='best', numpoints=1) 

167 

168 return fig 

169 

170 

171def plot_regress_exog(results, exog_idx, fig=None): 

172 """Plot regression results against one regressor. 

173 

174 This plots four graphs in a 2 by 2 figure: 'endog versus exog', 

175 'residuals versus exog', 'fitted versus exog' and 

176 'fitted plus residual versus exog' 

177 

178 Parameters 

179 ---------- 

180 results : result instance 

181 A result instance with resid, model.endog and model.exog as attributes. 

182 exog_idx : int or str 

183 Name or index of regressor in exog matrix. 

184 fig : Figure, optional 

185 If given, this figure is simply returned. Otherwise a new figure is 

186 created. 

187 

188 Returns 

189 ------- 

190 Figure 

191 The value of `fig` if provided. Otherwise a new instance. 

192 

193 Examples 

194 -------- 

195 Load the Statewide Crime data set and build a model with regressors 

196 including the rate of high school graduation (hs_grad), population in urban 

197 areas (urban), households below poverty line (poverty), and single person 

198 households (single). Outcome variable is the murder rate (murder). 

199 

200 Build a 2 by 2 figure based on poverty showing fitted versus actual murder 

201 rate, residuals versus the poverty rate, partial regression plot of poverty, 

202 and CCPR plot for poverty rate. 

203 

204 >>> import statsmodels.api as sm 

205 >>> import matplotlib.pyplot as plot 

206 >>> import statsmodels.formula.api as smf 

207 

208 >>> fig = plt.figure(figsize=(8, 6)) 

209 >>> crime_data = sm.datasets.statecrime.load_pandas() 

210 >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', 

211 ... data=crime_data.data).fit() 

212 >>> sm.graphics.plot_regress_exog(results, 'poverty', fig=fig) 

213 >>> plt.show() 

214 

215 .. plot:: plots/graphics_regression_regress_exog.py 

216 """ 

217 

218 fig = utils.create_mpl_fig(fig) 

219 

220 exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) 

221 results = maybe_unwrap_results(results) 

222 

223 #maybe add option for wendog, wexog 

224 y_name = results.model.endog_names 

225 x1 = results.model.exog[:, exog_idx] 

226 prstd, iv_l, iv_u = wls_prediction_std(results) 

227 

228 ax = fig.add_subplot(2, 2, 1) 

229 ax.plot(x1, results.model.endog, 'o', color='b', alpha=0.9, label=y_name) 

230 ax.plot(x1, results.fittedvalues, 'D', color='r', label='fitted', 

231 alpha=.5) 

232 ax.vlines(x1, iv_l, iv_u, linewidth=1, color='k', alpha=.7) 

233 ax.set_title('Y and Fitted vs. X', fontsize='large') 

234 ax.set_xlabel(exog_name) 

235 ax.set_ylabel(y_name) 

236 ax.legend(loc='best') 

237 

238 ax = fig.add_subplot(2, 2, 2) 

239 ax.plot(x1, results.resid, 'o') 

240 ax.axhline(y=0, color='black') 

241 ax.set_title('Residuals versus %s' % exog_name, fontsize='large') 

242 ax.set_xlabel(exog_name) 

243 ax.set_ylabel("resid") 

244 

245 ax = fig.add_subplot(2, 2, 3) 

246 exog_noti = np.ones(results.model.exog.shape[1], bool) 

247 exog_noti[exog_idx] = False 

248 exog_others = results.model.exog[:, exog_noti] 

249 from pandas import Series 

250 fig = plot_partregress(results.model.data.orig_endog, 

251 Series(x1, name=exog_name, 

252 index=results.model.data.row_labels), 

253 exog_others, obs_labels=False, ax=ax) 

254 ax.set_title('Partial regression plot', fontsize='large') 

255 #ax.set_ylabel("Fitted values") 

256 #ax.set_xlabel(exog_name) 

257 

258 ax = fig.add_subplot(2, 2, 4) 

259 fig = plot_ccpr(results, exog_idx, ax=ax) 

260 ax.set_title('CCPR Plot', fontsize='large') 

261 #ax.set_xlabel(exog_name) 

262 #ax.set_ylabel("Fitted values + resids") 

263 

264 fig.suptitle('Regression Plots for %s' % exog_name, fontsize="large") 

265 

266 fig.tight_layout() 

267 

268 fig.subplots_adjust(top=.90) 

269 return fig 

270 

271 

272def _partial_regression(endog, exog_i, exog_others): 

273 """Partial regression. 

274 

275 regress endog on exog_i conditional on exog_others 

276 

277 uses OLS 

278 

279 Parameters 

280 ---------- 

281 endog : array_like 

282 exog : array_like 

283 exog_others : array_like 

284 

285 Returns 

286 ------- 

287 res1c : OLS results instance 

288 

289 (res1a, res1b) : tuple of OLS results instances 

290 results from regression of endog on exog_others and of exog_i on 

291 exog_others 

292 """ 

293 #FIXME: This function does not appear to be used. 

294 res1a = OLS(endog, exog_others).fit() 

295 res1b = OLS(exog_i, exog_others).fit() 

296 res1c = OLS(res1a.resid, res1b.resid).fit() 

297 

298 return res1c, (res1a, res1b) 

299 

300 

301def plot_partregress(endog, exog_i, exog_others, data=None, 

302 title_kwargs={}, obs_labels=True, label_kwargs={}, 

303 ax=None, ret_coords=False, **kwargs): 

304 """Plot partial regression for a single regressor. 

305 

306 Parameters 

307 ---------- 

308 endog : {ndarray, str} 

309 The endogenous or response variable. If string is given, you can use a 

310 arbitrary translations as with a formula. 

311 exog_i : {ndarray, str} 

312 The exogenous, explanatory variable. If string is given, you can use a 

313 arbitrary translations as with a formula. 

314 exog_others : {ndarray, list[str]} 

315 Any other exogenous, explanatory variables. If a list of strings is 

316 given, each item is a term in formula. You can use a arbitrary 

317 translations as with a formula. The effect of these variables will be 

318 removed by OLS regression. 

319 data : {DataFrame, dict} 

320 Some kind of data structure with names if the other variables are 

321 given as strings. 

322 title_kwargs : dict 

323 Keyword arguments to pass on for the title. The key to control the 

324 fonts is fontdict. 

325 obs_labels : {bool, array_like} 

326 Whether or not to annotate the plot points with their observation 

327 labels. If obs_labels is a boolean, the point labels will try to do 

328 the right thing. First it will try to use the index of data, then 

329 fall back to the index of exog_i. Alternatively, you may give an 

330 array-like object corresponding to the observation numbers. 

331 label_kwargs : dict 

332 Keyword arguments that control annotate for the observation labels. 

333 ax : AxesSubplot, optional 

334 If given, this subplot is used to plot in instead of a new figure being 

335 created. 

336 ret_coords : bool 

337 If True will return the coordinates of the points in the plot. You 

338 can use this to add your own annotations. 

339 **kwargs 

340 The keyword arguments passed to plot for the points. 

341 

342 Returns 

343 ------- 

344 fig : Figure 

345 If `ax` is None, the created figure. Otherwise the figure to which 

346 `ax` is connected. 

347 coords : list, optional 

348 If ret_coords is True, return a tuple of arrays (x_coords, y_coords). 

349 

350 See Also 

351 -------- 

352 plot_partregress_grid : Plot partial regression for a set of regressors. 

353 

354 Notes 

355 ----- 

356 The slope of the fitted line is the that of `exog_i` in the full 

357 multiple regression. The individual points can be used to assess the 

358 influence of points on the estimated coefficient. 

359 

360 Examples 

361 -------- 

362 Load the Statewide Crime data set and plot partial regression of the rate 

363 of high school graduation (hs_grad) on the murder rate(murder). 

364 

365 The effects of the percent of the population living in urban areas (urban), 

366 below the poverty line (poverty) , and in a single person household (single) 

367 are removed by OLS regression. 

368 

369 >>> import statsmodels.api as sm 

370 >>> import matplotlib.pyplot as plt 

371 

372 >>> crime_data = sm.datasets.statecrime.load_pandas() 

373 >>> sm.graphics.plot_partregress(endog='murder', exog_i='hs_grad', 

374 ... exog_others=['urban', 'poverty', 'single'], 

375 ... data=crime_data.data, obs_labels=False) 

376 >>> plt.show() 

377 

378 .. plot:: plots/graphics_regression_partregress.py 

379 

380 More detailed examples can be found in the Regression Plots notebook 

381 on the examples page. 

382 """ 

383 #NOTE: there is no interaction between possible missing data and 

384 #obs_labels yet, so this will need to be tweaked a bit for this case 

385 fig, ax = utils.create_mpl_ax(ax) 

386 

387 # strings, use patsy to transform to data 

388 if isinstance(endog, str): 

389 endog = dmatrix(endog + "-1", data) 

390 

391 if isinstance(exog_others, str): 

392 RHS = dmatrix(exog_others, data) 

393 elif isinstance(exog_others, list): 

394 RHS = "+".join(exog_others) 

395 RHS = dmatrix(RHS, data) 

396 else: 

397 RHS = exog_others 

398 RHS_isemtpy = False 

399 if isinstance(RHS, np.ndarray) and RHS.size==0: 

400 RHS_isemtpy = True 

401 elif isinstance(RHS, pd.DataFrame) and RHS.empty: 

402 RHS_isemtpy = True 

403 if isinstance(exog_i, str): 

404 exog_i = dmatrix(exog_i + "-1", data) 

405 

406 # all arrays or pandas-like 

407 

408 if RHS_isemtpy: 

409 endog = np.asarray(endog) 

410 exog_i = np.asarray(exog_i) 

411 ax.plot(endog, exog_i, 'o', **kwargs) 

412 fitted_line = OLS(endog, exog_i).fit() 

413 x_axis_endog_name = 'x' if isinstance(exog_i, np.ndarray) else exog_i.name 

414 y_axis_endog_name = 'y' if isinstance(endog, np.ndarray) else endog.design_info.column_names[0] 

415 else: 

416 res_yaxis = OLS(endog, RHS).fit() 

417 res_xaxis = OLS(exog_i, RHS).fit() 

418 xaxis_resid = res_xaxis.resid 

419 yaxis_resid = res_yaxis.resid 

420 x_axis_endog_name = res_xaxis.model.endog_names 

421 y_axis_endog_name = res_yaxis.model.endog_names 

422 ax.plot(xaxis_resid, yaxis_resid, 'o', **kwargs) 

423 fitted_line = OLS(yaxis_resid, xaxis_resid).fit() 

424 

425 fig = abline_plot(0, fitted_line.params[0], color='k', ax=ax) 

426 

427 if x_axis_endog_name == 'y': # for no names regression will just get a y 

428 x_axis_endog_name = 'x' # this is misleading, so use x 

429 ax.set_xlabel("e(%s | X)" % x_axis_endog_name) 

430 ax.set_ylabel("e(%s | X)" % y_axis_endog_name) 

431 ax.set_title('Partial Regression Plot', **title_kwargs) 

432 

433 # NOTE: if we want to get super fancy, we could annotate if a point is 

434 # clicked using this widget 

435 # http://stackoverflow.com/questions/4652439/ 

436 # is-there-a-matplotlib-equivalent-of-matlabs-datacursormode/ 

437 # 4674445#4674445 

438 if obs_labels is True: 

439 if data is not None: 

440 obs_labels = data.index 

441 elif hasattr(exog_i, "index"): 

442 obs_labels = exog_i.index 

443 else: 

444 obs_labels = res_xaxis.model.data.row_labels 

445 #NOTE: row_labels can be None. 

446 #Maybe we should fix this to never be the case. 

447 if obs_labels is None: 

448 obs_labels = lrange(len(exog_i)) 

449 

450 if obs_labels is not False: # could be array_like 

451 if len(obs_labels) != len(exog_i): 

452 raise ValueError("obs_labels does not match length of exog_i") 

453 label_kwargs.update(dict(ha="center", va="bottom")) 

454 ax = utils.annotate_axes(lrange(len(obs_labels)), obs_labels, 

455 lzip(res_xaxis.resid, res_yaxis.resid), 

456 [(0, 5)] * len(obs_labels), "x-large", ax=ax, 

457 **label_kwargs) 

458 

459 if ret_coords: 

460 return fig, (res_xaxis.resid, res_yaxis.resid) 

461 else: 

462 return fig 

463 

464 

465def plot_partregress_grid(results, exog_idx=None, grid=None, fig=None): 

466 """ 

467 Plot partial regression for a set of regressors. 

468 

469 Parameters 

470 ---------- 

471 results : Results instance 

472 A regression model results instance. 

473 exog_idx : {None, list[int], list[str]} 

474 The indices or column names of the exog used in the plot, default is 

475 all. 

476 grid : {None, tuple[int]} 

477 If grid is given, then it is used for the arrangement of the subplots. 

478 The format of grid is (nrows, ncols). If grid is None, then ncol is 

479 one, if there are only 2 subplots, and the number of columns is two 

480 otherwise. 

481 fig : Figure, optional 

482 If given, this figure is simply returned. Otherwise a new figure is 

483 created. 

484 

485 Returns 

486 ------- 

487 Figure 

488 If `fig` is None, the created figure. Otherwise `fig` itself. 

489 

490 See Also 

491 -------- 

492 plot_partregress : Plot partial regression for a single regressor. 

493 plot_ccpr : Plot CCPR against one regressor 

494 

495 Notes 

496 ----- 

497 A subplot is created for each explanatory variable given by exog_idx. 

498 The partial regression plot shows the relationship between the response 

499 and the given explanatory variable after removing the effect of all other 

500 explanatory variables in exog. 

501 

502 References 

503 ---------- 

504 See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/partregr.htm 

505 

506 Examples 

507 -------- 

508 Using the state crime dataset separately plot the effect of the each 

509 variable on the on the outcome, murder rate while accounting for the effect 

510 of all other variables in the model visualized with a grid of partial 

511 regression plots. 

512 

513 >>> from statsmodels.graphics.regressionplots import plot_partregress_grid 

514 >>> import statsmodels.api as sm 

515 >>> import matplotlib.pyplot as plt 

516 >>> import statsmodels.formula.api as smf 

517 

518 >>> fig = plt.figure(figsize=(8, 6)) 

519 >>> crime_data = sm.datasets.statecrime.load_pandas() 

520 >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', 

521 ... data=crime_data.data).fit() 

522 >>> plot_partregress_grid(results, fig=fig) 

523 >>> plt.show() 

524 

525 .. plot:: plots/graphics_regression_partregress_grid.py 

526 """ 

527 import pandas 

528 fig = utils.create_mpl_fig(fig) 

529 

530 exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) 

531 

532 # TODO: maybe add option for using wendog, wexog instead 

533 y = pandas.Series(results.model.endog, name=results.model.endog_names) 

534 exog = results.model.exog 

535 

536 k_vars = exog.shape[1] 

537 # this function does not make sense if k_vars=1 

538 

539 nrows = (len(exog_idx) + 1) // 2 

540 ncols = 1 if nrows == len(exog_idx) else 2 

541 if grid is not None: 

542 nrows, ncols = grid 

543 if ncols > 1: 

544 title_kwargs = {"fontdict": {"fontsize": 'small'}} 

545 

546 # for indexing purposes 

547 other_names = np.array(results.model.exog_names) 

548 for i, idx in enumerate(exog_idx): 

549 others = lrange(k_vars) 

550 others.pop(idx) 

551 exog_others = pandas.DataFrame(exog[:, others], 

552 columns=other_names[others]) 

553 ax = fig.add_subplot(nrows, ncols, i + 1) 

554 plot_partregress(y, pandas.Series(exog[:, idx], 

555 name=other_names[idx]), 

556 exog_others, ax=ax, title_kwargs=title_kwargs, 

557 obs_labels=False) 

558 ax.set_title("") 

559 

560 fig.suptitle("Partial Regression Plot", fontsize="large") 

561 fig.tight_layout() 

562 fig.subplots_adjust(top=.95) 

563 

564 return fig 

565 

566 

567def plot_ccpr(results, exog_idx, ax=None): 

568 """ 

569 Plot CCPR against one regressor. 

570 

571 Generates a component and component-plus-residual (CCPR) plot. 

572 

573 Parameters 

574 ---------- 

575 results : result instance 

576 A regression results instance. 

577 exog_idx : {int, str} 

578 Exogenous, explanatory variable. If string is given, it should 

579 be the variable name that you want to use, and you can use arbitrary 

580 translations as with a formula. 

581 ax : AxesSubplot, optional 

582 If given, it is used to plot in instead of a new figure being 

583 created. 

584 

585 Returns 

586 ------- 

587 Figure 

588 If `ax` is None, the created figure. Otherwise the figure to which 

589 `ax` is connected. 

590 

591 See Also 

592 -------- 

593 plot_ccpr_grid : Creates CCPR plot for multiple regressors in a plot grid. 

594 

595 Notes 

596 ----- 

597 The CCPR plot provides a way to judge the effect of one regressor on the 

598 response variable by taking into account the effects of the other 

599 independent variables. The partial residuals plot is defined as 

600 Residuals + B_i*X_i versus X_i. The component adds the B_i*X_i versus 

601 X_i to show where the fitted line would lie. Care should be taken if X_i 

602 is highly correlated with any of the other independent variables. If this 

603 is the case, the variance evident in the plot will be an underestimate of 

604 the true variance. 

605 

606 References 

607 ---------- 

608 http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm 

609 

610 Examples 

611 -------- 

612 Using the state crime dataset plot the effect of the rate of single 

613 households ('single') on the murder rate while accounting for high school 

614 graduation rate ('hs_grad'), percentage of people in an urban area, and rate 

615 of poverty ('poverty'). 

616 

617 >>> import statsmodels.api as sm 

618 >>> import matplotlib.pyplot as plot 

619 >>> import statsmodels.formula.api as smf 

620 

621 >>> crime_data = sm.datasets.statecrime.load_pandas() 

622 >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', 

623 ... data=crime_data.data).fit() 

624 >>> sm.graphics.plot_ccpr(results, 'single') 

625 >>> plt.show() 

626 

627 .. plot:: plots/graphics_regression_ccpr.py 

628 """ 

629 fig, ax = utils.create_mpl_ax(ax) 

630 

631 exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) 

632 results = maybe_unwrap_results(results) 

633 

634 x1 = results.model.exog[:, exog_idx] 

635 #namestr = ' for %s' % self.name if self.name else '' 

636 x1beta = x1*results.params[exog_idx] 

637 ax.plot(x1, x1beta + results.resid, 'o') 

638 from statsmodels.tools.tools import add_constant 

639 mod = OLS(x1beta, add_constant(x1)).fit() 

640 params = mod.params 

641 fig = abline_plot(*params, **dict(ax=ax)) 

642 #ax.plot(x1, x1beta, '-') 

643 ax.set_title('Component and component plus residual plot') 

644 ax.set_ylabel("Residual + %s*beta_%d" % (exog_name, exog_idx)) 

645 ax.set_xlabel("%s" % exog_name) 

646 

647 return fig 

648 

649 

650def plot_ccpr_grid(results, exog_idx=None, grid=None, fig=None): 

651 """ 

652 Generate CCPR plots against a set of regressors, plot in a grid. 

653 

654 Generates a grid of component and component-plus-residual (CCPR) plots. 

655 

656 Parameters 

657 ---------- 

658 results : result instance 

659 A results instance with exog and params. 

660 exog_idx : None or list of int 

661 The indices or column names of the exog used in the plot. 

662 grid : None or tuple of int (nrows, ncols) 

663 If grid is given, then it is used for the arrangement of the subplots. 

664 If grid is None, then ncol is one, if there are only 2 subplots, and 

665 the number of columns is two otherwise. 

666 fig : Figure, optional 

667 If given, this figure is simply returned. Otherwise a new figure is 

668 created. 

669 

670 Returns 

671 ------- 

672 Figure 

673 If `ax` is None, the created figure. Otherwise the figure to which 

674 `ax` is connected. 

675 

676 See Also 

677 -------- 

678 plot_ccpr : Creates CCPR plot for a single regressor. 

679 

680 Notes 

681 ----- 

682 Partial residual plots are formed as:: 

683 

684 Res + Betahat(i)*Xi versus Xi 

685 

686 and CCPR adds:: 

687 

688 Betahat(i)*Xi versus Xi 

689 

690 References 

691 ---------- 

692 See http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ccpr.htm 

693 

694 Examples 

695 -------- 

696 Using the state crime dataset separately plot the effect of the each 

697 variable on the on the outcome, murder rate while accounting for the effect 

698 of all other variables in the model. 

699 

700 >>> import statsmodels.api as sm 

701 >>> import matplotlib.pyplot as plt 

702 >>> import statsmodels.formula.api as smf 

703 

704 >>> fig = plt.figure(figsize=(8, 8)) 

705 >>> crime_data = sm.datasets.statecrime.load_pandas() 

706 >>> results = smf.ols('murder ~ hs_grad + urban + poverty + single', 

707 ... data=crime_data.data).fit() 

708 >>> sm.graphics.plot_ccpr_grid(results, fig=fig) 

709 >>> plt.show() 

710 

711 .. plot:: plots/graphics_regression_ccpr_grid.py 

712 """ 

713 fig = utils.create_mpl_fig(fig) 

714 

715 exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model) 

716 

717 if grid is not None: 

718 nrows, ncols = grid 

719 else: 

720 if len(exog_idx) > 2: 

721 nrows = int(np.ceil(len(exog_idx)/2.)) 

722 ncols = 2 

723 else: 

724 nrows = len(exog_idx) 

725 ncols = 1 

726 

727 seen_constant = 0 

728 for i, idx in enumerate(exog_idx): 

729 if results.model.exog[:, idx].var() == 0: 

730 seen_constant = 1 

731 continue 

732 

733 ax = fig.add_subplot(nrows, ncols, i+1-seen_constant) 

734 fig = plot_ccpr(results, exog_idx=idx, ax=ax) 

735 ax.set_title("") 

736 

737 fig.suptitle("Component-Component Plus Residual Plot", fontsize="large") 

738 

739 fig.tight_layout() 

740 

741 fig.subplots_adjust(top=.95) 

742 return fig 

743 

744 

745def abline_plot(intercept=None, slope=None, horiz=None, vert=None, 

746 model_results=None, ax=None, **kwargs): 

747 """ 

748 Plot a line given an intercept and slope. 

749 

750 Parameters 

751 ---------- 

752 intercept : float 

753 The intercept of the line. 

754 slope : float 

755 The slope of the line. 

756 horiz : float or array_like 

757 Data for horizontal lines on the y-axis. 

758 vert : array_like 

759 Data for verterical lines on the x-axis. 

760 model_results : statsmodels results instance 

761 Any object that has a two-value `params` attribute. Assumed that it 

762 is (intercept, slope). 

763 ax : axes, optional 

764 Matplotlib axes instance. 

765 **kwargs 

766 Options passed to matplotlib.pyplot.plt. 

767 

768 Returns 

769 ------- 

770 Figure 

771 The figure given by `ax.figure` or a new instance. 

772 

773 Examples 

774 -------- 

775 >>> import numpy as np 

776 >>> import statsmodels.api as sm 

777 

778 >>> np.random.seed(12345) 

779 >>> X = sm.add_constant(np.random.normal(0, 20, size=30)) 

780 >>> y = np.dot(X, [25, 3.5]) + np.random.normal(0, 30, size=30) 

781 >>> mod = sm.OLS(y,X).fit() 

782 >>> fig = sm.graphics.abline_plot(model_results=mod) 

783 >>> ax = fig.axes[0] 

784 >>> ax.scatter(X[:,1], y) 

785 >>> ax.margins(.1) 

786 >>> import matplotlib.pyplot as plt 

787 >>> plt.show() 

788 

789 .. plot:: plots/graphics_regression_abline.py 

790 """ 

791 if ax is not None: # get axis limits first thing, do not change these 

792 x = ax.get_xlim() 

793 else: 

794 x = None 

795 

796 fig, ax = utils.create_mpl_ax(ax) 

797 

798 if model_results: 

799 intercept, slope = model_results.params 

800 if x is None: 

801 x = [model_results.model.exog[:, 1].min(), 

802 model_results.model.exog[:, 1].max()] 

803 else: 

804 if not (intercept is not None and slope is not None): 

805 raise ValueError("specify slope and intercepty or model_results") 

806 if x is None: 

807 x = ax.get_xlim() 

808 

809 data_y = [x[0]*slope+intercept, x[1]*slope+intercept] 

810 ax.set_xlim(x) 

811 #ax.set_ylim(y) 

812 

813 from matplotlib.lines import Line2D 

814 

815 class ABLine2D(Line2D): 

816 def __init__(self, *args, **kwargs): 

817 super(ABLine2D, self).__init__(*args, **kwargs) 

818 self.id_xlim_callback = None 

819 self.id_ylim_callback = None 

820 

821 def remove(self): 

822 ax = self.axes 

823 if self.id_xlim_callback: 

824 ax.callbacks.disconnect(self.id_xlim_callback) 

825 if self.id_ylim_callback: 

826 ax.callbacks.disconnect(self.id_ylim_callback) 

827 super(ABLine2D, self).remove() 

828 

829 def update_datalim(self, ax): 

830 ax.set_autoscale_on(False) 

831 children = ax.get_children() 

832 ablines = [child for child in children if child is self] 

833 abline = ablines[0] 

834 x = ax.get_xlim() 

835 y = [x[0] * slope + intercept, x[1] * slope + intercept] 

836 abline.set_data(x, y) 

837 ax.figure.canvas.draw() 

838 

839 # TODO: how to intercept something like a margins call and adjust? 

840 line = ABLine2D(x, data_y, **kwargs) 

841 ax.add_line(line) 

842 line.id_xlim_callback = ax.callbacks.connect('xlim_changed', line.update_datalim) 

843 line.id_ylim_callback = ax.callbacks.connect('ylim_changed', line.update_datalim) 

844 

845 if horiz: 

846 ax.hline(horiz) 

847 if vert: 

848 ax.vline(vert) 

849 return fig 

850 

851 

852@Appender(_plot_influence_doc.format(**{ 

853 'extra_params_doc': "results: object\n" 

854 " Results for a fitted regression model.\n" 

855 " influence: instance\n" 

856 " The instance of Influence for model."})) 

857def _influence_plot(results, influence, external=True, alpha=.05, 

858 criterion="cooks", size=48, plot_alpha=.75, ax=None, 

859 **kwargs): 

860 infl = influence 

861 fig, ax = utils.create_mpl_ax(ax) 

862 

863 if criterion.lower().startswith('coo'): 

864 psize = infl.cooks_distance[0] 

865 elif criterion.lower().startswith('dff'): 

866 psize = np.abs(infl.dffits[0]) 

867 else: 

868 raise ValueError("Criterion %s not understood" % criterion) 

869 

870 # scale the variables 

871 #TODO: what is the correct scaling and the assumption here? 

872 #we want plots to be comparable across different plots 

873 #so we would need to use the expected distribution of criterion probably 

874 old_range = np.ptp(psize) 

875 new_range = size**2 - 8**2 

876 

877 psize = (psize - psize.min()) * new_range/old_range + 8**2 

878 

879 leverage = infl.hat_matrix_diag 

880 if external: 

881 resids = infl.resid_studentized_external 

882 else: 

883 resids = infl.resid_studentized 

884 

885 from scipy import stats 

886 

887 cutoff = stats.t.ppf(1.-alpha/2, results.df_resid) 

888 large_resid = np.abs(resids) > cutoff 

889 large_leverage = leverage > _high_leverage(results) 

890 large_points = np.logical_or(large_resid, large_leverage) 

891 

892 ax.scatter(leverage, resids, s=psize, alpha=plot_alpha) 

893 

894 # add point labels 

895 labels = results.model.data.row_labels 

896 if labels is None: 

897 labels = lrange(len(resids)) 

898 ax = utils.annotate_axes(np.where(large_points)[0], labels, 

899 lzip(leverage, resids), 

900 lzip(-(psize/2)**.5, (psize/2)**.5), "x-large", 

901 ax) 

902 

903 # TODO: make configurable or let people do it ex-post? 

904 font = {"fontsize": 16, "color": "black"} 

905 ax.set_ylabel("Studentized Residuals", **font) 

906 ax.set_xlabel("H Leverage", **font) 

907 ax.set_title("Influence Plot", **font) 

908 return fig 

909 

910 

911@Appender(_plot_influence_doc.format(**{ 

912 'extra_params_doc': "results : Results\n" 

913 " Results for a fitted regression model."})) 

914def influence_plot(results, external=True, alpha=.05, criterion="cooks", 

915 size=48, plot_alpha=.75, ax=None, **kwargs): 

916 

917 infl = results.get_influence() 

918 res = _influence_plot(results, infl, external=external, alpha=alpha, 

919 criterion=criterion, size=size, 

920 plot_alpha=plot_alpha, ax=ax, **kwargs) 

921 return res 

922 

923 

924@Appender(_plot_leverage_resid2_doc.format({ 

925 'extra_params_doc': "results: object\n" 

926 " Results for a fitted regression model\n" 

927 "influence: instance\n" 

928 " instance of Influence for model"})) 

929def _plot_leverage_resid2(results, influence, alpha=.05, ax=None, 

930 **kwargs): 

931 

932 from scipy.stats import zscore, norm 

933 fig, ax = utils.create_mpl_ax(ax) 

934 

935 infl = influence 

936 leverage = infl.hat_matrix_diag 

937 resid = zscore(infl.resid) 

938 ax.plot(resid**2, leverage, 'o', **kwargs) 

939 ax.set_xlabel("Normalized residuals**2") 

940 ax.set_ylabel("Leverage") 

941 ax.set_title("Leverage vs. Normalized residuals squared") 

942 

943 large_leverage = leverage > _high_leverage(results) 

944 #norm or t here if standardized? 

945 cutoff = norm.ppf(1.-alpha/2) 

946 large_resid = np.abs(resid) > cutoff 

947 labels = results.model.data.row_labels 

948 if labels is None: 

949 labels = lrange(int(results.nobs)) 

950 index = np.where(np.logical_or(large_leverage, large_resid))[0] 

951 ax = utils.annotate_axes(index, labels, lzip(resid**2, leverage), 

952 [(0, 5)]*int(results.nobs), "large", 

953 ax=ax, ha="center", va="bottom") 

954 ax.margins(.075, .075) 

955 return fig 

956 

957 

958@Appender(_plot_leverage_resid2_doc.format({ 

959 'extra_params_doc': "results : object\n" 

960 " Results for a fitted regression model"})) 

961def plot_leverage_resid2(results, alpha=.05, ax=None, **kwargs): 

962 

963 infl = results.get_influence() 

964 return _plot_leverage_resid2(results, infl, alpha=alpha, ax=ax, **kwargs) 

965 

966 

967 

968@Appender(_plot_added_variable_doc % { 

969 'extra_params_doc': "results : object\n" 

970 " Results for a fitted regression model"}) 

971def plot_added_variable(results, focus_exog, resid_type=None, 

972 use_glm_weights=True, fit_kwargs=None, ax=None): 

973 

974 model = results.model 

975 

976 fig, ax = utils.create_mpl_ax(ax) 

977 

978 endog_resid, focus_exog_resid =\ 

979 added_variable_resids(results, focus_exog, 

980 resid_type=resid_type, 

981 use_glm_weights=use_glm_weights, 

982 fit_kwargs=fit_kwargs) 

983 

984 ax.plot(focus_exog_resid, endog_resid, 'o', alpha=0.6) 

985 

986 ax.set_title('Added variable plot', fontsize='large') 

987 

988 if isinstance(focus_exog, str): 

989 xname = focus_exog 

990 else: 

991 xname = model.exog_names[focus_exog] 

992 ax.set_xlabel(xname, size=15) 

993 ax.set_ylabel(model.endog_names + " residuals", size=15) 

994 

995 return fig 

996 

997 

998@Appender(_plot_partial_residuals_doc % { 

999 'extra_params_doc': "results : object\n" 

1000 " Results for a fitted regression model"}) 

1001def plot_partial_residuals(results, focus_exog, ax=None): 

1002 # Docstring attached below 

1003 

1004 model = results.model 

1005 

1006 focus_exog, focus_col = utils.maybe_name_or_idx(focus_exog, model) 

1007 

1008 pr = partial_resids(results, focus_exog) 

1009 focus_exog_vals = results.model.exog[:, focus_col] 

1010 

1011 fig, ax = utils.create_mpl_ax(ax) 

1012 ax.plot(focus_exog_vals, pr, 'o', alpha=0.6) 

1013 

1014 ax.set_title('Partial residuals plot', fontsize='large') 

1015 

1016 if isinstance(focus_exog, str): 

1017 xname = focus_exog 

1018 else: 

1019 xname = model.exog_names[focus_exog] 

1020 ax.set_xlabel(xname, size=15) 

1021 ax.set_ylabel("Component plus residual", size=15) 

1022 

1023 return fig 

1024 

1025 

1026@Appender(_plot_ceres_residuals_doc % { 

1027 'extra_params_doc': "results : Results\n" 

1028 " Results instance of a fitted regression " 

1029 "model."}) 

1030def plot_ceres_residuals(results, focus_exog, frac=0.66, cond_means=None, 

1031 ax=None): 

1032 

1033 model = results.model 

1034 

1035 focus_exog, focus_col = utils.maybe_name_or_idx(focus_exog, model) 

1036 

1037 presid = ceres_resids(results, focus_exog, frac=frac, 

1038 cond_means=cond_means) 

1039 

1040 focus_exog_vals = model.exog[:, focus_col] 

1041 

1042 fig, ax = utils.create_mpl_ax(ax) 

1043 ax.plot(focus_exog_vals, presid, 'o', alpha=0.6) 

1044 

1045 ax.set_title('CERES residuals plot', fontsize='large') 

1046 

1047 ax.set_xlabel(focus_exog, size=15) 

1048 ax.set_ylabel("Component plus residual", size=15) 

1049 

1050 return fig 

1051 

1052 

1053def ceres_resids(results, focus_exog, frac=0.66, cond_means=None): 

1054 """ 

1055 Calculate the CERES residuals (Conditional Expectation Partial 

1056 Residuals) for a fitted model. 

1057 

1058 Parameters 

1059 ---------- 

1060 results : model results instance 

1061 The fitted model for which the CERES residuals are calculated. 

1062 focus_exog : int 

1063 The column of results.model.exog used as the 'focus variable'. 

1064 frac : float, optional 

1065 Lowess smoothing parameter for estimating the conditional 

1066 means. Not used if `cond_means` is provided. 

1067 cond_means : array_like, optional 

1068 If provided, the columns of this array are the conditional 

1069 means E[exog | focus exog], where exog ranges over some 

1070 or all of the columns of exog other than focus exog. If 

1071 this is an empty nx0 array, the conditional means are 

1072 treated as being zero. If None, the conditional means are 

1073 estimated. 

1074 

1075 Returns 

1076 ------- 

1077 An array containing the CERES residuals. 

1078 

1079 Notes 

1080 ----- 

1081 If `cond_means` is not provided, it is obtained by smoothing each 

1082 column of exog (except the focus column) against the focus column. 

1083 

1084 Currently only supports GLM, GEE, and OLS models. 

1085 """ 

1086 

1087 model = results.model 

1088 

1089 if not isinstance(model, (GLM, GEE, OLS)): 

1090 raise ValueError("ceres residuals not available for %s" % 

1091 model.__class__.__name__) 

1092 

1093 focus_exog, focus_col = utils.maybe_name_or_idx(focus_exog, model) 

1094 

1095 # Indices of non-focus columns 

1096 ix_nf = range(len(results.params)) 

1097 ix_nf = list(ix_nf) 

1098 ix_nf.pop(focus_col) 

1099 nnf = len(ix_nf) 

1100 

1101 # Estimate the conditional means if not provided. 

1102 if cond_means is None: 

1103 

1104 # Below we calculate E[x | focus] where x is each column other 

1105 # than the focus column. We do not want the intercept when we do 

1106 # this so we remove it here. 

1107 pexog = model.exog[:, ix_nf] 

1108 pexog -= pexog.mean(0) 

1109 u, s, vt = np.linalg.svd(pexog, 0) 

1110 ii = np.flatnonzero(s > 1e-6) 

1111 pexog = u[:, ii] 

1112 

1113 fcol = model.exog[:, focus_col] 

1114 cond_means = np.empty((len(fcol), pexog.shape[1])) 

1115 for j in range(pexog.shape[1]): 

1116 

1117 # Get the fitted values for column i given the other 

1118 # columns (skip the intercept). 

1119 y0 = pexog[:, j] 

1120 

1121 cf = lowess(y0, fcol, frac=frac, return_sorted=False) 

1122 

1123 cond_means[:, j] = cf 

1124 

1125 new_exog = np.concatenate((model.exog[:, ix_nf], cond_means), axis=1) 

1126 

1127 # Refit the model using the adjusted exog values 

1128 klass = model.__class__ 

1129 init_kwargs = model._get_init_kwds() 

1130 new_model = klass(model.endog, new_exog, **init_kwargs) 

1131 new_result = new_model.fit() 

1132 

1133 # The partial residual, with respect to l(x2) (notation of Cook 1998) 

1134 presid = model.endog - new_result.fittedvalues 

1135 if isinstance(model, (GLM, GEE)): 

1136 presid *= model.family.link.deriv(new_result.fittedvalues) 

1137 if new_exog.shape[1] > nnf: 

1138 presid += np.dot(new_exog[:, nnf:], new_result.params[nnf:]) 

1139 

1140 return presid 

1141 

1142def partial_resids(results, focus_exog): 

1143 """ 

1144 Returns partial residuals for a fitted model with respect to a 

1145 'focus predictor'. 

1146 

1147 Parameters 

1148 ---------- 

1149 results : results instance 

1150 A fitted regression model. 

1151 focus col : int 

1152 The column index of model.exog with respect to which the 

1153 partial residuals are calculated. 

1154 

1155 Returns 

1156 ------- 

1157 An array of partial residuals. 

1158 

1159 References 

1160 ---------- 

1161 RD Cook and R Croos-Dabrera (1998). Partial residual plots in 

1162 generalized linear models. Journal of the American Statistical 

1163 Association, 93:442. 

1164 """ 

1165 

1166 # TODO: could be a method of results 

1167 # TODO: see Cook et al (1998) for a more general definition 

1168 

1169 # The calculation follows equation (8) from Cook's paper. 

1170 model = results.model 

1171 resid = model.endog - results.predict() 

1172 

1173 if isinstance(model, (GLM, GEE)): 

1174 resid *= model.family.link.deriv(results.fittedvalues) 

1175 elif isinstance(model, (OLS, GLS, WLS)): 

1176 pass # No need to do anything 

1177 else: 

1178 raise ValueError("Partial residuals for '%s' not implemented." 

1179 % type(model)) 

1180 

1181 if type(focus_exog) is str: 

1182 focus_col = model.exog_names.index(focus_exog) 

1183 else: 

1184 focus_col = focus_exog 

1185 

1186 focus_val = results.params[focus_col] * model.exog[:, focus_col] 

1187 

1188 return focus_val + resid 

1189 

1190def added_variable_resids(results, focus_exog, resid_type=None, 

1191 use_glm_weights=True, fit_kwargs=None): 

1192 """ 

1193 Residualize the endog variable and a 'focus' exog variable in a 

1194 regression model with respect to the other exog variables. 

1195 

1196 Parameters 

1197 ---------- 

1198 results : regression results instance 

1199 A fitted model including the focus exog and all other 

1200 predictors of interest. 

1201 focus_exog : {int, str} 

1202 The column of results.model.exog or a variable name that is 

1203 to be residualized against the other predictors. 

1204 resid_type : str 

1205 The type of residuals to use for the dependent variable. If 

1206 None, uses `resid_deviance` for GLM/GEE and `resid` otherwise. 

1207 use_glm_weights : bool 

1208 Only used if the model is a GLM or GEE. If True, the 

1209 residuals for the focus predictor are computed using WLS, with 

1210 the weights obtained from the IRLS calculations for fitting 

1211 the GLM. If False, unweighted regression is used. 

1212 fit_kwargs : dict, optional 

1213 Keyword arguments to be passed to fit when refitting the 

1214 model. 

1215 

1216 Returns 

1217 ------- 

1218 endog_resid : array_like 

1219 The residuals for the original exog 

1220 focus_exog_resid : array_like 

1221 The residuals for the focus predictor 

1222 

1223 Notes 

1224 ----- 

1225 The 'focus variable' residuals are always obtained using linear 

1226 regression. 

1227 

1228 Currently only GLM, GEE, and OLS models are supported. 

1229 """ 

1230 

1231 model = results.model 

1232 if not isinstance(model, (GEE, GLM, OLS)): 

1233 raise ValueError("model type %s not supported for added variable residuals" % 

1234 model.__class__.__name__) 

1235 

1236 exog = model.exog 

1237 endog = model.endog 

1238 

1239 focus_exog, focus_col = utils.maybe_name_or_idx(focus_exog, model) 

1240 

1241 focus_exog_vals = exog[:, focus_col] 

1242 

1243 # Default residuals 

1244 if resid_type is None: 

1245 if isinstance(model, (GEE, GLM)): 

1246 resid_type = "resid_deviance" 

1247 else: 

1248 resid_type = "resid" 

1249 

1250 ii = range(exog.shape[1]) 

1251 ii = list(ii) 

1252 ii.pop(focus_col) 

1253 reduced_exog = exog[:, ii] 

1254 start_params = results.params[ii] 

1255 

1256 klass = model.__class__ 

1257 

1258 kwargs = model._get_init_kwds() 

1259 new_model = klass(endog, reduced_exog, **kwargs) 

1260 args = {"start_params": start_params} 

1261 if fit_kwargs is not None: 

1262 args.update(fit_kwargs) 

1263 new_result = new_model.fit(**args) 

1264 if not new_result.converged: 

1265 raise ValueError("fit did not converge when calculating added variable residuals") 

1266 

1267 try: 

1268 endog_resid = getattr(new_result, resid_type) 

1269 except AttributeError: 

1270 raise ValueError("'%s' residual type not available" % resid_type) 

1271 

1272 import statsmodels.regression.linear_model as lm 

1273 

1274 if isinstance(model, (GLM, GEE)) and use_glm_weights: 

1275 weights = model.family.weights(results.fittedvalues) 

1276 if hasattr(model, "data_weights"): 

1277 weights = weights * model.data_weights 

1278 lm_results = lm.WLS(focus_exog_vals, reduced_exog, weights).fit() 

1279 else: 

1280 lm_results = lm.OLS(focus_exog_vals, reduced_exog).fit() 

1281 focus_exog_resid = lm_results.resid 

1282 

1283 return endog_resid, focus_exog_resid