Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3Spline and other smoother classes for Generalized Additive Models 

4 

5Author: Luca Puggini 

6Author: Josef Perktold 

7 

8Created on Fri Jun 5 16:32:00 2015 

9""" 

10 

11# import useful only for development 

12from abc import ABCMeta, abstractmethod 

13from statsmodels.compat.python import with_metaclass 

14 

15import numpy as np 

16import pandas as pd 

17from patsy import dmatrix 

18from patsy.mgcv_cubic_splines import _get_all_sorted_knots 

19 

20from statsmodels.tools.linalg import transf_constraints 

21 

22 

23# Obtain b splines from patsy 

24 

25def _equally_spaced_knots(x, df): 

26 n_knots = df - 2 

27 x_min = x.min() 

28 x_max = x.max() 

29 knots = np.linspace(x_min, x_max, n_knots) 

30 return knots 

31 

32 

33def _R_compat_quantile(x, probs): 

34 # return np.percentile(x, 100 * np.asarray(probs)) 

35 probs = np.asarray(probs) 

36 quantiles = np.asarray([np.percentile(x, 100 * prob) 

37 for prob in probs.ravel(order="C")]) 

38 return quantiles.reshape(probs.shape, order="C") 

39 

40 

41# FIXME: is this copy/pasted? If so, why do we need it? If not, get 

42# rid of the try/except for scipy import 

43# from patsy splines.py 

44def _eval_bspline_basis(x, knots, degree, deriv='all', include_intercept=True): 

45 try: 

46 from scipy.interpolate import splev 

47 except ImportError: 

48 raise ImportError("spline functionality requires scipy") 

49 # 'knots' are assumed to be already pre-processed. E.g. usually you 

50 # want to include duplicate copies of boundary knots; you should do 

51 # that *before* calling this constructor. 

52 knots = np.atleast_1d(np.asarray(knots, dtype=float)) 

53 assert knots.ndim == 1 

54 knots.sort() 

55 degree = int(degree) 

56 x = np.atleast_1d(x) 

57 if x.ndim == 2 and x.shape[1] == 1: 

58 x = x[:, 0] 

59 assert x.ndim == 1 

60 # XX FIXME: when points fall outside of the boundaries, splev and R seem 

61 # to handle them differently. I do not know why yet. So until we understand 

62 # this and decide what to do with it, I'm going to play it safe and 

63 # disallow such points. 

64 if np.min(x) < np.min(knots) or np.max(x) > np.max(knots): 

65 raise NotImplementedError("some data points fall outside the " 

66 "outermost knots, and I'm not sure how " 

67 "to handle them. (Patches accepted!)") 

68 # Thanks to Charles Harris for explaining splev. It's not well 

69 # documented, but basically it computes an arbitrary b-spline basis 

70 # given knots and degree on some specificed points (or derivatives 

71 # thereof, but we do not use that functionality), and then returns some 

72 # linear combination of these basis functions. To get out the basis 

73 # functions themselves, we use linear combinations like [1, 0, 0], [0, 

74 # 1, 0], [0, 0, 1]. 

75 # NB: This probably makes it rather inefficient (though I have not checked 

76 # to be sure -- maybe the fortran code actually skips computing the basis 

77 # function for coefficients that are zero). 

78 # Note: the order of a spline is the same as its degree + 1. 

79 # Note: there are (len(knots) - order) basis functions. 

80 

81 k_const = 1 - int(include_intercept) 

82 n_bases = len(knots) - (degree + 1) - k_const 

83 if deriv in ['all', 0]: 

84 basis = np.empty((x.shape[0], n_bases), dtype=float) 

85 ret = basis 

86 if deriv in ['all', 1]: 

87 der1_basis = np.empty((x.shape[0], n_bases), dtype=float) 

88 ret = der1_basis 

89 if deriv in ['all', 2]: 

90 der2_basis = np.empty((x.shape[0], n_bases), dtype=float) 

91 ret = der2_basis 

92 

93 for i in range(n_bases): 

94 coefs = np.zeros((n_bases + k_const,)) 

95 # we are skipping the first column of the basis to drop constant 

96 coefs[i + k_const] = 1 

97 ii = i 

98 if deriv in ['all', 0]: 

99 basis[:, ii] = splev(x, (knots, coefs, degree)) 

100 if deriv in ['all', 1]: 

101 der1_basis[:, ii] = splev(x, (knots, coefs, degree), der=1) 

102 if deriv in ['all', 2]: 

103 der2_basis[:, ii] = splev(x, (knots, coefs, degree), der=2) 

104 

105 if deriv == 'all': 

106 return basis, der1_basis, der2_basis 

107 else: 

108 return ret 

109 

110 

111def compute_all_knots(x, df, degree): 

112 order = degree + 1 

113 n_inner_knots = df - order 

114 lower_bound = np.min(x) 

115 upper_bound = np.max(x) 

116 knot_quantiles = np.linspace(0, 1, n_inner_knots + 2)[1:-1] 

117 inner_knots = _R_compat_quantile(x, knot_quantiles) 

118 all_knots = np.concatenate(([lower_bound, upper_bound] * order, 

119 inner_knots)) 

120 return all_knots, lower_bound, upper_bound, inner_knots 

121 

122 

123def make_bsplines_basis(x, df, degree): 

124 ''' make a spline basis for x ''' 

125 

126 all_knots, _, _, _ = compute_all_knots(x, df, degree) 

127 basis, der_basis, der2_basis = _eval_bspline_basis(x, all_knots, degree) 

128 return basis, der_basis, der2_basis 

129 

130 

131def get_knots_bsplines(x=None, df=None, knots=None, degree=3, 

132 spacing='quantile', lower_bound=None, 

133 upper_bound=None, all_knots=None): 

134 """knots for use in B-splines 

135 

136 There are two main options for the knot placement 

137 

138 - quantile spacing with multiplicity of boundary knots 

139 - equal spacing extended to boundary or exterior knots 

140 

141 The first corresponds to splines as used by patsy. the second is the 

142 knot spacing for P-Splines. 

143 """ 

144 # based on patsy memorize_finish 

145 if all_knots is not None: 

146 return all_knots 

147 

148 x_min = x.min() 

149 x_max = x.max() 

150 

151 if degree < 0: 

152 raise ValueError("degree must be greater than 0 (not %r)" 

153 % (degree,)) 

154 if int(degree) != degree: 

155 raise ValueError("degree must be an integer (not %r)" 

156 % (degree,)) 

157 

158 # These are guaranteed to all be 1d vectors by the code above 

159 # x = np.concatenate(tmp["xs"]) 

160 if df is None and knots is None: 

161 raise ValueError("must specify either df or knots") 

162 order = degree + 1 

163 if df is not None: 

164 n_inner_knots = df - order 

165 if n_inner_knots < 0: 

166 raise ValueError("df=%r is too small for degree=%r; must be >= %s" 

167 % (df, degree, 

168 # We know that n_inner_knots is negative; 

169 # if df were that much larger, it would 

170 # have been zero, and things would work. 

171 df - n_inner_knots)) 

172 if knots is not None: 

173 if len(knots) != n_inner_knots: 

174 raise ValueError("df=%s with degree=%r implies %s knots, " 

175 "but %s knots were provided" 

176 % (df, degree, 

177 n_inner_knots, len(knots))) 

178 elif spacing == 'quantile': 

179 # Need to compute inner knots 

180 knot_quantiles = np.linspace(0, 1, n_inner_knots + 2)[1:-1] 

181 inner_knots = _R_compat_quantile(x, knot_quantiles) 

182 elif spacing == 'equal': 

183 # Need to compute inner knots 

184 grid = np.linspace(0, 1, n_inner_knots + 2)[1:-1] 

185 inner_knots = x_min + grid * (x_max - x_min) 

186 diff_knots = inner_knots[1] - inner_knots[0] 

187 else: 

188 raise ValueError("incorrect option for spacing") 

189 if knots is not None: 

190 inner_knots = knots 

191 if lower_bound is None: 

192 lower_bound = np.min(x) 

193 if upper_bound is None: 

194 upper_bound = np.max(x) 

195 

196 if lower_bound > upper_bound: 

197 raise ValueError("lower_bound > upper_bound (%r > %r)" 

198 % (lower_bound, upper_bound)) 

199 inner_knots = np.asarray(inner_knots) 

200 if inner_knots.ndim > 1: 

201 raise ValueError("knots must be 1 dimensional") 

202 if np.any(inner_knots < lower_bound): 

203 raise ValueError("some knot values (%s) fall below lower bound " 

204 "(%r)" 

205 % (inner_knots[inner_knots < lower_bound], 

206 lower_bound)) 

207 if np.any(inner_knots > upper_bound): 

208 raise ValueError("some knot values (%s) fall above upper bound " 

209 "(%r)" 

210 % (inner_knots[inner_knots > upper_bound], 

211 upper_bound)) 

212 

213 if spacing == "equal": 

214 diffs = np.arange(1, order + 1) * diff_knots 

215 lower_knots = inner_knots[0] - diffs[::-1] 

216 upper_knots = inner_knots[-1] + diffs 

217 all_knots = np.concatenate((lower_knots, inner_knots, upper_knots)) 

218 else: 

219 all_knots = np.concatenate(([lower_bound, upper_bound] * order, 

220 inner_knots)) 

221 all_knots.sort() 

222 

223 return all_knots 

224 

225 

226def _get_integration_points(knots, k_points=3): 

227 """add points to each subinterval defined by knots 

228 

229 inserts k_points between each two consecutive knots 

230 """ 

231 k_points = k_points + 1 

232 knots = np.unique(knots) 

233 dxi = np.arange(k_points) / k_points 

234 dxk = np.diff(knots) 

235 dx = dxk[:, None] * dxi 

236 x = np.concatenate(((knots[:-1, None] + dx).ravel(), [knots[-1]])) 

237 return x 

238 

239 

240def get_covder2(smoother, k_points=4, integration_points=None, 

241 skip_ctransf=False, deriv=2): 

242 """ 

243 Approximate integral of cross product of second derivative of smoother 

244 

245 This uses scipy.integrate simps to compute an approximation to the 

246 integral of the smoother derivative cross-product at knots plus k_points 

247 in between knots. 

248 """ 

249 from scipy.integrate import simps 

250 knots = smoother.knots 

251 x = _get_integration_points(knots, k_points=3) 

252 if integration_points is None: 

253 d2 = smoother.transform(x, deriv=deriv, skip_ctransf=skip_ctransf) 

254 else: 

255 x = integration_points 

256 covd2 = simps(d2[:, :, None] * d2[:, None, :], x, axis=0) 

257 return covd2 

258 

259 

260# TODO: this function should be deleted 

261def make_poly_basis(x, degree, intercept=True): 

262 ''' 

263 given a vector x returns poly=(1, x, x^2, ..., x^degree) 

264 and its first and second derivative 

265 ''' 

266 

267 if intercept: 

268 start = 0 

269 else: 

270 start = 1 

271 

272 nobs = len(x) 

273 basis = np.zeros(shape=(nobs, degree + 1 - start)) 

274 der_basis = np.zeros(shape=(nobs, degree + 1 - start)) 

275 der2_basis = np.zeros(shape=(nobs, degree + 1 - start)) 

276 

277 for i in range(start, degree + 1): 

278 basis[:, i - start] = x ** i 

279 der_basis[:, i - start] = i * x ** (i - 1) 

280 der2_basis[:, i - start] = i * (i - 1) * x ** (i - 2) 

281 

282 return basis, der_basis, der2_basis 

283 

284 

285# TODO: try to include other kinds of splines from patsy 

286# x = np.linspace(0, 1, 30) 

287# df = 10 

288# degree = 3 

289# from patsy.mgcv_cubic_splines import cc, cr, te 

290# all_knots, lower, upper, inner = compute_all_knots(x, df, degree) 

291# result = cc(x, df=df, knots=all_knots, lower_bound=lower, upper_bound=upper, 

292# constraints=None) 

293# 

294# import matplotlib.pyplot as plt 

295# 

296# result = np.array(result) 

297# print(result.shape) 

298# plt.plot(result.T) 

299# plt.show() 

300 

301class UnivariateGamSmoother(with_metaclass(ABCMeta)): 

302 """Base Class for single smooth component 

303 """ 

304 def __init__(self, x, constraints=None, variable_name='x'): 

305 self.x = x 

306 self.constraints = constraints 

307 self.variable_name = variable_name 

308 self.nobs, self.k_variables = len(x), 1 

309 

310 base4 = self._smooth_basis_for_single_variable() 

311 if constraints == 'center': 

312 constraints = base4[0].mean(0)[None, :] 

313 

314 if constraints is not None and not isinstance(constraints, str): 

315 ctransf = transf_constraints(constraints) 

316 self.ctransf = ctransf 

317 else: 

318 # subclasses might set ctransf directly 

319 # only used if constraints is None 

320 if not hasattr(self, 'ctransf'): 

321 self.ctransf = None 

322 

323 self.basis, self.der_basis, self.der2_basis, self.cov_der2 = base4 

324 if self.ctransf is not None: 

325 ctransf = self.ctransf 

326 # transform attributes that are not None 

327 if base4[0] is not None: 

328 self.basis = base4[0].dot(ctransf) 

329 if base4[1] is not None: 

330 self.der_basis = base4[1].dot(ctransf) 

331 if base4[2] is not None: 

332 self.der2_basis = base4[2].dot(ctransf) 

333 if base4[3] is not None: 

334 self.cov_der2 = ctransf.T.dot(base4[3]).dot(ctransf) 

335 

336 self.dim_basis = self.basis.shape[1] 

337 self.col_names = [self.variable_name + "_s" + str(i) 

338 for i in range(self.dim_basis)] 

339 

340 @abstractmethod 

341 def _smooth_basis_for_single_variable(self): 

342 return 

343 

344 

345class UnivariateGenericSmoother(UnivariateGamSmoother): 

346 """Generic single smooth component 

347 """ 

348 def __init__(self, x, basis, der_basis, der2_basis, cov_der2, 

349 variable_name='x'): 

350 self.basis = basis 

351 self.der_basis = der_basis 

352 self.der2_basis = der2_basis 

353 self.cov_der2 = cov_der2 

354 

355 super(UnivariateGenericSmoother, self).__init__( 

356 x, variable_name=variable_name) 

357 

358 def _smooth_basis_for_single_variable(self): 

359 return self.basis, self.der_basis, self.der2_basis, self.cov_der2 

360 

361 

362class UnivariatePolynomialSmoother(UnivariateGamSmoother): 

363 """polynomial single smooth component 

364 """ 

365 def __init__(self, x, degree, variable_name='x'): 

366 self.degree = degree 

367 super(UnivariatePolynomialSmoother, self).__init__( 

368 x, variable_name=variable_name) 

369 

370 def _smooth_basis_for_single_variable(self): 

371 # TODO: unclear description 

372 """ 

373 given a vector x returns poly=(1, x, x^2, ..., x^degree) 

374 and its first and second derivative 

375 """ 

376 

377 basis = np.zeros(shape=(self.nobs, self.degree)) 

378 der_basis = np.zeros(shape=(self.nobs, self.degree)) 

379 der2_basis = np.zeros(shape=(self.nobs, self.degree)) 

380 for i in range(self.degree): 

381 dg = i + 1 

382 basis[:, i] = self.x ** dg 

383 der_basis[:, i] = dg * self.x ** (dg - 1) 

384 if dg > 1: 

385 der2_basis[:, i] = dg * (dg - 1) * self.x ** (dg - 2) 

386 else: 

387 der2_basis[:, i] = 0 

388 

389 cov_der2 = np.dot(der2_basis.T, der2_basis) 

390 

391 return basis, der_basis, der2_basis, cov_der2 

392 

393 

394class UnivariateBSplines(UnivariateGamSmoother): 

395 """B-Spline single smooth component 

396 

397 This creates and holds the B-Spline basis function for one 

398 component. 

399 

400 Parameters 

401 ---------- 

402 x : ndarray, 1-D 

403 underlying explanatory variable for smooth terms. 

404 df : int 

405 numer of basis functions or degrees of freedom 

406 degree : int 

407 degree of the spline 

408 include_intercept : bool 

409 If False, then the basis functions are transformed so that they 

410 do not include a constant. This avoids perfect collinearity if 

411 a constant or several components are included in the model. 

412 constraints : {None, str, array} 

413 Constraints are used to transform the basis functions to satisfy 

414 those constraints. 

415 `constraints = 'center'` applies a linear transform to remove the 

416 constant and center the basis functions. 

417 variable_name : {None, str} 

418 The name for the underlying explanatory variable, x, used in for 

419 creating the column and parameter names for the basis functions. 

420 covder2_kwds : {None, dict} 

421 options for computing the penalty matrix from the second derivative 

422 of the spline. 

423 knot_kwds : {None, list[dict]} 

424 option for the knot selection. 

425 By default knots are selected in the same way as in patsy, however the 

426 number of knots is independent of keeping or removing the constant. 

427 Interior knot selection is based on quantiles of the data and is the 

428 same in patsy and mgcv. Boundary points are at the limits of the data 

429 range. 

430 The available options use with `get_knots_bsplines` are 

431 

432 - knots : None or array 

433 interior knots 

434 - spacing : 'quantile' or 'equal' 

435 - lower_bound : None or float 

436 location of lower boundary knots, all boundary knots are at the same 

437 point 

438 - upper_bound : None or float 

439 location of upper boundary knots, all boundary knots are at the same 

440 point 

441 - all_knots : None or array 

442 If all knots are provided, then those will be taken as given and 

443 all other options will be ignored. 

444 """ 

445 def __init__(self, x, df, degree=3, include_intercept=False, 

446 constraints=None, variable_name='x', 

447 covder2_kwds=None, **knot_kwds): 

448 self.degree = degree 

449 self.df = df 

450 self.include_intercept = include_intercept 

451 self.knots = get_knots_bsplines(x, degree=degree, df=df, **knot_kwds) 

452 self.covder2_kwds = (covder2_kwds if covder2_kwds is not None 

453 else {}) 

454 super(UnivariateBSplines, self).__init__( 

455 x, constraints=constraints, variable_name=variable_name) 

456 

457 def _smooth_basis_for_single_variable(self): 

458 basis, der_basis, der2_basis = _eval_bspline_basis( 

459 self.x, self.knots, self.degree, 

460 include_intercept=self.include_intercept) 

461 # cov_der2 = np.dot(der2_basis.T, der2_basis) 

462 

463 cov_der2 = get_covder2(self, skip_ctransf=True, 

464 **self.covder2_kwds) 

465 

466 return basis, der_basis, der2_basis, cov_der2 

467 

468 def transform(self, x_new, deriv=0, skip_ctransf=False): 

469 """create the spline basis for new observations 

470 

471 The main use of this stateful transformation is for prediction 

472 using the same specification of the spline basis. 

473 

474 Parameters 

475 ---------- 

476 x_new : ndarray 

477 observations of the underlying explanatory variable 

478 deriv : int 

479 which derivative of the spline basis to compute 

480 This is an options for internal computation. 

481 skip_ctransf : bool 

482 whether to skip the constraint transform 

483 This is an options for internal computation. 

484 

485 Returns 

486 ------- 

487 basis : ndarray 

488 design matrix for the spline basis for given ``x_new`` 

489 """ 

490 

491 if x_new is None: 

492 x_new = self.x 

493 exog = _eval_bspline_basis(x_new, self.knots, self.degree, 

494 deriv=deriv, 

495 include_intercept=self.include_intercept) 

496 

497 # ctransf does not exist yet when cov_der2 is computed 

498 ctransf = getattr(self, 'ctransf', None) 

499 if ctransf is not None and not skip_ctransf: 

500 exog = exog.dot(self.ctransf) 

501 return exog 

502 

503 

504class UnivariateCubicSplines(UnivariateGamSmoother): 

505 """Cubic Spline single smooth component 

506 

507 Cubic splines as described in the wood's book in chapter 3 

508 """ 

509 

510 def __init__(self, x, df, constraints=None, transform='domain', 

511 variable_name='x'): 

512 

513 self.degree = 3 

514 self.df = df 

515 self.transform_data_method = transform 

516 

517 self.x = x = self.transform_data(x, initialize=True) 

518 self.knots = _equally_spaced_knots(x, df) 

519 super(UnivariateCubicSplines, self).__init__( 

520 x, constraints=constraints, variable_name=variable_name) 

521 

522 def transform_data(self, x, initialize=False): 

523 tm = self.transform_data_method 

524 if tm is None: 

525 return x 

526 

527 if initialize is True: 

528 if tm == 'domain': 

529 self.domain_low = x.min(0) 

530 self.domain_upp = x.max(0) 

531 elif isinstance(tm, tuple): 

532 self.domain_low = tm[0] 

533 self.domain_upp = tm[1] 

534 self.transform_data_method = 'domain' 

535 else: 

536 raise ValueError("transform should be None, 'domain' " 

537 "or a tuple") 

538 self.domain_diff = self.domain_upp - self.domain_low 

539 

540 if self.transform_data_method == 'domain': 

541 x = (x - self.domain_low) / self.domain_diff 

542 return x 

543 else: 

544 raise ValueError("incorrect transform_data_method") 

545 

546 def _smooth_basis_for_single_variable(self): 

547 

548 basis = self._splines_x()[:, :-1] 

549 # demean except for constant, does not affect derivatives 

550 if not self.constraints == 'none': 

551 self.transf_mean = basis[:, 1:].mean(0) 

552 basis[:, 1:] -= self.transf_mean 

553 else: 

554 self.transf_mean = np.zeros(basis.shape[1]) 

555 s = self._splines_s()[:-1, :-1] 

556 if not self.constraints == 'none': 

557 ctransf = np.diag(1/np.max(np.abs(basis), axis=0)) 

558 else: 

559 ctransf = np.eye(basis.shape[1]) 

560 # use np.eye to avoid rescaling 

561 # ctransf = np.eye(basis.shape[1]) 

562 

563 if self.constraints == 'no-const': 

564 ctransf = ctransf[1:] 

565 

566 self.ctransf = ctransf 

567 

568 return basis, None, None, s 

569 

570 def _rk(self, x, z): 

571 p1 = ((z - 1 / 2) ** 2 - 1 / 12) * ((x - 1 / 2) ** 2 - 1 / 12) / 4 

572 p2 = ((np.abs(z - x) - 1 / 2) ** 4 - 

573 1 / 2 * (np.abs(z - x) - 1 / 2) ** 2 + 

574 7 / 240) / 24. 

575 return p1 - p2 

576 

577 def _splines_x(self, x=None): 

578 if x is None: 

579 x = self.x 

580 n_columns = len(self.knots) + 2 

581 nobs = x.shape[0] 

582 basis = np.ones(shape=(nobs, n_columns)) 

583 basis[:, 1] = x 

584 # for loop equivalent to outer(x, xk, fun=rk) 

585 for i, xi in enumerate(x): 

586 for j, xkj in enumerate(self.knots): 

587 s_ij = self._rk(xi, xkj) 

588 basis[i, j + 2] = s_ij 

589 return basis 

590 

591 def _splines_s(self): 

592 q = len(self.knots) + 2 

593 s = np.zeros(shape=(q, q)) 

594 for i, x1 in enumerate(self.knots): 

595 for j, x2 in enumerate(self.knots): 

596 s[i + 2, j + 2] = self._rk(x1, x2) 

597 return s 

598 

599 def transform(self, x_new): 

600 x_new = self.transform_data(x_new, initialize=False) 

601 exog = self._splines_x(x_new) 

602 exog[:, 1:] -= self.transf_mean 

603 if self.ctransf is not None: 

604 exog = exog.dot(self.ctransf) 

605 return exog 

606 

607 

608class UnivariateCubicCyclicSplines(UnivariateGamSmoother): 

609 """cyclic cubic regression spline single smooth component 

610 

611 This creates and holds the Cyclic CubicSpline basis function for one 

612 component. 

613 

614 Parameters 

615 ---------- 

616 x : ndarray, 1-D 

617 underlying explanatory variable for smooth terms. 

618 df : int 

619 numer of basis functions or degrees of freedom 

620 degree : int 

621 degree of the spline 

622 include_intercept : bool 

623 If False, then the basis functions are transformed so that they 

624 do not include a constant. This avoids perfect collinearity if 

625 a constant or several components are included in the model. 

626 constraints : {None, str, array} 

627 Constraints are used to transform the basis functions to satisfy 

628 those constraints. 

629 `constraints = 'center'` applies a linear transform to remove the 

630 constant and center the basis functions. 

631 variable_name : None or str 

632 The name for the underlying explanatory variable, x, used in for 

633 creating the column and parameter names for the basis functions. 

634 """ 

635 def __init__(self, x, df, constraints=None, variable_name='x'): 

636 self.degree = 3 

637 self.df = df 

638 self.x = x 

639 self.knots = _equally_spaced_knots(x, df) 

640 super(UnivariateCubicCyclicSplines, self).__init__( 

641 x, constraints=constraints, variable_name=variable_name) 

642 

643 def _smooth_basis_for_single_variable(self): 

644 basis = dmatrix("cc(x, df=" + str(self.df) + ") - 1", {"x": self.x}) 

645 self.design_info = basis.design_info 

646 n_inner_knots = self.df - 2 + 1 # +n_constraints 

647 # TODO: from CubicRegressionSplines class 

648 all_knots = _get_all_sorted_knots(self.x, n_inner_knots=n_inner_knots, 

649 inner_knots=None, 

650 lower_bound=None, upper_bound=None) 

651 

652 b, d = self._get_b_and_d(all_knots) 

653 s = self._get_s(b, d) 

654 

655 return basis, None, None, s 

656 

657 def _get_b_and_d(self, knots): 

658 """Returns mapping of cyclic cubic spline values to 2nd derivatives. 

659 

660 .. note:: See 'Generalized Additive Models', Simon N. Wood, 2006, 

661 pp 146-147 

662 

663 Parameters 

664 ---------- 

665 knots : ndarray 

666 The 1-d array knots used for cubic spline parametrization, 

667 must be sorted in ascending order. 

668 

669 Returns 

670 ------- 

671 b, d: ndarrays 

672 arrays for mapping cyclic cubic spline values at knots to 

673 second derivatives. 

674 penalty matrix is equal to ``s = d.T.dot(b^-1).dot(d)`` 

675 """ 

676 h = knots[1:] - knots[:-1] 

677 n = knots.size - 1 

678 

679 # b and d are defined such that the penalty matrix is equivalent to: 

680 # s = d.T.dot(b^-1).dot(d) 

681 # reference in particular to pag 146 of Wood's book 

682 b = np.zeros((n, n)) # the b matrix on page 146 of Wood's book 

683 d = np.zeros((n, n)) # the d matrix on page 146 of Wood's book 

684 

685 b[0, 0] = (h[n - 1] + h[0]) / 3. 

686 b[0, n - 1] = h[n - 1] / 6. 

687 b[n - 1, 0] = h[n - 1] / 6. 

688 

689 d[0, 0] = -1. / h[0] - 1. / h[n - 1] 

690 d[0, n - 1] = 1. / h[n - 1] 

691 d[n - 1, 0] = 1. / h[n - 1] 

692 

693 for i in range(1, n): 

694 b[i, i] = (h[i - 1] + h[i]) / 3. 

695 b[i, i - 1] = h[i - 1] / 6. 

696 b[i - 1, i] = h[i - 1] / 6. 

697 

698 d[i, i] = -1. / h[i - 1] - 1. / h[i] 

699 d[i, i - 1] = 1. / h[i - 1] 

700 d[i - 1, i] = 1. / h[i - 1] 

701 

702 return b, d 

703 

704 def _get_s(self, b, d): 

705 return d.T.dot(np.linalg.inv(b)).dot(d) 

706 

707 def transform(self, x_new): 

708 exog = dmatrix(self.design_info, {"x": x_new}) 

709 if self.ctransf is not None: 

710 exog = exog.dot(self.ctransf) 

711 return exog 

712 

713 

714class AdditiveGamSmoother(with_metaclass(ABCMeta)): 

715 """Base class for additive smooth components 

716 """ 

717 def __init__(self, x, variable_names=None, include_intercept=False, 

718 **kwargs): 

719 

720 # get pandas names before using asarray 

721 if isinstance(x, pd.DataFrame): 

722 data_names = x.columns.tolist() 

723 elif isinstance(x, pd.Series): 

724 data_names = [x.name] 

725 else: 

726 data_names = None 

727 

728 x = np.asarray(x) 

729 

730 if x.ndim == 1: 

731 self.x = x.copy() 

732 self.x.shape = (len(x), 1) 

733 else: 

734 self.x = x 

735 

736 self.nobs, self.k_variables = self.x.shape 

737 if isinstance(include_intercept, bool): 

738 self.include_intercept = [include_intercept] * self.k_variables 

739 else: 

740 self.include_intercept = include_intercept 

741 

742 if variable_names is None: 

743 if data_names is not None: 

744 self.variable_names = data_names 

745 else: 

746 self.variable_names = ['x' + str(i) 

747 for i in range(self.k_variables)] 

748 else: 

749 self.variable_names = variable_names 

750 

751 self.smoothers = self._make_smoothers_list() 

752 self.basis = np.hstack(list(smoother.basis 

753 for smoother in self.smoothers)) 

754 self.dim_basis = self.basis.shape[1] 

755 self.penalty_matrices = [smoother.cov_der2 

756 for smoother in self.smoothers] 

757 self.col_names = [] 

758 for smoother in self.smoothers: 

759 self.col_names.extend(smoother.col_names) 

760 

761 self.mask = [] 

762 last_column = 0 

763 for smoother in self.smoothers: 

764 mask = np.array([False] * self.dim_basis) 

765 mask[last_column:smoother.dim_basis + last_column] = True 

766 last_column = last_column + smoother.dim_basis 

767 self.mask.append(mask) 

768 

769 @abstractmethod 

770 def _make_smoothers_list(self): 

771 pass 

772 

773 def transform(self, x_new): 

774 """create the spline basis for new observations 

775 

776 The main use of this stateful transformation is for prediction 

777 using the same specification of the spline basis. 

778 

779 Parameters 

780 ---------- 

781 x_new: ndarray 

782 observations of the underlying explanatory variable 

783 

784 Returns 

785 ------- 

786 basis : ndarray 

787 design matrix for the spline basis for given ``x_new``. 

788 """ 

789 exog = np.hstack(list(self.smoothers[i].transform(x_new[:, i]) 

790 for i in range(self.k_variables))) 

791 return exog 

792 

793 

794class GenericSmoothers(AdditiveGamSmoother): 

795 """generic class for additive smooth components for GAM 

796 """ 

797 def __init__(self, x, smoothers): 

798 self.smoothers = smoothers 

799 super(GenericSmoothers, self).__init__(x, variable_names=None) 

800 

801 def _make_smoothers_list(self): 

802 return self.smoothers 

803 

804 

805class PolynomialSmoother(AdditiveGamSmoother): 

806 """additive polynomial components for GAM 

807 """ 

808 def __init__(self, x, degrees, variable_names=None): 

809 self.degrees = degrees 

810 super(PolynomialSmoother, self).__init__(x, 

811 variable_names=variable_names) 

812 

813 def _make_smoothers_list(self): 

814 smoothers = [] 

815 for v in range(self.k_variables): 

816 uv_smoother = UnivariatePolynomialSmoother( 

817 self.x[:, v], 

818 degree=self.degrees[v], 

819 variable_name=self.variable_names[v]) 

820 smoothers.append(uv_smoother) 

821 return smoothers 

822 

823 

824class BSplines(AdditiveGamSmoother): 

825 """additive smooth components using B-Splines 

826 

827 This creates and holds the B-Spline basis function for several 

828 components. 

829 

830 Parameters 

831 ---------- 

832 x : array_like, 1-D or 2-D 

833 underlying explanatory variable for smooth terms. 

834 If 2-dimensional, then observations should be in rows and 

835 explanatory variables in columns. 

836 df : int 

837 numer of basis functions or degrees of freedom 

838 degree : int 

839 degree of the spline 

840 include_intercept : bool 

841 If False, then the basis functions are transformed so that they 

842 do not include a constant. This avoids perfect collinearity if 

843 a constant or several components are included in the model. 

844 constraints : {None, str, array} 

845 Constraints are used to transform the basis functions to satisfy 

846 those constraints. 

847 `constraints = 'center'` applies a linear transform to remove the 

848 constant and center the basis functions. 

849 variable_names : {list[str], None} 

850 The names for the underlying explanatory variables, x used in for 

851 creating the column and parameter names for the basis functions. 

852 If ``x`` is a pandas object, then the names will be taken from it. 

853 knot_kwds : None or list of dict 

854 option for the knot selection. 

855 By default knots are selected in the same way as in patsy, however the 

856 number of knots is independent of keeping or removing the constant. 

857 Interior knot selection is based on quantiles of the data and is the 

858 same in patsy and mgcv. Boundary points are at the limits of the data 

859 range. 

860 The available options use with `get_knots_bsplines` are 

861 

862 - knots : None or array 

863 interior knots 

864 - spacing : 'quantile' or 'equal' 

865 - lower_bound : None or float 

866 location of lower boundary knots, all boundary knots are at the same 

867 point 

868 - upper_bound : None or float 

869 location of upper boundary knots, all boundary knots are at the same 

870 point 

871 - all_knots : None or array 

872 If all knots are provided, then those will be taken as given and 

873 all other options will be ignored. 

874 

875 

876 Attributes 

877 ---------- 

878 smoothers : list of univariate smooth component instances 

879 basis : design matrix, array of spline bases columns for all components 

880 penalty_matrices : list of penalty matrices, one for each smooth term 

881 dim_basis : number of columns in the basis 

882 k_variables : number of smooth components 

883 col_names : created names for the basis columns 

884 

885 There are additional attributes about the specification of the splines 

886 and some attributes mainly for internal use. 

887 

888 Notes 

889 ----- 

890 A constant in the spline basis function can be removed in two different 

891 ways. 

892 The first is by dropping one basis column and normalizing the 

893 remaining columns. This is obtained by the default 

894 ``include_intercept=False, constraints=None`` 

895 The second option is by using the centering transform which is a linear 

896 transformation of all basis functions. As a consequence of the 

897 transformation, the B-spline basis functions do not have locally bounded 

898 support anymore. This is obtained ``constraints='center'``. In this case 

899 ``include_intercept`` will be automatically set to True to avoid 

900 dropping an additional column. 

901 """ 

902 def __init__(self, x, df, degree, include_intercept=False, 

903 constraints=None, variable_names=None, knot_kwds=None): 

904 self.degrees = degree 

905 self.dfs = df 

906 self.knot_kwds = knot_kwds 

907 # TODO: move attaching constraints to super call 

908 self.constraints = constraints 

909 if constraints == 'center': 

910 include_intercept = True 

911 

912 super(BSplines, self).__init__(x, include_intercept=include_intercept, 

913 variable_names=variable_names) 

914 

915 def _make_smoothers_list(self): 

916 smoothers = [] 

917 for v in range(self.k_variables): 

918 kwds = self.knot_kwds[v] if self.knot_kwds else {} 

919 uv_smoother = UnivariateBSplines( 

920 self.x[:, v], 

921 df=self.dfs[v], degree=self.degrees[v], 

922 include_intercept=self.include_intercept[v], 

923 constraints=self.constraints, 

924 variable_name=self.variable_names[v], **kwds) 

925 smoothers.append(uv_smoother) 

926 

927 return smoothers 

928 

929 

930class CubicSplines(AdditiveGamSmoother): 

931 """additive smooth components using cubic splines as in Wood 2006. 

932 

933 Note, these splines do NOT use the same spline basis as 

934 ``Cubic Regression Splines``. 

935 """ 

936 def __init__(self, x, df, constraints='center', transform='domain', 

937 variable_names=None): 

938 self.dfs = df 

939 self.constraints = constraints 

940 self.transform = transform 

941 super(CubicSplines, self).__init__(x, constraints=constraints, 

942 variable_names=variable_names) 

943 

944 def _make_smoothers_list(self): 

945 smoothers = [] 

946 for v in range(self.k_variables): 

947 uv_smoother = UnivariateCubicSplines( 

948 self.x[:, v], df=self.dfs[v], 

949 constraints=self.constraints, 

950 transform=self.transform, 

951 variable_name=self.variable_names[v]) 

952 smoothers.append(uv_smoother) 

953 

954 return smoothers 

955 

956 

957class CyclicCubicSplines(AdditiveGamSmoother): 

958 """additive smooth components using cyclic cubic regression splines 

959 

960 This spline basis is the same as in patsy. 

961 

962 Parameters 

963 ---------- 

964 x : array_like, 1-D or 2-D 

965 underlying explanatory variable for smooth terms. 

966 If 2-dimensional, then observations should be in rows and 

967 explanatory variables in columns. 

968 df : int 

969 numer of basis functions or degrees of freedom 

970 constraints : {None, str, array} 

971 Constraints are used to transform the basis functions to satisfy 

972 those constraints. 

973 variable_names : {list[str], None} 

974 The names for the underlying explanatory variables, x used in for 

975 creating the column and parameter names for the basis functions. 

976 If ``x`` is a pandas object, then the names will be taken from it. 

977 """ 

978 def __init__(self, x, df, constraints=None, variable_names=None): 

979 self.dfs = df 

980 # TODO: move attaching constraints to super call 

981 self.constraints = constraints 

982 super(CyclicCubicSplines, self).__init__(x, 

983 variable_names=variable_names) 

984 

985 def _make_smoothers_list(self): 

986 smoothers = [] 

987 for v in range(self.k_variables): 

988 uv_smoother = UnivariateCubicCyclicSplines( 

989 self.x[:, v], 

990 df=self.dfs[v], constraints=self.constraints, 

991 variable_name=self.variable_names[v]) 

992 smoothers.append(uv_smoother) 

993 

994 return smoothers 

995 

996# class CubicRegressionSplines(BaseCubicSplines): 

997# # TODO: this class is still not tested 

998# 

999# def __init__(self, x, df=10): 

1000# import warnings 

1001# warnings.warn("This class is still not tested and it is probably" 

1002# " not working properly. " 

1003# "I suggest to use another smoother", Warning) 

1004# 

1005# super(CubicRegressionSplines, self).__init__(x, df) 

1006# 

1007# self.basis = dmatrix("cc(x, df=" + str(df) + ") - 1", {"x": x}) 

1008# n_inner_knots = df - 2 + 1 # +n_constraints 

1009# # TODO: ACcording to CubicRegressionSplines class this should be 

1010# # n_inner_knots = df - 2 

1011# all_knots = _get_all_sorted_knots(x, n_inner_knots=n_inner_knots, 

1012# inner_knots=None, 

1013# lower_bound=None, upper_bound=None) 

1014# 

1015# b, d = self._get_b_and_d(all_knots) 

1016# self.s = self._get_s(b, d) 

1017# 

1018# self.dim_basis = self.basis.shape[1] 

1019# 

1020# def _get_b_and_d(self, knots): 

1021# 

1022# h = knots[1:] - knots[:-1] 

1023# n = knots.size - 1 

1024# 

1025# # b and d are defined such that the penalty matrix is equivalent to: 

1026# # s = d.T.dot(b^-1).dot(d) 

1027# # reference in particular to pag 146 of Wood's book 

1028# b = np.zeros((n, n)) # the b matrix on page 146 of Wood's book 

1029# d = np.zeros((n, n)) # the d matrix on page 146 of Wood's book 

1030# 

1031# for i in range(n-2): 

1032# d[i, i] = 1/h[i] 

1033# d[i, i+1] = -1/h[i] - 1/h[i+1] 

1034# d[i, i+2] = 1/h[i+1] 

1035# 

1036# b[i, i] = (h[i] + h[i+1])/3 

1037# 

1038# for i in range(n-3): 

1039# b[i, i+1] = h[i+1]/6 

1040# b[i+1, i] = h[i+1]/6 

1041# 

1042# return b, d 

1043# 

1044# def _get_s(self, b, d): 

1045# 

1046# return d.T.dot(np.linalg.pinv(b)).dot(d)