Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/gam/gam_penalties.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Penalty classes for Generalized Additive Models
5Author: Luca Puggini
6Author: Josef Perktold
8"""
10import numpy as np
11from scipy.linalg import block_diag
12from statsmodels.base._penalties import Penalty
15class UnivariateGamPenalty(Penalty):
16 """
17 Penalty for smooth term in Generalized Additive Models
19 Parameters
20 ----------
21 univariate_smoother : instance
22 instance of univariate smoother or spline class
23 alpha : float
24 default penalty weight, alpha can be provided to each method
25 weights:
26 TODO: not used and verified, might be removed
28 Attributes
29 ----------
30 Parameters are stored, additionally
31 nob s: The number of samples used during the estimation
32 n_columns : number of columns in smoother basis
33 """
35 def __init__(self, univariate_smoother, alpha=1, weights=1):
36 self.weights = weights
37 self.alpha = alpha
38 self.univariate_smoother = univariate_smoother
39 self.nobs = self.univariate_smoother.nobs
40 self.n_columns = self.univariate_smoother.dim_basis
42 def func(self, params, alpha=None):
43 """evaluate penalization at params
45 Parameters
46 ----------
47 params : ndarray
48 coefficients for the spline basis in the regression model
49 alpha : float
50 default penalty weight
52 Returns
53 -------
54 func : float
55 value of the penalty evaluated at params
56 """
57 if alpha is None:
58 alpha = self.alpha
60 f = params.dot(self.univariate_smoother.cov_der2.dot(params))
61 return alpha * f / self.nobs
63 def deriv(self, params, alpha=None):
64 """evaluate derivative of penalty with respect to params
66 Parameters
67 ----------
68 params : ndarray
69 coefficients for the spline basis in the regression model
70 alpha : float
71 default penalty weight
73 Returns
74 -------
75 deriv : ndarray
76 derivative, gradient of the penalty with respect to params
77 """
78 if alpha is None:
79 alpha = self.alpha
81 d = 2 * alpha * np.dot(self.univariate_smoother.cov_der2, params)
82 d /= self.nobs
83 return d
85 def deriv2(self, params, alpha=None):
86 """evaluate second derivative of penalty with respect to params
88 Parameters
89 ----------
90 params : ndarray
91 coefficients for the spline basis in the regression model
92 alpha : float
93 default penalty weight
95 Returns
96 -------
97 deriv2 : ndarray, 2-Dim
98 second derivative, hessian of the penalty with respect to params
99 """
100 if alpha is None:
101 alpha = self.alpha
103 d2 = 2 * alpha * self.univariate_smoother.cov_der2
104 d2 /= self.nobs
105 return d2
107 def penalty_matrix(self, alpha=None):
108 """penalty matrix for the smooth term of a GAM
110 Parameters
111 ----------
112 alpha : list of floats or None
113 penalty weights
115 Returns
116 -------
117 penalty matrix
118 square penalty matrix for quadratic penalization. The number
119 of rows and columns are equal to the number of columns in the
120 smooth terms, i.e. the number of parameters for this smooth
121 term in the regression model
122 """
123 if alpha is None:
124 alpha = self.alpha
126 return alpha * self.univariate_smoother.cov_der2
129class MultivariateGamPenalty(Penalty):
130 """
131 Penalty for Generalized Additive Models
133 Parameters
134 ----------
135 multivariate_smoother : instance
136 instance of additive smoother or spline class
137 alpha : list of float
138 default penalty weight, list with length equal to the number of smooth
139 terms. ``alpha`` can also be provided to each method.
140 weights: array_like
141 currently not used
142 is a list of doubles of the same length as alpha or a list
143 of ndarrays where each component has the length equal to the number
144 of columns in that component
145 start_idx : int
146 number of parameters that come before the smooth terms. If the model
147 has a linear component, then the parameters for the smooth components
148 start at ``start_index``.
150 Attributes
151 ----------
152 Parameters are stored, additionally
153 nob s: The number of samples used during the estimation
155 dim_basis : number of columns of additive smoother. Number of columns
156 in all smoothers.
157 k_variables : number of smooth terms
158 k_params : total number of parameters in the regression model
159 """
161 def __init__(self, multivariate_smoother, alpha, weights=None,
162 start_idx=0):
164 if len(multivariate_smoother.smoothers) != len(alpha):
165 msg = ('all the input values should be of the same length.'
166 ' len(smoothers)=%d, len(alphas)=%d') % (
167 len(multivariate_smoother.smoothers), len(alpha))
168 raise ValueError(msg)
170 self.multivariate_smoother = multivariate_smoother
171 self.dim_basis = self.multivariate_smoother.dim_basis
172 self.k_variables = self.multivariate_smoother.k_variables
173 self.nobs = self.multivariate_smoother.nobs
174 self.alpha = alpha
175 self.start_idx = start_idx
176 self.k_params = start_idx + self.dim_basis
178 # TODO: Review this,
179 if weights is None:
180 # weights should have total length as params
181 # but it can also be scalar in individual component
182 self.weights = [1. for _ in range(self.k_variables)]
183 else:
184 import warnings
185 warnings.warn('weights is currently ignored')
186 self.weights = weights
188 self.mask = [np.zeros(self.k_params, dtype=np.bool_)
189 for _ in range(self.k_variables)]
190 param_count = start_idx
191 for i, smoother in enumerate(self.multivariate_smoother.smoothers):
192 # the mask[i] contains a vector of length k_columns. The index
193 # corresponding to the i-th input variable are set to True.
194 self.mask[i][param_count: param_count + smoother.dim_basis] = True
195 param_count += smoother.dim_basis
197 self.gp = []
198 for i in range(self.k_variables):
199 gp = UnivariateGamPenalty(self.multivariate_smoother.smoothers[i],
200 weights=self.weights[i],
201 alpha=self.alpha[i])
202 self.gp.append(gp)
204 def func(self, params, alpha=None):
205 """evaluate penalization at params
207 Parameters
208 ----------
209 params : ndarray
210 coefficients in the regression model
211 alpha : float or list of floats
212 penalty weights
214 Returns
215 -------
216 func : float
217 value of the penalty evaluated at params
218 """
219 if alpha is None:
220 alpha = [None] * self.k_variables
222 cost = 0
223 for i in range(self.k_variables):
224 params_i = params[self.mask[i]]
225 cost += self.gp[i].func(params_i, alpha=alpha[i])
227 return cost
229 def deriv(self, params, alpha=None):
230 """evaluate derivative of penalty with respect to params
232 Parameters
233 ----------
234 params : ndarray
235 coefficients in the regression model
236 alpha : list of floats or None
237 penalty weights
239 Returns
240 -------
241 deriv : ndarray
242 derivative, gradient of the penalty with respect to params
243 """
244 if alpha is None:
245 alpha = [None] * self.k_variables
247 grad = [np.zeros(self.start_idx)]
248 for i in range(self.k_variables):
249 params_i = params[self.mask[i]]
250 grad.append(self.gp[i].deriv(params_i, alpha=alpha[i]))
252 return np.concatenate(grad)
254 def deriv2(self, params, alpha=None):
255 """evaluate second derivative of penalty with respect to params
257 Parameters
258 ----------
259 params : ndarray
260 coefficients in the regression model
261 alpha : list of floats or None
262 penalty weights
264 Returns
265 -------
266 deriv2 : ndarray, 2-Dim
267 second derivative, hessian of the penalty with respect to params
268 """
269 if alpha is None:
270 alpha = [None] * self.k_variables
272 deriv2 = [np.zeros((self.start_idx, self.start_idx))]
273 for i in range(self.k_variables):
274 params_i = params[self.mask[i]]
275 deriv2.append(self.gp[i].deriv2(params_i, alpha=alpha[i]))
277 return block_diag(*deriv2)
279 def penalty_matrix(self, alpha=None):
280 """penalty matrix for generalized additive model
282 Parameters
283 ----------
284 alpha : list of floats or None
285 penalty weights
287 Returns
288 -------
289 penalty matrix
290 block diagonal, square penalty matrix for quadratic penalization.
291 The number of rows and columns are equal to the number of
292 parameters in the regression model ``k_params``.
294 Notes
295 -----
296 statsmodels does not support backwards compatibility when keywords are
297 used as positional arguments. The order of keywords might change.
298 We might need to add a ``params`` keyword if the need arises.
299 """
300 if alpha is None:
301 alpha = self.alpha
303 s_all = [np.zeros((self.start_idx, self.start_idx))]
304 for i in range(self.k_variables):
305 s_all.append(self.gp[i].penalty_matrix(alpha=alpha[i]))
307 return block_diag(*s_all)