Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/gam/gam_cross_validation/gam_cross_validation.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Cross-validation classes for GAM
5Author: Luca Puggini
7"""
9from abc import ABCMeta, abstractmethod
10from statsmodels.compat.python import with_metaclass
11import itertools
12import numpy as np
13from statsmodels.gam.smooth_basis import (GenericSmoothers,
14 UnivariateGenericSmoother)
17class BaseCV(with_metaclass(ABCMeta)):
18 """
19 BaseCV class. It computes the cross validation error of a given model.
20 All the cross validation classes can be derived by this one
21 (e.g. GamCV, LassoCV,...)
22 """
24 def __init__(self, cv_iterator, endog, exog):
25 self.cv_iterator = cv_iterator
26 self.exog = exog
27 self.endog = endog
28 # TODO: cv_iterator.split only needs nobs from endog or exog
29 self.train_test_cv_indices = self.cv_iterator.split(self.exog,
30 self.endog,
31 label=None)
33 def fit(self, **kwargs):
34 # kwargs are the input values for the fit method of the
35 # cross-validated object
37 cv_err = []
39 for train_index, test_index in self.train_test_cv_indices:
40 cv_err.append(self._error(train_index, test_index, **kwargs))
42 return np.array(cv_err)
44 @abstractmethod
45 def _error(self, train_index, test_index, **kwargs):
46 # train the model on the train set
47 # and returns the error on the test set
48 pass
51def _split_train_test_smoothers(x, smoother, train_index, test_index):
52 """split smoothers in test and train sets and create GenericSmoothers
54 Note: this does not take exog_linear into account
55 """
56 train_smoothers = []
57 test_smoothers = []
58 for smoother in smoother.smoothers:
59 train_basis = smoother.basis[train_index]
60 train_der_basis = smoother.der_basis[train_index]
61 train_der2_basis = smoother.der2_basis[train_index]
62 train_cov_der2 = smoother.cov_der2
63 # TODO: Double check this part. cov_der2 is calculated with all data
64 train_x = smoother.x[train_index]
66 train_smoothers.append(
67 UnivariateGenericSmoother(
68 train_x, train_basis, train_der_basis, train_der2_basis,
69 train_cov_der2, smoother.variable_name + ' train'))
71 test_basis = smoother.basis[test_index]
72 test_der_basis = smoother.der_basis[test_index]
73 test_cov_der2 = smoother.cov_der2
74 # TODO: Double check this part. cov_der2 is calculated with all data
75 test_x = smoother.x[test_index]
77 test_smoothers.append(
78 UnivariateGenericSmoother(
79 test_x, test_basis, test_der_basis, train_der2_basis,
80 test_cov_der2, smoother.variable_name + ' test'))
82 train_multivariate_smoothers = GenericSmoothers(x[train_index],
83 train_smoothers)
84 test_multivariate_smoothers = GenericSmoothers(x[test_index],
85 test_smoothers)
87 return train_multivariate_smoothers, test_multivariate_smoothers
90class MultivariateGAMCV(BaseCV):
91 def __init__(self, smoother, alphas, gam, cost, endog, exog, cv_iterator):
92 self.cost = cost
93 self.gam = gam
94 self.smoother = smoother
95 self.exog_linear = exog
96 self.alphas = alphas
97 self.cv_iterator = cv_iterator
98 # TODO: super does not do anything with endog, exog, except get nobs
99 # refactor to clean up what where `exog` and `exog_linear` is attached
100 super(MultivariateGAMCV, self).__init__(cv_iterator,
101 endog,
102 # exog, # not used in super
103 self.smoother.basis)
105 def _error(self, train_index, test_index, **kwargs):
106 train_smoother, test_smoother = _split_train_test_smoothers(
107 self.smoother.x, self.smoother, train_index, test_index)
109 endog_train = self.endog[train_index]
110 endog_test = self.endog[test_index]
111 if self.exog_linear is not None:
112 exog_linear_train = self.exog_linear[train_index]
113 exog_linear_test = self.exog_linear[test_index]
114 else:
115 exog_linear_train = None
116 exog_linear_test = None
118 gam = self.gam(endog_train, exog=exog_linear_train,
119 smoother=train_smoother, alpha=self.alphas)
120 gam_res = gam.fit(**kwargs)
121 # exog_linear_test and test_smoother.basis will be column_stacked
122 # but not transformed in predict
123 endog_est = gam_res.predict(exog_linear_test, test_smoother.basis,
124 transform=False)
126 return self.cost(endog_test, endog_est)
129class BasePenaltiesPathCV(with_metaclass(ABCMeta)):
130 """
131 Base class for cross validation over a grid of parameters.
133 The best parameter is saved in alpha_cv
135 This class is currently not used
136 """
138 def __init__(self, alphas):
139 self.alphas = alphas
140 self.alpha_cv = None
141 self.cv_error = None
142 self.cv_std = None
144 def plot_path(self):
145 from statsmodels.graphics.utils import _import_mpl
146 plt = _import_mpl()
147 plt.plot(self.alphas, self.cv_error, c='black')
148 plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std,
149 c='blue')
150 plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std,
151 c='blue')
153 plt.plot(self.alphas, self.cv_error, 'o', c='black')
154 plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std, 'o',
155 c='blue')
156 plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std, 'o',
157 c='blue')
159 return
160 # TODO add return
163class MultivariateGAMCVPath(object):
164 """k-fold cross-validation for GAM
166 Warning: The API of this class is preliminary and will change.
168 Parameters
169 ----------
170 smoother : additive smoother instance
171 alphas : list of iteratables
172 list of alpha for smooths. The product space will be used as alpha
173 grid for cross-validation
174 gam : model class
175 model class for creating a model with k-fole training data
176 cost : function
177 cost function for the prediction error
178 endog : ndarray
179 dependent (response) variable of the model
180 cv_iterator : instance of cross-validation iterator
181 """
183 def __init__(self, smoother, alphas, gam, cost, endog, exog, cv_iterator):
184 self.cost = cost
185 self.smoother = smoother
186 self.gam = gam
187 self.alphas = alphas
188 self.alphas_grid = list(itertools.product(*self.alphas))
189 self.endog = endog
190 self.exog = exog
191 self.cv_iterator = cv_iterator
192 self.cv_error = np.zeros(shape=(len(self.alphas_grid, )))
193 self.cv_std = np.zeros(shape=(len(self.alphas_grid, )))
194 self.alpha_cv = None
196 def fit(self, **kwargs):
197 for i, alphas_i in enumerate(self.alphas_grid):
198 gam_cv = MultivariateGAMCV(smoother=self.smoother,
199 alphas=alphas_i,
200 gam=self.gam,
201 cost=self.cost,
202 endog=self.endog,
203 exog=self.exog,
204 cv_iterator=self.cv_iterator)
205 cv_err = gam_cv.fit(**kwargs)
206 self.cv_error[i] = cv_err.mean()
207 self.cv_std[i] = cv_err.std()
209 self.alpha_cv = self.alphas_grid[np.argmin(self.cv_error)]
210 return self