Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/nonparametric/kernel_density.py : 12%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Multivariate Conditional and Unconditional Kernel Density Estimation
3with Mixed Data Types.
5References
6----------
7[1] Racine, J., Li, Q. Nonparametric econometrics: theory and practice.
8 Princeton University Press. (2007)
9[2] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation
10 and Trends in Econometrics: Vol 3: No 1, pp1-88. (2008)
11 http://dx.doi.org/10.1561/0800000009
12[3] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
13 with Categorical and Continuous Data." Working Paper. (2000)
14[4] Racine, J. Li, Q. "Kernel Estimation of Multivariate Conditional
15 Distributions Annals of Economics and Finance 5, 211-235 (2004)
16[5] Liu, R., Yang, L. "Kernel estimation of multivariate
17 cumulative distribution function."
18 Journal of Nonparametric Statistics (2008)
19[6] Li, R., Ju, G. "Nonparametric Estimation of Multivariate CDF
20 with Categorical and Continuous Data." Working Paper
21[7] Li, Q., Racine, J. "Cross-validated local linear nonparametric
22 regression" Statistica Sinica 14(2004), pp. 485-512
23[8] Racine, J.: "Consistent Significance Testing for Nonparametric
24 Regression" Journal of Business & Economics Statistics
25[9] Racine, J., Hart, J., Li, Q., "Testing the Significance of
26 Categorical Predictor Variables in Nonparametric Regression
27 Models", 2006, Econometric Reviews 25, 523-544
29"""
30# TODO: make default behavior efficient=True above a certain n_obs
31import numpy as np
33from . import kernels
34from ._kernel_base import GenericKDE, EstimatorSettings, gpke, \
35 LeaveOneOut, _adjust_shape
38__all__ = ['KDEMultivariate', 'KDEMultivariateConditional', 'EstimatorSettings']
41class KDEMultivariate(GenericKDE):
42 """
43 Multivariate kernel density estimator.
45 This density estimator can handle univariate as well as multivariate data,
46 including mixed continuous / ordered discrete / unordered discrete data.
47 It also provides cross-validated bandwidth selection methods (least
48 squares, maximum likelihood).
50 Parameters
51 ----------
52 data : list of ndarrays or 2-D ndarray
53 The training data for the Kernel Density Estimation, used to determine
54 the bandwidth(s). If a 2-D array, should be of shape
55 (num_observations, num_variables). If a list, each list element is a
56 separate observation.
57 var_type : str
58 The type of the variables:
60 - c : continuous
61 - u : unordered (discrete)
62 - o : ordered (discrete)
64 The string should contain a type specifier for each variable, so for
65 example ``var_type='ccuo'``.
66 bw : array_like or str, optional
67 If an array, it is a fixed user-specified bandwidth. If a string,
68 should be one of:
70 - normal_reference: normal reference rule of thumb (default)
71 - cv_ml: cross validation maximum likelihood
72 - cv_ls: cross validation least squares
74 defaults : EstimatorSettings instance, optional
75 The default values for (efficient) bandwidth estimation.
77 Attributes
78 ----------
79 bw : array_like
80 The bandwidth parameters.
82 See Also
83 --------
84 KDEMultivariateConditional
86 Examples
87 --------
88 >>> import statsmodels.api as sm
89 >>> nobs = 300
90 >>> np.random.seed(1234) # Seed random generator
91 >>> c1 = np.random.normal(size=(nobs,1))
92 >>> c2 = np.random.normal(2, 1, size=(nobs,1))
94 Estimate a bivariate distribution and display the bandwidth found:
96 >>> dens_u = sm.nonparametric.KDEMultivariate(data=[c1,c2],
97 ... var_type='cc', bw='normal_reference')
98 >>> dens_u.bw
99 array([ 0.39967419, 0.38423292])
100 """
101 def __init__(self, data, var_type, bw=None, defaults=None):
102 self.var_type = var_type
103 self.k_vars = len(self.var_type)
104 self.data = _adjust_shape(data, self.k_vars)
105 self.data_type = var_type
106 self.nobs, self.k_vars = np.shape(self.data)
107 if self.nobs <= self.k_vars:
108 raise ValueError("The number of observations must be larger " \
109 "than the number of variables.")
110 defaults = EstimatorSettings() if defaults is None else defaults
111 self._set_defaults(defaults)
112 if not self.efficient:
113 self.bw = self._compute_bw(bw)
114 else:
115 self.bw = self._compute_efficient(bw)
117 def __repr__(self):
118 """Provide something sane to print."""
119 rpr = "KDE instance\n"
120 rpr += "Number of variables: k_vars = " + str(self.k_vars) + "\n"
121 rpr += "Number of samples: nobs = " + str(self.nobs) + "\n"
122 rpr += "Variable types: " + self.var_type + "\n"
123 rpr += "BW selection method: " + self._bw_method + "\n"
124 return rpr
126 def loo_likelihood(self, bw, func=lambda x: x):
127 r"""
128 Returns the leave-one-out likelihood function.
130 The leave-one-out likelihood function for the unconditional KDE.
132 Parameters
133 ----------
134 bw : array_like
135 The value for the bandwidth parameter(s).
136 func : callable, optional
137 Function to transform the likelihood values (before summing); for
138 the log likelihood, use ``func=np.log``. Default is ``f(x) = x``.
140 Notes
141 -----
142 The leave-one-out kernel estimator of :math:`f_{-i}` is:
144 .. math:: f_{-i}(X_{i})=\frac{1}{(n-1)h}
145 \sum_{j=1,j\neq i}K_{h}(X_{i},X_{j})
147 where :math:`K_{h}` represents the generalized product kernel
148 estimator:
150 .. math:: K_{h}(X_{i},X_{j}) =
151 \prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right)
152 """
153 LOO = LeaveOneOut(self.data)
154 L = 0
155 for i, X_not_i in enumerate(LOO):
156 f_i = gpke(bw, data=-X_not_i, data_predict=-self.data[i, :],
157 var_type=self.var_type)
158 L += func(f_i)
160 return -L
162 def pdf(self, data_predict=None):
163 r"""
164 Evaluate the probability density function.
166 Parameters
167 ----------
168 data_predict : array_like, optional
169 Points to evaluate at. If unspecified, the training data is used.
171 Returns
172 -------
173 pdf_est : array_like
174 Probability density function evaluated at `data_predict`.
176 Notes
177 -----
178 The probability density is given by the generalized product kernel
179 estimator:
181 .. math:: K_{h}(X_{i},X_{j}) =
182 \prod_{s=1}^{q}h_{s}^{-1}k\left(\frac{X_{is}-X_{js}}{h_{s}}\right)
183 """
184 if data_predict is None:
185 data_predict = self.data
186 else:
187 data_predict = _adjust_shape(data_predict, self.k_vars)
189 pdf_est = []
190 for i in range(np.shape(data_predict)[0]):
191 pdf_est.append(gpke(self.bw, data=self.data,
192 data_predict=data_predict[i, :],
193 var_type=self.var_type) / self.nobs)
195 pdf_est = np.squeeze(pdf_est)
196 return pdf_est
198 def cdf(self, data_predict=None):
199 r"""
200 Evaluate the cumulative distribution function.
202 Parameters
203 ----------
204 data_predict : array_like, optional
205 Points to evaluate at. If unspecified, the training data is used.
207 Returns
208 -------
209 cdf_est : array_like
210 The estimate of the cdf.
212 Notes
213 -----
214 See https://en.wikipedia.org/wiki/Cumulative_distribution_function
215 For more details on the estimation see Ref. [5] in module docstring.
217 The multivariate CDF for mixed data (continuous and ordered/unordered
218 discrete) is estimated by:
220 .. math::
222 F(x^{c},x^{d})=n^{-1}\sum_{i=1}^{n}\left[G(\frac{x^{c}-X_{i}}{h})\sum_{u\leq x^{d}}L(X_{i}^{d},x_{i}^{d}, \lambda)\right]
224 where G() is the product kernel CDF estimator for the continuous
225 and L() for the discrete variables.
227 Used bandwidth is ``self.bw``.
228 """
229 if data_predict is None:
230 data_predict = self.data
231 else:
232 data_predict = _adjust_shape(data_predict, self.k_vars)
234 cdf_est = []
235 for i in range(np.shape(data_predict)[0]):
236 cdf_est.append(gpke(self.bw, data=self.data,
237 data_predict=data_predict[i, :],
238 var_type=self.var_type,
239 ckertype="gaussian_cdf",
240 ukertype="aitchisonaitken_cdf",
241 okertype='wangryzin_cdf') / self.nobs)
243 cdf_est = np.squeeze(cdf_est)
244 return cdf_est
246 def imse(self, bw):
247 r"""
248 Returns the Integrated Mean Square Error for the unconditional KDE.
250 Parameters
251 ----------
252 bw : array_like
253 The bandwidth parameter(s).
255 Returns
256 -------
257 CV : float
258 The cross-validation objective function.
260 Notes
261 -----
262 See p. 27 in [1]_ for details on how to handle the multivariate
263 estimation with mixed data types see p.6 in [2]_.
265 The formula for the cross-validation objective function is:
267 .. math:: CV=\frac{1}{n^{2}}\sum_{i=1}^{n}\sum_{j=1}^{N}
268 \bar{K}_{h}(X_{i},X_{j})-\frac{2}{n(n-1)}\sum_{i=1}^{n}
269 \sum_{j=1,j\neq i}^{N}K_{h}(X_{i},X_{j})
271 Where :math:`\bar{K}_{h}` is the multivariate product convolution
272 kernel (consult [2]_ for mixed data types).
274 References
275 ----------
276 .. [1] Racine, J., Li, Q. Nonparametric econometrics: theory and
277 practice. Princeton University Press. (2007)
278 .. [2] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
279 with Categorical and Continuous Data." Working Paper. (2000)
280 """
281 #F = 0
282 #for i in range(self.nobs):
283 # k_bar_sum = gpke(bw, data=-self.data,
284 # data_predict=-self.data[i, :],
285 # var_type=self.var_type,
286 # ckertype='gauss_convolution',
287 # okertype='wangryzin_convolution',
288 # ukertype='aitchisonaitken_convolution')
289 # F += k_bar_sum
290 ## there is a + because loo_likelihood returns the negative
291 #return (F / self.nobs**2 + self.loo_likelihood(bw) * \
292 # 2 / ((self.nobs) * (self.nobs - 1)))
294 # The code below is equivalent to the commented-out code above. It's
295 # about 20% faster due to some code being moved outside the for-loops
296 # and shared by gpke() and loo_likelihood().
297 F = 0
298 kertypes = dict(c=kernels.gaussian_convolution,
299 o=kernels.wang_ryzin_convolution,
300 u=kernels.aitchison_aitken_convolution)
301 nobs = self.nobs
302 data = -self.data
303 var_type = self.var_type
304 ix_cont = np.array([c == 'c' for c in var_type])
305 _bw_cont_product = bw[ix_cont].prod()
306 Kval = np.empty(data.shape)
307 for i in range(nobs):
308 for ii, vtype in enumerate(var_type):
309 Kval[:, ii] = kertypes[vtype](bw[ii],
310 data[:, ii],
311 data[i, ii])
313 dens = Kval.prod(axis=1) / _bw_cont_product
314 k_bar_sum = dens.sum(axis=0)
315 F += k_bar_sum # sum of prod kernel over nobs
317 kertypes = dict(c=kernels.gaussian,
318 o=kernels.wang_ryzin,
319 u=kernels.aitchison_aitken)
320 LOO = LeaveOneOut(self.data)
321 L = 0 # leave-one-out likelihood
322 Kval = np.empty((data.shape[0]-1, data.shape[1]))
323 for i, X_not_i in enumerate(LOO):
324 for ii, vtype in enumerate(var_type):
325 Kval[:, ii] = kertypes[vtype](bw[ii],
326 -X_not_i[:, ii],
327 data[i, ii])
328 dens = Kval.prod(axis=1) / _bw_cont_product
329 L += dens.sum(axis=0)
331 # CV objective function, eq. (2.4) of Ref. [3]
332 return (F / nobs**2 - 2 * L / (nobs * (nobs - 1)))
334 def _get_class_vars_type(self):
335 """Helper method to be able to pass needed vars to _compute_subset."""
336 class_type = 'KDEMultivariate'
337 class_vars = (self.var_type, )
338 return class_type, class_vars
341class KDEMultivariateConditional(GenericKDE):
342 """
343 Conditional multivariate kernel density estimator.
345 Calculates ``P(Y_1,Y_2,...Y_n | X_1,X_2...X_m) =
346 P(X_1, X_2,...X_n, Y_1, Y_2,..., Y_m)/P(X_1, X_2,..., X_m)``.
347 The conditional density is by definition the ratio of the two densities,
348 see [1]_.
350 Parameters
351 ----------
352 endog : list of ndarrays or 2-D ndarray
353 The training data for the dependent variables, used to determine
354 the bandwidth(s). If a 2-D array, should be of shape
355 (num_observations, num_variables). If a list, each list element is a
356 separate observation.
357 exog : list of ndarrays or 2-D ndarray
358 The training data for the independent variable; same shape as `endog`.
359 dep_type : str
360 The type of the dependent variables:
362 c : Continuous
363 u : Unordered (Discrete)
364 o : Ordered (Discrete)
366 The string should contain a type specifier for each variable, so for
367 example ``dep_type='ccuo'``.
368 indep_type : str
369 The type of the independent variables; specified like `dep_type`.
370 bw : array_like or str, optional
371 If an array, it is a fixed user-specified bandwidth. If a string,
372 should be one of:
374 - normal_reference: normal reference rule of thumb (default)
375 - cv_ml: cross validation maximum likelihood
376 - cv_ls: cross validation least squares
378 defaults : Instance of class EstimatorSettings
379 The default values for the efficient bandwidth estimation
381 Attributes
382 ----------
383 bw : array_like
384 The bandwidth parameters
386 See Also
387 --------
388 KDEMultivariate
390 References
391 ----------
392 .. [1] https://en.wikipedia.org/wiki/Conditional_probability_distribution
394 Examples
395 --------
396 >>> import statsmodels.api as sm
397 >>> nobs = 300
398 >>> c1 = np.random.normal(size=(nobs,1))
399 >>> c2 = np.random.normal(2,1,size=(nobs,1))
401 >>> dens_c = sm.nonparametric.KDEMultivariateConditional(endog=[c1],
402 ... exog=[c2], dep_type='c', indep_type='c', bw='normal_reference')
403 >>> dens_c.bw # show computed bandwidth
404 array([ 0.41223484, 0.40976931])
405 """
407 def __init__(self, endog, exog, dep_type, indep_type, bw,
408 defaults=None):
409 self.dep_type = dep_type
410 self.indep_type = indep_type
411 self.data_type = dep_type + indep_type
412 self.k_dep = len(self.dep_type)
413 self.k_indep = len(self.indep_type)
414 self.endog = _adjust_shape(endog, self.k_dep)
415 self.exog = _adjust_shape(exog, self.k_indep)
416 self.nobs, self.k_dep = np.shape(self.endog)
417 self.data = np.column_stack((self.endog, self.exog))
418 self.k_vars = np.shape(self.data)[1]
419 defaults = EstimatorSettings() if defaults is None else defaults
420 self._set_defaults(defaults)
421 if not self.efficient:
422 self.bw = self._compute_bw(bw)
423 else:
424 self.bw = self._compute_efficient(bw)
426 def __repr__(self):
427 """Provide something sane to print."""
428 rpr = "KDEMultivariateConditional instance\n"
429 rpr += "Number of independent variables: k_indep = " + \
430 str(self.k_indep) + "\n"
431 rpr += "Number of dependent variables: k_dep = " + \
432 str(self.k_dep) + "\n"
433 rpr += "Number of observations: nobs = " + str(self.nobs) + "\n"
434 rpr += "Independent variable types: " + self.indep_type + "\n"
435 rpr += "Dependent variable types: " + self.dep_type + "\n"
436 rpr += "BW selection method: " + self._bw_method + "\n"
437 return rpr
439 def loo_likelihood(self, bw, func=lambda x: x):
440 """
441 Returns the leave-one-out conditional likelihood of the data.
443 If `func` is not equal to the default, what's calculated is a function
444 of the leave-one-out conditional likelihood.
446 Parameters
447 ----------
448 bw : array_like
449 The bandwidth parameter(s).
450 func : callable, optional
451 Function to transform the likelihood values (before summing); for
452 the log likelihood, use ``func=np.log``. Default is ``f(x) = x``.
454 Returns
455 -------
456 L : float
457 The value of the leave-one-out function for the data.
459 Notes
460 -----
461 Similar to ``KDE.loo_likelihood`, but substitute ``f(y|x)=f(x,y)/f(x)``
462 for ``f(x)``.
463 """
464 yLOO = LeaveOneOut(self.data)
465 xLOO = LeaveOneOut(self.exog).__iter__()
466 L = 0
467 for i, Y_j in enumerate(yLOO):
468 X_not_i = next(xLOO)
469 f_yx = gpke(bw, data=-Y_j, data_predict=-self.data[i, :],
470 var_type=(self.dep_type + self.indep_type))
471 f_x = gpke(bw[self.k_dep:], data=-X_not_i,
472 data_predict=-self.exog[i, :],
473 var_type=self.indep_type)
474 f_i = f_yx / f_x
475 L += func(f_i)
477 return -L
479 def pdf(self, endog_predict=None, exog_predict=None):
480 r"""
481 Evaluate the probability density function.
483 Parameters
484 ----------
485 endog_predict : array_like, optional
486 Evaluation data for the dependent variables. If unspecified, the
487 training data is used.
488 exog_predict : array_like, optional
489 Evaluation data for the independent variables.
491 Returns
492 -------
493 pdf : array_like
494 The value of the probability density at `endog_predict` and `exog_predict`.
496 Notes
497 -----
498 The formula for the conditional probability density is:
500 .. math:: f(y|x)=\frac{f(x,y)}{f(x)}
502 with
504 .. math:: f(x)=\prod_{s=1}^{q}h_{s}^{-1}k
505 \left(\frac{x_{is}-x_{js}}{h_{s}}\right)
507 where :math:`k` is the appropriate kernel for each variable.
508 """
509 if endog_predict is None:
510 endog_predict = self.endog
511 else:
512 endog_predict = _adjust_shape(endog_predict, self.k_dep)
513 if exog_predict is None:
514 exog_predict = self.exog
515 else:
516 exog_predict = _adjust_shape(exog_predict, self.k_indep)
518 pdf_est = []
519 data_predict = np.column_stack((endog_predict, exog_predict))
520 for i in range(np.shape(data_predict)[0]):
521 f_yx = gpke(self.bw, data=self.data,
522 data_predict=data_predict[i, :],
523 var_type=(self.dep_type + self.indep_type))
524 f_x = gpke(self.bw[self.k_dep:], data=self.exog,
525 data_predict=exog_predict[i, :],
526 var_type=self.indep_type)
527 pdf_est.append(f_yx / f_x)
529 return np.squeeze(pdf_est)
531 def cdf(self, endog_predict=None, exog_predict=None):
532 r"""
533 Cumulative distribution function for the conditional density.
535 Parameters
536 ----------
537 endog_predict : array_like, optional
538 The evaluation dependent variables at which the cdf is estimated.
539 If not specified the training dependent variables are used.
540 exog_predict : array_like, optional
541 The evaluation independent variables at which the cdf is estimated.
542 If not specified the training independent variables are used.
544 Returns
545 -------
546 cdf_est : array_like
547 The estimate of the cdf.
549 Notes
550 -----
551 For more details on the estimation see [2]_, and p.181 in [1]_.
553 The multivariate conditional CDF for mixed data (continuous and
554 ordered/unordered discrete) is estimated by:
556 .. math::
558 F(y|x)=\frac{n^{-1}\sum_{i=1}^{n}G(\frac{y-Y_{i}}{h_{0}}) W_{h}(X_{i},x)}{\widehat{\mu}(x)}
560 where G() is the product kernel CDF estimator for the dependent (y)
561 variable(s) and W() is the product kernel CDF estimator for the
562 independent variable(s).
564 References
565 ----------
566 .. [1] Racine, J., Li, Q. Nonparametric econometrics: theory and
567 practice. Princeton University Press. (2007)
568 .. [2] Liu, R., Yang, L. "Kernel estimation of multivariate cumulative
569 distribution function." Journal of Nonparametric
570 Statistics (2008)
571 """
572 if endog_predict is None:
573 endog_predict = self.endog
574 else:
575 endog_predict = _adjust_shape(endog_predict, self.k_dep)
576 if exog_predict is None:
577 exog_predict = self.exog
578 else:
579 exog_predict = _adjust_shape(exog_predict, self.k_indep)
581 N_data_predict = np.shape(exog_predict)[0]
582 cdf_est = np.empty(N_data_predict)
583 for i in range(N_data_predict):
584 mu_x = gpke(self.bw[self.k_dep:], data=self.exog,
585 data_predict=exog_predict[i, :],
586 var_type=self.indep_type) / self.nobs
587 mu_x = np.squeeze(mu_x)
588 cdf_endog = gpke(self.bw[0:self.k_dep], data=self.endog,
589 data_predict=endog_predict[i, :],
590 var_type=self.dep_type,
591 ckertype="gaussian_cdf",
592 ukertype="aitchisonaitken_cdf",
593 okertype='wangryzin_cdf', tosum=False)
595 cdf_exog = gpke(self.bw[self.k_dep:], data=self.exog,
596 data_predict=exog_predict[i, :],
597 var_type=self.indep_type, tosum=False)
598 S = (cdf_endog * cdf_exog).sum(axis=0)
599 cdf_est[i] = S / (self.nobs * mu_x)
601 return cdf_est
603 def imse(self, bw):
604 r"""
605 The integrated mean square error for the conditional KDE.
607 Parameters
608 ----------
609 bw : array_like
610 The bandwidth parameter(s).
612 Returns
613 -------
614 CV : float
615 The cross-validation objective function.
617 Notes
618 -----
619 For more details see pp. 156-166 in [1]_. For details on how to
620 handle the mixed variable types see [2]_.
622 The formula for the cross-validation objective function for mixed
623 variable types is:
625 .. math:: CV(h,\lambda)=\frac{1}{n}\sum_{l=1}^{n}
626 \frac{G_{-l}(X_{l})}{\left[\mu_{-l}(X_{l})\right]^{2}}-
627 \frac{2}{n}\sum_{l=1}^{n}\frac{f_{-l}(X_{l},Y_{l})}{\mu_{-l}(X_{l})}
629 where
631 .. math:: G_{-l}(X_{l}) = n^{-2}\sum_{i\neq l}\sum_{j\neq l}
632 K_{X_{i},X_{l}} K_{X_{j},X_{l}}K_{Y_{i},Y_{j}}^{(2)}
634 where :math:`K_{X_{i},X_{l}}` is the multivariate product kernel and
635 :math:`\mu_{-l}(X_{l})` is the leave-one-out estimator of the pdf.
637 :math:`K_{Y_{i},Y_{j}}^{(2)}` is the convolution kernel.
639 The value of the function is minimized by the ``_cv_ls`` method of the
640 `GenericKDE` class to return the bw estimates that minimize the
641 distance between the estimated and "true" probability density.
643 References
644 ----------
645 .. [1] Racine, J., Li, Q. Nonparametric econometrics: theory and
646 practice. Princeton University Press. (2007)
647 .. [2] Racine, J., Li, Q. "Nonparametric Estimation of Distributions
648 with Categorical and Continuous Data." Working Paper. (2000)
649 """
650 zLOO = LeaveOneOut(self.data)
651 CV = 0
652 nobs = float(self.nobs)
653 expander = np.ones((self.nobs - 1, 1))
654 for ii, Z in enumerate(zLOO):
655 X = Z[:, self.k_dep:]
656 Y = Z[:, :self.k_dep]
657 Ye_L = np.kron(Y, expander)
658 Ye_R = np.kron(expander, Y)
659 Xe_L = np.kron(X, expander)
660 Xe_R = np.kron(expander, X)
661 K_Xi_Xl = gpke(bw[self.k_dep:], data=Xe_L,
662 data_predict=self.exog[ii, :],
663 var_type=self.indep_type, tosum=False)
664 K_Xj_Xl = gpke(bw[self.k_dep:], data=Xe_R,
665 data_predict=self.exog[ii, :],
666 var_type=self.indep_type, tosum=False)
667 K2_Yi_Yj = gpke(bw[0:self.k_dep], data=Ye_L,
668 data_predict=Ye_R, var_type=self.dep_type,
669 ckertype='gauss_convolution',
670 okertype='wangryzin_convolution',
671 ukertype='aitchisonaitken_convolution',
672 tosum=False)
673 G = (K_Xi_Xl * K_Xj_Xl * K2_Yi_Yj).sum() / nobs**2
674 f_X_Y = gpke(bw, data=-Z, data_predict=-self.data[ii, :],
675 var_type=(self.dep_type + self.indep_type)) / nobs
676 m_x = gpke(bw[self.k_dep:], data=-X,
677 data_predict=-self.exog[ii, :],
678 var_type=self.indep_type) / nobs
679 CV += (G / m_x ** 2) - 2 * (f_X_Y / m_x)
681 return CV / nobs
683 def _get_class_vars_type(self):
684 """Helper method to be able to pass needed vars to _compute_subset."""
685 class_type = 'KDEMultivariateConditional'
686 class_vars = (self.k_dep, self.dep_type, self.indep_type)
687 return class_type, class_vars