Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/nonparametric/bandwidths.py : 32%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import numpy as np
2from scipy.stats import scoreatpercentile as sap
4from statsmodels.compat.pandas import Substitution
5from statsmodels.sandbox.nonparametric import kernels
7def _select_sigma(X):
8 """
9 Returns the smaller of std(X, ddof=1) or normalized IQR(X) over axis 0.
11 References
12 ----------
13 Silverman (1986) p.47
14 """
15# normalize = norm.ppf(.75) - norm.ppf(.25)
16 normalize = 1.349
17# IQR = np.subtract.reduce(percentile(X, [75,25],
18# axis=axis), axis=axis)/normalize
19 IQR = (sap(X, 75) - sap(X, 25))/normalize
20 return np.minimum(np.std(X, axis=0, ddof=1), IQR)
23## Univariate Rule of Thumb Bandwidths ##
24def bw_scott(x, kernel=None):
25 """
26 Scott's Rule of Thumb
28 Parameters
29 ----------
30 x : array_like
31 Array for which to get the bandwidth
32 kernel : CustomKernel object
33 Unused
35 Returns
36 -------
37 bw : float
38 The estimate of the bandwidth
40 Notes
41 -----
42 Returns 1.059 * A * n ** (-1/5.) where ::
44 A = min(std(x, ddof=1), IQR/1.349)
45 IQR = np.subtract.reduce(np.percentile(x, [75,25]))
47 References
48 ----------
50 Scott, D.W. (1992) Multivariate Density Estimation: Theory, Practice, and
51 Visualization.
52 """
53 A = _select_sigma(x)
54 n = len(x)
55 return 1.059 * A * n ** (-0.2)
57def bw_silverman(x, kernel=None):
58 """
59 Silverman's Rule of Thumb
61 Parameters
62 ----------
63 x : array_like
64 Array for which to get the bandwidth
65 kernel : CustomKernel object
66 Unused
68 Returns
69 -------
70 bw : float
71 The estimate of the bandwidth
73 Notes
74 -----
75 Returns .9 * A * n ** (-1/5.) where ::
77 A = min(std(x, ddof=1), IQR/1.349)
78 IQR = np.subtract.reduce(np.percentile(x, [75,25]))
80 References
81 ----------
83 Silverman, B.W. (1986) `Density Estimation.`
84 """
85 A = _select_sigma(x)
86 n = len(x)
87 return .9 * A * n ** (-0.2)
90def bw_normal_reference(x, kernel=kernels.Gaussian):
91 """
92 Plug-in bandwidth with kernel specific constant based on normal reference.
94 This bandwidth minimizes the mean integrated square error if the true
95 distribution is the normal. This choice is an appropriate bandwidth for
96 single peaked distributions that are similar to the normal distribution.
98 Parameters
99 ----------
100 x : array_like
101 Array for which to get the bandwidth
102 kernel : CustomKernel object
103 Used to calculate the constant for the plug-in bandwidth.
105 Returns
106 -------
107 bw : float
108 The estimate of the bandwidth
110 Notes
111 -----
112 Returns C * A * n ** (-1/5.) where ::
114 A = min(std(x, ddof=1), IQR/1.349)
115 IQR = np.subtract.reduce(np.percentile(x, [75,25]))
116 C = constant from Hansen (2009)
118 When using a Gaussian kernel this is equivalent to the 'scott' bandwidth up
119 to two decimal places. This is the accuracy to which the 'scott' constant is
120 specified.
122 References
123 ----------
125 Silverman, B.W. (1986) `Density Estimation.`
126 Hansen, B.E. (2009) `Lecture Notes on Nonparametrics.`
127 """
128 C = kernel.normal_reference_constant
129 A = _select_sigma(x)
130 n = len(x)
131 return C * A * n ** (-0.2)
133## Plug-In Methods ##
135## Least Squares Cross-Validation ##
137## Helper Functions ##
139bandwidth_funcs = {
140 "scott": bw_scott,
141 "silverman": bw_silverman,
142 "normal_reference": bw_normal_reference,
143}
146@Substitution(", ".join(sorted(bandwidth_funcs.keys())))
147def select_bandwidth(x, bw, kernel):
148 """
149 Selects bandwidth for a selection rule bw
151 this is a wrapper around existing bandwidth selection rules
153 Parameters
154 ----------
155 x : array_like
156 Array for which to get the bandwidth
157 bw : str
158 name of bandwidth selection rule, currently supported are:
159 %s
160 kernel : not used yet
162 Returns
163 -------
164 bw : float
165 The estimate of the bandwidth
166 """
167 bw = bw.lower()
168 if bw not in bandwidth_funcs:
169 raise ValueError("Bandwidth %s not understood" % bw)
170 bandwidth = bandwidth_funcs[bw](x, kernel)
171 if np.any(bandwidth == 0):
172 # eventually this can fall back on another selection criterion.
173 err = "Selected KDE bandwidth is 0. Cannot estimate density."
174 raise RuntimeError(err)
175 else:
176 return bandwidth