Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/distributions/empirical_distribution.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Empirical CDF Functions
3"""
4import numpy as np
5from scipy.interpolate import interp1d
7def _conf_set(F, alpha=.05):
8 r"""
9 Constructs a Dvoretzky-Kiefer-Wolfowitz confidence band for the eCDF.
11 Parameters
12 ----------
13 F : array_like
14 The empirical distributions
15 alpha : float
16 Set alpha for a (1 - alpha) % confidence band.
18 Notes
19 -----
20 Based on the DKW inequality.
22 .. math:: P \left( \sup_x \left| F(x) - \hat(F)_n(X) \right| > \epsilon \right) \leq 2e^{-2n\epsilon^2}
24 References
25 ----------
26 Wasserman, L. 2006. `All of Nonparametric Statistics`. Springer.
27 """
28 nobs = len(F)
29 epsilon = np.sqrt(np.log(2./alpha) / (2 * nobs))
30 lower = np.clip(F - epsilon, 0, 1)
31 upper = np.clip(F + epsilon, 0, 1)
32 return lower, upper
34class StepFunction(object):
35 """
36 A basic step function.
38 Values at the ends are handled in the simplest way possible:
39 everything to the left of x[0] is set to ival; everything
40 to the right of x[-1] is set to y[-1].
42 Parameters
43 ----------
44 x : array_like
45 y : array_like
46 ival : float
47 ival is the value given to the values to the left of x[0]. Default
48 is 0.
49 sorted : bool
50 Default is False.
51 side : {'left', 'right'}, optional
52 Default is 'left'. Defines the shape of the intervals constituting the
53 steps. 'right' correspond to [a, b) intervals and 'left' to (a, b].
55 Examples
56 --------
57 >>> import numpy as np
58 >>> from statsmodels.distributions.empirical_distribution import StepFunction
59 >>>
60 >>> x = np.arange(20)
61 >>> y = np.arange(20)
62 >>> f = StepFunction(x, y)
63 >>>
64 >>> print(f(3.2))
65 3.0
66 >>> print(f([[3.2,4.5],[24,-3.1]]))
67 [[ 3. 4.]
68 [ 19. 0.]]
69 >>> f2 = StepFunction(x, y, side='right')
70 >>>
71 >>> print(f(3.0))
72 2.0
73 >>> print(f2(3.0))
74 3.0
75 """
77 def __init__(self, x, y, ival=0., sorted=False, side='left'):
79 if side.lower() not in ['right', 'left']:
80 msg = "side can take the values 'right' or 'left'"
81 raise ValueError(msg)
82 self.side = side
84 _x = np.asarray(x)
85 _y = np.asarray(y)
87 if _x.shape != _y.shape:
88 msg = "x and y do not have the same shape"
89 raise ValueError(msg)
90 if len(_x.shape) != 1:
91 msg = 'x and y must be 1-dimensional'
92 raise ValueError(msg)
94 self.x = np.r_[-np.inf, _x]
95 self.y = np.r_[ival, _y]
97 if not sorted:
98 asort = np.argsort(self.x)
99 self.x = np.take(self.x, asort, 0)
100 self.y = np.take(self.y, asort, 0)
101 self.n = self.x.shape[0]
103 def __call__(self, time):
105 tind = np.searchsorted(self.x, time, self.side) - 1
106 return self.y[tind]
108class ECDF(StepFunction):
109 """
110 Return the Empirical CDF of an array as a step function.
112 Parameters
113 ----------
114 x : array_like
115 Observations
116 side : {'left', 'right'}, optional
117 Default is 'right'. Defines the shape of the intervals constituting the
118 steps. 'right' correspond to [a, b) intervals and 'left' to (a, b].
120 Returns
121 -------
122 Empirical CDF as a step function.
124 Examples
125 --------
126 >>> import numpy as np
127 >>> from statsmodels.distributions.empirical_distribution import ECDF
128 >>>
129 >>> ecdf = ECDF([3, 3, 1, 4])
130 >>>
131 >>> ecdf([3, 55, 0.5, 1.5])
132 array([ 0.75, 1. , 0. , 0.25])
133 """
134 def __init__(self, x, side='right'):
135 x = np.array(x, copy=True)
136 x.sort()
137 nobs = len(x)
138 y = np.linspace(1./nobs,1,nobs)
139 super(ECDF, self).__init__(x, y, side=side, sorted=True)
140 # TODO: make `step` an arg and have a linear interpolation option?
141 # This is the path with `step` is True
142 # If `step` is False, a previous version of the code read
143 # `return interp1d(x,y,drop_errors=False,fill_values=ival)`
144 # which would have raised a NameError if hit, so would need to be
145 # fixed. See GH#5701.
148def monotone_fn_inverter(fn, x, vectorized=True, **keywords):
149 """
150 Given a monotone function fn (no checking is done to verify monotonicity)
151 and a set of x values, return an linearly interpolated approximation
152 to its inverse from its values on x.
153 """
154 x = np.asarray(x)
155 if vectorized:
156 y = fn(x, **keywords)
157 else:
158 y = []
159 for _x in x:
160 y.append(fn(_x, **keywords))
161 y = np.array(y)
163 a = np.argsort(y)
165 return interp1d(y[a], x[a])
167if __name__ == "__main__":
168 #TODO: Make sure everything is correctly aligned and make a plotting
169 # function
170 from urllib.request import urlopen
171 import matplotlib.pyplot as plt
172 nerve_data = urlopen('http://www.statsci.org/data/general/nerve.txt')
173 nerve_data = np.loadtxt(nerve_data)
174 x = nerve_data / 50. # was in 1/50 seconds
175 cdf = ECDF(x)
176 x.sort()
177 F = cdf(x)
178 plt.step(x, F, where='post')
179 lower, upper = _conf_set(F)
180 plt.step(x, lower, 'r', where='post')
181 plt.step(x, upper, 'r', where='post')
182 plt.xlim(0, 1.5)
183 plt.ylim(0, 1.05)
184 plt.vlines(x, 0, .05)
185 plt.show()