Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/stats/tabledist.py : 34%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Created on Sat Oct 01 20:20:16 2011
5Author: Josef Perktold
6License: BSD-3
8TODO:
9check orientation, size and alpha should be increasing for interp1d,
10but what is alpha? can be either sf or cdf probability
11change it to use one consistent notation
13check: instead of bound checking I could use the fill-value of the
14interpolators
15"""
16import numpy as np
17from scipy.interpolate import interp1d, interp2d, Rbf
19from statsmodels.tools.decorators import cache_readonly
22class TableDist(object):
23 """
24 Distribution, critical values and p-values from tables
26 currently only 1 extra parameter, e.g. sample size
28 Parameters
29 ----------
30 alpha : array_like, 1d
31 probabiliy in the table, could be either sf (right tail) or cdf (left
32 tail)
33 size : array_like, 1d
34 The sample sizes for the table
35 crit_table : array_like, 2d
36 The sample sizes in the table
37 array with critical values for sample size in rows and probability in
38 columns
39 asymptotic : callable, optional
40 Callable function with the form fn(nobs) that returns len(alpha)
41 critical values where the critical value in position i corresponds to
42 alpha[i]
43 min_nobs : int, optional
44 Minimum number of observations to use the asymptotic distribution. If
45 not provided, uses max(size).
46 max_nobs : int, optional
47 Maximum number of observations to use the tabular distribution. If not
48 provided, uses max(size)
50 Notes
51 -----
52 size and alpha must be sorted and increasing.
54 If both min_nobs and max_nobs are provided, then
55 the critical values from the tabular distribution and the asymptotic
56 distribution are linearly blended using the formula
57 :math:`w cv_a + (1-w) cv_t` where the weight is
58 :math:`w = (n - a_{min}) / (a_{max} - a_{min})`. This ensures the
59 transition between the tabular and the asymptotic critical values is
60 continuous. If these are not provided, then the asymptotic critical value
61 is used for nobs > max(size).
62 """
64 def __init__(self, alpha, size, crit_table, asymptotic=None,
65 min_nobs=None, max_nobs=None):
66 self.alpha = np.asarray(alpha)
67 if self.alpha.ndim != 1:
68 raise ValueError('alpha is not 1d')
69 elif (np.diff(self.alpha) <= 0).any():
70 raise ValueError('alpha is not sorted')
71 self.size = np.asarray(size)
72 if self.size.ndim != 1:
73 raise ValueError('size is not 1d')
74 elif (np.diff(self.size) <= 0).any():
75 raise ValueError('size is not sorted')
76 if self.size.ndim == 1:
77 if (np.diff(alpha) <= 0).any():
78 raise ValueError('alpha is not sorted')
79 self.crit_table = np.asarray(crit_table)
80 if self.crit_table.shape != (self.size.shape[0], self.alpha.shape[0]):
81 raise ValueError('crit_table must have shape'
82 '(len(size), len(alpha))')
84 self.n_alpha = len(alpha)
85 self.signcrit = np.sign(np.diff(self.crit_table, 1).mean())
86 if self.signcrit > 0: # increasing
87 self.critv_bounds = self.crit_table[:, [0, 1]]
88 else:
89 self.critv_bounds = self.crit_table[:, [1, 0]]
90 self.asymptotic = None
91 max_size = self.max_size = max(size)
93 if asymptotic is not None:
94 try:
95 cv = asymptotic(self.max_size + 1)
96 except Exception as exc:
97 raise type(exc)('Calling asymptotic(self.size+1) failed. The '
98 'error message was:'
99 '\n\n{err_msg}'.format(err_msg=exc.args[0]))
100 if len(cv) != len(alpha):
101 raise ValueError('asymptotic does not return len(alpha) '
102 'values')
103 self.asymptotic = asymptotic
105 self.min_nobs = max_size if min_nobs is None else min_nobs
106 self.max_nobs = max_size if max_nobs is None else max_nobs
107 if self.min_nobs > max_size:
108 raise ValueError('min_nobs > max(size)')
109 if self.max_nobs > max_size:
110 raise ValueError('max_nobs > max(size)')
112 @cache_readonly
113 def polyn(self):
114 polyn = [interp1d(self.size, self.crit_table[:, i])
115 for i in range(self.n_alpha)]
116 return polyn
118 @cache_readonly
119 def poly2d(self):
120 # check for monotonicity ?
121 # fix this, interp needs increasing
122 poly2d = interp2d(self.size, self.alpha, self.crit_table)
123 return poly2d
125 @cache_readonly
126 def polyrbf(self):
127 xs, xa = np.meshgrid(self.size.astype(float), self.alpha)
128 polyrbf = Rbf(xs.ravel(), xa.ravel(), self.crit_table.T.ravel(),
129 function='linear')
130 return polyrbf
132 def _critvals(self, n):
133 """
134 Rows of the table, linearly interpolated for given sample size
136 Parameters
137 ----------
138 n : float
139 sample size, second parameter of the table
141 Returns
142 -------
143 critv : ndarray, 1d
144 critical values (ppf) corresponding to a row of the table
146 Notes
147 -----
148 This is used in two step interpolation, or if we want to know the
149 critical values for all alphas for any sample size that we can obtain
150 through interpolation
151 """
152 if n > self.max_size:
153 if self.asymptotic is not None:
154 cv = self.asymptotic(n)
155 else:
156 raise ValueError('n is above max(size) and no asymptotic '
157 'distribtuion is provided')
158 else:
159 cv = ([p(n) for p in self.polyn])
160 if n > self.min_nobs:
161 w = (n - self.min_nobs) / (self.max_nobs - self.min_nobs)
162 w = min(1.0, w)
163 a_cv = self.asymptotic(n)
164 cv = w * a_cv + (1 - w) * cv
166 return cv
168 def prob(self, x, n):
169 """
170 Find pvalues by interpolation, either cdf(x)
172 Returns extreme probabilities, 0.001 and 0.2, for out of range
174 Parameters
175 ----------
176 x : array_like
177 observed value, assumed to follow the distribution in the table
178 n : float
179 sample size, second parameter of the table
181 Returns
182 -------
183 prob : array_like
184 This is the probability for each value of x, the p-value in
185 underlying distribution is for a statistical test.
186 """
187 critv = self._critvals(n)
188 alpha = self.alpha
190 if self.signcrit < 1:
191 # reverse if critv is decreasing
192 critv, alpha = critv[::-1], alpha[::-1]
194 # now critv is increasing
195 if np.size(x) == 1:
196 if x < critv[0]:
197 return alpha[0]
198 elif x > critv[-1]:
199 return alpha[-1]
200 return interp1d(critv, alpha)(x)[()]
201 else:
202 # vectorized
203 cond_low = (x < critv[0])
204 cond_high = (x > critv[-1])
205 cond_interior = ~np.logical_or(cond_low, cond_high)
207 probs = np.nan * np.ones(x.shape) # mistake if nan left
208 probs[cond_low] = alpha[0]
209 probs[cond_low] = alpha[-1]
210 probs[cond_interior] = interp1d(critv, alpha)(x[cond_interior])
212 return probs
214 def crit(self, prob, n):
215 """
216 Returns interpolated quantiles, similar to ppf or isf
218 use two sequential 1d interpolation, first by n then by prob
220 Parameters
221 ----------
222 prob : array_like
223 probabilities corresponding to the definition of table columns
224 n : int or float
225 sample size, second parameter of the table
227 Returns
228 -------
229 ppf : array_like
230 critical values with same shape as prob
231 """
232 prob = np.asarray(prob)
233 alpha = self.alpha
234 critv = self._critvals(n)
236 # vectorized
237 cond_ilow = (prob > alpha[0])
238 cond_ihigh = (prob < alpha[-1])
239 cond_interior = np.logical_or(cond_ilow, cond_ihigh)
241 # scalar
242 if prob.size == 1:
243 if cond_interior:
244 return interp1d(alpha, critv)(prob)
245 else:
246 return np.nan
248 # vectorized
249 quantile = np.nan * np.ones(prob.shape) # nans for outside
250 quantile[cond_interior] = interp1d(alpha, critv)(prob[cond_interior])
251 return quantile
253 def crit3(self, prob, n):
254 """
255 Returns interpolated quantiles, similar to ppf or isf
257 uses Rbf to interpolate critical values as function of `prob` and `n`
259 Parameters
260 ----------
261 prob : array_like
262 probabilities corresponding to the definition of table columns
263 n : int or float
264 sample size, second parameter of the table
266 Returns
267 -------
268 ppf : array_like
269 critical values with same shape as prob, returns nan for arguments
270 that are outside of the table bounds
271 """
272 prob = np.asarray(prob)
273 alpha = self.alpha
275 # vectorized
276 cond_ilow = (prob > alpha[0])
277 cond_ihigh = (prob < alpha[-1])
278 cond_interior = np.logical_or(cond_ilow, cond_ihigh)
280 # scalar
281 if prob.size == 1:
282 if cond_interior:
283 return self.polyrbf(n, prob)
284 else:
285 return np.nan
287 # vectorized
288 quantile = np.nan * np.ones(prob.shape) # nans for outside
290 quantile[cond_interior] = self.polyrbf(n, prob[cond_interior])
291 return quantile