Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py : 30%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#
2# Author: Travis Oliphant 2002-2011 with contributions from
3# SciPy Developers 2004-2011
4#
5from scipy._lib._util import getfullargspec_no_self as _getfullargspec
7import sys
8import keyword
9import re
10import types
11import warnings
12import inspect
13from itertools import zip_longest
15from scipy._lib import doccer
16from ._distr_params import distcont, distdiscrete
17from scipy._lib._util import check_random_state
18from scipy._lib._util import _valarray as valarray
20from scipy.special import (comb, chndtr, entr, rel_entr, xlogy, ive)
22# for root finding for continuous distribution ppf, and max likelihood estimation
23from scipy import optimize
25# for functions of continuous distributions (e.g. moments, entropy, cdf)
26from scipy import integrate
28# to approximate the pdf of a continuous distribution given its cdf
29from scipy.misc import derivative
31from numpy import (arange, putmask, ravel, ones, shape, ndarray, zeros, floor,
32 logical_and, log, sqrt, place, argmax, vectorize, asarray,
33 nan, inf, isinf, NINF, empty)
35import numpy as np
37from ._constants import _XMAX
40# These are the docstring parts used for substitution in specific
41# distribution docstrings
43docheaders = {'methods': """\nMethods\n-------\n""",
44 'notes': """\nNotes\n-----\n""",
45 'examples': """\nExamples\n--------\n"""}
47_doc_rvs = """\
48rvs(%(shapes)s, loc=0, scale=1, size=1, random_state=None)
49 Random variates.
50"""
51_doc_pdf = """\
52pdf(x, %(shapes)s, loc=0, scale=1)
53 Probability density function.
54"""
55_doc_logpdf = """\
56logpdf(x, %(shapes)s, loc=0, scale=1)
57 Log of the probability density function.
58"""
59_doc_pmf = """\
60pmf(k, %(shapes)s, loc=0, scale=1)
61 Probability mass function.
62"""
63_doc_logpmf = """\
64logpmf(k, %(shapes)s, loc=0, scale=1)
65 Log of the probability mass function.
66"""
67_doc_cdf = """\
68cdf(x, %(shapes)s, loc=0, scale=1)
69 Cumulative distribution function.
70"""
71_doc_logcdf = """\
72logcdf(x, %(shapes)s, loc=0, scale=1)
73 Log of the cumulative distribution function.
74"""
75_doc_sf = """\
76sf(x, %(shapes)s, loc=0, scale=1)
77 Survival function (also defined as ``1 - cdf``, but `sf` is sometimes more accurate).
78"""
79_doc_logsf = """\
80logsf(x, %(shapes)s, loc=0, scale=1)
81 Log of the survival function.
82"""
83_doc_ppf = """\
84ppf(q, %(shapes)s, loc=0, scale=1)
85 Percent point function (inverse of ``cdf`` --- percentiles).
86"""
87_doc_isf = """\
88isf(q, %(shapes)s, loc=0, scale=1)
89 Inverse survival function (inverse of ``sf``).
90"""
91_doc_moment = """\
92moment(n, %(shapes)s, loc=0, scale=1)
93 Non-central moment of order n
94"""
95_doc_stats = """\
96stats(%(shapes)s, loc=0, scale=1, moments='mv')
97 Mean('m'), variance('v'), skew('s'), and/or kurtosis('k').
98"""
99_doc_entropy = """\
100entropy(%(shapes)s, loc=0, scale=1)
101 (Differential) entropy of the RV.
102"""
103_doc_fit = """\
104fit(data)
105 Parameter estimates for generic data.
106 See `scipy.stats.rv_continuous.fit <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.rv_continuous.fit.html#scipy.stats.rv_continuous.fit>`__ for detailed documentation of the
107 keyword arguments.
108"""
109_doc_expect = """\
110expect(func, args=(%(shapes_)s), loc=0, scale=1, lb=None, ub=None, conditional=False, **kwds)
111 Expected value of a function (of one argument) with respect to the distribution.
112"""
113_doc_expect_discrete = """\
114expect(func, args=(%(shapes_)s), loc=0, lb=None, ub=None, conditional=False)
115 Expected value of a function (of one argument) with respect to the distribution.
116"""
117_doc_median = """\
118median(%(shapes)s, loc=0, scale=1)
119 Median of the distribution.
120"""
121_doc_mean = """\
122mean(%(shapes)s, loc=0, scale=1)
123 Mean of the distribution.
124"""
125_doc_var = """\
126var(%(shapes)s, loc=0, scale=1)
127 Variance of the distribution.
128"""
129_doc_std = """\
130std(%(shapes)s, loc=0, scale=1)
131 Standard deviation of the distribution.
132"""
133_doc_interval = """\
134interval(alpha, %(shapes)s, loc=0, scale=1)
135 Endpoints of the range that contains alpha percent of the distribution
136"""
137_doc_allmethods = ''.join([docheaders['methods'], _doc_rvs, _doc_pdf,
138 _doc_logpdf, _doc_cdf, _doc_logcdf, _doc_sf,
139 _doc_logsf, _doc_ppf, _doc_isf, _doc_moment,
140 _doc_stats, _doc_entropy, _doc_fit,
141 _doc_expect, _doc_median,
142 _doc_mean, _doc_var, _doc_std, _doc_interval])
144_doc_default_longsummary = """\
145As an instance of the `rv_continuous` class, `%(name)s` object inherits from it
146a collection of generic methods (see below for the full list),
147and completes them with details specific for this particular distribution.
148"""
150_doc_default_frozen_note = """
151Alternatively, the object may be called (as a function) to fix the shape,
152location, and scale parameters returning a "frozen" continuous RV object:
154rv = %(name)s(%(shapes)s, loc=0, scale=1)
155 - Frozen RV object with the same methods but holding the given shape,
156 location, and scale fixed.
157"""
158_doc_default_example = """\
159Examples
160--------
161>>> from scipy.stats import %(name)s
162>>> import matplotlib.pyplot as plt
163>>> fig, ax = plt.subplots(1, 1)
165Calculate a few first moments:
167%(set_vals_stmt)s
168>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
170Display the probability density function (``pdf``):
172>>> x = np.linspace(%(name)s.ppf(0.01, %(shapes)s),
173... %(name)s.ppf(0.99, %(shapes)s), 100)
174>>> ax.plot(x, %(name)s.pdf(x, %(shapes)s),
175... 'r-', lw=5, alpha=0.6, label='%(name)s pdf')
177Alternatively, the distribution object can be called (as a function)
178to fix the shape, location and scale parameters. This returns a "frozen"
179RV object holding the given parameters fixed.
181Freeze the distribution and display the frozen ``pdf``:
183>>> rv = %(name)s(%(shapes)s)
184>>> ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')
186Check accuracy of ``cdf`` and ``ppf``:
188>>> vals = %(name)s.ppf([0.001, 0.5, 0.999], %(shapes)s)
189>>> np.allclose([0.001, 0.5, 0.999], %(name)s.cdf(vals, %(shapes)s))
190True
192Generate random numbers:
194>>> r = %(name)s.rvs(%(shapes)s, size=1000)
196And compare the histogram:
198>>> ax.hist(r, density=True, histtype='stepfilled', alpha=0.2)
199>>> ax.legend(loc='best', frameon=False)
200>>> plt.show()
202"""
204_doc_default_locscale = """\
205The probability density above is defined in the "standardized" form. To shift
206and/or scale the distribution use the ``loc`` and ``scale`` parameters.
207Specifically, ``%(name)s.pdf(x, %(shapes)s, loc, scale)`` is identically
208equivalent to ``%(name)s.pdf(y, %(shapes)s) / scale`` with
209``y = (x - loc) / scale``.
210"""
212_doc_default = ''.join([_doc_default_longsummary,
213 _doc_allmethods,
214 '\n',
215 _doc_default_example])
217_doc_default_before_notes = ''.join([_doc_default_longsummary,
218 _doc_allmethods])
220docdict = {
221 'rvs': _doc_rvs,
222 'pdf': _doc_pdf,
223 'logpdf': _doc_logpdf,
224 'cdf': _doc_cdf,
225 'logcdf': _doc_logcdf,
226 'sf': _doc_sf,
227 'logsf': _doc_logsf,
228 'ppf': _doc_ppf,
229 'isf': _doc_isf,
230 'stats': _doc_stats,
231 'entropy': _doc_entropy,
232 'fit': _doc_fit,
233 'moment': _doc_moment,
234 'expect': _doc_expect,
235 'interval': _doc_interval,
236 'mean': _doc_mean,
237 'std': _doc_std,
238 'var': _doc_var,
239 'median': _doc_median,
240 'allmethods': _doc_allmethods,
241 'longsummary': _doc_default_longsummary,
242 'frozennote': _doc_default_frozen_note,
243 'example': _doc_default_example,
244 'default': _doc_default,
245 'before_notes': _doc_default_before_notes,
246 'after_notes': _doc_default_locscale
247}
249# Reuse common content between continuous and discrete docs, change some
250# minor bits.
251docdict_discrete = docdict.copy()
253docdict_discrete['pmf'] = _doc_pmf
254docdict_discrete['logpmf'] = _doc_logpmf
255docdict_discrete['expect'] = _doc_expect_discrete
256_doc_disc_methods = ['rvs', 'pmf', 'logpmf', 'cdf', 'logcdf', 'sf', 'logsf',
257 'ppf', 'isf', 'stats', 'entropy', 'expect', 'median',
258 'mean', 'var', 'std', 'interval']
259for obj in _doc_disc_methods:
260 docdict_discrete[obj] = docdict_discrete[obj].replace(', scale=1', '')
262_doc_disc_methods_err_varname = ['cdf', 'logcdf', 'sf', 'logsf']
263for obj in _doc_disc_methods_err_varname:
264 docdict_discrete[obj] = docdict_discrete[obj].replace('(x, ', '(k, ')
266docdict_discrete.pop('pdf')
267docdict_discrete.pop('logpdf')
269_doc_allmethods = ''.join([docdict_discrete[obj] for obj in _doc_disc_methods])
270docdict_discrete['allmethods'] = docheaders['methods'] + _doc_allmethods
272docdict_discrete['longsummary'] = _doc_default_longsummary.replace(
273 'rv_continuous', 'rv_discrete')
275_doc_default_frozen_note = """
276Alternatively, the object may be called (as a function) to fix the shape and
277location parameters returning a "frozen" discrete RV object:
279rv = %(name)s(%(shapes)s, loc=0)
280 - Frozen RV object with the same methods but holding the given shape and
281 location fixed.
282"""
283docdict_discrete['frozennote'] = _doc_default_frozen_note
285_doc_default_discrete_example = """\
286Examples
287--------
288>>> from scipy.stats import %(name)s
289>>> import matplotlib.pyplot as plt
290>>> fig, ax = plt.subplots(1, 1)
292Calculate a few first moments:
294%(set_vals_stmt)s
295>>> mean, var, skew, kurt = %(name)s.stats(%(shapes)s, moments='mvsk')
297Display the probability mass function (``pmf``):
299>>> x = np.arange(%(name)s.ppf(0.01, %(shapes)s),
300... %(name)s.ppf(0.99, %(shapes)s))
301>>> ax.plot(x, %(name)s.pmf(x, %(shapes)s), 'bo', ms=8, label='%(name)s pmf')
302>>> ax.vlines(x, 0, %(name)s.pmf(x, %(shapes)s), colors='b', lw=5, alpha=0.5)
304Alternatively, the distribution object can be called (as a function)
305to fix the shape and location. This returns a "frozen" RV object holding
306the given parameters fixed.
308Freeze the distribution and display the frozen ``pmf``:
310>>> rv = %(name)s(%(shapes)s)
311>>> ax.vlines(x, 0, rv.pmf(x), colors='k', linestyles='-', lw=1,
312... label='frozen pmf')
313>>> ax.legend(loc='best', frameon=False)
314>>> plt.show()
316Check accuracy of ``cdf`` and ``ppf``:
318>>> prob = %(name)s.cdf(x, %(shapes)s)
319>>> np.allclose(x, %(name)s.ppf(prob, %(shapes)s))
320True
322Generate random numbers:
324>>> r = %(name)s.rvs(%(shapes)s, size=1000)
325"""
328_doc_default_discrete_locscale = """\
329The probability mass function above is defined in the "standardized" form.
330To shift distribution use the ``loc`` parameter.
331Specifically, ``%(name)s.pmf(k, %(shapes)s, loc)`` is identically
332equivalent to ``%(name)s.pmf(k - loc, %(shapes)s)``.
333"""
335docdict_discrete['example'] = _doc_default_discrete_example
336docdict_discrete['after_notes'] = _doc_default_discrete_locscale
338_doc_default_before_notes = ''.join([docdict_discrete['longsummary'],
339 docdict_discrete['allmethods']])
340docdict_discrete['before_notes'] = _doc_default_before_notes
342_doc_default_disc = ''.join([docdict_discrete['longsummary'],
343 docdict_discrete['allmethods'],
344 docdict_discrete['frozennote'],
345 docdict_discrete['example']])
346docdict_discrete['default'] = _doc_default_disc
348# clean up all the separate docstring elements, we do not need them anymore
349for obj in [s for s in dir() if s.startswith('_doc_')]:
350 exec('del ' + obj)
351del obj
354def _moment(data, n, mu=None):
355 if mu is None:
356 mu = data.mean()
357 return ((data - mu)**n).mean()
360def _moment_from_stats(n, mu, mu2, g1, g2, moment_func, args):
361 if (n == 0):
362 return 1.0
363 elif (n == 1):
364 if mu is None:
365 val = moment_func(1, *args)
366 else:
367 val = mu
368 elif (n == 2):
369 if mu2 is None or mu is None:
370 val = moment_func(2, *args)
371 else:
372 val = mu2 + mu*mu
373 elif (n == 3):
374 if g1 is None or mu2 is None or mu is None:
375 val = moment_func(3, *args)
376 else:
377 mu3 = g1 * np.power(mu2, 1.5) # 3rd central moment
378 val = mu3+3*mu*mu2+mu*mu*mu # 3rd non-central moment
379 elif (n == 4):
380 if g1 is None or g2 is None or mu2 is None or mu is None:
381 val = moment_func(4, *args)
382 else:
383 mu4 = (g2+3.0)*(mu2**2.0) # 4th central moment
384 mu3 = g1*np.power(mu2, 1.5) # 3rd central moment
385 val = mu4+4*mu*mu3+6*mu*mu*mu2+mu*mu*mu*mu
386 else:
387 val = moment_func(n, *args)
389 return val
392def _skew(data):
393 """
394 skew is third central moment / variance**(1.5)
395 """
396 data = np.ravel(data)
397 mu = data.mean()
398 m2 = ((data - mu)**2).mean()
399 m3 = ((data - mu)**3).mean()
400 return m3 / np.power(m2, 1.5)
403def _kurtosis(data):
404 """
405 kurtosis is fourth central moment / variance**2 - 3
406 """
407 data = np.ravel(data)
408 mu = data.mean()
409 m2 = ((data - mu)**2).mean()
410 m4 = ((data - mu)**4).mean()
411 return m4 / m2**2 - 3
414# Frozen RV class
415class rv_frozen(object):
417 def __init__(self, dist, *args, **kwds):
418 self.args = args
419 self.kwds = kwds
421 # create a new instance
422 self.dist = dist.__class__(**dist._updated_ctor_param())
424 shapes, _, _ = self.dist._parse_args(*args, **kwds)
425 self.a, self.b = self.dist._get_support(*shapes)
427 @property
428 def random_state(self):
429 return self.dist._random_state
431 @random_state.setter
432 def random_state(self, seed):
433 self.dist._random_state = check_random_state(seed)
435 def pdf(self, x): # raises AttributeError in frozen discrete distribution
436 return self.dist.pdf(x, *self.args, **self.kwds)
438 def logpdf(self, x):
439 return self.dist.logpdf(x, *self.args, **self.kwds)
441 def cdf(self, x):
442 return self.dist.cdf(x, *self.args, **self.kwds)
444 def logcdf(self, x):
445 return self.dist.logcdf(x, *self.args, **self.kwds)
447 def ppf(self, q):
448 return self.dist.ppf(q, *self.args, **self.kwds)
450 def isf(self, q):
451 return self.dist.isf(q, *self.args, **self.kwds)
453 def rvs(self, size=None, random_state=None):
454 kwds = self.kwds.copy()
455 kwds.update({'size': size, 'random_state': random_state})
456 return self.dist.rvs(*self.args, **kwds)
458 def sf(self, x):
459 return self.dist.sf(x, *self.args, **self.kwds)
461 def logsf(self, x):
462 return self.dist.logsf(x, *self.args, **self.kwds)
464 def stats(self, moments='mv'):
465 kwds = self.kwds.copy()
466 kwds.update({'moments': moments})
467 return self.dist.stats(*self.args, **kwds)
469 def median(self):
470 return self.dist.median(*self.args, **self.kwds)
472 def mean(self):
473 return self.dist.mean(*self.args, **self.kwds)
475 def var(self):
476 return self.dist.var(*self.args, **self.kwds)
478 def std(self):
479 return self.dist.std(*self.args, **self.kwds)
481 def moment(self, n):
482 return self.dist.moment(n, *self.args, **self.kwds)
484 def entropy(self):
485 return self.dist.entropy(*self.args, **self.kwds)
487 def pmf(self, k):
488 return self.dist.pmf(k, *self.args, **self.kwds)
490 def logpmf(self, k):
491 return self.dist.logpmf(k, *self.args, **self.kwds)
493 def interval(self, alpha):
494 return self.dist.interval(alpha, *self.args, **self.kwds)
496 def expect(self, func=None, lb=None, ub=None, conditional=False, **kwds):
497 # expect method only accepts shape parameters as positional args
498 # hence convert self.args, self.kwds, also loc/scale
499 # See the .expect method docstrings for the meaning of
500 # other parameters.
501 a, loc, scale = self.dist._parse_args(*self.args, **self.kwds)
502 if isinstance(self.dist, rv_discrete):
503 return self.dist.expect(func, a, loc, lb, ub, conditional, **kwds)
504 else:
505 return self.dist.expect(func, a, loc, scale, lb, ub,
506 conditional, **kwds)
508 def support(self):
509 return self.dist.support(*self.args, **self.kwds)
512# This should be rewritten
513def argsreduce(cond, *args):
514 """Return the sequence of ravel(args[i]) where ravel(condition) is
515 True in 1D.
517 Examples
518 --------
519 >>> import numpy as np
520 >>> rand = np.random.random_sample
521 >>> A = rand((4, 5))
522 >>> B = 2
523 >>> C = rand((1, 5))
524 >>> cond = np.ones(A.shape)
525 >>> [A1, B1, C1] = argsreduce(cond, A, B, C)
526 >>> B1.shape
527 (20,)
528 >>> cond[2,:] = 0
529 >>> [A2, B2, C2] = argsreduce(cond, A, B, C)
530 >>> B2.shape
531 (15,)
533 """
534 newargs = np.atleast_1d(*args)
535 if not isinstance(newargs, list):
536 newargs = [newargs, ]
537 expand_arr = (cond == cond)
538 return [np.extract(cond, arr1 * expand_arr) for arr1 in newargs]
541parse_arg_template = """
542def _parse_args(self, %(shape_arg_str)s %(locscale_in)s):
543 return (%(shape_arg_str)s), %(locscale_out)s
545def _parse_args_rvs(self, %(shape_arg_str)s %(locscale_in)s, size=None):
546 return self._argcheck_rvs(%(shape_arg_str)s %(locscale_out)s, size=size)
548def _parse_args_stats(self, %(shape_arg_str)s %(locscale_in)s, moments='mv'):
549 return (%(shape_arg_str)s), %(locscale_out)s, moments
550"""
553# Both the continuous and discrete distributions depend on ncx2.
554# The function name ncx2 is an abbreviation for noncentral chi squared.
556def _ncx2_log_pdf(x, df, nc):
557 # We use (xs**2 + ns**2)/2 = (xs - ns)**2/2 + xs*ns, and include the
558 # factor of exp(-xs*ns) into the ive function to improve numerical
559 # stability at large values of xs. See also `rice.pdf`.
560 df2 = df/2.0 - 1.0
561 xs, ns = np.sqrt(x), np.sqrt(nc)
562 res = xlogy(df2/2.0, x/nc) - 0.5*(xs - ns)**2
563 res += np.log(ive(df2, xs*ns) / 2.0)
564 return res
567def _ncx2_pdf(x, df, nc):
568 return np.exp(_ncx2_log_pdf(x, df, nc))
571def _ncx2_cdf(x, df, nc):
572 return chndtr(x, df, nc)
575class rv_generic(object):
576 """Class which encapsulates common functionality between rv_discrete
577 and rv_continuous.
579 """
580 def __init__(self, seed=None):
581 super(rv_generic, self).__init__()
583 # figure out if _stats signature has 'moments' keyword
584 sig = _getfullargspec(self._stats)
585 self._stats_has_moments = ((sig.varkw is not None) or
586 ('moments' in sig.args) or
587 ('moments' in sig.kwonlyargs))
588 self._random_state = check_random_state(seed)
590 # For historical reasons, `size` was made an attribute that was read
591 # inside _rvs(). The code is being changed so that 'size' is an argument
592 # to self._rvs(). However some external (non-SciPy) distributions have not
593 # been updated. Maintain backwards compatibility by checking if
594 # the self._rvs() signature has the 'size' keyword, or a **kwarg,
595 # and if not set self._size inside self.rvs() before calling self._rvs().
596 argspec = inspect.getfullargspec(self._rvs)
597 self._rvs_uses_size_attribute = (argspec.varkw is None and
598 'size' not in argspec.args and
599 'size' not in argspec.kwonlyargs)
600 # Warn on first use only
601 self._rvs_size_warned = False
603 @property
604 def random_state(self):
605 """ Get or set the RandomState object for generating random variates.
607 This can be either None, int, a RandomState instance, or a
608 np.random.Generator instance.
610 If None (or np.random), use the RandomState singleton used by np.random.
611 If already a RandomState or Generator instance, use it.
612 If an int, use a new RandomState instance seeded with seed.
614 """
615 return self._random_state
617 @random_state.setter
618 def random_state(self, seed):
619 self._random_state = check_random_state(seed)
621 def __getstate__(self):
622 return self._updated_ctor_param(), self._random_state
624 def __setstate__(self, state):
625 ctor_param, r = state
626 self.__init__(**ctor_param)
627 self._random_state = r
628 return self
630 def _construct_argparser(
631 self, meths_to_inspect, locscale_in, locscale_out):
632 """Construct the parser for the shape arguments.
634 Generates the argument-parsing functions dynamically and attaches
635 them to the instance.
636 Is supposed to be called in __init__ of a class for each distribution.
638 If self.shapes is a non-empty string, interprets it as a
639 comma-separated list of shape parameters.
641 Otherwise inspects the call signatures of `meths_to_inspect`
642 and constructs the argument-parsing functions from these.
643 In this case also sets `shapes` and `numargs`.
644 """
646 if self.shapes:
647 # sanitize the user-supplied shapes
648 if not isinstance(self.shapes, str):
649 raise TypeError('shapes must be a string.')
651 shapes = self.shapes.replace(',', ' ').split()
653 for field in shapes:
654 if keyword.iskeyword(field):
655 raise SyntaxError('keywords cannot be used as shapes.')
656 if not re.match('^[_a-zA-Z][_a-zA-Z0-9]*$', field):
657 raise SyntaxError(
658 'shapes must be valid python identifiers')
659 else:
660 # find out the call signatures (_pdf, _cdf etc), deduce shape
661 # arguments. Generic methods only have 'self, x', any further args
662 # are shapes.
663 shapes_list = []
664 for meth in meths_to_inspect:
665 shapes_args = _getfullargspec(meth) # NB: does not contain self
666 args = shapes_args.args[1:] # peel off 'x', too
668 if args:
669 shapes_list.append(args)
671 # *args or **kwargs are not allowed w/automatic shapes
672 if shapes_args.varargs is not None:
673 raise TypeError(
674 '*args are not allowed w/out explicit shapes')
675 if shapes_args.varkw is not None:
676 raise TypeError(
677 '**kwds are not allowed w/out explicit shapes')
678 if shapes_args.kwonlyargs:
679 raise TypeError(
680 'kwonly args are not allowed w/out explicit shapes')
681 if shapes_args.defaults is not None:
682 raise TypeError('defaults are not allowed for shapes')
684 if shapes_list:
685 shapes = shapes_list[0]
687 # make sure the signatures are consistent
688 for item in shapes_list:
689 if item != shapes:
690 raise TypeError('Shape arguments are inconsistent.')
691 else:
692 shapes = []
694 # have the arguments, construct the method from template
695 shapes_str = ', '.join(shapes) + ', ' if shapes else '' # NB: not None
696 dct = dict(shape_arg_str=shapes_str,
697 locscale_in=locscale_in,
698 locscale_out=locscale_out,
699 )
700 ns = {}
701 exec(parse_arg_template % dct, ns)
702 # NB: attach to the instance, not class
703 for name in ['_parse_args', '_parse_args_stats', '_parse_args_rvs']:
704 setattr(self, name, types.MethodType(ns[name], self))
706 self.shapes = ', '.join(shapes) if shapes else None
707 if not hasattr(self, 'numargs'):
708 # allows more general subclassing with *args
709 self.numargs = len(shapes)
711 def _construct_doc(self, docdict, shapes_vals=None):
712 """Construct the instance docstring with string substitutions."""
713 tempdict = docdict.copy()
714 tempdict['name'] = self.name or 'distname'
715 tempdict['shapes'] = self.shapes or ''
717 if shapes_vals is None:
718 shapes_vals = ()
719 vals = ', '.join('%.3g' % val for val in shapes_vals)
720 tempdict['vals'] = vals
722 tempdict['shapes_'] = self.shapes or ''
723 if self.shapes and self.numargs == 1:
724 tempdict['shapes_'] += ','
726 if self.shapes:
727 tempdict['set_vals_stmt'] = '>>> %s = %s' % (self.shapes, vals)
728 else:
729 tempdict['set_vals_stmt'] = ''
731 if self.shapes is None:
732 # remove shapes from call parameters if there are none
733 for item in ['default', 'before_notes']:
734 tempdict[item] = tempdict[item].replace(
735 "\n%(shapes)s : array_like\n shape parameters", "")
736 for i in range(2):
737 if self.shapes is None:
738 # necessary because we use %(shapes)s in two forms (w w/o ", ")
739 self.__doc__ = self.__doc__.replace("%(shapes)s, ", "")
740 try:
741 self.__doc__ = doccer.docformat(self.__doc__, tempdict)
742 except TypeError as e:
743 raise Exception("Unable to construct docstring for distribution \"%s\": %s" % (self.name, repr(e)))
745 # correct for empty shapes
746 self.__doc__ = self.__doc__.replace('(, ', '(').replace(', )', ')')
748 def _construct_default_doc(self, longname=None, extradoc=None,
749 docdict=None, discrete='continuous'):
750 """Construct instance docstring from the default template."""
751 if longname is None:
752 longname = 'A'
753 if extradoc is None:
754 extradoc = ''
755 if extradoc.startswith('\n\n'):
756 extradoc = extradoc[2:]
757 self.__doc__ = ''.join(['%s %s random variable.' % (longname, discrete),
758 '\n\n%(before_notes)s\n', docheaders['notes'],
759 extradoc, '\n%(example)s'])
760 self._construct_doc(docdict)
762 def freeze(self, *args, **kwds):
763 """Freeze the distribution for the given arguments.
765 Parameters
766 ----------
767 arg1, arg2, arg3,... : array_like
768 The shape parameter(s) for the distribution. Should include all
769 the non-optional arguments, may include ``loc`` and ``scale``.
771 Returns
772 -------
773 rv_frozen : rv_frozen instance
774 The frozen distribution.
776 """
777 return rv_frozen(self, *args, **kwds)
779 def __call__(self, *args, **kwds):
780 return self.freeze(*args, **kwds)
781 __call__.__doc__ = freeze.__doc__
783 # The actual calculation functions (no basic checking need be done)
784 # If these are defined, the others won't be looked at.
785 # Otherwise, the other set can be defined.
786 def _stats(self, *args, **kwds):
787 return None, None, None, None
789 # Noncentral moments (also known as the moment about the origin).
790 # Expressed in LaTeX, munp would be $\mu'_{n}$, i.e. "mu-sub-n-prime".
791 # The primed mu is a widely used notation for the noncentral moment.
792 def _munp(self, n, *args):
793 # Silence floating point warnings from integration.
794 with np.errstate(all='ignore'):
795 vals = self.generic_moment(n, *args)
796 return vals
798 def _argcheck_rvs(self, *args, **kwargs):
799 # Handle broadcasting and size validation of the rvs method.
800 # Subclasses should not have to override this method.
801 # The rule is that if `size` is not None, then `size` gives the
802 # shape of the result (integer values of `size` are treated as
803 # tuples with length 1; i.e. `size=3` is the same as `size=(3,)`.)
804 #
805 # `args` is expected to contain the shape parameters (if any), the
806 # location and the scale in a flat tuple (e.g. if there are two
807 # shape parameters `a` and `b`, `args` will be `(a, b, loc, scale)`).
808 # The only keyword argument expected is 'size'.
809 size = kwargs.get('size', None)
810 all_bcast = np.broadcast_arrays(*args)
812 def squeeze_left(a):
813 while a.ndim > 0 and a.shape[0] == 1:
814 a = a[0]
815 return a
817 # Eliminate trivial leading dimensions. In the convention
818 # used by numpy's random variate generators, trivial leading
819 # dimensions are effectively ignored. In other words, when `size`
820 # is given, trivial leading dimensions of the broadcast parameters
821 # in excess of the number of dimensions in size are ignored, e.g.
822 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]], size=3)
823 # array([ 1.00104267, 3.00422496, 4.99799278])
824 # If `size` is not given, the exact broadcast shape is preserved:
825 # >>> np.random.normal([[1, 3, 5]], [[[[0.01]]]])
826 # array([[[[ 1.00862899, 3.00061431, 4.99867122]]]])
827 #
828 all_bcast = [squeeze_left(a) for a in all_bcast]
829 bcast_shape = all_bcast[0].shape
830 bcast_ndim = all_bcast[0].ndim
832 if size is None:
833 size_ = bcast_shape
834 else:
835 size_ = tuple(np.atleast_1d(size))
837 # Check compatibility of size_ with the broadcast shape of all
838 # the parameters. This check is intended to be consistent with
839 # how the numpy random variate generators (e.g. np.random.normal,
840 # np.random.beta) handle their arguments. The rule is that, if size
841 # is given, it determines the shape of the output. Broadcasting
842 # can't change the output size.
844 # This is the standard broadcasting convention of extending the
845 # shape with fewer dimensions with enough dimensions of length 1
846 # so that the two shapes have the same number of dimensions.
847 ndiff = bcast_ndim - len(size_)
848 if ndiff < 0:
849 bcast_shape = (1,)*(-ndiff) + bcast_shape
850 elif ndiff > 0:
851 size_ = (1,)*ndiff + size_
853 # This compatibility test is not standard. In "regular" broadcasting,
854 # two shapes are compatible if for each dimension, the lengths are the
855 # same or one of the lengths is 1. Here, the length of a dimension in
856 # size_ must not be less than the corresponding length in bcast_shape.
857 ok = all([bcdim == 1 or bcdim == szdim
858 for (bcdim, szdim) in zip(bcast_shape, size_)])
859 if not ok:
860 raise ValueError("size does not match the broadcast shape of "
861 "the parameters. %s, %s, %s" % (size, size_, bcast_shape))
863 param_bcast = all_bcast[:-2]
864 loc_bcast = all_bcast[-2]
865 scale_bcast = all_bcast[-1]
867 return param_bcast, loc_bcast, scale_bcast, size_
869 ## These are the methods you must define (standard form functions)
870 ## NB: generic _pdf, _logpdf, _cdf are different for
871 ## rv_continuous and rv_discrete hence are defined in there
872 def _argcheck(self, *args):
873 """Default check for correct values on args and keywords.
875 Returns condition array of 1's where arguments are correct and
876 0's where they are not.
878 """
879 cond = 1
880 for arg in args:
881 cond = logical_and(cond, (asarray(arg) > 0))
882 return cond
884 def _get_support(self, *args, **kwargs):
885 """Return the support of the (unscaled, unshifted) distribution.
887 *Must* be overridden by distributions which have support dependent
888 upon the shape parameters of the distribution. Any such override
889 *must not* set or change any of the class members, as these members
890 are shared amongst all instances of the distribution.
892 Parameters
893 ----------
894 arg1, arg2, ... : array_like
895 The shape parameter(s) for the distribution (see docstring of the
896 instance object for more information).
897 Returns
898 -------
899 a, b : numeric (float, or int or +/-np.inf)
900 end-points of the distribution's support for the specified
901 shape parameters.
902 """
903 return self.a, self.b
905 def _support_mask(self, x, *args):
906 a, b = self._get_support(*args)
907 with np.errstate(invalid='ignore'):
908 return (a <= x) & (x <= b)
910 def _open_support_mask(self, x, *args):
911 a, b = self._get_support(*args)
912 with np.errstate(invalid='ignore'):
913 return (a < x) & (x < b)
915 def _rvs(self, *args, size=None, random_state=None):
916 # This method must handle size being a tuple, and it must
917 # properly broadcast *args and size. size might be
918 # an empty tuple, which means a scalar random variate is to be
919 # generated.
921 ## Use basic inverse cdf algorithm for RV generation as default.
922 U = random_state.uniform(size=size)
923 Y = self._ppf(U, *args)
924 return Y
926 def _logcdf(self, x, *args):
927 with np.errstate(divide='ignore'):
928 return log(self._cdf(x, *args))
930 def _sf(self, x, *args):
931 return 1.0-self._cdf(x, *args)
933 def _logsf(self, x, *args):
934 with np.errstate(divide='ignore'):
935 return log(self._sf(x, *args))
937 def _ppf(self, q, *args):
938 return self._ppfvec(q, *args)
940 def _isf(self, q, *args):
941 return self._ppf(1.0-q, *args) # use correct _ppf for subclasses
943 # These are actually called, and should not be overwritten if you
944 # want to keep error checking.
945 def rvs(self, *args, **kwds):
946 """
947 Random variates of given type.
949 Parameters
950 ----------
951 arg1, arg2, arg3,... : array_like
952 The shape parameter(s) for the distribution (see docstring of the
953 instance object for more information).
954 loc : array_like, optional
955 Location parameter (default=0).
956 scale : array_like, optional
957 Scale parameter (default=1).
958 size : int or tuple of ints, optional
959 Defining number of random variates (default is 1).
960 random_state : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
961 If `seed` is `None` the `~np.random.RandomState` singleton is used.
962 If `seed` is an int, a new ``RandomState`` instance is used, seeded
963 with seed.
964 If `seed` is already a ``RandomState`` or ``Generator`` instance,
965 then that object is used.
966 Default is None.
968 Returns
969 -------
970 rvs : ndarray or scalar
971 Random variates of given `size`.
973 """
974 discrete = kwds.pop('discrete', None)
975 rndm = kwds.pop('random_state', None)
976 args, loc, scale, size = self._parse_args_rvs(*args, **kwds)
977 cond = logical_and(self._argcheck(*args), (scale >= 0))
978 if not np.all(cond):
979 raise ValueError("Domain error in arguments.")
981 if np.all(scale == 0):
982 return loc*ones(size, 'd')
984 # extra gymnastics needed for a custom random_state
985 if rndm is not None:
986 random_state_saved = self._random_state
987 random_state = check_random_state(rndm)
988 else:
989 random_state = self._random_state
991 # Maintain backwards compatibility by setting self._size
992 # for distributions that still need it.
993 if self._rvs_uses_size_attribute:
994 if not self._rvs_size_warned:
995 warnings.warn(
996 f'The signature of {self._rvs} does not contain '
997 f'a "size" keyword. Such signatures are deprecated.',
998 np.VisibleDeprecationWarning)
999 self._rvs_size_warned = True
1000 self._size = size
1001 self._random_state = random_state
1002 vals = self._rvs(*args)
1003 else:
1004 vals = self._rvs(*args, size=size, random_state=random_state)
1006 vals = vals * scale + loc
1008 # do not forget to restore the _random_state
1009 if rndm is not None:
1010 self._random_state = random_state_saved
1012 # Cast to int if discrete
1013 if discrete:
1014 if size == ():
1015 vals = int(vals)
1016 else:
1017 vals = vals.astype(int)
1019 return vals
1021 def stats(self, *args, **kwds):
1022 """
1023 Some statistics of the given RV.
1025 Parameters
1026 ----------
1027 arg1, arg2, arg3,... : array_like
1028 The shape parameter(s) for the distribution (see docstring of the
1029 instance object for more information)
1030 loc : array_like, optional
1031 location parameter (default=0)
1032 scale : array_like, optional (continuous RVs only)
1033 scale parameter (default=1)
1034 moments : str, optional
1035 composed of letters ['mvsk'] defining which moments to compute:
1036 'm' = mean,
1037 'v' = variance,
1038 's' = (Fisher's) skew,
1039 'k' = (Fisher's) kurtosis.
1040 (default is 'mv')
1042 Returns
1043 -------
1044 stats : sequence
1045 of requested moments.
1047 """
1048 args, loc, scale, moments = self._parse_args_stats(*args, **kwds)
1049 # scale = 1 by construction for discrete RVs
1050 loc, scale = map(asarray, (loc, scale))
1051 args = tuple(map(asarray, args))
1052 cond = self._argcheck(*args) & (scale > 0) & (loc == loc)
1053 output = []
1054 default = valarray(shape(cond), self.badvalue)
1056 # Use only entries that are valid in calculation
1057 if np.any(cond):
1058 goodargs = argsreduce(cond, *(args+(scale, loc)))
1059 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
1061 if self._stats_has_moments:
1062 mu, mu2, g1, g2 = self._stats(*goodargs,
1063 **{'moments': moments})
1064 else:
1065 mu, mu2, g1, g2 = self._stats(*goodargs)
1066 if g1 is None:
1067 mu3 = None
1068 else:
1069 if mu2 is None:
1070 mu2 = self._munp(2, *goodargs)
1071 if g2 is None:
1072 # (mu2**1.5) breaks down for nan and inf
1073 mu3 = g1 * np.power(mu2, 1.5)
1075 if 'm' in moments:
1076 if mu is None:
1077 mu = self._munp(1, *goodargs)
1078 out0 = default.copy()
1079 place(out0, cond, mu * scale + loc)
1080 output.append(out0)
1082 if 'v' in moments:
1083 if mu2 is None:
1084 mu2p = self._munp(2, *goodargs)
1085 if mu is None:
1086 mu = self._munp(1, *goodargs)
1087 # if mean is inf then var is also inf
1088 with np.errstate(invalid='ignore'):
1089 mu2 = np.where(np.isfinite(mu), mu2p - mu**2, np.inf)
1090 out0 = default.copy()
1091 place(out0, cond, mu2 * scale * scale)
1092 output.append(out0)
1094 if 's' in moments:
1095 if g1 is None:
1096 mu3p = self._munp(3, *goodargs)
1097 if mu is None:
1098 mu = self._munp(1, *goodargs)
1099 if mu2 is None:
1100 mu2p = self._munp(2, *goodargs)
1101 mu2 = mu2p - mu * mu
1102 with np.errstate(invalid='ignore'):
1103 mu3 = (-mu*mu - 3*mu2)*mu + mu3p
1104 g1 = mu3 / np.power(mu2, 1.5)
1105 out0 = default.copy()
1106 place(out0, cond, g1)
1107 output.append(out0)
1109 if 'k' in moments:
1110 if g2 is None:
1111 mu4p = self._munp(4, *goodargs)
1112 if mu is None:
1113 mu = self._munp(1, *goodargs)
1114 if mu2 is None:
1115 mu2p = self._munp(2, *goodargs)
1116 mu2 = mu2p - mu * mu
1117 if mu3 is None:
1118 mu3p = self._munp(3, *goodargs)
1119 with np.errstate(invalid='ignore'):
1120 mu3 = (-mu * mu - 3 * mu2) * mu + mu3p
1121 with np.errstate(invalid='ignore'):
1122 mu4 = ((-mu**2 - 6*mu2) * mu - 4*mu3)*mu + mu4p
1123 g2 = mu4 / mu2**2.0 - 3.0
1124 out0 = default.copy()
1125 place(out0, cond, g2)
1126 output.append(out0)
1127 else: # no valid args
1128 output = [default.copy() for _ in moments]
1130 if len(output) == 1:
1131 return output[0]
1132 else:
1133 return tuple(output)
1135 def entropy(self, *args, **kwds):
1136 """
1137 Differential entropy of the RV.
1139 Parameters
1140 ----------
1141 arg1, arg2, arg3,... : array_like
1142 The shape parameter(s) for the distribution (see docstring of the
1143 instance object for more information).
1144 loc : array_like, optional
1145 Location parameter (default=0).
1146 scale : array_like, optional (continuous distributions only).
1147 Scale parameter (default=1).
1149 Notes
1150 -----
1151 Entropy is defined base `e`:
1153 >>> drv = rv_discrete(values=((0, 1), (0.5, 0.5)))
1154 >>> np.allclose(drv.entropy(), np.log(2.0))
1155 True
1157 """
1158 args, loc, scale = self._parse_args(*args, **kwds)
1159 # NB: for discrete distributions scale=1 by construction in _parse_args
1160 loc, scale = map(asarray, (loc, scale))
1161 args = tuple(map(asarray, args))
1162 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
1163 output = zeros(shape(cond0), 'd')
1164 place(output, (1-cond0), self.badvalue)
1165 goodargs = argsreduce(cond0, scale, *args)
1166 goodscale = goodargs[0]
1167 goodargs = goodargs[1:]
1168 place(output, cond0, self.vecentropy(*goodargs) + log(goodscale))
1169 return output
1171 def moment(self, n, *args, **kwds):
1172 """
1173 n-th order non-central moment of distribution.
1175 Parameters
1176 ----------
1177 n : int, n >= 1
1178 Order of moment.
1179 arg1, arg2, arg3,... : float
1180 The shape parameter(s) for the distribution (see docstring of the
1181 instance object for more information).
1182 loc : array_like, optional
1183 location parameter (default=0)
1184 scale : array_like, optional
1185 scale parameter (default=1)
1187 """
1188 args, loc, scale = self._parse_args(*args, **kwds)
1189 if not (self._argcheck(*args) and (scale > 0)):
1190 return nan
1191 if (floor(n) != n):
1192 raise ValueError("Moment must be an integer.")
1193 if (n < 0):
1194 raise ValueError("Moment must be positive.")
1195 mu, mu2, g1, g2 = None, None, None, None
1196 if (n > 0) and (n < 5):
1197 if self._stats_has_moments:
1198 mdict = {'moments': {1: 'm', 2: 'v', 3: 'vs', 4: 'vk'}[n]}
1199 else:
1200 mdict = {}
1201 mu, mu2, g1, g2 = self._stats(*args, **mdict)
1202 val = _moment_from_stats(n, mu, mu2, g1, g2, self._munp, args)
1204 # Convert to transformed X = L + S*Y
1205 # E[X^n] = E[(L+S*Y)^n] = L^n sum(comb(n, k)*(S/L)^k E[Y^k], k=0...n)
1206 if loc == 0:
1207 return scale**n * val
1208 else:
1209 result = 0
1210 fac = float(scale) / float(loc)
1211 for k in range(n):
1212 valk = _moment_from_stats(k, mu, mu2, g1, g2, self._munp, args)
1213 result += comb(n, k, exact=True)*(fac**k) * valk
1214 result += fac**n * val
1215 return result * loc**n
1217 def median(self, *args, **kwds):
1218 """
1219 Median of the distribution.
1221 Parameters
1222 ----------
1223 arg1, arg2, arg3,... : array_like
1224 The shape parameter(s) for the distribution (see docstring of the
1225 instance object for more information)
1226 loc : array_like, optional
1227 Location parameter, Default is 0.
1228 scale : array_like, optional
1229 Scale parameter, Default is 1.
1231 Returns
1232 -------
1233 median : float
1234 The median of the distribution.
1236 See Also
1237 --------
1238 rv_discrete.ppf
1239 Inverse of the CDF
1241 """
1242 return self.ppf(0.5, *args, **kwds)
1244 def mean(self, *args, **kwds):
1245 """
1246 Mean of the distribution.
1248 Parameters
1249 ----------
1250 arg1, arg2, arg3,... : array_like
1251 The shape parameter(s) for the distribution (see docstring of the
1252 instance object for more information)
1253 loc : array_like, optional
1254 location parameter (default=0)
1255 scale : array_like, optional
1256 scale parameter (default=1)
1258 Returns
1259 -------
1260 mean : float
1261 the mean of the distribution
1263 """
1264 kwds['moments'] = 'm'
1265 res = self.stats(*args, **kwds)
1266 if isinstance(res, ndarray) and res.ndim == 0:
1267 return res[()]
1268 return res
1270 def var(self, *args, **kwds):
1271 """
1272 Variance of the distribution.
1274 Parameters
1275 ----------
1276 arg1, arg2, arg3,... : array_like
1277 The shape parameter(s) for the distribution (see docstring of the
1278 instance object for more information)
1279 loc : array_like, optional
1280 location parameter (default=0)
1281 scale : array_like, optional
1282 scale parameter (default=1)
1284 Returns
1285 -------
1286 var : float
1287 the variance of the distribution
1289 """
1290 kwds['moments'] = 'v'
1291 res = self.stats(*args, **kwds)
1292 if isinstance(res, ndarray) and res.ndim == 0:
1293 return res[()]
1294 return res
1296 def std(self, *args, **kwds):
1297 """
1298 Standard deviation of the distribution.
1300 Parameters
1301 ----------
1302 arg1, arg2, arg3,... : array_like
1303 The shape parameter(s) for the distribution (see docstring of the
1304 instance object for more information)
1305 loc : array_like, optional
1306 location parameter (default=0)
1307 scale : array_like, optional
1308 scale parameter (default=1)
1310 Returns
1311 -------
1312 std : float
1313 standard deviation of the distribution
1315 """
1316 kwds['moments'] = 'v'
1317 res = sqrt(self.stats(*args, **kwds))
1318 return res
1320 def interval(self, alpha, *args, **kwds):
1321 """
1322 Confidence interval with equal areas around the median.
1324 Parameters
1325 ----------
1326 alpha : array_like of float
1327 Probability that an rv will be drawn from the returned range.
1328 Each value should be in the range [0, 1].
1329 arg1, arg2, ... : array_like
1330 The shape parameter(s) for the distribution (see docstring of the
1331 instance object for more information).
1332 loc : array_like, optional
1333 location parameter, Default is 0.
1334 scale : array_like, optional
1335 scale parameter, Default is 1.
1337 Returns
1338 -------
1339 a, b : ndarray of float
1340 end-points of range that contain ``100 * alpha %`` of the rv's
1341 possible values.
1343 """
1344 alpha = asarray(alpha)
1345 if np.any((alpha > 1) | (alpha < 0)):
1346 raise ValueError("alpha must be between 0 and 1 inclusive")
1347 q1 = (1.0-alpha)/2
1348 q2 = (1.0+alpha)/2
1349 a = self.ppf(q1, *args, **kwds)
1350 b = self.ppf(q2, *args, **kwds)
1351 return a, b
1353 def support(self, *args, **kwargs):
1354 """
1355 Return the support of the distribution.
1357 Parameters
1358 ----------
1359 arg1, arg2, ... : array_like
1360 The shape parameter(s) for the distribution (see docstring of the
1361 instance object for more information).
1362 loc : array_like, optional
1363 location parameter, Default is 0.
1364 scale : array_like, optional
1365 scale parameter, Default is 1.
1366 Returns
1367 -------
1368 a, b : float
1369 end-points of the distribution's support.
1371 """
1372 args, loc, scale = self._parse_args(*args, **kwargs)
1373 _a, _b = self._get_support(*args)
1374 return _a * scale + loc, _b * scale + loc
1377def _get_fixed_fit_value(kwds, names):
1378 """
1379 Given names such as `['f0', 'fa', 'fix_a']`, check that there is
1380 at most one non-None value in `kwds` associaed with those names.
1381 Return that value, or None if none of the names occur in `kwds`.
1382 As a side effect, all occurrences of those names in `kwds` are
1383 removed.
1384 """
1385 vals = [(name, kwds.pop(name)) for name in names if name in kwds]
1386 if len(vals) > 1:
1387 repeated = [name for name, val in vals]
1388 raise ValueError("fit method got multiple keyword arguments to "
1389 "specify the same fixed parameter: " +
1390 ', '.join(repeated))
1391 return vals[0][1] if vals else None
1394## continuous random variables: implement maybe later
1395##
1396## hf --- Hazard Function (PDF / SF)
1397## chf --- Cumulative hazard function (-log(SF))
1398## psf --- Probability sparsity function (reciprocal of the pdf) in
1399## units of percent-point-function (as a function of q).
1400## Also, the derivative of the percent-point function.
1402class rv_continuous(rv_generic):
1403 """
1404 A generic continuous random variable class meant for subclassing.
1406 `rv_continuous` is a base class to construct specific distribution classes
1407 and instances for continuous random variables. It cannot be used
1408 directly as a distribution.
1410 Parameters
1411 ----------
1412 momtype : int, optional
1413 The type of generic moment calculation to use: 0 for pdf, 1 (default)
1414 for ppf.
1415 a : float, optional
1416 Lower bound of the support of the distribution, default is minus
1417 infinity.
1418 b : float, optional
1419 Upper bound of the support of the distribution, default is plus
1420 infinity.
1421 xtol : float, optional
1422 The tolerance for fixed point calculation for generic ppf.
1423 badvalue : float, optional
1424 The value in a result arrays that indicates a value that for which
1425 some argument restriction is violated, default is np.nan.
1426 name : str, optional
1427 The name of the instance. This string is used to construct the default
1428 example for distributions.
1429 longname : str, optional
1430 This string is used as part of the first line of the docstring returned
1431 when a subclass has no docstring of its own. Note: `longname` exists
1432 for backwards compatibility, do not use for new subclasses.
1433 shapes : str, optional
1434 The shape of the distribution. For example ``"m, n"`` for a
1435 distribution that takes two integers as the two shape arguments for all
1436 its methods. If not provided, shape parameters will be inferred from
1437 the signature of the private methods, ``_pdf`` and ``_cdf`` of the
1438 instance.
1439 extradoc : str, optional, deprecated
1440 This string is used as the last part of the docstring returned when a
1441 subclass has no docstring of its own. Note: `extradoc` exists for
1442 backwards compatibility, do not use for new subclasses.
1443 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
1444 This parameter defines the object to use for drawing random variates.
1445 If `seed` is `None` the `~np.random.RandomState` singleton is used.
1446 If `seed` is an int, a new ``RandomState`` instance is used, seeded
1447 with seed.
1448 If `seed` is already a ``RandomState`` or ``Generator`` instance,
1449 then that object is used.
1450 Default is None.
1452 Methods
1453 -------
1454 rvs
1455 pdf
1456 logpdf
1457 cdf
1458 logcdf
1459 sf
1460 logsf
1461 ppf
1462 isf
1463 moment
1464 stats
1465 entropy
1466 expect
1467 median
1468 mean
1469 std
1470 var
1471 interval
1472 __call__
1473 fit
1474 fit_loc_scale
1475 nnlf
1476 support
1478 Notes
1479 -----
1480 Public methods of an instance of a distribution class (e.g., ``pdf``,
1481 ``cdf``) check their arguments and pass valid arguments to private,
1482 computational methods (``_pdf``, ``_cdf``). For ``pdf(x)``, ``x`` is valid
1483 if it is within the support of the distribution.
1484 Whether a shape parameter is valid is decided by an ``_argcheck`` method
1485 (which defaults to checking that its arguments are strictly positive.)
1487 **Subclassing**
1489 New random variables can be defined by subclassing the `rv_continuous` class
1490 and re-defining at least the ``_pdf`` or the ``_cdf`` method (normalized
1491 to location 0 and scale 1).
1493 If positive argument checking is not correct for your RV
1494 then you will also need to re-define the ``_argcheck`` method.
1496 For most of the scipy.stats distributions, the support interval doesn't
1497 depend on the shape parameters. ``x`` being in the support interval is
1498 equivalent to ``self.a <= x <= self.b``. If either of the endpoints of
1499 the support do depend on the shape parameters, then
1500 i) the distribution must implement the ``_get_support`` method; and
1501 ii) those dependent endpoints must be omitted from the distribution's
1502 call to the ``rv_continuous`` initializer.
1504 Correct, but potentially slow defaults exist for the remaining
1505 methods but for speed and/or accuracy you can over-ride::
1507 _logpdf, _cdf, _logcdf, _ppf, _rvs, _isf, _sf, _logsf
1509 The default method ``_rvs`` relies on the inverse of the cdf, ``_ppf``,
1510 applied to a uniform random variate. In order to generate random variates
1511 efficiently, either the default ``_ppf`` needs to be overwritten (e.g.
1512 if the inverse cdf can expressed in an explicit form) or a sampling
1513 method needs to be implemented in a custom ``_rvs`` method.
1515 If possible, you should override ``_isf``, ``_sf`` or ``_logsf``.
1516 The main reason would be to improve numerical accuracy: for example,
1517 the survival function ``_sf`` is computed as ``1 - _cdf`` which can
1518 result in loss of precision if ``_cdf(x)`` is close to one.
1520 **Methods that can be overwritten by subclasses**
1521 ::
1523 _rvs
1524 _pdf
1525 _cdf
1526 _sf
1527 _ppf
1528 _isf
1529 _stats
1530 _munp
1531 _entropy
1532 _argcheck
1533 _get_support
1535 There are additional (internal and private) generic methods that can
1536 be useful for cross-checking and for debugging, but might work in all
1537 cases when directly called.
1539 A note on ``shapes``: subclasses need not specify them explicitly. In this
1540 case, `shapes` will be automatically deduced from the signatures of the
1541 overridden methods (`pdf`, `cdf` etc).
1542 If, for some reason, you prefer to avoid relying on introspection, you can
1543 specify ``shapes`` explicitly as an argument to the instance constructor.
1546 **Frozen Distributions**
1548 Normally, you must provide shape parameters (and, optionally, location and
1549 scale parameters to each call of a method of a distribution.
1551 Alternatively, the object may be called (as a function) to fix the shape,
1552 location, and scale parameters returning a "frozen" continuous RV object:
1554 rv = generic(<shape(s)>, loc=0, scale=1)
1555 `rv_frozen` object with the same methods but holding the given shape,
1556 location, and scale fixed
1558 **Statistics**
1560 Statistics are computed using numerical integration by default.
1561 For speed you can redefine this using ``_stats``:
1563 - take shape parameters and return mu, mu2, g1, g2
1564 - If you can't compute one of these, return it as None
1565 - Can also be defined with a keyword argument ``moments``, which is a
1566 string composed of "m", "v", "s", and/or "k".
1567 Only the components appearing in string should be computed and
1568 returned in the order "m", "v", "s", or "k" with missing values
1569 returned as None.
1571 Alternatively, you can override ``_munp``, which takes ``n`` and shape
1572 parameters and returns the n-th non-central moment of the distribution.
1574 Examples
1575 --------
1576 To create a new Gaussian distribution, we would do the following:
1578 >>> from scipy.stats import rv_continuous
1579 >>> class gaussian_gen(rv_continuous):
1580 ... "Gaussian distribution"
1581 ... def _pdf(self, x):
1582 ... return np.exp(-x**2 / 2.) / np.sqrt(2.0 * np.pi)
1583 >>> gaussian = gaussian_gen(name='gaussian')
1585 ``scipy.stats`` distributions are *instances*, so here we subclass
1586 `rv_continuous` and create an instance. With this, we now have
1587 a fully functional distribution with all relevant methods automagically
1588 generated by the framework.
1590 Note that above we defined a standard normal distribution, with zero mean
1591 and unit variance. Shifting and scaling of the distribution can be done
1592 by using ``loc`` and ``scale`` parameters: ``gaussian.pdf(x, loc, scale)``
1593 essentially computes ``y = (x - loc) / scale`` and
1594 ``gaussian._pdf(y) / scale``.
1596 """
1597 def __init__(self, momtype=1, a=None, b=None, xtol=1e-14,
1598 badvalue=None, name=None, longname=None,
1599 shapes=None, extradoc=None, seed=None):
1601 super(rv_continuous, self).__init__(seed)
1603 # save the ctor parameters, cf generic freeze
1604 self._ctor_param = dict(
1605 momtype=momtype, a=a, b=b, xtol=xtol,
1606 badvalue=badvalue, name=name, longname=longname,
1607 shapes=shapes, extradoc=extradoc, seed=seed)
1609 if badvalue is None:
1610 badvalue = nan
1611 if name is None:
1612 name = 'Distribution'
1613 self.badvalue = badvalue
1614 self.name = name
1615 self.a = a
1616 self.b = b
1617 if a is None:
1618 self.a = -inf
1619 if b is None:
1620 self.b = inf
1621 self.xtol = xtol
1622 self.moment_type = momtype
1623 self.shapes = shapes
1624 self._construct_argparser(meths_to_inspect=[self._pdf, self._cdf],
1625 locscale_in='loc=0, scale=1',
1626 locscale_out='loc, scale')
1628 # nin correction
1629 self._ppfvec = vectorize(self._ppf_single, otypes='d')
1630 self._ppfvec.nin = self.numargs + 1
1631 self.vecentropy = vectorize(self._entropy, otypes='d')
1632 self._cdfvec = vectorize(self._cdf_single, otypes='d')
1633 self._cdfvec.nin = self.numargs + 1
1635 self.extradoc = extradoc
1636 if momtype == 0:
1637 self.generic_moment = vectorize(self._mom0_sc, otypes='d')
1638 else:
1639 self.generic_moment = vectorize(self._mom1_sc, otypes='d')
1640 # Because of the *args argument of _mom0_sc, vectorize cannot count the
1641 # number of arguments correctly.
1642 self.generic_moment.nin = self.numargs + 1
1644 if longname is None:
1645 if name[0] in ['aeiouAEIOU']:
1646 hstr = "An "
1647 else:
1648 hstr = "A "
1649 longname = hstr + name
1651 if sys.flags.optimize < 2:
1652 # Skip adding docstrings if interpreter is run with -OO
1653 if self.__doc__ is None:
1654 self._construct_default_doc(longname=longname,
1655 extradoc=extradoc,
1656 docdict=docdict,
1657 discrete='continuous')
1658 else:
1659 dct = dict(distcont)
1660 self._construct_doc(docdict, dct.get(self.name))
1662 def _updated_ctor_param(self):
1663 """ Return the current version of _ctor_param, possibly updated by user.
1665 Used by freezing and pickling.
1666 Keep this in sync with the signature of __init__.
1667 """
1668 dct = self._ctor_param.copy()
1669 dct['a'] = self.a
1670 dct['b'] = self.b
1671 dct['xtol'] = self.xtol
1672 dct['badvalue'] = self.badvalue
1673 dct['name'] = self.name
1674 dct['shapes'] = self.shapes
1675 dct['extradoc'] = self.extradoc
1676 return dct
1678 def _ppf_to_solve(self, x, q, *args):
1679 return self.cdf(*(x, )+args)-q
1681 def _ppf_single(self, q, *args):
1682 factor = 10.
1683 left, right = self._get_support(*args)
1685 if np.isinf(left):
1686 left = min(-factor, right)
1687 while self._ppf_to_solve(left, q, *args) > 0.:
1688 left, right = left * factor, left
1689 # left is now such that cdf(left) <= q
1690 # if right has changed, then cdf(right) > q
1692 if np.isinf(right):
1693 right = max(factor, left)
1694 while self._ppf_to_solve(right, q, *args) < 0.:
1695 left, right = right, right * factor
1696 # right is now such that cdf(right) >= q
1698 return optimize.brentq(self._ppf_to_solve,
1699 left, right, args=(q,)+args, xtol=self.xtol)
1701 # moment from definition
1702 def _mom_integ0(self, x, m, *args):
1703 return x**m * self.pdf(x, *args)
1705 def _mom0_sc(self, m, *args):
1706 _a, _b = self._get_support(*args)
1707 return integrate.quad(self._mom_integ0, _a, _b,
1708 args=(m,)+args)[0]
1710 # moment calculated using ppf
1711 def _mom_integ1(self, q, m, *args):
1712 return (self.ppf(q, *args))**m
1714 def _mom1_sc(self, m, *args):
1715 return integrate.quad(self._mom_integ1, 0, 1, args=(m,)+args)[0]
1717 def _pdf(self, x, *args):
1718 return derivative(self._cdf, x, dx=1e-5, args=args, order=5)
1720 ## Could also define any of these
1721 def _logpdf(self, x, *args):
1722 return log(self._pdf(x, *args))
1724 def _cdf_single(self, x, *args):
1725 _a, _b = self._get_support(*args)
1726 return integrate.quad(self._pdf, _a, x, args=args)[0]
1728 def _cdf(self, x, *args):
1729 return self._cdfvec(x, *args)
1731 ## generic _argcheck, _logcdf, _sf, _logsf, _ppf, _isf, _rvs are defined
1732 ## in rv_generic
1734 def pdf(self, x, *args, **kwds):
1735 """
1736 Probability density function at x of the given RV.
1738 Parameters
1739 ----------
1740 x : array_like
1741 quantiles
1742 arg1, arg2, arg3,... : array_like
1743 The shape parameter(s) for the distribution (see docstring of the
1744 instance object for more information)
1745 loc : array_like, optional
1746 location parameter (default=0)
1747 scale : array_like, optional
1748 scale parameter (default=1)
1750 Returns
1751 -------
1752 pdf : ndarray
1753 Probability density function evaluated at x
1755 """
1756 args, loc, scale = self._parse_args(*args, **kwds)
1757 x, loc, scale = map(asarray, (x, loc, scale))
1758 args = tuple(map(asarray, args))
1759 dtyp = np.find_common_type([x.dtype, np.float64], [])
1760 x = np.asarray((x - loc)/scale, dtype=dtyp)
1761 cond0 = self._argcheck(*args) & (scale > 0)
1762 cond1 = self._support_mask(x, *args) & (scale > 0)
1763 cond = cond0 & cond1
1764 output = zeros(shape(cond), dtyp)
1765 putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
1766 if np.any(cond):
1767 goodargs = argsreduce(cond, *((x,)+args+(scale,)))
1768 scale, goodargs = goodargs[-1], goodargs[:-1]
1769 place(output, cond, self._pdf(*goodargs) / scale)
1770 if output.ndim == 0:
1771 return output[()]
1772 return output
1774 def logpdf(self, x, *args, **kwds):
1775 """
1776 Log of the probability density function at x of the given RV.
1778 This uses a more numerically accurate calculation if available.
1780 Parameters
1781 ----------
1782 x : array_like
1783 quantiles
1784 arg1, arg2, arg3,... : array_like
1785 The shape parameter(s) for the distribution (see docstring of the
1786 instance object for more information)
1787 loc : array_like, optional
1788 location parameter (default=0)
1789 scale : array_like, optional
1790 scale parameter (default=1)
1792 Returns
1793 -------
1794 logpdf : array_like
1795 Log of the probability density function evaluated at x
1797 """
1798 args, loc, scale = self._parse_args(*args, **kwds)
1799 x, loc, scale = map(asarray, (x, loc, scale))
1800 args = tuple(map(asarray, args))
1801 dtyp = np.find_common_type([x.dtype, np.float64], [])
1802 x = np.asarray((x - loc)/scale, dtype=dtyp)
1803 cond0 = self._argcheck(*args) & (scale > 0)
1804 cond1 = self._support_mask(x, *args) & (scale > 0)
1805 cond = cond0 & cond1
1806 output = empty(shape(cond), dtyp)
1807 output.fill(NINF)
1808 putmask(output, (1-cond0)+np.isnan(x), self.badvalue)
1809 if np.any(cond):
1810 goodargs = argsreduce(cond, *((x,)+args+(scale,)))
1811 scale, goodargs = goodargs[-1], goodargs[:-1]
1812 place(output, cond, self._logpdf(*goodargs) - log(scale))
1813 if output.ndim == 0:
1814 return output[()]
1815 return output
1817 def cdf(self, x, *args, **kwds):
1818 """
1819 Cumulative distribution function of the given RV.
1821 Parameters
1822 ----------
1823 x : array_like
1824 quantiles
1825 arg1, arg2, arg3,... : array_like
1826 The shape parameter(s) for the distribution (see docstring of the
1827 instance object for more information)
1828 loc : array_like, optional
1829 location parameter (default=0)
1830 scale : array_like, optional
1831 scale parameter (default=1)
1833 Returns
1834 -------
1835 cdf : ndarray
1836 Cumulative distribution function evaluated at `x`
1838 """
1839 args, loc, scale = self._parse_args(*args, **kwds)
1840 x, loc, scale = map(asarray, (x, loc, scale))
1841 args = tuple(map(asarray, args))
1842 _a, _b = self._get_support(*args)
1843 dtyp = np.find_common_type([x.dtype, np.float64], [])
1844 x = np.asarray((x - loc)/scale, dtype=dtyp)
1845 cond0 = self._argcheck(*args) & (scale > 0)
1846 cond1 = self._open_support_mask(x, *args) & (scale > 0)
1847 cond2 = (x >= np.asarray(_b)) & cond0
1848 cond = cond0 & cond1
1849 output = zeros(shape(cond), dtyp)
1850 place(output, (1-cond0)+np.isnan(x), self.badvalue)
1851 place(output, cond2, 1.0)
1852 if np.any(cond): # call only if at least 1 entry
1853 goodargs = argsreduce(cond, *((x,)+args))
1854 place(output, cond, self._cdf(*goodargs))
1855 if output.ndim == 0:
1856 return output[()]
1857 return output
1859 def logcdf(self, x, *args, **kwds):
1860 """
1861 Log of the cumulative distribution function at x of the given RV.
1863 Parameters
1864 ----------
1865 x : array_like
1866 quantiles
1867 arg1, arg2, arg3,... : array_like
1868 The shape parameter(s) for the distribution (see docstring of the
1869 instance object for more information)
1870 loc : array_like, optional
1871 location parameter (default=0)
1872 scale : array_like, optional
1873 scale parameter (default=1)
1875 Returns
1876 -------
1877 logcdf : array_like
1878 Log of the cumulative distribution function evaluated at x
1880 """
1881 args, loc, scale = self._parse_args(*args, **kwds)
1882 x, loc, scale = map(asarray, (x, loc, scale))
1883 args = tuple(map(asarray, args))
1884 _a, _b = self._get_support(*args)
1885 dtyp = np.find_common_type([x.dtype, np.float64], [])
1886 x = np.asarray((x - loc)/scale, dtype=dtyp)
1887 cond0 = self._argcheck(*args) & (scale > 0)
1888 cond1 = self._open_support_mask(x, *args) & (scale > 0)
1889 cond2 = (x >= _b) & cond0
1890 cond = cond0 & cond1
1891 output = empty(shape(cond), dtyp)
1892 output.fill(NINF)
1893 place(output, (1-cond0)*(cond1 == cond1)+np.isnan(x), self.badvalue)
1894 place(output, cond2, 0.0)
1895 if np.any(cond): # call only if at least 1 entry
1896 goodargs = argsreduce(cond, *((x,)+args))
1897 place(output, cond, self._logcdf(*goodargs))
1898 if output.ndim == 0:
1899 return output[()]
1900 return output
1902 def sf(self, x, *args, **kwds):
1903 """
1904 Survival function (1 - `cdf`) at x of the given RV.
1906 Parameters
1907 ----------
1908 x : array_like
1909 quantiles
1910 arg1, arg2, arg3,... : array_like
1911 The shape parameter(s) for the distribution (see docstring of the
1912 instance object for more information)
1913 loc : array_like, optional
1914 location parameter (default=0)
1915 scale : array_like, optional
1916 scale parameter (default=1)
1918 Returns
1919 -------
1920 sf : array_like
1921 Survival function evaluated at x
1923 """
1924 args, loc, scale = self._parse_args(*args, **kwds)
1925 x, loc, scale = map(asarray, (x, loc, scale))
1926 args = tuple(map(asarray, args))
1927 _a, _b = self._get_support(*args)
1928 dtyp = np.find_common_type([x.dtype, np.float64], [])
1929 x = np.asarray((x - loc)/scale, dtype=dtyp)
1930 cond0 = self._argcheck(*args) & (scale > 0)
1931 cond1 = self._open_support_mask(x, *args) & (scale > 0)
1932 cond2 = cond0 & (x <= _a)
1933 cond = cond0 & cond1
1934 output = zeros(shape(cond), dtyp)
1935 place(output, (1-cond0)+np.isnan(x), self.badvalue)
1936 place(output, cond2, 1.0)
1937 if np.any(cond):
1938 goodargs = argsreduce(cond, *((x,)+args))
1939 place(output, cond, self._sf(*goodargs))
1940 if output.ndim == 0:
1941 return output[()]
1942 return output
1944 def logsf(self, x, *args, **kwds):
1945 """
1946 Log of the survival function of the given RV.
1948 Returns the log of the "survival function," defined as (1 - `cdf`),
1949 evaluated at `x`.
1951 Parameters
1952 ----------
1953 x : array_like
1954 quantiles
1955 arg1, arg2, arg3,... : array_like
1956 The shape parameter(s) for the distribution (see docstring of the
1957 instance object for more information)
1958 loc : array_like, optional
1959 location parameter (default=0)
1960 scale : array_like, optional
1961 scale parameter (default=1)
1963 Returns
1964 -------
1965 logsf : ndarray
1966 Log of the survival function evaluated at `x`.
1968 """
1969 args, loc, scale = self._parse_args(*args, **kwds)
1970 x, loc, scale = map(asarray, (x, loc, scale))
1971 args = tuple(map(asarray, args))
1972 _a, _b = self._get_support(*args)
1973 dtyp = np.find_common_type([x.dtype, np.float64], [])
1974 x = np.asarray((x - loc)/scale, dtype=dtyp)
1975 cond0 = self._argcheck(*args) & (scale > 0)
1976 cond1 = self._open_support_mask(x, *args) & (scale > 0)
1977 cond2 = cond0 & (x <= _a)
1978 cond = cond0 & cond1
1979 output = empty(shape(cond), dtyp)
1980 output.fill(NINF)
1981 place(output, (1-cond0)+np.isnan(x), self.badvalue)
1982 place(output, cond2, 0.0)
1983 if np.any(cond):
1984 goodargs = argsreduce(cond, *((x,)+args))
1985 place(output, cond, self._logsf(*goodargs))
1986 if output.ndim == 0:
1987 return output[()]
1988 return output
1990 def ppf(self, q, *args, **kwds):
1991 """
1992 Percent point function (inverse of `cdf`) at q of the given RV.
1994 Parameters
1995 ----------
1996 q : array_like
1997 lower tail probability
1998 arg1, arg2, arg3,... : array_like
1999 The shape parameter(s) for the distribution (see docstring of the
2000 instance object for more information)
2001 loc : array_like, optional
2002 location parameter (default=0)
2003 scale : array_like, optional
2004 scale parameter (default=1)
2006 Returns
2007 -------
2008 x : array_like
2009 quantile corresponding to the lower tail probability q.
2011 """
2012 args, loc, scale = self._parse_args(*args, **kwds)
2013 q, loc, scale = map(asarray, (q, loc, scale))
2014 args = tuple(map(asarray, args))
2015 _a, _b = self._get_support(*args)
2016 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
2017 cond1 = (0 < q) & (q < 1)
2018 cond2 = cond0 & (q == 0)
2019 cond3 = cond0 & (q == 1)
2020 cond = cond0 & cond1
2021 output = valarray(shape(cond), value=self.badvalue)
2023 lower_bound = _a * scale + loc
2024 upper_bound = _b * scale + loc
2025 place(output, cond2, argsreduce(cond2, lower_bound)[0])
2026 place(output, cond3, argsreduce(cond3, upper_bound)[0])
2028 if np.any(cond): # call only if at least 1 entry
2029 goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
2030 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
2031 place(output, cond, self._ppf(*goodargs) * scale + loc)
2032 if output.ndim == 0:
2033 return output[()]
2034 return output
2036 def isf(self, q, *args, **kwds):
2037 """
2038 Inverse survival function (inverse of `sf`) at q of the given RV.
2040 Parameters
2041 ----------
2042 q : array_like
2043 upper tail probability
2044 arg1, arg2, arg3,... : array_like
2045 The shape parameter(s) for the distribution (see docstring of the
2046 instance object for more information)
2047 loc : array_like, optional
2048 location parameter (default=0)
2049 scale : array_like, optional
2050 scale parameter (default=1)
2052 Returns
2053 -------
2054 x : ndarray or scalar
2055 Quantile corresponding to the upper tail probability q.
2057 """
2058 args, loc, scale = self._parse_args(*args, **kwds)
2059 q, loc, scale = map(asarray, (q, loc, scale))
2060 args = tuple(map(asarray, args))
2061 _a, _b = self._get_support(*args)
2062 cond0 = self._argcheck(*args) & (scale > 0) & (loc == loc)
2063 cond1 = (0 < q) & (q < 1)
2064 cond2 = cond0 & (q == 1)
2065 cond3 = cond0 & (q == 0)
2066 cond = cond0 & cond1
2067 output = valarray(shape(cond), value=self.badvalue)
2069 lower_bound = _a * scale + loc
2070 upper_bound = _b * scale + loc
2071 place(output, cond2, argsreduce(cond2, lower_bound)[0])
2072 place(output, cond3, argsreduce(cond3, upper_bound)[0])
2074 if np.any(cond):
2075 goodargs = argsreduce(cond, *((q,)+args+(scale, loc)))
2076 scale, loc, goodargs = goodargs[-2], goodargs[-1], goodargs[:-2]
2077 place(output, cond, self._isf(*goodargs) * scale + loc)
2078 if output.ndim == 0:
2079 return output[()]
2080 return output
2082 def _nnlf(self, x, *args):
2083 return -np.sum(self._logpdf(x, *args), axis=0)
2085 def _unpack_loc_scale(self, theta):
2086 try:
2087 loc = theta[-2]
2088 scale = theta[-1]
2089 args = tuple(theta[:-2])
2090 except IndexError:
2091 raise ValueError("Not enough input arguments.")
2092 return loc, scale, args
2094 def nnlf(self, theta, x):
2095 '''Return negative loglikelihood function.
2097 Notes
2098 -----
2099 This is ``-sum(log pdf(x, theta), axis=0)`` where `theta` are the
2100 parameters (including loc and scale).
2101 '''
2102 loc, scale, args = self._unpack_loc_scale(theta)
2103 if not self._argcheck(*args) or scale <= 0:
2104 return inf
2105 x = asarray((x-loc) / scale)
2106 n_log_scale = len(x) * log(scale)
2107 if np.any(~self._support_mask(x, *args)):
2108 return inf
2109 return self._nnlf(x, *args) + n_log_scale
2111 def _nnlf_and_penalty(self, x, args):
2112 cond0 = ~self._support_mask(x, *args)
2113 n_bad = np.count_nonzero(cond0, axis=0)
2114 if n_bad > 0:
2115 x = argsreduce(~cond0, x)[0]
2116 logpdf = self._logpdf(x, *args)
2117 finite_logpdf = np.isfinite(logpdf)
2118 n_bad += np.sum(~finite_logpdf, axis=0)
2119 if n_bad > 0:
2120 penalty = n_bad * log(_XMAX) * 100
2121 return -np.sum(logpdf[finite_logpdf], axis=0) + penalty
2122 return -np.sum(logpdf, axis=0)
2124 def _penalized_nnlf(self, theta, x):
2125 ''' Return penalized negative loglikelihood function,
2126 i.e., - sum (log pdf(x, theta), axis=0) + penalty
2127 where theta are the parameters (including loc and scale)
2128 '''
2129 loc, scale, args = self._unpack_loc_scale(theta)
2130 if not self._argcheck(*args) or scale <= 0:
2131 return inf
2132 x = asarray((x-loc) / scale)
2133 n_log_scale = len(x) * log(scale)
2134 return self._nnlf_and_penalty(x, args) + n_log_scale
2136 # return starting point for fit (shape arguments + loc + scale)
2137 def _fitstart(self, data, args=None):
2138 if args is None:
2139 args = (1.0,)*self.numargs
2140 loc, scale = self._fit_loc_scale_support(data, *args)
2141 return args + (loc, scale)
2143 def _reduce_func(self, args, kwds):
2144 """
2145 Return the (possibly reduced) function to optimize in order to find MLE
2146 estimates for the .fit method.
2147 """
2148 # Convert fixed shape parameters to the standard numeric form: e.g. for
2149 # stats.beta, shapes='a, b'. To fix `a`, the caller can give a value
2150 # for `f0`, `fa` or 'fix_a'. The following converts the latter two
2151 # into the first (numeric) form.
2152 if self.shapes:
2153 shapes = self.shapes.replace(',', ' ').split()
2154 for j, s in enumerate(shapes):
2155 key = 'f' + str(j)
2156 names = [key, 'f' + s, 'fix_' + s]
2157 val = _get_fixed_fit_value(kwds, names)
2158 if val is not None:
2159 kwds[key] = val
2161 args = list(args)
2162 Nargs = len(args)
2163 fixedn = []
2164 names = ['f%d' % n for n in range(Nargs - 2)] + ['floc', 'fscale']
2165 x0 = []
2166 for n, key in enumerate(names):
2167 if key in kwds:
2168 fixedn.append(n)
2169 args[n] = kwds.pop(key)
2170 else:
2171 x0.append(args[n])
2173 if len(fixedn) == 0:
2174 func = self._penalized_nnlf
2175 restore = None
2176 else:
2177 if len(fixedn) == Nargs:
2178 raise ValueError(
2179 "All parameters fixed. There is nothing to optimize.")
2181 def restore(args, theta):
2182 # Replace with theta for all numbers not in fixedn
2183 # This allows the non-fixed values to vary, but
2184 # we still call self.nnlf with all parameters.
2185 i = 0
2186 for n in range(Nargs):
2187 if n not in fixedn:
2188 args[n] = theta[i]
2189 i += 1
2190 return args
2192 def func(theta, x):
2193 newtheta = restore(args[:], theta)
2194 return self._penalized_nnlf(newtheta, x)
2196 return x0, func, restore, args
2198 def fit(self, data, *args, **kwds):
2199 """
2200 Return MLEs for shape (if applicable), location, and scale
2201 parameters from data.
2203 MLE stands for Maximum Likelihood Estimate. Starting estimates for
2204 the fit are given by input arguments; for any arguments not provided
2205 with starting estimates, ``self._fitstart(data)`` is called to generate
2206 such.
2208 One can hold some parameters fixed to specific values by passing in
2209 keyword arguments ``f0``, ``f1``, ..., ``fn`` (for shape parameters)
2210 and ``floc`` and ``fscale`` (for location and scale parameters,
2211 respectively).
2213 Parameters
2214 ----------
2215 data : array_like
2216 Data to use in calculating the MLEs.
2217 arg1, arg2, arg3,... : floats, optional
2218 Starting value(s) for any shape-characterizing arguments (those not
2219 provided will be determined by a call to ``_fitstart(data)``).
2220 No default value.
2221 kwds : floats, optional
2222 - `loc`: initial guess of the distribution's location parameter.
2223 - `scale`: initial guess of the distribution's scale parameter.
2225 Special keyword arguments are recognized as holding certain
2226 parameters fixed:
2228 - f0...fn : hold respective shape parameters fixed.
2229 Alternatively, shape parameters to fix can be specified by name.
2230 For example, if ``self.shapes == "a, b"``, ``fa`` and ``fix_a``
2231 are equivalent to ``f0``, and ``fb`` and ``fix_b`` are
2232 equivalent to ``f1``.
2234 - floc : hold location parameter fixed to specified value.
2236 - fscale : hold scale parameter fixed to specified value.
2238 - optimizer : The optimizer to use. The optimizer must take ``func``,
2239 and starting position as the first two arguments,
2240 plus ``args`` (for extra arguments to pass to the
2241 function to be optimized) and ``disp=0`` to suppress
2242 output as keyword arguments.
2244 Returns
2245 -------
2246 mle_tuple : tuple of floats
2247 MLEs for any shape parameters (if applicable), followed by those
2248 for location and scale. For most random variables, shape statistics
2249 will be returned, but there are exceptions (e.g. ``norm``).
2251 Notes
2252 -----
2253 This fit is computed by maximizing a log-likelihood function, with
2254 penalty applied for samples outside of range of the distribution. The
2255 returned answer is not guaranteed to be the globally optimal MLE, it
2256 may only be locally optimal, or the optimization may fail altogether.
2257 If the data contain any of np.nan, np.inf, or -np.inf, the fit routine
2258 will throw a RuntimeError.
2260 Examples
2261 --------
2263 Generate some data to fit: draw random variates from the `beta`
2264 distribution
2266 >>> from scipy.stats import beta
2267 >>> a, b = 1., 2.
2268 >>> x = beta.rvs(a, b, size=1000)
2270 Now we can fit all four parameters (``a``, ``b``, ``loc`` and ``scale``):
2272 >>> a1, b1, loc1, scale1 = beta.fit(x)
2274 We can also use some prior knowledge about the dataset: let's keep
2275 ``loc`` and ``scale`` fixed:
2277 >>> a1, b1, loc1, scale1 = beta.fit(x, floc=0, fscale=1)
2278 >>> loc1, scale1
2279 (0, 1)
2281 We can also keep shape parameters fixed by using ``f``-keywords. To
2282 keep the zero-th shape parameter ``a`` equal 1, use ``f0=1`` or,
2283 equivalently, ``fa=1``:
2285 >>> a1, b1, loc1, scale1 = beta.fit(x, fa=1, floc=0, fscale=1)
2286 >>> a1
2287 1
2289 Not all distributions return estimates for the shape parameters.
2290 ``norm`` for example just returns estimates for location and scale:
2292 >>> from scipy.stats import norm
2293 >>> x = norm.rvs(a, b, size=1000, random_state=123)
2294 >>> loc1, scale1 = norm.fit(x)
2295 >>> loc1, scale1
2296 (0.92087172783841631, 2.0015750750324668)
2297 """
2298 Narg = len(args)
2299 if Narg > self.numargs:
2300 raise TypeError("Too many input arguments.")
2302 if not np.isfinite(data).all():
2303 raise RuntimeError("The data contains non-finite values.")
2305 start = [None]*2
2306 if (Narg < self.numargs) or not ('loc' in kwds and
2307 'scale' in kwds):
2308 # get distribution specific starting locations
2309 start = self._fitstart(data)
2310 args += start[Narg:-2]
2311 loc = kwds.pop('loc', start[-2])
2312 scale = kwds.pop('scale', start[-1])
2313 args += (loc, scale)
2314 x0, func, restore, args = self._reduce_func(args, kwds)
2316 optimizer = kwds.pop('optimizer', optimize.fmin)
2317 # convert string to function in scipy.optimize
2318 if not callable(optimizer) and isinstance(optimizer, str):
2319 if not optimizer.startswith('fmin_'):
2320 optimizer = "fmin_"+optimizer
2321 if optimizer == 'fmin_':
2322 optimizer = 'fmin'
2323 try:
2324 optimizer = getattr(optimize, optimizer)
2325 except AttributeError:
2326 raise ValueError("%s is not a valid optimizer" % optimizer)
2328 # by now kwds must be empty, since everybody took what they needed
2329 if kwds:
2330 raise TypeError("Unknown arguments: %s." % kwds)
2332 vals = optimizer(func, x0, args=(ravel(data),), disp=0)
2333 if restore is not None:
2334 vals = restore(args, vals)
2335 vals = tuple(vals)
2336 return vals
2338 def _fit_loc_scale_support(self, data, *args):
2339 """
2340 Estimate loc and scale parameters from data accounting for support.
2342 Parameters
2343 ----------
2344 data : array_like
2345 Data to fit.
2346 arg1, arg2, arg3,... : array_like
2347 The shape parameter(s) for the distribution (see docstring of the
2348 instance object for more information).
2350 Returns
2351 -------
2352 Lhat : float
2353 Estimated location parameter for the data.
2354 Shat : float
2355 Estimated scale parameter for the data.
2357 """
2358 data = np.asarray(data)
2360 # Estimate location and scale according to the method of moments.
2361 loc_hat, scale_hat = self.fit_loc_scale(data, *args)
2363 # Compute the support according to the shape parameters.
2364 self._argcheck(*args)
2365 _a, _b = self._get_support(*args)
2366 a, b = _a, _b
2367 support_width = b - a
2369 # If the support is empty then return the moment-based estimates.
2370 if support_width <= 0:
2371 return loc_hat, scale_hat
2373 # Compute the proposed support according to the loc and scale
2374 # estimates.
2375 a_hat = loc_hat + a * scale_hat
2376 b_hat = loc_hat + b * scale_hat
2378 # Use the moment-based estimates if they are compatible with the data.
2379 data_a = np.min(data)
2380 data_b = np.max(data)
2381 if a_hat < data_a and data_b < b_hat:
2382 return loc_hat, scale_hat
2384 # Otherwise find other estimates that are compatible with the data.
2385 data_width = data_b - data_a
2386 rel_margin = 0.1
2387 margin = data_width * rel_margin
2389 # For a finite interval, both the location and scale
2390 # should have interesting values.
2391 if support_width < np.inf:
2392 loc_hat = (data_a - a) - margin
2393 scale_hat = (data_width + 2 * margin) / support_width
2394 return loc_hat, scale_hat
2396 # For a one-sided interval, use only an interesting location parameter.
2397 if a > -np.inf:
2398 return (data_a - a) - margin, 1
2399 elif b < np.inf:
2400 return (data_b - b) + margin, 1
2401 else:
2402 raise RuntimeError
2404 def fit_loc_scale(self, data, *args):
2405 """
2406 Estimate loc and scale parameters from data using 1st and 2nd moments.
2408 Parameters
2409 ----------
2410 data : array_like
2411 Data to fit.
2412 arg1, arg2, arg3,... : array_like
2413 The shape parameter(s) for the distribution (see docstring of the
2414 instance object for more information).
2416 Returns
2417 -------
2418 Lhat : float
2419 Estimated location parameter for the data.
2420 Shat : float
2421 Estimated scale parameter for the data.
2423 """
2424 mu, mu2 = self.stats(*args, **{'moments': 'mv'})
2425 tmp = asarray(data)
2426 muhat = tmp.mean()
2427 mu2hat = tmp.var()
2428 Shat = sqrt(mu2hat / mu2)
2429 Lhat = muhat - Shat*mu
2430 if not np.isfinite(Lhat):
2431 Lhat = 0
2432 if not (np.isfinite(Shat) and (0 < Shat)):
2433 Shat = 1
2434 return Lhat, Shat
2436 def _entropy(self, *args):
2437 def integ(x):
2438 val = self._pdf(x, *args)
2439 return entr(val)
2441 # upper limit is often inf, so suppress warnings when integrating
2442 _a, _b = self._get_support(*args)
2443 with np.errstate(over='ignore'):
2444 h = integrate.quad(integ, _a, _b)[0]
2446 if not np.isnan(h):
2447 return h
2448 else:
2449 # try with different limits if integration problems
2450 low, upp = self.ppf([1e-10, 1. - 1e-10], *args)
2451 if np.isinf(_b):
2452 upper = upp
2453 else:
2454 upper = _b
2455 if np.isinf(_a):
2456 lower = low
2457 else:
2458 lower = _a
2459 return integrate.quad(integ, lower, upper)[0]
2461 def expect(self, func=None, args=(), loc=0, scale=1, lb=None, ub=None,
2462 conditional=False, **kwds):
2463 """Calculate expected value of a function with respect to the
2464 distribution by numerical integration.
2466 The expected value of a function ``f(x)`` with respect to a
2467 distribution ``dist`` is defined as::
2469 ub
2470 E[f(x)] = Integral(f(x) * dist.pdf(x)),
2471 lb
2473 where ``ub`` and ``lb`` are arguments and ``x`` has the ``dist.pdf(x)``
2474 distribution. If the bounds ``lb`` and ``ub`` correspond to the
2475 support of the distribution, e.g. ``[-inf, inf]`` in the default
2476 case, then the integral is the unrestricted expectation of ``f(x)``.
2477 Also, the function ``f(x)`` may be defined such that ``f(x)`` is ``0``
2478 outside a finite interval in which case the expectation is
2479 calculated within the finite range ``[lb, ub]``.
2481 Parameters
2482 ----------
2483 func : callable, optional
2484 Function for which integral is calculated. Takes only one argument.
2485 The default is the identity mapping f(x) = x.
2486 args : tuple, optional
2487 Shape parameters of the distribution.
2488 loc : float, optional
2489 Location parameter (default=0).
2490 scale : float, optional
2491 Scale parameter (default=1).
2492 lb, ub : scalar, optional
2493 Lower and upper bound for integration. Default is set to the
2494 support of the distribution.
2495 conditional : bool, optional
2496 If True, the integral is corrected by the conditional probability
2497 of the integration interval. The return value is the expectation
2498 of the function, conditional on being in the given interval.
2499 Default is False.
2501 Additional keyword arguments are passed to the integration routine.
2503 Returns
2504 -------
2505 expect : float
2506 The calculated expected value.
2508 Notes
2509 -----
2510 The integration behavior of this function is inherited from
2511 `scipy.integrate.quad`. Neither this function nor
2512 `scipy.integrate.quad` can verify whether the integral exists or is
2513 finite. For example ``cauchy(0).mean()`` returns ``np.nan`` and
2514 ``cauchy(0).expect()`` returns ``0.0``.
2516 The function is not vectorized.
2518 Examples
2519 --------
2521 To understand the effect of the bounds of integration consider
2523 >>> from scipy.stats import expon
2524 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0)
2525 0.6321205588285578
2527 This is close to
2529 >>> expon(1).cdf(2.0) - expon(1).cdf(0.0)
2530 0.6321205588285577
2532 If ``conditional=True``
2534 >>> expon(1).expect(lambda x: 1, lb=0.0, ub=2.0, conditional=True)
2535 1.0000000000000002
2537 The slight deviation from 1 is due to numerical integration.
2538 """
2539 lockwds = {'loc': loc,
2540 'scale': scale}
2541 self._argcheck(*args)
2542 _a, _b = self._get_support(*args)
2543 if func is None:
2544 def fun(x, *args):
2545 return x * self.pdf(x, *args, **lockwds)
2546 else:
2547 def fun(x, *args):
2548 return func(x) * self.pdf(x, *args, **lockwds)
2549 if lb is None:
2550 lb = loc + _a * scale
2551 if ub is None:
2552 ub = loc + _b * scale
2553 if conditional:
2554 invfac = (self.sf(lb, *args, **lockwds)
2555 - self.sf(ub, *args, **lockwds))
2556 else:
2557 invfac = 1.0
2558 kwds['args'] = args
2559 # Silence floating point warnings from integration.
2560 with np.errstate(all='ignore'):
2561 vals = integrate.quad(fun, lb, ub, **kwds)[0] / invfac
2562 return vals
2565# Helpers for the discrete distributions
2566def _drv2_moment(self, n, *args):
2567 """Non-central moment of discrete distribution."""
2568 def fun(x):
2569 return np.power(x, n) * self._pmf(x, *args)
2571 _a, _b = self._get_support(*args)
2572 return _expect(fun, _a, _b, self.ppf(0.5, *args), self.inc)
2575def _drv2_ppfsingle(self, q, *args): # Use basic bisection algorithm
2576 _a, _b = self._get_support(*args)
2577 b = _b
2578 a = _a
2579 if isinf(b): # Be sure ending point is > q
2580 b = int(max(100*q, 10))
2581 while 1:
2582 if b >= _b:
2583 qb = 1.0
2584 break
2585 qb = self._cdf(b, *args)
2586 if (qb < q):
2587 b += 10
2588 else:
2589 break
2590 else:
2591 qb = 1.0
2592 if isinf(a): # be sure starting point < q
2593 a = int(min(-100*q, -10))
2594 while 1:
2595 if a <= _a:
2596 qb = 0.0
2597 break
2598 qa = self._cdf(a, *args)
2599 if (qa > q):
2600 a -= 10
2601 else:
2602 break
2603 else:
2604 qa = self._cdf(a, *args)
2606 while 1:
2607 if (qa == q):
2608 return a
2609 if (qb == q):
2610 return b
2611 if b <= a+1:
2612 if qa > q:
2613 return a
2614 else:
2615 return b
2616 c = int((a+b)/2.0)
2617 qc = self._cdf(c, *args)
2618 if (qc < q):
2619 if a != c:
2620 a = c
2621 else:
2622 raise RuntimeError('updating stopped, endless loop')
2623 qa = qc
2624 elif (qc > q):
2625 if b != c:
2626 b = c
2627 else:
2628 raise RuntimeError('updating stopped, endless loop')
2629 qb = qc
2630 else:
2631 return c
2634def entropy(pk, qk=None, base=None, axis=0):
2635 """Calculate the entropy of a distribution for given probability values.
2637 If only probabilities `pk` are given, the entropy is calculated as
2638 ``S = -sum(pk * log(pk), axis=axis)``.
2640 If `qk` is not None, then compute the Kullback-Leibler divergence
2641 ``S = sum(pk * log(pk / qk), axis=axis)``.
2643 This routine will normalize `pk` and `qk` if they don't sum to 1.
2645 Parameters
2646 ----------
2647 pk : sequence
2648 Defines the (discrete) distribution. ``pk[i]`` is the (possibly
2649 unnormalized) probability of event ``i``.
2650 qk : sequence, optional
2651 Sequence against which the relative entropy is computed. Should be in
2652 the same format as `pk`.
2653 base : float, optional
2654 The logarithmic base to use, defaults to ``e`` (natural logarithm).
2655 axis: int, optional
2656 The axis along which the entropy is calculated. Default is 0.
2658 Returns
2659 -------
2660 S : float
2661 The calculated entropy.
2663 Examples
2664 --------
2666 >>> from scipy.stats import entropy
2668 Bernoulli trial with different p.
2669 The outcome of a fair coin is the most uncertain:
2671 >>> entropy([1/2, 1/2], base=2)
2672 1.0
2674 The outcome of a biased coin is less uncertain:
2676 >>> entropy([9/10, 1/10], base=2)
2677 0.46899559358928117
2679 Relative entropy:
2681 >>> entropy([1/2, 1/2], qk=[9/10, 1/10])
2682 0.5108256237659907
2684 """
2685 pk = asarray(pk)
2686 pk = 1.0*pk / np.sum(pk, axis=axis, keepdims=True)
2687 if qk is None:
2688 vec = entr(pk)
2689 else:
2690 qk = asarray(qk)
2691 if qk.shape != pk.shape:
2692 raise ValueError("qk and pk must have same shape.")
2693 qk = 1.0*qk / np.sum(qk, axis=axis, keepdims=True)
2694 vec = rel_entr(pk, qk)
2695 S = np.sum(vec, axis=axis)
2696 if base is not None:
2697 S /= log(base)
2698 return S
2701# Must over-ride one of _pmf or _cdf or pass in
2702# x_k, p(x_k) lists in initialization
2704class rv_discrete(rv_generic):
2705 """
2706 A generic discrete random variable class meant for subclassing.
2708 `rv_discrete` is a base class to construct specific distribution classes
2709 and instances for discrete random variables. It can also be used
2710 to construct an arbitrary distribution defined by a list of support
2711 points and corresponding probabilities.
2713 Parameters
2714 ----------
2715 a : float, optional
2716 Lower bound of the support of the distribution, default: 0
2717 b : float, optional
2718 Upper bound of the support of the distribution, default: plus infinity
2719 moment_tol : float, optional
2720 The tolerance for the generic calculation of moments.
2721 values : tuple of two array_like, optional
2722 ``(xk, pk)`` where ``xk`` are integers and ``pk`` are the non-zero
2723 probabilities between 0 and 1 with ``sum(pk) = 1``. ``xk``
2724 and ``pk`` must have the same shape.
2725 inc : integer, optional
2726 Increment for the support of the distribution.
2727 Default is 1. (other values have not been tested)
2728 badvalue : float, optional
2729 The value in a result arrays that indicates a value that for which
2730 some argument restriction is violated, default is np.nan.
2731 name : str, optional
2732 The name of the instance. This string is used to construct the default
2733 example for distributions.
2734 longname : str, optional
2735 This string is used as part of the first line of the docstring returned
2736 when a subclass has no docstring of its own. Note: `longname` exists
2737 for backwards compatibility, do not use for new subclasses.
2738 shapes : str, optional
2739 The shape of the distribution. For example "m, n" for a distribution
2740 that takes two integers as the two shape arguments for all its methods
2741 If not provided, shape parameters will be inferred from
2742 the signatures of the private methods, ``_pmf`` and ``_cdf`` of
2743 the instance.
2744 extradoc : str, optional
2745 This string is used as the last part of the docstring returned when a
2746 subclass has no docstring of its own. Note: `extradoc` exists for
2747 backwards compatibility, do not use for new subclasses.
2748 seed : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
2749 This parameter defines the object to use for drawing random variates.
2750 If `seed` is `None` the `~np.random.RandomState` singleton is used.
2751 If `seed` is an int, a new ``RandomState`` instance is used, seeded
2752 with seed.
2753 If `seed` is already a ``RandomState`` or ``Generator`` instance,
2754 then that object is used.
2755 Default is None.
2757 Methods
2758 -------
2759 rvs
2760 pmf
2761 logpmf
2762 cdf
2763 logcdf
2764 sf
2765 logsf
2766 ppf
2767 isf
2768 moment
2769 stats
2770 entropy
2771 expect
2772 median
2773 mean
2774 std
2775 var
2776 interval
2777 __call__
2778 support
2781 Notes
2782 -----
2784 This class is similar to `rv_continuous`. Whether a shape parameter is
2785 valid is decided by an ``_argcheck`` method (which defaults to checking
2786 that its arguments are strictly positive.)
2787 The main differences are:
2789 - the support of the distribution is a set of integers
2790 - instead of the probability density function, ``pdf`` (and the
2791 corresponding private ``_pdf``), this class defines the
2792 *probability mass function*, `pmf` (and the corresponding
2793 private ``_pmf``.)
2794 - scale parameter is not defined.
2796 To create a new discrete distribution, we would do the following:
2798 >>> from scipy.stats import rv_discrete
2799 >>> class poisson_gen(rv_discrete):
2800 ... "Poisson distribution"
2801 ... def _pmf(self, k, mu):
2802 ... return exp(-mu) * mu**k / factorial(k)
2804 and create an instance::
2806 >>> poisson = poisson_gen(name="poisson")
2808 Note that above we defined the Poisson distribution in the standard form.
2809 Shifting the distribution can be done by providing the ``loc`` parameter
2810 to the methods of the instance. For example, ``poisson.pmf(x, mu, loc)``
2811 delegates the work to ``poisson._pmf(x-loc, mu)``.
2813 **Discrete distributions from a list of probabilities**
2815 Alternatively, you can construct an arbitrary discrete rv defined
2816 on a finite set of values ``xk`` with ``Prob{X=xk} = pk`` by using the
2817 ``values`` keyword argument to the `rv_discrete` constructor.
2819 Examples
2820 --------
2822 Custom made discrete distribution:
2824 >>> from scipy import stats
2825 >>> xk = np.arange(7)
2826 >>> pk = (0.1, 0.2, 0.3, 0.1, 0.1, 0.0, 0.2)
2827 >>> custm = stats.rv_discrete(name='custm', values=(xk, pk))
2828 >>>
2829 >>> import matplotlib.pyplot as plt
2830 >>> fig, ax = plt.subplots(1, 1)
2831 >>> ax.plot(xk, custm.pmf(xk), 'ro', ms=12, mec='r')
2832 >>> ax.vlines(xk, 0, custm.pmf(xk), colors='r', lw=4)
2833 >>> plt.show()
2835 Random number generation:
2837 >>> R = custm.rvs(size=100)
2839 """
2840 def __new__(cls, a=0, b=inf, name=None, badvalue=None,
2841 moment_tol=1e-8, values=None, inc=1, longname=None,
2842 shapes=None, extradoc=None, seed=None):
2844 if values is not None:
2845 # dispatch to a subclass
2846 return super(rv_discrete, cls).__new__(rv_sample)
2847 else:
2848 # business as usual
2849 return super(rv_discrete, cls).__new__(cls)
2851 def __init__(self, a=0, b=inf, name=None, badvalue=None,
2852 moment_tol=1e-8, values=None, inc=1, longname=None,
2853 shapes=None, extradoc=None, seed=None):
2855 super(rv_discrete, self).__init__(seed)
2857 # cf generic freeze
2858 self._ctor_param = dict(
2859 a=a, b=b, name=name, badvalue=badvalue,
2860 moment_tol=moment_tol, values=values, inc=inc,
2861 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed)
2863 if badvalue is None:
2864 badvalue = nan
2865 self.badvalue = badvalue
2866 self.a = a
2867 self.b = b
2868 self.moment_tol = moment_tol
2869 self.inc = inc
2870 self._cdfvec = vectorize(self._cdf_single, otypes='d')
2871 self.vecentropy = vectorize(self._entropy)
2872 self.shapes = shapes
2874 if values is not None:
2875 raise ValueError("rv_discrete.__init__(..., values != None, ...)")
2877 self._construct_argparser(meths_to_inspect=[self._pmf, self._cdf],
2878 locscale_in='loc=0',
2879 # scale=1 for discrete RVs
2880 locscale_out='loc, 1')
2882 # nin correction needs to be after we know numargs
2883 # correct nin for generic moment vectorization
2884 _vec_generic_moment = vectorize(_drv2_moment, otypes='d')
2885 _vec_generic_moment.nin = self.numargs + 2
2886 self.generic_moment = types.MethodType(_vec_generic_moment, self)
2888 # correct nin for ppf vectorization
2889 _vppf = vectorize(_drv2_ppfsingle, otypes='d')
2890 _vppf.nin = self.numargs + 2
2891 self._ppfvec = types.MethodType(_vppf, self)
2893 # now that self.numargs is defined, we can adjust nin
2894 self._cdfvec.nin = self.numargs + 1
2896 self._construct_docstrings(name, longname, extradoc)
2898 def _construct_docstrings(self, name, longname, extradoc):
2899 if name is None:
2900 name = 'Distribution'
2901 self.name = name
2902 self.extradoc = extradoc
2904 # generate docstring for subclass instances
2905 if longname is None:
2906 if name[0] in ['aeiouAEIOU']:
2907 hstr = "An "
2908 else:
2909 hstr = "A "
2910 longname = hstr + name
2912 if sys.flags.optimize < 2:
2913 # Skip adding docstrings if interpreter is run with -OO
2914 if self.__doc__ is None:
2915 self._construct_default_doc(longname=longname,
2916 extradoc=extradoc,
2917 docdict=docdict_discrete,
2918 discrete='discrete')
2919 else:
2920 dct = dict(distdiscrete)
2921 self._construct_doc(docdict_discrete, dct.get(self.name))
2923 # discrete RV do not have the scale parameter, remove it
2924 self.__doc__ = self.__doc__.replace(
2925 '\n scale : array_like, '
2926 'optional\n scale parameter (default=1)', '')
2928 def _updated_ctor_param(self):
2929 """ Return the current version of _ctor_param, possibly updated by user.
2931 Used by freezing and pickling.
2932 Keep this in sync with the signature of __init__.
2933 """
2934 dct = self._ctor_param.copy()
2935 dct['a'] = self.a
2936 dct['b'] = self.b
2937 dct['badvalue'] = self.badvalue
2938 dct['moment_tol'] = self.moment_tol
2939 dct['inc'] = self.inc
2940 dct['name'] = self.name
2941 dct['shapes'] = self.shapes
2942 dct['extradoc'] = self.extradoc
2943 return dct
2945 def _nonzero(self, k, *args):
2946 return floor(k) == k
2948 def _pmf(self, k, *args):
2949 return self._cdf(k, *args) - self._cdf(k-1, *args)
2951 def _logpmf(self, k, *args):
2952 return log(self._pmf(k, *args))
2954 def _cdf_single(self, k, *args):
2955 _a, _b = self._get_support(*args)
2956 m = arange(int(_a), k+1)
2957 return np.sum(self._pmf(m, *args), axis=0)
2959 def _cdf(self, x, *args):
2960 k = floor(x)
2961 return self._cdfvec(k, *args)
2963 # generic _logcdf, _sf, _logsf, _ppf, _isf, _rvs defined in rv_generic
2965 def rvs(self, *args, **kwargs):
2966 """
2967 Random variates of given type.
2969 Parameters
2970 ----------
2971 arg1, arg2, arg3,... : array_like
2972 The shape parameter(s) for the distribution (see docstring of the
2973 instance object for more information).
2974 loc : array_like, optional
2975 Location parameter (default=0).
2976 size : int or tuple of ints, optional
2977 Defining number of random variates (Default is 1). Note that `size`
2978 has to be given as keyword, not as positional argument.
2979 random_state : {None, int, `~np.random.RandomState`, `~np.random.Generator`}, optional
2980 This parameter defines the object to use for drawing random
2981 variates.
2982 If `random_state` is `None` the `~np.random.RandomState` singleton
2983 is used.
2984 If `random_state` is an int, a new ``RandomState`` instance is used,
2985 seeded with random_state.
2986 If `random_state` is already a ``RandomState`` or ``Generator``
2987 instance, then that object is used.
2988 Default is None.
2990 Returns
2991 -------
2992 rvs : ndarray or scalar
2993 Random variates of given `size`.
2995 """
2996 kwargs['discrete'] = True
2997 return super(rv_discrete, self).rvs(*args, **kwargs)
2999 def pmf(self, k, *args, **kwds):
3000 """
3001 Probability mass function at k of the given RV.
3003 Parameters
3004 ----------
3005 k : array_like
3006 Quantiles.
3007 arg1, arg2, arg3,... : array_like
3008 The shape parameter(s) for the distribution (see docstring of the
3009 instance object for more information)
3010 loc : array_like, optional
3011 Location parameter (default=0).
3013 Returns
3014 -------
3015 pmf : array_like
3016 Probability mass function evaluated at k
3018 """
3019 args, loc, _ = self._parse_args(*args, **kwds)
3020 k, loc = map(asarray, (k, loc))
3021 args = tuple(map(asarray, args))
3022 _a, _b = self._get_support(*args)
3023 k = asarray((k-loc))
3024 cond0 = self._argcheck(*args)
3025 cond1 = (k >= _a) & (k <= _b) & self._nonzero(k, *args)
3026 cond = cond0 & cond1
3027 output = zeros(shape(cond), 'd')
3028 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3029 if np.any(cond):
3030 goodargs = argsreduce(cond, *((k,)+args))
3031 place(output, cond, np.clip(self._pmf(*goodargs), 0, 1))
3032 if output.ndim == 0:
3033 return output[()]
3034 return output
3036 def logpmf(self, k, *args, **kwds):
3037 """
3038 Log of the probability mass function at k of the given RV.
3040 Parameters
3041 ----------
3042 k : array_like
3043 Quantiles.
3044 arg1, arg2, arg3,... : array_like
3045 The shape parameter(s) for the distribution (see docstring of the
3046 instance object for more information).
3047 loc : array_like, optional
3048 Location parameter. Default is 0.
3050 Returns
3051 -------
3052 logpmf : array_like
3053 Log of the probability mass function evaluated at k.
3055 """
3056 args, loc, _ = self._parse_args(*args, **kwds)
3057 k, loc = map(asarray, (k, loc))
3058 args = tuple(map(asarray, args))
3059 _a, _b = self._get_support(*args)
3060 k = asarray((k-loc))
3061 cond0 = self._argcheck(*args)
3062 cond1 = (k >= _a) & (k <= _b) & self._nonzero(k, *args)
3063 cond = cond0 & cond1
3064 output = empty(shape(cond), 'd')
3065 output.fill(NINF)
3066 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3067 if np.any(cond):
3068 goodargs = argsreduce(cond, *((k,)+args))
3069 place(output, cond, self._logpmf(*goodargs))
3070 if output.ndim == 0:
3071 return output[()]
3072 return output
3074 def cdf(self, k, *args, **kwds):
3075 """
3076 Cumulative distribution function of the given RV.
3078 Parameters
3079 ----------
3080 k : array_like, int
3081 Quantiles.
3082 arg1, arg2, arg3,... : array_like
3083 The shape parameter(s) for the distribution (see docstring of the
3084 instance object for more information).
3085 loc : array_like, optional
3086 Location parameter (default=0).
3088 Returns
3089 -------
3090 cdf : ndarray
3091 Cumulative distribution function evaluated at `k`.
3093 """
3094 args, loc, _ = self._parse_args(*args, **kwds)
3095 k, loc = map(asarray, (k, loc))
3096 args = tuple(map(asarray, args))
3097 _a, _b = self._get_support(*args)
3098 k = asarray((k-loc))
3099 cond0 = self._argcheck(*args)
3100 cond1 = (k >= _a) & (k < _b)
3101 cond2 = (k >= _b)
3102 cond = cond0 & cond1
3103 output = zeros(shape(cond), 'd')
3104 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3105 place(output, cond2*(cond0 == cond0), 1.0)
3107 if np.any(cond):
3108 goodargs = argsreduce(cond, *((k,)+args))
3109 place(output, cond, np.clip(self._cdf(*goodargs), 0, 1))
3110 if output.ndim == 0:
3111 return output[()]
3112 return output
3114 def logcdf(self, k, *args, **kwds):
3115 """
3116 Log of the cumulative distribution function at k of the given RV.
3118 Parameters
3119 ----------
3120 k : array_like, int
3121 Quantiles.
3122 arg1, arg2, arg3,... : array_like
3123 The shape parameter(s) for the distribution (see docstring of the
3124 instance object for more information).
3125 loc : array_like, optional
3126 Location parameter (default=0).
3128 Returns
3129 -------
3130 logcdf : array_like
3131 Log of the cumulative distribution function evaluated at k.
3133 """
3134 args, loc, _ = self._parse_args(*args, **kwds)
3135 k, loc = map(asarray, (k, loc))
3136 args = tuple(map(asarray, args))
3137 _a, _b = self._get_support(*args)
3138 k = asarray((k-loc))
3139 cond0 = self._argcheck(*args)
3140 cond1 = (k >= _a) & (k < _b)
3141 cond2 = (k >= _b)
3142 cond = cond0 & cond1
3143 output = empty(shape(cond), 'd')
3144 output.fill(NINF)
3145 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3146 place(output, cond2*(cond0 == cond0), 0.0)
3148 if np.any(cond):
3149 goodargs = argsreduce(cond, *((k,)+args))
3150 place(output, cond, self._logcdf(*goodargs))
3151 if output.ndim == 0:
3152 return output[()]
3153 return output
3155 def sf(self, k, *args, **kwds):
3156 """
3157 Survival function (1 - `cdf`) at k of the given RV.
3159 Parameters
3160 ----------
3161 k : array_like
3162 Quantiles.
3163 arg1, arg2, arg3,... : array_like
3164 The shape parameter(s) for the distribution (see docstring of the
3165 instance object for more information).
3166 loc : array_like, optional
3167 Location parameter (default=0).
3169 Returns
3170 -------
3171 sf : array_like
3172 Survival function evaluated at k.
3174 """
3175 args, loc, _ = self._parse_args(*args, **kwds)
3176 k, loc = map(asarray, (k, loc))
3177 args = tuple(map(asarray, args))
3178 _a, _b = self._get_support(*args)
3179 k = asarray(k-loc)
3180 cond0 = self._argcheck(*args)
3181 cond1 = (k >= _a) & (k < _b)
3182 cond2 = (k < _a) & cond0
3183 cond = cond0 & cond1
3184 output = zeros(shape(cond), 'd')
3185 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3186 place(output, cond2, 1.0)
3187 if np.any(cond):
3188 goodargs = argsreduce(cond, *((k,)+args))
3189 place(output, cond, np.clip(self._sf(*goodargs), 0, 1))
3190 if output.ndim == 0:
3191 return output[()]
3192 return output
3194 def logsf(self, k, *args, **kwds):
3195 """
3196 Log of the survival function of the given RV.
3198 Returns the log of the "survival function," defined as 1 - `cdf`,
3199 evaluated at `k`.
3201 Parameters
3202 ----------
3203 k : array_like
3204 Quantiles.
3205 arg1, arg2, arg3,... : array_like
3206 The shape parameter(s) for the distribution (see docstring of the
3207 instance object for more information).
3208 loc : array_like, optional
3209 Location parameter (default=0).
3211 Returns
3212 -------
3213 logsf : ndarray
3214 Log of the survival function evaluated at `k`.
3216 """
3217 args, loc, _ = self._parse_args(*args, **kwds)
3218 k, loc = map(asarray, (k, loc))
3219 args = tuple(map(asarray, args))
3220 _a, _b = self._get_support(*args)
3221 k = asarray(k-loc)
3222 cond0 = self._argcheck(*args)
3223 cond1 = (k >= _a) & (k < _b)
3224 cond2 = (k < _a) & cond0
3225 cond = cond0 & cond1
3226 output = empty(shape(cond), 'd')
3227 output.fill(NINF)
3228 place(output, (1-cond0) + np.isnan(k), self.badvalue)
3229 place(output, cond2, 0.0)
3230 if np.any(cond):
3231 goodargs = argsreduce(cond, *((k,)+args))
3232 place(output, cond, self._logsf(*goodargs))
3233 if output.ndim == 0:
3234 return output[()]
3235 return output
3237 def ppf(self, q, *args, **kwds):
3238 """
3239 Percent point function (inverse of `cdf`) at q of the given RV.
3241 Parameters
3242 ----------
3243 q : array_like
3244 Lower tail probability.
3245 arg1, arg2, arg3,... : array_like
3246 The shape parameter(s) for the distribution (see docstring of the
3247 instance object for more information).
3248 loc : array_like, optional
3249 Location parameter (default=0).
3251 Returns
3252 -------
3253 k : array_like
3254 Quantile corresponding to the lower tail probability, q.
3256 """
3257 args, loc, _ = self._parse_args(*args, **kwds)
3258 q, loc = map(asarray, (q, loc))
3259 args = tuple(map(asarray, args))
3260 _a, _b = self._get_support(*args)
3261 cond0 = self._argcheck(*args) & (loc == loc)
3262 cond1 = (q > 0) & (q < 1)
3263 cond2 = (q == 1) & cond0
3264 cond = cond0 & cond1
3265 output = valarray(shape(cond), value=self.badvalue, typecode='d')
3266 # output type 'd' to handle nin and inf
3267 place(output, (q == 0)*(cond == cond), _a-1 + loc)
3268 place(output, cond2, _b + loc)
3269 if np.any(cond):
3270 goodargs = argsreduce(cond, *((q,)+args+(loc,)))
3271 loc, goodargs = goodargs[-1], goodargs[:-1]
3272 place(output, cond, self._ppf(*goodargs) + loc)
3274 if output.ndim == 0:
3275 return output[()]
3276 return output
3278 def isf(self, q, *args, **kwds):
3279 """
3280 Inverse survival function (inverse of `sf`) at q of the given RV.
3282 Parameters
3283 ----------
3284 q : array_like
3285 Upper tail probability.
3286 arg1, arg2, arg3,... : array_like
3287 The shape parameter(s) for the distribution (see docstring of the
3288 instance object for more information).
3289 loc : array_like, optional
3290 Location parameter (default=0).
3292 Returns
3293 -------
3294 k : ndarray or scalar
3295 Quantile corresponding to the upper tail probability, q.
3297 """
3298 args, loc, _ = self._parse_args(*args, **kwds)
3299 q, loc = map(asarray, (q, loc))
3300 args = tuple(map(asarray, args))
3301 _a, _b = self._get_support(*args)
3302 cond0 = self._argcheck(*args) & (loc == loc)
3303 cond1 = (q > 0) & (q < 1)
3304 cond2 = (q == 1) & cond0
3305 cond = cond0 & cond1
3307 # same problem as with ppf; copied from ppf and changed
3308 output = valarray(shape(cond), value=self.badvalue, typecode='d')
3309 # output type 'd' to handle nin and inf
3310 place(output, (q == 0)*(cond == cond), _b)
3311 place(output, cond2, _a-1)
3313 # call place only if at least 1 valid argument
3314 if np.any(cond):
3315 goodargs = argsreduce(cond, *((q,)+args+(loc,)))
3316 loc, goodargs = goodargs[-1], goodargs[:-1]
3317 # PB same as ticket 766
3318 place(output, cond, self._isf(*goodargs) + loc)
3320 if output.ndim == 0:
3321 return output[()]
3322 return output
3324 def _entropy(self, *args):
3325 if hasattr(self, 'pk'):
3326 return entropy(self.pk)
3327 else:
3328 _a, _b = self._get_support(*args)
3329 return _expect(lambda x: entr(self.pmf(x, *args)),
3330 _a, _b, self.ppf(0.5, *args), self.inc)
3332 def expect(self, func=None, args=(), loc=0, lb=None, ub=None,
3333 conditional=False, maxcount=1000, tolerance=1e-10, chunksize=32):
3334 """
3335 Calculate expected value of a function with respect to the distribution
3336 for discrete distribution by numerical summation.
3338 Parameters
3339 ----------
3340 func : callable, optional
3341 Function for which the expectation value is calculated.
3342 Takes only one argument.
3343 The default is the identity mapping f(k) = k.
3344 args : tuple, optional
3345 Shape parameters of the distribution.
3346 loc : float, optional
3347 Location parameter.
3348 Default is 0.
3349 lb, ub : int, optional
3350 Lower and upper bound for the summation, default is set to the
3351 support of the distribution, inclusive (``ul <= k <= ub``).
3352 conditional : bool, optional
3353 If true then the expectation is corrected by the conditional
3354 probability of the summation interval. The return value is the
3355 expectation of the function, `func`, conditional on being in
3356 the given interval (k such that ``ul <= k <= ub``).
3357 Default is False.
3358 maxcount : int, optional
3359 Maximal number of terms to evaluate (to avoid an endless loop for
3360 an infinite sum). Default is 1000.
3361 tolerance : float, optional
3362 Absolute tolerance for the summation. Default is 1e-10.
3363 chunksize : int, optional
3364 Iterate over the support of a distributions in chunks of this size.
3365 Default is 32.
3367 Returns
3368 -------
3369 expect : float
3370 Expected value.
3372 Notes
3373 -----
3374 For heavy-tailed distributions, the expected value may or may not exist,
3375 depending on the function, `func`. If it does exist, but the sum converges
3376 slowly, the accuracy of the result may be rather low. For instance, for
3377 ``zipf(4)``, accuracy for mean, variance in example is only 1e-5.
3378 increasing `maxcount` and/or `chunksize` may improve the result, but may
3379 also make zipf very slow.
3381 The function is not vectorized.
3383 """
3384 if func is None:
3385 def fun(x):
3386 # loc and args from outer scope
3387 return (x+loc)*self._pmf(x, *args)
3388 else:
3389 def fun(x):
3390 # loc and args from outer scope
3391 return func(x+loc)*self._pmf(x, *args)
3392 # used pmf because _pmf does not check support in randint and there
3393 # might be problems(?) with correct self.a, self.b at this stage maybe
3394 # not anymore, seems to work now with _pmf
3396 self._argcheck(*args) # (re)generate scalar self.a and self.b
3397 _a, _b = self._get_support(*args)
3398 if lb is None:
3399 lb = _a
3400 else:
3401 lb = lb - loc # convert bound for standardized distribution
3402 if ub is None:
3403 ub = _b
3404 else:
3405 ub = ub - loc # convert bound for standardized distribution
3406 if conditional:
3407 invfac = self.sf(lb-1, *args) - self.sf(ub, *args)
3408 else:
3409 invfac = 1.0
3411 # iterate over the support, starting from the median
3412 x0 = self.ppf(0.5, *args)
3413 res = _expect(fun, lb, ub, x0, self.inc, maxcount, tolerance, chunksize)
3414 return res / invfac
3417def _expect(fun, lb, ub, x0, inc, maxcount=1000, tolerance=1e-10,
3418 chunksize=32):
3419 """Helper for computing the expectation value of `fun`."""
3421 # short-circuit if the support size is small enough
3422 if (ub - lb) <= chunksize:
3423 supp = np.arange(lb, ub+1, inc)
3424 vals = fun(supp)
3425 return np.sum(vals)
3427 # otherwise, iterate starting from x0
3428 if x0 < lb:
3429 x0 = lb
3430 if x0 > ub:
3431 x0 = ub
3433 count, tot = 0, 0.
3434 # iterate over [x0, ub] inclusive
3435 for x in _iter_chunked(x0, ub+1, chunksize=chunksize, inc=inc):
3436 count += x.size
3437 delta = np.sum(fun(x))
3438 tot += delta
3439 if abs(delta) < tolerance * x.size:
3440 break
3441 if count > maxcount:
3442 warnings.warn('expect(): sum did not converge', RuntimeWarning)
3443 return tot
3445 # iterate over [lb, x0)
3446 for x in _iter_chunked(x0-1, lb-1, chunksize=chunksize, inc=-inc):
3447 count += x.size
3448 delta = np.sum(fun(x))
3449 tot += delta
3450 if abs(delta) < tolerance * x.size:
3451 break
3452 if count > maxcount:
3453 warnings.warn('expect(): sum did not converge', RuntimeWarning)
3454 break
3456 return tot
3459def _iter_chunked(x0, x1, chunksize=4, inc=1):
3460 """Iterate from x0 to x1 in chunks of chunksize and steps inc.
3462 x0 must be finite, x1 need not be. In the latter case, the iterator is
3463 infinite.
3464 Handles both x0 < x1 and x0 > x1. In the latter case, iterates downwards
3465 (make sure to set inc < 0.)
3467 >>> [x for x in _iter_chunked(2, 5, inc=2)]
3468 [array([2, 4])]
3469 >>> [x for x in _iter_chunked(2, 11, inc=2)]
3470 [array([2, 4, 6, 8]), array([10])]
3471 >>> [x for x in _iter_chunked(2, -5, inc=-2)]
3472 [array([ 2, 0, -2, -4])]
3473 >>> [x for x in _iter_chunked(2, -9, inc=-2)]
3474 [array([ 2, 0, -2, -4]), array([-6, -8])]
3476 """
3477 if inc == 0:
3478 raise ValueError('Cannot increment by zero.')
3479 if chunksize <= 0:
3480 raise ValueError('Chunk size must be positive; got %s.' % chunksize)
3482 s = 1 if inc > 0 else -1
3483 stepsize = abs(chunksize * inc)
3485 x = x0
3486 while (x - x1) * inc < 0:
3487 delta = min(stepsize, abs(x - x1))
3488 step = delta * s
3489 supp = np.arange(x, x + step, inc)
3490 x += step
3491 yield supp
3494class rv_sample(rv_discrete):
3495 """A 'sample' discrete distribution defined by the support and values.
3497 The ctor ignores most of the arguments, only needs the `values` argument.
3498 """
3499 def __init__(self, a=0, b=inf, name=None, badvalue=None,
3500 moment_tol=1e-8, values=None, inc=1, longname=None,
3501 shapes=None, extradoc=None, seed=None):
3503 super(rv_discrete, self).__init__(seed)
3505 if values is None:
3506 raise ValueError("rv_sample.__init__(..., values=None,...)")
3508 # cf generic freeze
3509 self._ctor_param = dict(
3510 a=a, b=b, name=name, badvalue=badvalue,
3511 moment_tol=moment_tol, values=values, inc=inc,
3512 longname=longname, shapes=shapes, extradoc=extradoc, seed=seed)
3514 if badvalue is None:
3515 badvalue = nan
3516 self.badvalue = badvalue
3517 self.moment_tol = moment_tol
3518 self.inc = inc
3519 self.shapes = shapes
3520 self.vecentropy = self._entropy
3522 xk, pk = values
3524 if np.shape(xk) != np.shape(pk):
3525 raise ValueError("xk and pk must have the same shape.")
3526 if np.less(pk, 0.0).any():
3527 raise ValueError("All elements of pk must be non-negative.")
3528 if not np.allclose(np.sum(pk), 1):
3529 raise ValueError("The sum of provided pk is not 1.")
3531 indx = np.argsort(np.ravel(xk))
3532 self.xk = np.take(np.ravel(xk), indx, 0)
3533 self.pk = np.take(np.ravel(pk), indx, 0)
3534 self.a = self.xk[0]
3535 self.b = self.xk[-1]
3537 self.qvals = np.cumsum(self.pk, axis=0)
3539 self.shapes = ' ' # bypass inspection
3540 self._construct_argparser(meths_to_inspect=[self._pmf],
3541 locscale_in='loc=0',
3542 # scale=1 for discrete RVs
3543 locscale_out='loc, 1')
3545 self._construct_docstrings(name, longname, extradoc)
3547 def _get_support(self, *args):
3548 """Return the support of the (unscaled, unshifted) distribution.
3550 Parameters
3551 ----------
3552 arg1, arg2, ... : array_like
3553 The shape parameter(s) for the distribution (see docstring of the
3554 instance object for more information).
3555 Returns
3556 -------
3557 a, b : numeric (float, or int or +/-np.inf)
3558 end-points of the distribution's support.
3559 """
3560 return self.a, self.b
3562 def _pmf(self, x):
3563 return np.select([x == k for k in self.xk],
3564 [np.broadcast_arrays(p, x)[0] for p in self.pk], 0)
3566 def _cdf(self, x):
3567 xx, xxk = np.broadcast_arrays(x[:, None], self.xk)
3568 indx = np.argmax(xxk > xx, axis=-1) - 1
3569 return self.qvals[indx]
3571 def _ppf(self, q):
3572 qq, sqq = np.broadcast_arrays(q[..., None], self.qvals)
3573 indx = argmax(sqq >= qq, axis=-1)
3574 return self.xk[indx]
3576 def _rvs(self, size=None, random_state=None):
3577 # Need to define it explicitly, otherwise .rvs() with size=None
3578 # fails due to explicit broadcasting in _ppf
3579 U = random_state.uniform(size=size)
3580 if size is None:
3581 U = np.array(U, ndmin=1)
3582 Y = self._ppf(U)[0]
3583 else:
3584 Y = self._ppf(U)
3585 return Y
3587 def _entropy(self):
3588 return entropy(self.pk)
3590 def generic_moment(self, n):
3591 n = asarray(n)
3592 return np.sum(self.xk**n[np.newaxis, ...] * self.pk, axis=0)
3595def _check_shape(argshape, size):
3596 """
3597 This is a utility function used by `_rvs()` in the class geninvgauss_gen.
3598 It compares the tuple argshape to the tuple size.
3600 Parameters
3601 ----------
3602 argshape : tuple of integers
3603 Shape of the arguments.
3604 size : tuple of integers or integer
3605 Size argument of rvs().
3607 Returns
3608 -------
3609 The function returns two tuples, scalar_shape and bc.
3611 scalar_shape : tuple
3612 Shape to which the 1-d array of random variates returned by
3613 _rvs_scalar() is converted when it is copied into the
3614 output array of _rvs().
3616 bc : tuple of booleans
3617 bc is an tuple the same length as size. bc[j] is True if the data
3618 associated with that index is generated in one call of _rvs_scalar().
3620 """
3621 scalar_shape = []
3622 bc = []
3623 for argdim, sizedim in zip_longest(argshape[::-1], size[::-1],
3624 fillvalue=1):
3625 if sizedim > argdim or (argdim == sizedim == 1):
3626 scalar_shape.append(sizedim)
3627 bc.append(True)
3628 else:
3629 bc.append(False)
3630 return tuple(scalar_shape[::-1]), tuple(bc[::-1])
3633def get_distribution_names(namespace_pairs, rv_base_class):
3634 """
3635 Collect names of statistical distributions and their generators.
3637 Parameters
3638 ----------
3639 namespace_pairs : sequence
3640 A snapshot of (name, value) pairs in the namespace of a module.
3641 rv_base_class : class
3642 The base class of random variable generator classes in a module.
3644 Returns
3645 -------
3646 distn_names : list of strings
3647 Names of the statistical distributions.
3648 distn_gen_names : list of strings
3649 Names of the generators of the statistical distributions.
3650 Note that these are not simply the names of the statistical
3651 distributions, with a _gen suffix added.
3653 """
3654 distn_names = []
3655 distn_gen_names = []
3656 for name, value in namespace_pairs:
3657 if name.startswith('_'):
3658 continue
3659 if name.endswith('_gen') and issubclass(value, rv_base_class):
3660 distn_gen_names.append(name)
3661 if isinstance(value, rv_base_class):
3662 distn_names.append(name)
3663 return distn_names, distn_gen_names