1 """
2 Probability density functions.
3
4 This module defines L{AbstractDensity}: a common interface for all PDFs.
5 Each L{AbstractDensity} describes a specific type of probability distribution,
6 for example L{Normal} is an implementation of the Gaussian distribution:
7
8 >>> pdf = Normal(mu=10, sigma=1.1)
9 >>> pdf.mu, pdf['sigma']
10 10.0, 1.1
11
12 Every PDF provides an implementation of the L{AbstractDensity.evaluate}
13 method, which evaluates the PDF for a list of input data points:
14
15 >>> pdf.evaluate([10, 9, 11, 12])
16 array([ 0.3626748 , 0.2399147 , 0.2399147 , 0.06945048])
17
18 PDF instances also behave like functions:
19
20 >>> pdf(data) # the same as pdf.evaluate(data)
21
22 Some L{AbstractDensity} implementations may support drawing random numbers from
23 the distribution (or raise an exception otherwise):
24
25 >>> pdf.random(2)
26 array([ 9.86257083, 9.73760515])
27
28 Each implementation of L{AbstractDensity} may support infinite number of estimators,
29 used to estimate and re-initialize the PDF parameters from a set of observed data
30 points:
31
32 >>> pdf.estimate([5, 5, 10, 10])
33 >>> pdf.mu, pdf.sigma
34 (7.5, 2.5)
35 >>> pdf.estimator
36 <csb.statistics.pdf.GaussianMLEstimator>
37
38 Estimators implement the L{AbstractEstimator} interface. They are treated as
39 pluggable tools, which can be exchanged through the L{AbstractDensity.estimator}
40 property (you could create, initialize and plug your own estimator as well).
41 This is a classic Strategy pattern.
42 """
43
44 import numpy.random
45 import scipy.special
46 import csb.core
47
48 from abc import ABCMeta, abstractmethod
49 from csb.core import OrderedDict
50
51 from csb.numeric import log, exp, psi, inv_psi, EULER_MASCHERONI
52 from scipy.special import gammaln
53 from numpy import array, fabs, power, sqrt, pi, mean, median, clip
58
61
73
76
78 """
79 Density parameter estimation strategy.
80 """
81
82 __metaclass__ = ABCMeta
83
84 @abstractmethod
86 """
87 Estimate the parameters of the distribution from same {data}.
88
89 @param context: context distribution
90 @type context: L{AbstractDensity}
91 @param data: sample values
92 @type data: array
93
94 @return: a new distribution, initialized with the estimated parameters
95 @rtype: L{AbstractDensity}
96
97 @raise EstimationFailureError: if estimation is not possible
98 """
99 pass
100
102 """
103 Does not estimate anything.
104 """
106 raise NotImplementedError()
107
118
129
143
163
165
166 - def __init__(self, minbeta=0.5, maxbeta=8.0, step=0.1):
173
175
176 pdf = GeneralizedNormal(1, 1, 1)
177 data = array(data)
178 logl = []
179
180 for beta in numpy.arange(self._minbeta, self._maxbeta, self._step):
181
182 self.update(pdf, data, beta)
183
184 l = pdf.log_prob(data).sum()
185 logl.append([beta, l])
186
187 logl = numpy.array(logl)
188
189
190 beta = logl[ numpy.argmax(logl[:, 1]) ][0]
191 self.update(pdf, data, beta)
192
193 return pdf
194
202
203 - def update(self, pdf, data, beta):
212
220
222
227
229
230 log_p = numpy.mean(log(data), 0)
231
232 e = numpy.mean(data, 0)
233 v = numpy.mean(data ** 2, 0)
234 q = (e[0] - v[0]) / (v[0] - e[0] ** 2)
235
236 a = e * q
237 y = a * 0
238 k = 0
239 while(sum(abs(y - a)) > self.tol and k < self.n_iter):
240 y = psi(sum(a)) + log_p
241 a = numpy.array(list(map(inv_psi, y)))
242 k += 1
243
244 return Dirichlet(a)
245
256
267
270 """
271 Defines the interface and common operations for all probability density
272 functions.
273
274 Subclasses must complete the implementation by implementing the
275 L{AbstractDensity.log_prob} method. Subclasses could also consider--but
276 are not obliged to--override the L{AbstractDensity.random} method. If
277 any of the density parameters need validation, subclasses are expected to
278 override the L{AbstractDensity._validate} method and raise
279 L{ParameterValueError} on validation failure. Note that implementing
280 parameter validation in property setters has almost no effect and is
281 discouraged.
282 """
283
284 __metaclass__ = ABCMeta
285
286
293
295
296 if param in self._params:
297 return self._params[param]
298 else:
299 raise ParameterNotFoundError(param)
300
313
314 @property
316 return self._estimator
317 @estimator.setter
319 if not isinstance(strategy, AbstractEstimator):
320 raise TypeError(strategy)
321 self._estimator = strategy
322
325
331
333 """
334 Register a new parameter name.
335 """
336 if name not in self._params:
337 self._params[name] = None
338
340 """
341 Parameter value validation hook.
342 @raise ParameterValueError: on failed validation (value not accepted)
343 """
344 pass
345
348
350
351 for p, v in zip(self.parameters, values):
352 self[p] = v
353
354 for p in named_params:
355 self[p] = named_params[p]
356
357 @property
359 """
360 Get a list of all distribution parameter names.
361 """
362 return tuple(self._params)
363
364 @abstractmethod
366 """
367 Evaluate the logarithm of the probability of observing values C{x}.
368
369 @param x: values
370 @type x: array
371 @rtype: array
372 """
373 pass
374
376 """
377 Evaluate the probability of observing values C{x}.
378
379 @param x: values
380 @type x: array
381 @rtype: array
382 """
383 x = numpy.array(x)
384 return exp(self.log_prob(x))
385
387 """
388 Generate random samples from the probability distribution.
389
390 @param size: number of values to sample
391 @type size: int
392 """
393 raise NotImplementedError()
394
420
422
432
437
438 @property
441 @b.setter
442 - def b(self, value):
444
445 @property
448 @mu.setter
449 - def mu(self, value):
451
458
465
466 -class Normal(AbstractDensity):
467
477
478 @property
481 @mu.setter
482 - def mu(self, value):
484
485 @property
488 @sigma.setter
490 self['sigma'] = value
491
498
505
507
517
522
523 @property
526 @mu.setter
527 - def mu(self, value):
529
530 @property
533 @shape.setter
535 self['shape'] = value
536
538
539 mu = self.mu
540 scale = self.shape
541 x = numpy.array(x)
542
543 if numpy.min(x) <= 0:
544 raise ValueError('InverseGaussian is defined for x > 0')
545
546 y = -0.5 * scale * (x - mu) ** 2 / (mu ** 2 * x)
547 z = 0.5 * (log(scale) - log(2 * pi * x ** 3))
548 return z + y
549
550
565
567
568 - def __init__(self, mu=0, alpha=1, beta=1):
578
583
584 @property
587 @mu.setter
588 - def mu(self, value):
590
591 @property
594 @alpha.setter
596 self['alpha'] = value
597
598 @property
601 @beta.setter
602 - def beta(self, value):
604
612
614
624
629
630 @property
633 @a.setter
634 - def a(self, value):
636
637 @property
640 @b.setter
641 - def b(self, value):
643
644 @property
647 @p.setter
648 - def p(self, value):
650
652
653 a = self['a']
654 b = self['b']
655 p = self['p']
656
657 lz = 0.5 * p * (log(a) - log(b)) - log(2 * scipy.special.kv(p, sqrt(a * b)))
658
659 return lz + (p - 1) * log(x) - 0.5 * (a * x + b / x)
660
693
694 -class Gamma(AbstractDensity):
695
704
709
710 @property
713 @alpha.setter
715 self['alpha'] = value
716
717 @property
720 @beta.setter
721 - def beta(self, value):
723
725
726 a, b = self['alpha'], self['beta']
727
728 return a * log(b) - gammaln(clip(a, 1e-308, 1e308)) + \
729 (a - 1) * log(clip(x, 1e-308, 1e308)) - b * x
730
733
735
744
749
750 @property
753 @alpha.setter
755 self['alpha'] = value
756
757 @property
760 @beta.setter
761 - def beta(self, value):
763
767
770
772
773 - def __init__(self, mu=numpy.zeros(2), sigma=numpy.eye(2)):
777
780
782
783 from numpy.linalg import det
784
785 mu = self.mu
786 S = self.sigma
787 D = len(mu)
788 q = self.__q(x)
789 return -0.5 * (D * log(2 * pi) + log(abs(det(S)))) - 0.5 * q ** 2
790
792 from numpy import sum, dot, reshape
793 from numpy.linalg import inv
794
795 mu = self.mu
796 S = self.sigma
797
798 return sqrt(clip(sum(reshape((x - mu) * dot(x - mu, inv(S).T.squeeze()), (-1, len(mu))), -1), 0., 1e308))
799
801 """
802 Return the distribution along the dimensions
803 dims conditioned on x
804
805 @param x: conditional values
806 @param dims: new dimensions
807 """
808 from numpy import take, dot
809 from numpy.linalg import inv
810
811 dims2 = [i for i in range(self['mu'].shape[0]) if not i in dims]
812
813 mu1 = take(self['mu'], dims)
814 mu2 = take(self['mu'], dims2)
815
816
817 x2 = take(x, dims2)
818
819 A = take(take(self['Sigma'], dims, 0), dims, 1)
820 B = take(take(self['Sigma'], dims2, 0), dims2, 1)
821 C = take(take(self['Sigma'], dims, 0), dims2, 1)
822
823 mu = mu1 + dot(C, dot(inv(B), x2 - mu2))
824 Sigma = A - dot(C, dot(inv(B), C.T))
825
826 return MultivariateGaussian((mu, Sigma))
827
829
837
838 @property
841
842 @alpha.setter
844 self['alpha'] = numpy.ravel(value)
845
851
854
857
866
871
872 @property
875 @mu.setter
876 - def mu(self, value):
878
879 @property
882 @beta.setter
883 - def beta(self, value):
885
893
900
922