1 """
2 Probability density functions.
3
4 This module defines L{AbstractDensity}: a common interface for all PDFs.
5 Each L{AbstractDensity} describes a specific type of probability distribution,
6 for example L{Normal} is an implementation of the Gaussian distribution:
7
8 >>> pdf = Normal(mu=10, sigma=1.1)
9 >>> pdf.mu, pdf['sigma']
10 10.0, 1.1
11
12 Every PDF provides an implementation of the L{AbstractDensity.evaluate}
13 method, which evaluates the PDF for a list of input data points:
14
15 >>> pdf.evaluate([10, 9, 11, 12])
16 array([ 0.3626748 , 0.2399147 , 0.2399147 , 0.06945048])
17
18 PDF instances also behave like functions:
19
20 >>> pdf(data) # the same as pdf.evaluate(data)
21
22 Some L{AbstractDensity} implementations may support drawing random numbers from
23 the distribution (or raise an exception otherwise):
24
25 >>> pdf.random(2)
26 array([ 9.86257083, 9.73760515])
27
28 Each implementation of L{AbstractDensity} may support infinite number of estimators,
29 used to estimate and re-initialize the PDF parameters from a set of observed data
30 points:
31
32 >>> pdf.estimate([5, 5, 10, 10])
33 >>> pdf.mu, pdf.sigma
34 (7.5, 2.5)
35 >>> pdf.estimator
36 <csb.statistics.pdf.GaussianMLEstimator>
37
38 Estimators implement the L{AbstractEstimator} interface. They are treated as
39 pluggable tools, which can be exchanged through the L{AbstractDensity.estimator}
40 property (you could create, initialize and plug your own estimator as well).
41 This is a classic Strategy pattern.
42 """
43
44 import numpy.random
45 import scipy.special
46 import csb.core
47
48 from abc import ABCMeta, abstractmethod
49 from csb.core import OrderedDict
50
51 from csb.numeric import log, exp, psi, inv_psi
52 from scipy.special import gammaln
53 from numpy import array, fabs, power, sqrt, pi, mean, median, clip
58
61
70
73 """
74 Density parameter estimation strategy.
75 """
76
77 __metaclass__ = ABCMeta
78
79 @abstractmethod
81 """
82 Estimate the parameters of the distribution from same {data}.
83
84 @param context: context distribution
85 @type context: L{AbstractDensity}
86 @param data: sample values
87 @type data: array
88
89 @return: a new distribution, initialized with the estimated parameters
90 @rtype: L{AbstractDensity}
91 """
92 pass
93
95 """
96 Does not estimate anything.
97 """
99 raise NotImplementedError()
100
111
122
123 -class InverseGammaPosteriorSampler(AbstractEstimator):
124 """
125 Density parameter estimation based on adaptive rejection sampling
126 """
127 pass
128
148
150
151 - def __init__(self, minbeta=0.5, maxbeta=8.0, step=0.1):
158
160
161 pdf = GeneralizedNormal(1, 1, 1)
162 data = array(data)
163 logl = []
164
165 for beta in numpy.arange(self._minbeta, self._maxbeta, self._step):
166
167 self.update(pdf, data, beta)
168
169 l = pdf.log_prob(data).sum()
170 logl.append([beta, l])
171
172 logl = numpy.array(logl)
173
174
175 beta = logl[ numpy.argmax(logl[:, 1]) ][0]
176 self.update(pdf, data, beta)
177
178 return pdf
179
187
188 - def update(self, pdf, data, beta):
197
205
208
213
215
216 log_p = numpy.mean(log(data), 0)
217
218 e = numpy.mean(data, 0)
219 v = numpy.mean(data ** 2, 0)
220 q = (e[0] - v[0]) / (v[0] - e[0] ** 2)
221
222 a = e * q
223 y = a * 0
224 k = 0
225 while(sum(abs(y - a)) > self.tol and k < self.n_iter):
226 y = psi(sum(a)) + log_p
227 a = numpy.array(list(map(inv_psi, y)))
228 k += 1
229
230 return Dirichlet(a)
231
234 """
235 Defines the interface and common operations for all probability density
236 functions.
237
238 Subclasses must complete the implementation by implementing the
239 L{AbstractDensity.log_prob} method. Subclasses could also consider--but
240 are not obliged to--override the L{AbstractDensity.random} method.
241 """
242
243 __metaclass__ = ABCMeta
244
245
252
254
255 if param in self._params:
256 return self._params[param]
257 else:
258 raise ParameterNotFoundError(param)
259
272
273 @property
275 return self._estimator
276 @estimator.setter
278 if not isinstance(strategy, AbstractEstimator):
279 raise TypeError(strategy)
280 self._estimator = strategy
281
284
290
292 """
293 Register a new parameter name.
294 """
295 if name not in self._params:
296 self._params[name] = None
297
299 """
300 Parameter value validation hook.
301 """
302 pass
303
306
308
309 for p, v in zip(self.parameters, values):
310 self[p] = v
311
312 for p in named_params:
313 self[p] = named_params[p]
314
315 @property
317 """
318 Get a list of all distribution parameter names.
319 """
320 return tuple(self._params)
321
322 @abstractmethod
324 """
325 Evaluate the logarithm of the probability of observing values C{x}.
326
327 @param x: values
328 @type x: array
329 @rtype: array
330 """
331 pass
332
334 """
335 Evaluate the probability of observing values C{x}.
336
337 @param x: values
338 @type x: array
339 @rtype: array
340 """
341 x = numpy.array(x)
342 return exp(self.log_prob(x))
343
345 """
346 Generate random samples from the probability distribution.
347
348 @param size: number of values to sample
349 @type size: int
350 """
351 raise NotImplementedError()
352
354 """
355 Estimate and load the parameters of the distribution from sample C{data}
356 using the current L{AbstractEstimator} strategy.
357
358 @param data: sample values
359 @type data: array
360
361 @raise NotImplementedError: when no estimator is available for this
362 distribution
363 """
364 pdf = self.estimator.estimate(self, data)
365
366 try:
367 for param in pdf.parameters:
368 self[param] = pdf[param]
369
370 except ParameterNotFoundError:
371 raise IncompatibleEstimatorError(self.estimator)
372
374
384
389
390 @property
393 @b.setter
394 - def b(self, value):
396
397 @property
400 @mu.setter
401 - def mu(self, value):
403
410
417
418 -class Normal(AbstractDensity):
419
429
430 @property
433 @mu.setter
434 - def mu(self, value):
436
437 @property
440 @sigma.setter
442 self['sigma'] = value
443
450
457
459
469
470 @property
473
474 @mu.setter
475 - def mu(self, value):
476 if value <= 0.:
477 raise ValueError("Mean mu should be greater than 0")
478 self['mu'] = value
479
480 @property
483
484 @llambda.setter
486 if value <= 0.:
487 raise ValueError("Shape Parameter lambda should be greater than 0")
488 self['llambda'] = value
489
491
492 mu = self.mu
493 _lambda = self.llambda
494
495 y = -0.5 * _lambda * (x - mu) ** 2 / (mu ** 2 * x)
496 z = 0.5 * (log(_lambda) - log(2 * pi * x ** 3))
497 return z + y
498
499
501
502 mu = self.mu
503 _lambda = self.llambda
504
505 mu_2l = mu / _lambda / 2.
506 Y = numpy.random.standard_normal(size)
507 Y = mu * Y ** 2
508 X = mu + mu_2l * (Y - sqrt(4 * _lambda * Y + Y ** 2))
509 U = numpy.random.random(size)
510
511 m = numpy.less_equal(U, mu / (mu + X))
512
513 return m * X + (1 - m) * mu ** 2 / X
514
516
527
528 @property
531 @mu.setter
532 - def mu(self, value):
534
535 @property
538 @alpha.setter
540 self['alpha'] = value
541
542 @property
545 @beta.setter
546 - def beta(self, value):
548
556
558
568
569 @property
572
573 @a.setter
574 - def a(self, value):
575 if value <= 0:
576 raise ValueError("Parameter a is nonnegative")
577 else:
578 self['a'] = value
579
580 @property
583
584 @b.setter
585 - def b(self, value):
586 if value <= 0:
587 raise ValueError("Parameter b is nonnegative")
588 else:
589 self['b'] = value
590
591 @property
594
595 @p.setter
596 - def p(self, value):
597 if value <= 0:
598 raise ValueError("Parameter p is nonnegative")
599 else:
600 self['p'] = value
601
603
604 a = self['a']
605 b = self['b']
606 p = self['p']
607
608 lz = 0.5 * p * (log(a) - log(b)) - log(2 * scipy.special.kv(p, sqrt(a * b)))
609
610 return lz + (p - 1) * log(x) - 0.5 * (a * x + b / x)
611
613
614 from csb.statistics.rand import inv_gaussian
615
616 rvs = []
617 burnin = 10
618 a = self['a']
619 b = self['b']
620 p = self['p']
621
622 s = a * 0. + 1.
623
624 if p < 0:
625 a, b = b, a
626
627 if size == None:
628 size = 1
629 for i in range(int(size)):
630 for j in range(burnin):
631
632 l = b + 2 * s
633 m = sqrt(l / a)
634
635 x = inv_gaussian(m, l, shape=m.shape)
636 s = numpy.random.gamma(abs(p) + 0.5, x)
637
638 if p >= 0:
639 rvs.append(x)
640 else:
641 rvs.append(1 / x)
642
643 return numpy.array(rvs)
644
645 -class Gamma(AbstractDensity):
646
655
656 @property
659 @alpha.setter
661 self['alpha'] = value
662
663 @property
666
667 @beta.setter
668 - def beta(self, value):
670
672
673 a, b = self['alpha'], self['beta']
674
675 return a * log(b) - gammaln(clip(a, 1e-308, 1e308)) + \
676 (a - 1) * log(clip(x, 1e-308, 1e308)) - b * x
677
680
682
691
692 @property
695
696 @alpha.setter
698 self['alpha'] = value
699
700 @property
703
704 @beta.setter
705 - def beta(self, value):
707
711
714
716
717 - def __init__(self, mu=numpy.zeros(2), sigma=numpy.eye(2)):
721
724
726
727 from numpy.linalg import det
728
729 mu = self.mu
730 S = self.sigma
731 D = len(mu)
732 q = self.__q(x)
733 return -0.5 * (D * log(2 * pi) + log(abs(det(S)))) - 0.5 * q ** 2
734
736 from numpy import sum, dot, reshape
737 from numpy.linalg import inv
738
739 mu = self.mu
740 S = self.sigma
741
742 return sqrt(clip(sum(reshape((x - mu) * dot(x - mu, inv(S).T.squeeze()), (-1, len(mu))), -1), 0., 1e308))
743
745 """
746 Returns the distribution along the dimensions
747 dims conditioned on x
748
749 @param x: conditional values
750 @param dims: new dimensions
751 """
752 from numpy import take, dot
753 from numpy.linalg import inv
754
755 dims2 = [i for i in range(self['mu'].shape[0]) if not i in dims]
756
757 mu1 = take(self['mu'], dims)
758 mu2 = take(self['mu'], dims2)
759
760
761 x2 = take(x, dims2)
762
763 A = take(take(self['Sigma'], dims, 0), dims, 1)
764 B = take(take(self['Sigma'], dims2, 0), dims2, 1)
765 C = take(take(self['Sigma'], dims, 0), dims2, 1)
766
767 mu = mu1 + dot(C, dot(inv(B), x2 - mu2))
768 Sigma = A - dot(C, dot(inv(B), C.T))
769
770 return MultivariateGaussian((mu, Sigma))
771
773
781
782 @property
785
786 @alpha.setter
788 self['alpha'] = numpy.ravel(value)
789
795
798