Package csb :: Package statistics :: Package pdf
[frames] | no frames]

Source Code for Package csb.statistics.pdf

  1  """ 
  2  Probability density functions. 
  3   
  4  This module defines L{AbstractDensity}: a common interface for all PDFs. 
  5  Each L{AbstractDensity} describes a specific type of probability distribution, 
  6  for example L{Normal} is an implementation of the Gaussian distribution: 
  7   
  8      >>> pdf = Normal(mu=10, sigma=1.1) 
  9      >>> pdf.mu, pdf['sigma'] 
 10      10.0, 1.1 
 11   
 12  Every PDF provides an implementation of the L{AbstractDensity.evaluate}  
 13  method, which evaluates the PDF for a list of input data points: 
 14   
 15      >>> pdf.evaluate([10, 9, 11, 12]) 
 16      array([ 0.3626748 ,  0.2399147 ,  0.2399147 ,  0.06945048]) 
 17       
 18  PDF instances also behave like functions: 
 19       
 20      >>> pdf(data)    # the same as pdf.evaluate(data) 
 21       
 22  Some L{AbstractDensity} implementations may support drawing random numbers from 
 23  the distribution (or raise an exception otherwise): 
 24   
 25      >>> pdf.random(2) 
 26      array([ 9.86257083,  9.73760515]) 
 27       
 28  Each implementation of L{AbstractDensity} may support infinite number of estimators, 
 29  used to estimate and re-initialize the PDF parameters from a set of observed data 
 30  points: 
 31   
 32      >>> pdf.estimate([5, 5, 10, 10]) 
 33      >>> pdf.mu, pdf.sigma 
 34      (7.5, 2.5) 
 35      >>> pdf.estimator 
 36      <csb.statistics.pdf.GaussianMLEstimator> 
 37       
 38  Estimators implement the L{AbstractEstimator} interface. They are treated as 
 39  pluggable tools, which can be exchanged through the L{AbstractDensity.estimator} 
 40  property (you could create, initialize and plug your own estimator as well). 
 41  This is a classic Strategy pattern.   
 42  """ 
 43   
 44  import numpy.random 
 45  import scipy.special 
 46  import csb.core 
 47   
 48  from abc import ABCMeta, abstractmethod 
 49  from csb.core import OrderedDict 
 50   
 51  from csb.numeric import log, exp, psi, inv_psi 
 52  from scipy.special import gammaln 
 53  from numpy import array, fabs, power, sqrt, pi, mean, median, clip 
54 55 56 -class IncompatibleEstimatorError(TypeError):
57 pass
58
59 -class ParameterNotFoundError(AttributeError):
60 pass
61
62 -class ParameterValueError(ValueError):
63
64 - def __init__(self, param, value):
65 66 self.param = param 67 self.value = value 68 69 super(ParameterValueError, self).__init__(param, value)
70
71 72 -class AbstractEstimator(object):
73 """ 74 Density parameter estimation strategy. 75 """ 76 77 __metaclass__ = ABCMeta 78 79 @abstractmethod
80 - def estimate(self, context, data):
81 """ 82 Estimate the parameters of the distribution from same {data}. 83 84 @param context: context distribution 85 @type context: L{AbstractDensity} 86 @param data: sample values 87 @type data: array 88 89 @return: a new distribution, initialized with the estimated parameters 90 @rtype: L{AbstractDensity} 91 """ 92 pass
93
94 -class NullEstimator(AbstractEstimator):
95 """ 96 Does not estimate anything. 97 """
98 - def estimate(self, context, data):
99 raise NotImplementedError()
100
101 -class LaplaceMLEstimator(AbstractEstimator):
102
103 - def estimate(self, context, data):
104 105 x = array(data) 106 107 mu = median(x) 108 b = mean(fabs(x - mu)) 109 110 return Laplace(b, mu)
111
112 -class GaussianMLEstimator(AbstractEstimator):
113
114 - def estimate(self, context, data):
115 116 x = array(data) 117 118 mu = mean(x) 119 sigma = sqrt(mean((x - mu) ** 2)) 120 121 return Normal(mu, sigma)
122
123 -class InverseGammaPosteriorSampler(AbstractEstimator):
124 """ 125 Density parameter estimation based on adaptive rejection sampling 126 """ 127 pass
128
129 -class GammaMLEstimator(AbstractEstimator):
130
131 - def __init__(self):
132 super(GammaMLEstimator, self).__init__() 133 self.n_iter = 1000
134 135
136 - def estimate(self, context, data):
137 138 mu = mean(data) 139 logmean = mean(log(data)) 140 141 a = 0.5 / (log(mu) - logmean) 142 143 for dummy in range(self.n_iter): 144 145 a = inv_psi(logmean - log(mu) + log(a)) 146 147 return Gamma(a, a / mu)
148
149 -class GenNormalBruteForceEstimator(AbstractEstimator):
150
151 - def __init__(self, minbeta=0.5, maxbeta=8.0, step=0.1):
152 153 self._minbeta = minbeta 154 self._maxbeta = maxbeta 155 self._step = step 156 157 super(GenNormalBruteForceEstimator, self).__init__()
158
159 - def estimate(self, context, data):
160 161 pdf = GeneralizedNormal(1, 1, 1) 162 data = array(data) 163 logl = [] 164 165 for beta in numpy.arange(self._minbeta, self._maxbeta, self._step): 166 167 self.update(pdf, data, beta) 168 169 l = pdf.log_prob(data).sum() 170 logl.append([beta, l]) 171 172 logl = numpy.array(logl) 173 174 # optimal parameters: 175 beta = logl[ numpy.argmax(logl[:, 1]) ][0] 176 self.update(pdf, data, beta) 177 178 return pdf
179
180 - def estimate_with_fixed_beta(self, data, beta):
181 182 mu = median(data) 183 v = mean((data - mu) ** 2) 184 alpha = sqrt(v * exp(gammaln(1. / beta) - gammaln(3. / beta))) 185 186 return mu, alpha
187
188 - def update(self, pdf, data, beta):
189 190 mu, alpha = self.estimate_with_fixed_beta(data, beta) 191 192 pdf.mu = mu 193 pdf.alpha = alpha 194 pdf.beta = beta 195 196 return pdf
197
198 -class MultivariateGaussianMLEstimator(AbstractEstimator):
199
200 - def __init__(self):
202
203 - def estimate(self, context, data):
204 return MultivariateGaussian(numpy.mean(data, 0), numpy.cov(data.T))
205
206 207 -class DirichletEstimator(AbstractEstimator):
208
209 - def __init__(self):
210 super(DirichletEstimator, self).__init__() 211 self.n_iter = 1000 212 self.tol = 1e-5
213
214 - def estimate(self, context, data):
215 216 log_p = numpy.mean(log(data), 0) 217 218 e = numpy.mean(data, 0) 219 v = numpy.mean(data ** 2, 0) 220 q = (e[0] - v[0]) / (v[0] - e[0] ** 2) 221 222 a = e * q 223 y = a * 0 224 k = 0 225 while(sum(abs(y - a)) > self.tol and k < self.n_iter): 226 y = psi(sum(a)) + log_p 227 a = numpy.array(list(map(inv_psi, y))) 228 k += 1 229 230 return Dirichlet(a)
231
232 233 -class AbstractDensity(object):
234 """ 235 Defines the interface and common operations for all probability density 236 functions. 237 238 Subclasses must complete the implementation by implementing the 239 L{AbstractDensity.log_prob} method. Subclasses could also consider--but 240 are not obliged to--override the L{AbstractDensity.random} method. 241 """ 242 243 __metaclass__ = ABCMeta 244 245
246 - def __init__(self):
247 248 self._params = OrderedDict() 249 self._estimator = None 250 251 self.estimator = NullEstimator()
252
253 - def __getitem__(self, param):
254 255 if param in self._params: 256 return self._params[param] 257 else: 258 raise ParameterNotFoundError(param)
259
260 - def __setitem__(self, param, value):
261 262 if param in self._params: 263 if csb.core.iterable(value): 264 value = array(value) 265 else: 266 value = float(value) 267 268 self._validate(param, value) 269 self._params[param] = value 270 else: 271 raise ParameterNotFoundError(param)
272 273 @property
274 - def estimator(self):
275 return self._estimator
276 @estimator.setter
277 - def estimator(self, strategy):
278 if not isinstance(strategy, AbstractEstimator): 279 raise TypeError(strategy) 280 self._estimator = strategy
281
282 - def __call__(self, x):
283 return self.evaluate(x)
284
285 - def __str__(self):
286 name = self.__class__.__name__ 287 params = ', '.join([ '{0}={1}'.format(p, v) for p, v in self._params.items() ]) 288 289 return '{0}({1})'.format(name, params)
290
291 - def _register(self, name):
292 """ 293 Register a new parameter name. 294 """ 295 if name not in self._params: 296 self._params[name] = None
297
298 - def _validate(self, param, value):
299 """ 300 Parameter value validation hook. 301 """ 302 pass
303
304 - def get_params(self):
305 return [self._params[name] for name in self.parameters]
306
307 - def set_params(self, *values, **named_params):
308 309 for p, v in zip(self.parameters, values): 310 self[p] = v 311 312 for p in named_params: 313 self[p] = named_params[p]
314 315 @property
316 - def parameters(self):
317 """ 318 Get a list of all distribution parameter names. 319 """ 320 return tuple(self._params)
321 322 @abstractmethod
323 - def log_prob(self, x):
324 """ 325 Evaluate the logarithm of the probability of observing values C{x}. 326 327 @param x: values 328 @type x: array 329 @rtype: array 330 """ 331 pass
332
333 - def evaluate(self, x):
334 """ 335 Evaluate the probability of observing values C{x}. 336 337 @param x: values 338 @type x: array 339 @rtype: array 340 """ 341 x = numpy.array(x) 342 return exp(self.log_prob(x))
343
344 - def random(self, size=None):
345 """ 346 Generate random samples from the probability distribution. 347 348 @param size: number of values to sample 349 @type size: int 350 """ 351 raise NotImplementedError()
352
353 - def estimate(self, data):
354 """ 355 Estimate and load the parameters of the distribution from sample C{data} 356 using the current L{AbstractEstimator} strategy. 357 358 @param data: sample values 359 @type data: array 360 361 @raise NotImplementedError: when no estimator is available for this 362 distribution 363 """ 364 pdf = self.estimator.estimate(self, data) 365 366 try: 367 for param in pdf.parameters: 368 self[param] = pdf[param] 369 370 except ParameterNotFoundError: 371 raise IncompatibleEstimatorError(self.estimator)
372
373 -class Laplace(AbstractDensity):
374
375 - def __init__(self, b, mu):
376 377 super(Laplace, self).__init__() 378 379 self._register('b') 380 self._register('mu') 381 382 self.set_params(b=b, mu=mu) 383 self.estimator = LaplaceMLEstimator()
384
385 - def _validate(self, param, value):
386 387 if param == 'b' and value < 0: 388 raise ParameterValueError(param, value)
389 390 @property
391 - def b(self):
392 return self['b']
393 @b.setter
394 - def b(self, value):
395 self['b'] = value
396 397 @property
398 - def mu(self):
399 return self['mu']
400 @mu.setter
401 - def mu(self, value):
402 self['mu'] = value
403
404 - def log_prob(self, x):
405 406 b = self.b 407 mu = self.mu 408 409 return log(1 / (2. * b)) - fabs(x - mu) / b
410
411 - def random(self, size=None):
412 413 loc = self.mu 414 scale = self.b 415 416 return numpy.random.laplace(loc, scale, size)
417
418 -class Normal(AbstractDensity):
419
420 - def __init__(self, mu=0, sigma=1):
421 422 super(Normal, self).__init__() 423 424 self._register('mu') 425 self._register('sigma') 426 427 self.set_params(mu=mu, sigma=sigma) 428 self.estimator = GaussianMLEstimator()
429 430 @property
431 - def mu(self):
432 return self['mu']
433 @mu.setter
434 - def mu(self, value):
435 self['mu'] = value
436 437 @property
438 - def sigma(self):
439 return self['sigma']
440 @sigma.setter
441 - def sigma(self, value):
442 self['sigma'] = value
443
444 - def log_prob(self, x):
445 446 mu = self.mu 447 sigma = self.sigma 448 449 return log(1.0 / sqrt(2 * pi * sigma ** 2)) - (x - mu) ** 2 / (2 * sigma ** 2)
450
451 - def random(self, size=None):
452 453 mu = self.mu 454 sigma = self.sigma 455 456 return numpy.random.normal(mu, sigma, size)
457
458 -class InverseGaussian(AbstractDensity):
459
460 - def __init__(self, mu=1., llambda=1.):
461 462 super(InverseGaussian, self).__init__() 463 464 self._register('mu') 465 self._register('llambda') 466 467 self.set_params(mu=mu, llambda=llambda) 468 self.estimate = NullEstimator()
469 470 @property
471 - def mu(self):
472 return self['mu']
473 474 @mu.setter
475 - def mu(self, value):
476 if value <= 0.: 477 raise ValueError("Mean mu should be greater than 0") 478 self['mu'] = value
479 480 @property
481 - def llambda(self):
482 return self['mu']
483 484 @llambda.setter
485 - def llambda(self, value):
486 if value <= 0.: 487 raise ValueError("Shape Parameter lambda should be greater than 0") 488 self['llambda'] = value
489
490 - def log_prob(self, x):
491 492 mu = self.mu 493 _lambda = self.llambda 494 495 y = -0.5 * _lambda * (x - mu) ** 2 / (mu ** 2 * x) 496 z = 0.5 * (log(_lambda) - log(2 * pi * x ** 3)) 497 return z + y
498 499
500 - def random(self, size=None):
501 502 mu = self.mu 503 _lambda = self.llambda 504 505 mu_2l = mu / _lambda / 2. 506 Y = numpy.random.standard_normal(size) 507 Y = mu * Y ** 2 508 X = mu + mu_2l * (Y - sqrt(4 * _lambda * Y + Y ** 2)) 509 U = numpy.random.random(size) 510 511 m = numpy.less_equal(U, mu / (mu + X)) 512 513 return m * X + (1 - m) * mu ** 2 / X
514
515 -class GeneralizedNormal(AbstractDensity):
516
517 - def __init__(self, mu, alpha, beta):
518 519 super(GeneralizedNormal, self).__init__() 520 521 self._register('mu') 522 self._register('alpha') 523 self._register('beta') 524 525 self.set_params(mu=mu, alpha=alpha, beta=beta) 526 self.estimator = GenNormalBruteForceEstimator()
527 528 @property
529 - def mu(self):
530 return self['mu']
531 @mu.setter
532 - def mu(self, value):
533 self['mu'] = value
534 535 @property
536 - def alpha(self):
537 return self['alpha']
538 @alpha.setter
539 - def alpha(self, value):
540 self['alpha'] = value
541 542 @property
543 - def beta(self):
544 return self['beta']
545 @beta.setter
546 - def beta(self, value):
547 self['beta'] = value
548
549 - def log_prob(self, x):
550 551 mu = self.mu 552 alpha = self.alpha 553 beta = self.beta 554 555 return log(beta / (2.0 * alpha)) - gammaln(1. / beta) - power(fabs(x - mu) / alpha, beta)
556
557 -class GeneralizedInverseGaussian(AbstractDensity):
558
559 - def __init__(self, a=1., b=1., p=1.):
560 super(GeneralizedInverseGaussian, self).__init__() 561 562 self._register('a') 563 self._register('b') 564 self._register('p') 565 self.set_params(a=a, b=b, p=p) 566 567 self.estimator = NullEstimator()
568 569 @property
570 - def a(self):
571 return self['a']
572 573 @a.setter
574 - def a(self, value):
575 if value <= 0: 576 raise ValueError("Parameter a is nonnegative") 577 else: 578 self['a'] = value
579 580 @property
581 - def b(self):
582 return self['b']
583 584 @b.setter
585 - def b(self, value):
586 if value <= 0: 587 raise ValueError("Parameter b is nonnegative") 588 else: 589 self['b'] = value
590 591 @property
592 - def p(self):
593 return self['p']
594 595 @p.setter
596 - def p(self, value):
597 if value <= 0: 598 raise ValueError("Parameter p is nonnegative") 599 else: 600 self['p'] = value
601
602 - def log_prob(self, x):
603 604 a = self['a'] 605 b = self['b'] 606 p = self['p'] 607 608 lz = 0.5 * p * (log(a) - log(b)) - log(2 * scipy.special.kv(p, sqrt(a * b))) 609 610 return lz + (p - 1) * log(x) - 0.5 * (a * x + b / x)
611
612 - def random(self, size=None):
613 614 from csb.statistics.rand import inv_gaussian 615 616 rvs = [] 617 burnin = 10 618 a = self['a'] 619 b = self['b'] 620 p = self['p'] 621 622 s = a * 0. + 1. 623 624 if p < 0: 625 a, b = b, a 626 627 if size == None: 628 size = 1 629 for i in range(int(size)): 630 for j in range(burnin): 631 632 l = b + 2 * s 633 m = sqrt(l / a) 634 635 x = inv_gaussian(m, l, shape=m.shape) 636 s = numpy.random.gamma(abs(p) + 0.5, x) 637 638 if p >= 0: 639 rvs.append(x) 640 else: 641 rvs.append(1 / x) 642 643 return numpy.array(rvs)
644
645 -class Gamma(AbstractDensity):
646
647 - def __init__(self, alpha=1, beta=1):
648 super(Gamma, self).__init__() 649 650 self._register('alpha') 651 self._register('beta') 652 653 self.set_params(alpha=alpha, beta=beta) 654 self.estimator = GammaMLEstimator()
655 656 @property
657 - def alpha(self):
658 return self['alpha']
659 @alpha.setter
660 - def alpha(self, value):
661 self['alpha'] = value
662 663 @property
664 - def beta(self):
665 return self['beta']
666 667 @beta.setter
668 - def beta(self, value):
669 self['beta'] = value
670
671 - def log_prob(self, x):
672 673 a, b = self['alpha'], self['beta'] 674 675 return a * log(b) - gammaln(clip(a, 1e-308, 1e308)) + \ 676 (a - 1) * log(clip(x, 1e-308, 1e308)) - b * x
677
678 - def random(self, size=None):
679 return numpy.random.gamma(self['alpha'], 1 / self['beta'], size)
680
681 -class InverseGamma(AbstractDensity):
682
683 - def __init__(self, alpha=1, beta=1):
684 super(InverseGamma, self).__init__() 685 686 self._register('alpha') 687 self._register('beta') 688 689 self.set_params(alpha=alpha, beta=beta) 690 self.estimator = NullEstimator()
691 692 @property
693 - def alpha(self):
694 return self['alpha']
695 696 @alpha.setter
697 - def alpha(self, value):
698 self['alpha'] = value
699 700 @property
701 - def beta(self):
702 return self['beta']
703 704 @beta.setter
705 - def beta(self, value):
706 self['beta'] = value
707
708 - def log_prob(self, x):
709 a, b = self['alpha'], self['beta'] 710 return a * log(b) - gammaln(a) - (a + 1) * log(x) - b / x
711
712 - def random(self, size=None):
713 return 1. / numpy.random.gamma(self['alpha'], 1 / self['beta'], size)
714
715 -class MultivariateGaussian(Normal):
716
717 - def __init__(self, mu=numpy.zeros(2), sigma=numpy.eye(2)):
721
722 - def random(self, size=None):
723 return numpy.random.multivariate_normal(self.mu, self.sigma, size)
724
725 - def log_prob(self, x):
726 727 from numpy.linalg import det 728 729 mu = self.mu 730 S = self.sigma 731 D = len(mu) 732 q = self.__q(x) 733 return -0.5 * (D * log(2 * pi) + log(abs(det(S)))) - 0.5 * q ** 2
734
735 - def __q(self, x):
736 from numpy import sum, dot, reshape 737 from numpy.linalg import inv 738 739 mu = self.mu 740 S = self.sigma 741 742 return sqrt(clip(sum(reshape((x - mu) * dot(x - mu, inv(S).T.squeeze()), (-1, len(mu))), -1), 0., 1e308))
743
744 - def conditional(self, x, dims):
745 """ 746 Returns the distribution along the dimensions 747 dims conditioned on x 748 749 @param x: conditional values 750 @param dims: new dimensions 751 """ 752 from numpy import take, dot 753 from numpy.linalg import inv 754 755 dims2 = [i for i in range(self['mu'].shape[0]) if not i in dims] 756 757 mu1 = take(self['mu'], dims) 758 mu2 = take(self['mu'], dims2) 759 760 # x1 = take(x, dims) 761 x2 = take(x, dims2) 762 763 A = take(take(self['Sigma'], dims, 0), dims, 1) 764 B = take(take(self['Sigma'], dims2, 0), dims2, 1) 765 C = take(take(self['Sigma'], dims, 0), dims2, 1) 766 767 mu = mu1 + dot(C, dot(inv(B), x2 - mu2)) 768 Sigma = A - dot(C, dot(inv(B), C.T)) 769 770 return MultivariateGaussian((mu, Sigma))
771
772 -class Dirichlet(AbstractDensity):
773
774 - def __init__(self, alpha):
775 super(Dirichlet, self).__init__() 776 777 self._register('alpha') 778 779 self.set_params(alpha=alpha) 780 self.estimator = DirichletEstimator()
781 782 @property
783 - def alpha(self):
784 return self['alpha']
785 786 @alpha.setter
787 - def alpha(self, value):
788 self['alpha'] = numpy.ravel(value)
789
790 - def log_prob(self, x):
791 #TODO check wether x is in the probability simplex 792 alpha = self.alpha 793 return gammaln(sum(alpha)) - sum(gammaln(alpha)) \ 794 + numpy.dot((alpha - 1).T, log(x).T)
795
796 - def random(self, size=None):
797 return numpy.random.mtrand.dirichlet(self.alpha, size)
798