Package csb :: Package statistics :: Module scalemixture
[frames] | no frames]

Source Code for Module csb.statistics.scalemixture

  1  """ 
  2  Approximation of a distribution as a mixture of gaussians with a zero mean but different sigmas 
  3  """ 
  4   
  5  import numpy.random 
  6   
  7  import csb.core 
  8  import csb.statistics.ars 
  9  import csb.statistics.rand 
 10   
 11  from abc import abstractmethod, ABCMeta 
 12   
 13  from csb.numeric import log, exp, approx_psi, inv_psi, d_approx_psi 
 14  from scipy.special import  psi, kve 
 15  from csb.statistics import harmonic_mean, geometric_mean 
 16  from csb.statistics.pdf import AbstractEstimator, AbstractDensity, Gamma, InverseGamma, NullEstimator 
17 18 19 -def inv_digamma_minus_log(y, tol=1e-10, n_iter=100):
20 """ 21 Solve y = psi(alpha) - log(alpha) for alpha by fixed point 22 integration. 23 """ 24 if y >= -log(6.): 25 x = 1 / (2 * (1 - exp(y))) 26 else: 27 x = 1.e-10 28 for _i in range(n_iter): 29 z = approx_psi(x) - log(x) - y 30 if abs(z) < tol: 31 break 32 x -= x * z / (x * d_approx_psi(x) - 1) 33 x = abs(x) 34 return x
35
36 -class ScaleMixturePriorEstimator(AbstractEstimator):
37 """ 38 Prior on the scales of a L{ScaleMixture}, which determines how the scales 39 are estimated. 40 """ 41 42 __metaclass__ = ABCMeta 43 44 @abstractmethod
45 - def get_scale_estimator(self):
46 """ 47 Return an appropriate estimator for the scales of the mixture distribution 48 under this prior. 49 """ 50 pass
51
52 53 -class ScaleMixturePrior(object):
54 """ 55 Prior on the scales of a L{ScaleMixture}, which determines how the scales 56 are estimated. 57 """ 58
59 - def __init__(self, *args):
60 super(ScaleMixturePrior, self).__init__(*args) 61 self._scale = None 62 self._scale_estimator = NullEstimator()
63 64 @property
65 - def scale_estimator(self):
66 return self._scale_estimator
67 68 @property
69 - def estimator(self):
70 return self._estimator
71 72 @estimator.setter
73 - def estimator(self, strategy):
74 if not isinstance(strategy, AbstractEstimator): 75 raise TypeError(strategy) 76 self._estimator = strategy 77 if isinstance(strategy, ScaleMixturePriorEstimator): 78 self._scale_estimator = strategy.get_scale_estimator() 79 else: 80 self._scale_estimator = NullEstimator()
81
82 83 -class ScaleMixture(AbstractDensity):
84 """ 85 Robust probabilistic superposition and comparison of protein structures 86 Martin Mechelke and Michael Habeck 87 88 Represenation of a distribution as a mixture of gaussians with a mean of 89 zero and different inverse variances/scales. The number of scales equals 90 the number of datapoints. 91 92 The underlying family is determined by a prior L{ScaleMixturePrior} on the 93 scales. Choosing a L{GammaPrior} results in Stundent-t posterior, wheras a 94 L{InvGammaPrior} leads to a K-Distribution as posterior. 95 """ 96
97 - def __init__(self, scales=numpy.array([1., 1.]), prior=None, d=3):
98 99 super(ScaleMixture, self).__init__() 100 101 self._register('scales') 102 103 self._d = d 104 self.set_params(scales=scales) 105 self._prior = prior 106 107 if self._prior is not None: 108 self.estimator = self._prior.scale_estimator
109 110 @property
111 - def scales(self):
112 return numpy.squeeze(self['scales'])
113 114 @scales.setter
115 - def scales(self, value):
116 if not isinstance(value, csb.core.string) and \ 117 isinstance(value, (numpy.ndarray, list, tuple)): 118 self['scales'] = numpy.array(value) 119 else: 120 raise ValueError("numpy array expected")
121 122 @property
123 - def prior(self):
124 return self._prior
125 126 @prior.setter
127 - def prior(self, value):
128 if not isinstance(value, ScaleMixturePrior): 129 raise TypeError(value) 130 self._prior = value 131 self.estimator = self._prior.scale_estimator
132
133 - def log_prob(self, x):
134 from csb.numeric import log_sum_exp 135 136 dim = self._d 137 s = self.scales 138 139 log_p = numpy.squeeze(-numpy.multiply.outer(x * x, 0.5 * s)) + \ 140 numpy.squeeze(dim * 0.5 * (log(s) - log(2 * numpy.pi))) 141 142 if self._prior is not None: 143 log_p += numpy.squeeze(self._prior.log_prob(s)) 144 return log_sum_exp(log_p.T, 0)
145 146
147 - def random(self, shape=None):
148 149 s = self.scales 150 151 if shape is None: 152 return numpy.random.normal() * s[numpy.random.randint(len(s))] 153 154 else: 155 #n = s.shape[0] 156 nrv = numpy.random.normal(size=shape) 157 indices = numpy.random.randint(len(s), size=shape) 158 159 return s[indices] * nrv
160
161 162 -class ARSPosteriorAlpha(csb.statistics.ars.LogProb):
163 """ 164 This class represents the posterior distribution of the alpha parameter 165 of the Gamma and Inverse Gamma prior, and allows sampling using adaptive 166 rejection sampling L{ARS}. 167 """ 168
169 - def __init__(self, a, b, n):
170 171 self.a = float(a) 172 self.b = float(b) 173 self.n = float(n)
174
175 - def __call__(self, x):
176 177 from scipy.special import gammaln 178 179 return self.a * x - \ 180 self.n * gammaln(numpy.clip(x, 1e-308, 1e308)) + \ 181 self.b * log(x), \ 182 self.a - self.n * psi(x) + self.b / x
183
184 - def initial_values(self, tol=1e-10):
185 """ 186 Generate initial values by doing fixed point 187 iterations to solve for alpha 188 """ 189 n, a, b = self.n, self.a, self.b 190 191 if abs(b) < 1e-10: 192 alpha = inv_psi(a / n) 193 else: 194 alpha = 1. 195 196 z = tol + 1. 197 198 while abs(z) > tol: 199 200 z = n * psi(alpha) - \ 201 b / numpy.clip(alpha, 1e-300, 1e300) - a 202 203 alpha -= z / (n * d_approx_psi(alpha) - b 204 / (alpha ** 2 + 1e-300)) 205 alpha = numpy.clip(alpha, 1e-100, 1e300) 206 207 return numpy.clip(alpha - 1 / (n + 1e-300), 1e-100, 1e300), \ 208 alpha + 1 / (n + 1e-300), alpha
209
210 211 -class GammaPosteriorSampler(ScaleMixturePriorEstimator):
212
213 - def __init__(self):
214 super(GammaPosteriorSampler, self).__init__() 215 self.n_samples = 2
216
217 - def get_scale_estimator(self):
218 return GammaScaleSampler()
219
220 - def estimate(self, context, data):
221 """ 222 Generate samples from the posterior of alpha and beta. 223 224 For beta the posterior is a gamma distribution and analytically 225 acessible. 226 227 The posterior of alpha can not be expressed analytically and is 228 aproximated using adaptive rejection sampling. 229 """ 230 pdf = GammaPrior() 231 232 ## sufficient statistics 233 234 a = numpy.mean(data) 235 b = exp(numpy.mean(log(data))) 236 v = numpy.std(data) ** 2 237 n = len(data) 238 239 beta = a / v 240 alpha = beta * a 241 samples = [] 242 243 for _i in range(self.n_samples): 244 245 ## sample beta from Gamma distribution 246 beta = numpy.random.gamma(n * alpha + context._hyper_beta.alpha, 247 1 / (n * a + context._hyper_beta.beta)) 248 249 ## sample alpha with ARS 250 logp = ARSPosteriorAlpha(n * log(beta * b)\ 251 - context.hyper_alpha.beta, 252 context.hyper_alpha.alpha - 1., n) 253 ars = csb.statistics.ars.ARS(logp) 254 ars.initialize(logp.initial_values()[:2], z0=0.) 255 alpha = ars.sample() 256 257 if alpha is None: 258 raise ValueError("ARS failed") 259 260 samples.append((alpha, beta)) 261 262 pdf.alpha, pdf.beta = samples[-1] 263 264 return pdf
265
266 -class GammaPosteriorMAP(ScaleMixturePriorEstimator):
267
268 - def __init__(self):
269 super(GammaPosteriorMAP, self).__init__()
270
271 - def get_scale_estimator(self):
272 return GammaScaleMAP()
273
274 - def estimate(self, context, data):
275 """ 276 Estimate alpha and beta from their posterior 277 """ 278 279 pdf = GammaPrior() 280 281 s = data[0].mean() 282 y = data[1].mean() - log(s) - 1. 283 284 alpha = abs(inv_digamma_minus_log(numpy.clip(y, 285 - 1e308, 286 - 1e-300))) 287 beta = alpha / s 288 289 290 pdf.alpha, pdf.beta = alpha, beta 291 return pdf
292
293 294 295 296 -class InvGammaPosteriorSampler(ScaleMixturePriorEstimator):
297
298 - def __init__(self):
299 super(InvGammaPosteriorSampler, self).__init__() 300 self.n_samples = 2
301
302 - def get_scale_estimator(self):
303 return InvGammaScaleSampler()
304
305 - def estimate(self, context, data):
306 """ 307 Generate samples from the posterior of alpha and beta. 308 309 For beta the posterior is a gamma distribution and analytically 310 acessible. 311 312 The posterior of alpha can not be expressed analytically and is 313 aproximated using adaptive rejection sampling. 314 """ 315 pdf = GammaPrior() 316 317 ## sufficient statistics 318 319 h = harmonic_mean(numpy.clip(data, 1e-308, 1e308)) 320 g = geometric_mean(numpy.clip(data, 1e-308, 1e308)) 321 322 n = len(data) 323 324 samples = [] 325 326 a = numpy.mean(1 / data) 327 v = numpy.std(1 / data) ** 2 328 329 beta = a / v 330 alpha = beta * a 331 332 for i in range(self.n_samples): 333 334 ## sample alpha with ARS 335 336 logp = ARSPosteriorAlpha(n * (log(beta) - log(g)) - context.hyper_alpha.beta, 337 context.hyper_alpha.alpha - 1., n) 338 ars = csb.statistics.ars.ARS(logp) 339 ars.initialize(logp.initial_values()[:2], z0=0.) 340 341 alpha = numpy.abs(ars.sample()) 342 343 if alpha is None: 344 raise ValueError("Sampling failed") 345 346 347 ## sample beta from Gamma distribution 348 349 beta = numpy.random.gamma(n * alpha + context.hyper_beta.alpha, \ 350 1 / (n / h + context.hyper_beta.beta)) 351 352 samples.append((alpha, beta)) 353 354 pdf.alpha, pdf.beta = samples[-1] 355 return pdf
356
357 358 -class InvGammaPosteriorMAP(ScaleMixturePriorEstimator):
359
360 - def __init__(self):
361 super(InvGammaPosteriorMAP, self).__init__()
362
363 - def get_scale_estimator(self):
364 return InvGammaScaleMAP()
365
366 - def estimate(self, context, data):
367 """ 368 Generate samples from the posterior of alpha and beta. 369 370 For beta the posterior is a gamma distribution and analytically 371 acessible. 372 373 The posterior of alpha can not be expressed analytically and is 374 aproximated using adaptive rejection sampling. 375 """ 376 pdf = GammaPrior() 377 378 379 y = log(data).mean() - log((data ** -1).mean()) 380 381 alpha = inv_digamma_minus_log(numpy.clip(y, 382 - 1e308, 383 - 1e-300)) 384 alpha = abs(alpha) 385 386 beta = numpy.clip(alpha / 387 (data ** (-1)).mean(), 388 1e-100, 1e100) 389 390 pdf.alpha, pdf.beta = alpha, beta 391 return pdf
392
393 394 395 -class GammaScaleSampler(AbstractEstimator):
396 """ 397 Sample the scalies given the data 398 """ 399
400 - def estimate(self, context, data):
401 pdf = ScaleMixture() 402 alpha = context.prior.alpha 403 beta = context.prior.beta 404 d = context._d 405 406 if len(data.shape) == 1: 407 data = data[:, numpy.newaxis] 408 409 a = alpha + 0.5 * d * len(data.shape) 410 b = beta + 0.5 * data.sum(-1) ** 2 411 412 s = numpy.clip(numpy.random.gamma(a, 1. / b), 1e-20, 1e10) 413 pdf.scales = s 414 415 context.prior.estimate(s) 416 pdf.prior = context.prior 417 418 return pdf
419
420 421 -class GammaScaleMAP(AbstractEstimator):
422 """ 423 MAP estimator of the scales 424 """ 425
426 - def estimate(self, context, data):
427 pdf = ScaleMixture() 428 alpha = context.prior.alpha 429 beta = context.prior.beta 430 d = context._d 431 432 if len(data.shape) == 1: 433 data = data[:, numpy.newaxis] 434 435 a = alpha + 0.5 * d * len(data.shape) 436 b = beta + 0.5 * data.sum(-1) ** 2 437 438 s = a / b 439 log_s = psi(a) - log(b) 440 441 pdf.scales = s 442 context.prior.estimate([s, log_s]) 443 pdf.prior = context.prior 444 445 return pdf
446
447 448 449 -class InvGammaScaleSampler(AbstractEstimator):
450 """ 451 Sample the scales given the data 452 """ 453
454 - def estimate(self, context, data):
455 pdf = ScaleMixture() 456 alpha = context.prior.alpha 457 beta = context.prior.beta 458 d = context._d 459 460 if len(data.shape) == 1: 461 data = data[:, numpy.newaxis] 462 463 p = -alpha + 0.5 * d 464 b = 2 * beta 465 a = 1e-5 + data.sum(-1) ** 2 466 467 s = csb.statistics.rand.gen_inv_gaussian(a, b, p) 468 s = numpy.clip(s, 1e-300, 1e300) 469 470 pdf.scales = s 471 context.prior.estimate(s) 472 pdf.prior = context.prior 473 474 return pdf
475
476 477 478 -class InvGammaScaleMAP(AbstractEstimator):
479 """ 480 MAP estimator of the scales 481 """
482 - def estimate(self, context, data):
483 pdf = ScaleMixture() 484 alpha = context.prior.alpha 485 beta = context.prior.beta 486 d = context._d 487 488 if len(data.shape) == 1: 489 data = data[:, numpy.newaxis] 490 491 p = -alpha + 0.5 * d 492 b = 2 * beta 493 a = 1e-5 + data.sum(-1) ** 2 494 495 s = numpy.clip((numpy.sqrt(b) * kve(p + 1, numpy.sqrt(a * b))) 496 / (numpy.sqrt(a) * kve(p, numpy.sqrt(a * b))), 497 1e-10, 1e10) 498 pdf.scales = s 499 context.prior.estimate(s) 500 pdf.prior = context.prior 501 502 return pdf
503
504 505 506 -class GammaPrior(ScaleMixturePrior, Gamma):
507 """ 508 Gamma prior on mixture weights including a weak gamma prior on its parameter. 509 """ 510
511 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.), 512 hyper_beta=(2., 1.)):
513 514 super(GammaPrior, self).__init__(alpha, beta) 515 516 self._hyper_alpha = Gamma(hyper_alpha[0], hyper_alpha[0]) 517 self._hyper_beta = Gamma(hyper_beta[0], hyper_beta[0]) 518 self.estimator = GammaPosteriorSampler()
519 520 521 @property
522 - def hyper_beta(self):
523 return self._hyper_beta
524 525 @hyper_beta.setter
526 - def hyper_beta(self, value):
527 if isinstance(value, AbstractDensity): 528 self._hyper_beta = value 529 else: 530 raise ValueError(value)
531 532 @property
533 - def hyper_alpha(self):
534 return self._hyper_alpha
535 536 @hyper_alpha.setter
537 - def hyper_alpha(self, value):
538 if isinstance(value, AbstractDensity): 539 self._hyper_beta = value 540 else: 541 raise ValueError(value)
542
543 - def log_prob(self, x):
544 545 a, b = self['alpha'], self['beta'] 546 547 l_a = self._hyper_alpha(a) 548 l_b = self._hyper_beta(b) 549 550 return super(GammaPrior, self).log_prob(x) + l_a + l_b
551
552 553 554 -class InvGammaPrior(ScaleMixturePrior, InverseGamma):
555 """ 556 Inverse Gamma prior on mixture weights including a weak gamma 557 prior on its parameter. 558 """ 559
560 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.), 561 hyper_beta=(2., 1.)):
562 563 super(InvGammaPrior, self).__init__(alpha, beta) 564 565 self._hyper_alpha = Gamma(hyper_alpha[0], 566 hyper_alpha[0]) 567 self._hyper_beta = Gamma(hyper_beta[0], 568 hyper_beta[0]) 569 self.estimator = InvGammaPosteriorSampler()
570 571 @property
572 - def hyper_beta(self):
573 return self._hyper_beta
574 575 @hyper_beta.setter
576 - def hyper_beta(self, value):
577 if isinstance(value, AbstractDensity): 578 self._hyper_beta = value 579 else: 580 raise ValueError(value)
581 582 @property
583 - def hyper_alpha(self):
584 return self._hyper_alpha
585 586 @hyper_alpha.setter
587 - def hyper_alpha(self, value):
588 if isinstance(value, AbstractDensity): 589 self._hyper_beta = value 590 else: 591 raise ValueError(value)
592
593 - def log_prob(self, x):
594 595 a, b = self['alpha'], self['beta'] 596 597 l_a = self._hyper_alpha(a) 598 l_b = self._hyper_beta(b) 599 600 return super(InvGammaPrior, self).log_prob(x) + l_a + l_b
601