Package csb :: Package statistics :: Module scalemixture
[frames] | no frames]

Source Code for Module csb.statistics.scalemixture

  1  """ 
  2  Approximation of a distribution as a mixture of gaussians with a zero mean but different sigmas 
  3  """ 
  4   
  5  import numpy.random 
  6   
  7  import csb.core 
  8  import csb.statistics.ars 
  9  import csb.statistics.rand 
 10   
 11  from csb.core import typedproperty, validatedproperty 
 12  from abc import abstractmethod, ABCMeta 
 13   
 14  from csb.numeric import log, exp, approx_psi, inv_psi, d_approx_psi 
 15  from scipy.special import  psi, kve 
 16  from csb.statistics import harmonic_mean, geometric_mean 
 17  from csb.statistics.pdf import AbstractEstimator, AbstractDensity, Gamma, InverseGamma, NullEstimator 
18 19 20 -def inv_digamma_minus_log(y, tol=1e-10, n_iter=100):
21 """ 22 Solve y = psi(alpha) - log(alpha) for alpha by fixed point 23 integration. 24 """ 25 if y >= -log(6.): 26 x = 1 / (2 * (1 - exp(y))) 27 else: 28 x = 1.e-10 29 for _i in range(n_iter): 30 z = approx_psi(x) - log(x) - y 31 if abs(z) < tol: 32 break 33 x -= x * z / (x * d_approx_psi(x) - 1) 34 x = abs(x) 35 return x
36
37 -class ScaleMixturePriorEstimator(AbstractEstimator):
38 """ 39 Prior on the scales of a L{ScaleMixture}, which determines how the scales 40 are estimated. 41 """ 42 43 __metaclass__ = ABCMeta 44 45 @abstractmethod
46 - def get_scale_estimator(self):
47 """ 48 Return an appropriate estimator for the scales of the mixture distribution 49 under this prior. 50 """ 51 pass
52
53 54 -class ScaleMixturePrior(object):
55 """ 56 Prior on the scales of a L{ScaleMixture}, which determines how the scales 57 are estimated. 58 """ 59
60 - def __init__(self, *args):
61 super(ScaleMixturePrior, self).__init__(*args) 62 self._scale = None 63 self._scale_estimator = NullEstimator()
64 65 @property
66 - def scale_estimator(self):
67 return self._scale_estimator
68 69 @property
70 - def estimator(self):
71 return self._estimator
72 73 @estimator.setter
74 - def estimator(self, strategy):
75 if not isinstance(strategy, AbstractEstimator): 76 raise TypeError(strategy) 77 self._estimator = strategy 78 if isinstance(strategy, ScaleMixturePriorEstimator): 79 self._scale_estimator = strategy.get_scale_estimator() 80 else: 81 self._scale_estimator = NullEstimator()
82
83 84 -class ScaleMixture(AbstractDensity):
85 """ 86 Robust probabilistic superposition and comparison of protein structures 87 Martin Mechelke and Michael Habeck 88 89 Represenation of a distribution as a mixture of gaussians with a mean of 90 zero and different inverse variances/scales. The number of scales equals 91 the number of datapoints. 92 93 The underlying family is determined by a prior L{ScaleMixturePrior} on the 94 scales. Choosing a L{GammaPrior} results in Stundent-t posterior, wheras a 95 L{InvGammaPrior} leads to a K-Distribution as posterior. 96 """ 97
98 - def __init__(self, scales=numpy.array([1., 1.]), prior=None, d=3):
99 100 super(ScaleMixture, self).__init__() 101 102 self._register('scales') 103 104 self._d = d 105 self.set_params(scales=scales) 106 self._prior = prior 107 108 if self._prior is not None: 109 self.estimator = self._prior.scale_estimator
110 111 @property
112 - def scales(self):
113 return numpy.squeeze(self['scales'])
114 115 @scales.setter
116 - def scales(self, value):
117 if not isinstance(value, csb.core.string) and \ 118 isinstance(value, (numpy.ndarray, list, tuple)): 119 self['scales'] = numpy.array(value) 120 else: 121 raise ValueError("numpy array expected")
122 123 @property
124 - def prior(self):
125 return self._prior
126 127 @prior.setter
128 - def prior(self, value):
129 if not isinstance(value, ScaleMixturePrior): 130 raise TypeError(value) 131 self._prior = value 132 self.estimator = self._prior.scale_estimator
133
134 - def log_prob(self, x):
135 from csb.numeric import log_sum_exp 136 137 dim = self._d 138 s = self.scales 139 140 log_p = numpy.squeeze(-numpy.multiply.outer(x * x, 0.5 * s)) + \ 141 numpy.squeeze(dim * 0.5 * (log(s) - log(2 * numpy.pi))) 142 143 if self._prior is not None: 144 log_p += numpy.squeeze(self._prior.log_prob(s)) 145 return log_sum_exp(log_p.T, 0)
146 147
148 - def random(self, shape=None):
149 150 s = self.scales 151 152 if shape is None: 153 return numpy.random.normal() * s[numpy.random.randint(len(s))] 154 155 else: 156 #n = s.shape[0] 157 nrv = numpy.random.normal(size=shape) 158 indices = numpy.random.randint(len(s), size=shape) 159 160 return s[indices] * nrv
161
162 163 -class ARSPosteriorAlpha(csb.statistics.ars.LogProb):
164 """ 165 This class represents the posterior distribution of the alpha parameter 166 of the Gamma and Inverse Gamma prior, and allows sampling using adaptive 167 rejection sampling L{ARS}. 168 """ 169
170 - def __init__(self, a, b, n):
171 172 self.a = float(a) 173 self.b = float(b) 174 self.n = float(n)
175
176 - def __call__(self, x):
177 178 from scipy.special import gammaln 179 180 return self.a * x - \ 181 self.n * gammaln(numpy.clip(x, 1e-308, 1e308)) + \ 182 self.b * log(x), \ 183 self.a - self.n * psi(x) + self.b / x
184
185 - def initial_values(self, tol=1e-10):
186 """ 187 Generate initial values by doing fixed point 188 iterations to solve for alpha 189 """ 190 n, a, b = self.n, self.a, self.b 191 192 if abs(b) < 1e-10: 193 alpha = inv_psi(a / n) 194 else: 195 alpha = 1. 196 197 z = tol + 1. 198 199 while abs(z) > tol: 200 201 z = n * psi(alpha) - \ 202 b / numpy.clip(alpha, 1e-300, 1e300) - a 203 204 alpha -= z / (n * d_approx_psi(alpha) - b 205 / (alpha ** 2 + 1e-300)) 206 alpha = numpy.clip(alpha, 1e-100, 1e300) 207 208 return numpy.clip(alpha - 1 / (n + 1e-300), 1e-100, 1e300), \ 209 alpha + 1 / (n + 1e-300), alpha
210
211 212 -class GammaPosteriorSampler(ScaleMixturePriorEstimator):
213
214 - def __init__(self):
215 super(GammaPosteriorSampler, self).__init__() 216 self.n_samples = 2
217
218 - def get_scale_estimator(self):
219 return GammaScaleSampler()
220
221 - def estimate(self, context, data):
222 """ 223 Generate samples from the posterior of alpha and beta. 224 225 For beta the posterior is a gamma distribution and analytically 226 acessible. 227 228 The posterior of alpha can not be expressed analytically and is 229 aproximated using adaptive rejection sampling. 230 """ 231 pdf = GammaPrior() 232 233 ## sufficient statistics 234 235 a = numpy.mean(data) 236 b = exp(numpy.mean(log(data))) 237 v = numpy.std(data) ** 2 238 n = len(data) 239 240 beta = a / v 241 alpha = beta * a 242 samples = [] 243 244 for _i in range(self.n_samples): 245 246 ## sample beta from Gamma distribution 247 beta = numpy.random.gamma(n * alpha + context._hyper_beta.alpha, 248 1 / (n * a + context._hyper_beta.beta)) 249 250 ## sample alpha with ARS 251 logp = ARSPosteriorAlpha(n * log(beta * b)\ 252 - context.hyper_alpha.beta, 253 context.hyper_alpha.alpha - 1., n) 254 ars = csb.statistics.ars.ARS(logp) 255 ars.initialize(logp.initial_values()[:2], z0=0.) 256 alpha = ars.sample() 257 258 if alpha is None: 259 raise ValueError("ARS failed") 260 261 samples.append((alpha, beta)) 262 263 pdf.alpha, pdf.beta = samples[-1] 264 265 return pdf
266
267 -class GammaPosteriorMAP(ScaleMixturePriorEstimator):
268
269 - def __init__(self):
270 super(GammaPosteriorMAP, self).__init__()
271
272 - def get_scale_estimator(self):
273 return GammaScaleMAP()
274
275 - def estimate(self, context, data):
276 """ 277 Estimate alpha and beta from their posterior 278 """ 279 280 pdf = GammaPrior() 281 282 s = data[0].mean() 283 y = data[1].mean() - log(s) - 1. 284 285 alpha = abs(inv_digamma_minus_log(numpy.clip(y, 286 - 1e308, 287 - 1e-300))) 288 beta = alpha / s 289 290 291 pdf.alpha, pdf.beta = alpha, beta 292 return pdf
293
294 295 296 297 -class InvGammaPosteriorSampler(ScaleMixturePriorEstimator):
298
299 - def __init__(self):
300 super(InvGammaPosteriorSampler, self).__init__() 301 self.n_samples = 2
302
303 - def get_scale_estimator(self):
304 return InvGammaScaleSampler()
305
306 - def estimate(self, context, data):
307 """ 308 Generate samples from the posterior of alpha and beta. 309 310 For beta the posterior is a gamma distribution and analytically 311 acessible. 312 313 The posterior of alpha can not be expressed analytically and is 314 aproximated using adaptive rejection sampling. 315 """ 316 pdf = GammaPrior() 317 318 ## sufficient statistics 319 320 h = harmonic_mean(numpy.clip(data, 1e-308, 1e308)) 321 g = geometric_mean(numpy.clip(data, 1e-308, 1e308)) 322 323 n = len(data) 324 325 samples = [] 326 327 a = numpy.mean(1 / data) 328 v = numpy.std(1 / data) ** 2 329 330 beta = a / v 331 alpha = beta * a 332 333 for i in range(self.n_samples): 334 335 ## sample alpha with ARS 336 337 logp = ARSPosteriorAlpha(n * (log(beta) - log(g)) - context.hyper_alpha.beta, 338 context.hyper_alpha.alpha - 1., n) 339 ars = csb.statistics.ars.ARS(logp) 340 ars.initialize(logp.initial_values()[:2], z0=0.) 341 342 alpha = numpy.abs(ars.sample()) 343 344 if alpha is None: 345 raise ValueError("Sampling failed") 346 347 348 ## sample beta from Gamma distribution 349 350 beta = numpy.random.gamma(n * alpha + context.hyper_beta.alpha, \ 351 1 / (n / h + context.hyper_beta.beta)) 352 353 samples.append((alpha, beta)) 354 355 pdf.alpha, pdf.beta = samples[-1] 356 return pdf
357
358 359 -class InvGammaPosteriorMAP(ScaleMixturePriorEstimator):
360
361 - def __init__(self):
362 super(InvGammaPosteriorMAP, self).__init__()
363
364 - def get_scale_estimator(self):
365 return InvGammaScaleMAP()
366
367 - def estimate(self, context, data):
368 """ 369 Generate samples from the posterior of alpha and beta. 370 371 For beta the posterior is a gamma distribution and analytically 372 acessible. 373 374 The posterior of alpha can not be expressed analytically and is 375 aproximated using adaptive rejection sampling. 376 """ 377 pdf = GammaPrior() 378 379 380 y = log(data).mean() - log((data ** -1).mean()) 381 382 alpha = inv_digamma_minus_log(numpy.clip(y, 383 - 1e308, 384 - 1e-300)) 385 alpha = abs(alpha) 386 387 beta = numpy.clip(alpha / 388 (data ** (-1)).mean(), 389 1e-100, 1e100) 390 391 pdf.alpha, pdf.beta = alpha, beta 392 return pdf
393
394 395 396 -class GammaScaleSampler(AbstractEstimator):
397 """ 398 Sample the scalies given the data 399 """ 400
401 - def estimate(self, context, data):
402 pdf = ScaleMixture() 403 alpha = context.prior.alpha 404 beta = context.prior.beta 405 d = context._d 406 407 if len(data.shape) == 1: 408 data = data[:, numpy.newaxis] 409 410 a = alpha + 0.5 * d * len(data.shape) 411 b = beta + 0.5 * data.sum(-1) ** 2 412 413 s = numpy.clip(numpy.random.gamma(a, 1. / b), 1e-20, 1e10) 414 pdf.scales = s 415 416 context.prior.estimate(s) 417 pdf.prior = context.prior 418 419 return pdf
420
421 422 -class GammaScaleMAP(AbstractEstimator):
423 """ 424 MAP estimator of the scales 425 """ 426
427 - def estimate(self, context, data):
428 pdf = ScaleMixture() 429 alpha = context.prior.alpha 430 beta = context.prior.beta 431 d = context._d 432 433 if len(data.shape) == 1: 434 data = data[:, numpy.newaxis] 435 436 a = alpha + 0.5 * d * len(data.shape) 437 b = beta + 0.5 * data.sum(-1) ** 2 438 439 s = a / b 440 log_s = psi(a) - log(b) 441 442 pdf.scales = s 443 context.prior.estimate([s, log_s]) 444 pdf.prior = context.prior 445 446 return pdf
447
448 449 450 -class InvGammaScaleSampler(AbstractEstimator):
451 """ 452 Sample the scales given the data 453 """ 454
455 - def estimate(self, context, data):
456 pdf = ScaleMixture() 457 alpha = context.prior.alpha 458 beta = context.prior.beta 459 d = context._d 460 461 if len(data.shape) == 1: 462 data = data[:, numpy.newaxis] 463 464 p = -alpha + 0.5 * d 465 b = 2 * beta 466 a = 1e-5 + data.sum(-1) ** 2 467 468 s = csb.statistics.rand.gen_inv_gaussian(a, b, p) 469 s = numpy.clip(s, 1e-300, 1e300) 470 471 pdf.scales = s 472 context.prior.estimate(s) 473 pdf.prior = context.prior 474 475 return pdf
476
477 478 479 -class InvGammaScaleMAP(AbstractEstimator):
480 """ 481 MAP estimator of the scales 482 """
483 - def estimate(self, context, data):
484 pdf = ScaleMixture() 485 alpha = context.prior.alpha 486 beta = context.prior.beta 487 d = context._d 488 489 if len(data.shape) == 1: 490 data = data[:, numpy.newaxis] 491 492 p = -alpha + 0.5 * d 493 b = 2 * beta 494 a = 1e-5 + data.sum(-1) ** 2 495 496 s = numpy.clip((numpy.sqrt(b) * kve(p + 1, numpy.sqrt(a * b))) 497 / (numpy.sqrt(a) * kve(p, numpy.sqrt(a * b))), 498 1e-10, 1e10) 499 pdf.scales = s 500 context.prior.estimate(s) 501 pdf.prior = context.prior 502 503 return pdf
504
505 506 507 -class GammaPrior(ScaleMixturePrior, Gamma):
508 """ 509 Gamma prior on mixture weights including a weak gamma prior on its parameter. 510 """ 511
512 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.), 513 hyper_beta=(2., 1.)):
514 515 super(GammaPrior, self).__init__(alpha, beta) 516 517 self._hyper_alpha = Gamma(*hyper_alpha) 518 self._hyper_beta = Gamma(*hyper_beta) 519 self.estimator = GammaPosteriorSampler()
520 521 @typedproperty(AbstractDensity)
522 - def hyper_beta():
523 pass
524 525 @typedproperty(AbstractDensity)
526 - def hyper_alpha():
527 pass
528
529 - def log_prob(self, x):
530 531 a, b = self['alpha'], self['beta'] 532 533 l_a = self._hyper_alpha(a) 534 l_b = self._hyper_beta(b) 535 536 return super(GammaPrior, self).log_prob(x) + l_a + l_b
537
538 539 540 -class InvGammaPrior(ScaleMixturePrior, InverseGamma):
541 """ 542 Inverse Gamma prior on mixture weights including a weak gamma 543 prior on its parameter. 544 """ 545
546 - def __init__(self, alpha=1., beta=1., hyper_alpha=(4., 1.), 547 hyper_beta=(2., 1.)):
548 549 super(InvGammaPrior, self).__init__(alpha, beta) 550 551 self._hyper_alpha = Gamma(*hyper_alpha) 552 self._hyper_beta = Gamma(*hyper_beta) 553 self.estimator = InvGammaPosteriorSampler()
554 555 @typedproperty(AbstractDensity)
556 - def hyper_beta():
557 pass
558 559 @typedproperty(AbstractDensity)
560 - def hyper_alpha():
561 pass
562
563 - def log_prob(self, x):
564 565 a, b = self['alpha'], self['beta'] 566 567 l_a = self._hyper_alpha(a) 568 l_b = self._hyper_beta(b) 569 570 return super(InvGammaPrior, self).log_prob(x) + l_a + l_b
571