Package BIP :: Package Bayes :: Module Melding
[hide private]
[frames] | no frames]

Source Code for Module BIP.Bayes.Melding

  1  # -*- coding:utf-8 -*- 
  2  #----------------------------------------------------------------------------- 
  3  # Name:        Melding.py 
  4  # Purpose:     The Bayesian melding Class provides 
  5  #                   uncertainty analyses for deterministic models. 
  6  # 
  7  # Author:      Flávio Codeço Coelho 
  8  # 
  9  # Created:     2003/08/10 
 10  # Copyright:   (c) 2003-2008 by the Author 
 11  # Licence:     GPL 
 12  #----------------------------------------------------------------------------- 
 13  from numpy.core.records import recarray 
 14  #import psyco 
 15  #psyco.full() 
 16  import sys, os 
 17  import cPickle as CP 
 18  import like 
 19  import pylab as P 
 20  import scipy.stats.kde as kde 
 21  from scipy import stats 
 22  import numpy 
 23  from numpy import * 
 24  from time import time 
 25  from numpy.random import normal, randint,  random 
 26  #from BIP.Viz.realtime import RTplot 
 27  import lhs 
 28  if sys.version.startswith('2.5'): 
 29      from processing import Pool 
 30  else: 
 31      from multiprocessing import Pool 
 32   
 33  __docformat__ = "restructuredtext en" 
 34   
 35   
36 -class Meld:
37 """ 38 Bayesian Melding class 39 """
40 - def __init__(self, K, L, model, ntheta, nphi, alpha = 0.5, verbose = False ):
41 """ 42 Initializes the Melding class. 43 44 :Parameters: 45 - `K`: Number of replicates of the model run. Also determines the prior sample size. 46 - `L`: Number of samples from the Posterior distributions. Usually 10% of K. 47 - `model`: Callable taking theta as argument and returning phi = M(theta). 48 - `ntheta`: Number of inputs to the model (parameters). 49 - `nphi`: Number of outputs of the model (State-variables) 50 """ 51 self.K = K 52 self.L = L 53 self.verbose = verbose 54 self.model = model 55 self.likelist = [] #list of likelihoods 56 self.q1theta = recarray(K,formats=['f8']*ntheta) #Theta Priors (record array) 57 self.post_theta = recarray(L,formats=['f8']*ntheta) #Theta Posteriors (record array) 58 self.q2phi = recarray(K,formats=['f8']*nphi) #Phi Priors (record array) 59 self.phi = recarray(K,formats=['f8']*nphi) #Phi model-induced Priors (record array) 60 self.q2type = [] #list of distribution types 61 self.post_phi = recarray(L,formats=['f8']*nphi) #Phi Posteriors (record array) 62 self.ntheta = ntheta 63 self.nphi = nphi 64 self.alpha = alpha #pooling weight of user-provided phi priors 65 self.done_running = False
66 # self.po = Pool() #pool of processes for parallel processing 67
68 - def setPhi(self, names, dists=[stats.norm], pars=[(0, 1)], limits=[(-5,5)]):
69 """ 70 Setup the models Outputs, or Phi, and generate the samples from prior distributions 71 needed for the melding replicates. 72 73 :Parameters: 74 - `names`: list of string with the names of the variables. 75 - `dists`: is a list of RNG from scipy.stats 76 - `pars`: is a list of tuples of variables for each prior distribution, respectively. 77 - `limits`: lower and upper limits on the support of variables. 78 """ 79 if len(names) != self.nphi: 80 raise ValueError("Number of names(%s) does not match the number of output variables(%s)."%(len(names),self.nphi)) 81 self.q2phi.dtype.names = names 82 self.phi.dtype.names = names 83 self.post_phi.dtype.names = names 84 self.plimits = limits 85 for n,d,p in zip(names,dists,pars): 86 self.q2phi[n] = lhs.lhs(d,p,self.K) 87 self.q2type.append(d.name)
88 89 90
91 - def setTheta(self, names, dists=[stats.norm], pars=[(0, 1)]):
92 """ 93 Setup the models inputs and generate the samples from prior distributions 94 needed for the dists the melding replicates. 95 96 :Parameters: 97 - `names`: list of string with the names of the parameters. 98 - `dists`: is a list of RNG from scipy.stats 99 - `pars`: is a list of tuples of parameters for each prior distribution, respectivelydists 100 """ 101 self.q1theta.dtype.names = names 102 self.post_theta.dtype.names = names 103 if os.path.exists('q1theta'): 104 self.q1theta = CP.load(open('q1theta','r')) 105 else: 106 for n,d,p in zip(names,dists,pars): 107 self.q1theta[n] = lhs.lhs(d,p,self.K)
108
109 - def setThetaFromData(self,names,data, limits):
110 """ 111 Setup the model inputs and set the prior distributions from the vectors 112 in data. 113 This method is to be used when the prior distributions are available in 114 the form of a sample from an empirical distribution such as a bayesian 115 posterior. 116 In order to expand the samples provided, K samples are generated from a 117 kernel density estimate of the original sample. 118 119 :Parameters: 120 - `names`: list of string with the names of the parameters. 121 - `data`: list of vectors. Samples of a proposed distribution 122 - `limits`: List of (min,max) tuples for each theta to make sure samples are not generated outside these limits. 123 """ 124 self.q1theta.dtype.names = names 125 self.post_theta.dtype.names = names 126 if os.path.exists('q1theta'): 127 self.q1theta = CP.load(open('q1theta','r')) 128 else: 129 i=0 130 for n,d in zip(names,data): 131 smp = [] 132 while len(smp)<self.K: 133 smp += [x for x in kde.gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]] 134 #print self.q1theta[n].shape, array(smp[:self.K]).shape 135 self.q1theta[n] = array(smp[:self.K]) 136 i+=1
137 # 138
139 - def setPhiFromData(self,names,data,limits):
140 """ 141 Setup the model outputs and set their prior distributions from the 142 vectors in data. 143 This method is to be used when the prior distributions are available in 144 the form of a sample from an empirical distribution such as a bayesian 145 posterior. 146 In order to expand the samples provided, K samples are generated from a 147 kernel density estimate of the original sample. 148 149 :Parameters: 150 - `names`: list of string with the names of the variables. 151 - `data`: list of vectors. Samples of the proposed distribution. 152 - `limits`: list of tuples (ll,ul),lower and upper limits on the support of variables. 153 """ 154 self.q2phi.dtype.names = names 155 self.phi.dtype.names = names 156 self.post_phi.dtype.names = names 157 self.limits = limits 158 for n,d in zip(names,data): 159 smp = [] 160 while len(smp)<self.K: 161 smp += [x for x in kde.gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]] 162 self.q2phi[n] = array(smp[:self.K]) 163 self.q2type.append('empirical')
164 #self.q2phi = self.filtM(self.q2phi, self.q2phi, limits) 165
166 - def addData(self, data, model, limits,l=1024, **kwargs):
167 """ 168 Calculates the likelihood functions of the dataset presented and add to 169 self.likelist 170 Likelihood function is a vector of lenght l 171 172 :Parameters: 173 - `data`: vector containing observations on a given variable. 174 - `model`: string with the name of the distribution of the variable 175 - `limits`: (ll,ul) tuple with lower and upper limits for the variable 176 - `l`: Length (resolution) of the likelihood vector 177 """ 178 n = len(data) # Number of data points 179 data = array(data) 180 (ll,ul) = limits #limits for the parameter space 181 step = (ul-ll)/float(l) 182 183 if model == 'normal': # In this case, L is a function of the mean. SD is set to the SD(data) 184 sd = std(data) #standard deviation of data 185 prec = 1/sd #precision of the data 186 res = array([exp(like.Normal(data,mu,prec)) for mu in arange(ll,ul,step)]) 187 lik = res/max(res) # Likelihood function 188 print max(lik), min(lik) 189 elif model == 'exponential': 190 res = [lamb**n*exp(-lamb*sum(data)) for lamb in arange(ll,ul,step)] 191 lik = array(res)/max(array(res)) 192 elif model == 'beta': 193 # TODO: Make sure pars is passed as an extra parameter 194 res = [exp(like.Beta(data,*kwargs['pars'])) for i in arange(ll,ul,step)] 195 lik = array(res)/max(array(res)) 196 elif model == 'bernoulli': 197 if ll<0 or ul>1: 198 print "Parameter p of the bernoulli is out of range[0,1]" 199 res = [exp(like.Bernoulli(data,p)) for p in arange(ll,ul,step)] 200 lik = array(res)/max(array(res)) 201 202 elif model == 'poisson': 203 res = [exp(like.Poisson(data,lb)) for lb in arange(ll,ul,step)] 204 lik = array(res)/max(array(res)) 205 206 elif model == 'lognormal': 207 sd = std(data) #standard deviation of data 208 prec = 1/sd #precision of the data 209 res = [exp(like.Lognormal(data,mu,prec)) for mu in arange(ll,ul,step)] 210 lik = array(res)/max(array(res)) 211 else: 212 print 'Invalid distribution type. Valid distributions: normal,lognormal, exponential, bernoulli and poisson' 213 self.likelist.append(lik) 214 return lik
215
216 - def run(self,*args):
217 """ 218 Runs the model through the Melding inference.model 219 model is a callable which return the output of the deterministic model, 220 i.e. the model itself. 221 The model is run self.K times to obtain phi = M(theta). 222 """ 223 224 for i in xrange(self.K): 225 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 226 r = self.po.applyAsync(self.model, theta) 227 self.phi[i]= r.get()[-1]#self.model(*theta)[-1] #phi is the last point in the simulation 228 229 self.done_running = True
230
231 - def getPosteriors(self,t=1):
232 """ 233 Updates the posteriors of the model's output for the last t time steps. 234 Returns two record arrays: 235 - The posteriors of the Theta 236 - the posterior of Phi last t values of time-series. self.L by `t` arrays. 237 238 :Parameters: 239 - `t`: length of the posterior time-series to return. 240 """ 241 if not self.done_running: 242 return 243 if t > 1: 244 self.post_phi = recarray((self.L,t),formats=['f8']*self.nphi) 245 self.post_phi.dtype.names = self.phi.dtype.names 246 def cb(r): 247 ''' 248 callback function for the asynchronous model runs 249 ''' 250 if t == 1: 251 self.post_phi[r[1]] = r[0][-1] 252 else: 253 self.post_phi[r[1]]= [tuple(l) for l in r[0][-t:]]
254 po = Pool() 255 #random indices for the marginal posteriors of theta 256 pti = lhs.lhs(stats.randint,(0,self.L),siz=(self.ntheta,self.L)) 257 for i in xrange(self.L):#Monte Carlo with values of the posterior of Theta 258 theta = [self.post_theta[n][pti[j,i]] for j,n in enumerate(self.post_theta.dtype.names)] 259 po.applyAsync(enumRun, (self.model,theta,i), callback=cb) 260 # r = po.applyAsync(self.model,theta) 261 # if t == 1: 262 # self.post_phi[i] = r.get()[-1] 263 # else: 264 # self.post_phi[i]= [tuple(l) for l in r.get()[-t:]] 265 if i%100 == 0 and self.verbose: 266 print "==> L = %s"%i 267 268 po.close() 269 po.join() 270 return self.post_theta, self.post_phi
271
272 - def filtM(self,cond,x,limits):
273 ''' 274 Multiple condition filtering. 275 Remove values in x[i], if corresponding values in 276 cond[i] are less than limits[i][0] or greater than 277 limits[i][1]. 278 279 :Parameters: 280 - `cond`: is an array of conditions. 281 - `limits`: is a list of tuples (ll,ul) with length equal to number of lines in `cond` and `x`. 282 - `x`: array to be filtered. 283 ''' 284 # Deconstruct the record array, if necessary. 285 names = [] 286 if isinstance(cond, recarray): 287 names = list(cond.dtype.names) 288 cond = [cond[v] for v in cond.dtype.names] 289 x = [x[v] for v in x.dtype.names] 290 291 cond = array(cond) 292 cnd = ones(cond.shape[1],int) 293 for i,j in zip(cond,limits): 294 ll = j[0] 295 ul = j[1] 296 #print cond.shape,cnd.shape,i.shape,ll,ul 297 cnd = cnd & less(i,ul) & greater(i,ll) 298 f = compress(cnd,x, axis=1) 299 300 if names:#Reconstruct the record array 301 r = recarray((1,f.shape[1]),formats=['f8']*len(names),names=names) 302 for i,n in enumerate(names): 303 r[n]=f[i] 304 f=r 305 306 return f
307
308 - def basicfit(self,s1,s2):
309 ''' 310 Calculates a basic fitness calculation between a model- 311 generated time series and a observed time series. 312 it uses a normalized RMS variation. 313 314 :Parameters: 315 - `s1`: model-generated time series. record array. 316 - `s2`: observed time series. dictionary with keys matching names of s1 317 ''' 318 fit = [] 319 for k in s2.keys(): 320 if s2[k] == [] or (not s2[k].any()): 321 continue #no observations for this variable 322 e = sqrt(mean((s1[k]-s2[k])**2.)) 323 fit.append(e) #min to guarantee error is bounded to (0,1) 324 325 return mean(fit) #mean r-squared
326 327
328 - def logPooling(self,phi):
329 """ 330 Returns the probability associated with each phi[i] 331 on the pooled pdf of phi and q2phi. 332 333 :Parameters: 334 - `phi`: prior of Phi induced by the model and q1theta. 335 """ 336 337 # Estimating the multivariate joint probability densities 338 phidens = stats.gaussian_kde(array([phi[n][:,-1] for n in phi.dtype.names])) 339 340 q2dens = stats.gaussian_kde(array([self.q2phi[n] for n in self.q2phi.dtype.names])) 341 # Determining the pooled probabilities for each phi[i] 342 # qtilphi = zeros(self.K) 343 lastp = array([list(phi[i,-1]) for i in xrange(self.K)]) 344 # print lastp,lastp.shape 345 qtilphi = (phidens.evaluate(lastp.T)**(1-self.alpha))*q2dens.evaluate(lastp.T)**self.alpha 346 return qtilphi/sum(qtilphi)
347
348 - def abcRun(self,fitfun=None, data={}, t=1,savetemp=False):
349 """ 350 Runs the model for inference through Approximate Bayes Computation 351 techniques. This method should be used as an alternative to the sir. 352 353 :Parameters: 354 - `fitfun`: Callable which will return the goodness of fit of the model to data as a number between 0-1, with 1 meaning perfect fit 355 - `t`: number of time steps to retain at the end of the of the model run for fitting purposes. 356 - `data`: dict containing observed time series (lists of length t) of the state variables. This dict must have as many items the number of state variables, with labels matching variables names. Unorbserved variables must have an empty list as value. 357 - `savetemp`: Should temp results be saved. Useful for long runs. Alows for resuming the simulation from last sa 358 """ 359 if not fitfun: 360 fitfun = self.basicfit 361 if savetemp: 362 CP.dump(self.q1theta,open('q1theta','w')) 363 # Running the model ========================== 364 if os.path.exists('phi.temp'): 365 phi,j = CP.load(open('phi.temp','r')) 366 else: 367 j=0 368 phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names) 369 for i in xrange(j,self.K): 370 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 371 r = self.po.applyAsync(self.model, theta) 372 phi[i]= [tuple(l) for l in r.get()[-t:]]# #phi is the last t points in the simulation 373 if i%100 == 0: 374 print "==> K = %s"%i 375 if savetemp: 376 CP.dump((phi,i),open('phi.temp','w')) 377 if savetemp: #If all replicates are done, clear temporary save files. 378 os.unlink('phi.temp') 379 os.unlink('q1theta') 380 381 print "==> Done Running the K replicates\n" 382 qtilphi = self.logPooling(phi) #vector with probability of each phi[i] belonging to qtilphi 383 qtilphi = nan_to_num(qtilphi) 384 print 'max(qtilphi): ', max(qtilphi) 385 # 386 # calculate weights 387 w = [fitfun(phi[i],data) for i in xrange(phi.shape[0])] 388 w /=sum(w) 389 w = 1-w 390 print "w=",w, mean(w), var(w) 391 print 392 print 'qtilphi=',qtilphi 393 # Resampling Thetas 394 w = nan_to_num(w) 395 w = array(w)*qtilphi 396 w /=sum(w) 397 w = nan_to_num(w) 398 print 'max(w): ',max(w) 399 # for n in phi.dtype.names: 400 # P.plot(mean(phi[n],axis=0),label=n) 401 # P.figure() 402 # P.plot(w,label='w') 403 # P.plot(qtilphi,label='qtilphi') 404 # P.title('Resampling vector(w) and pooled prior on Phi') 405 # P.legend() 406 if sum(w) == 0.0: 407 sys.exit('Resampling weights are all zero, please check your model or data.') 408 j = 0 409 while j < self.L: # Extract L samples from q1theta 410 i=randint(0,w.size)# Random position of w and q1theta 411 if random()<= w[i]: 412 self.post_theta[j] = self.q1theta[i]# retain the sample according with resampling prob. 413 j+=1 414 415 416 self.done_running = True
417
418 - def sir(self, data={}, t=1,tau=0.1, nopool=False,savetemp=False):
419 """ 420 Run the model output through the Sampling-Importance-Resampling algorithm. 421 Returns 1 if successful or 0 if not. 422 423 :Parameters: 424 - `data`: observed time series on the model's output 425 - `t`: length of the observed time series 426 - `tau`: Precision of the Normal likelihood function 427 - `nopool`: True if no priors on the outputs are available. Leads to faster calculations 428 - `savetemp`: Boolean. create a temp file? 429 """ 430 phi = self.runModel(savetemp,t) 431 # Do Log Pooling 432 if nopool: 433 qtilphi = ones(self.K) 434 else: 435 t0 = time() 436 qtilphi = self.logPooling(phi) #vector with probability of each phi[i] belonging to qtilphi 437 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0) 438 qtilphi = nan_to_num(qtilphi) 439 print 'max(qtilphi): ', max(qtilphi) 440 if sum(qtilphi)==0: 441 print 'Pooled prior on ouputs is null, please check your priors, and try again.' 442 return 0 443 444 # Calculating the likelihood of each phi[i] considering the observed data 445 lik = zeros(self.K) 446 t0=time() 447 # po = Pool() 448 for i in xrange(self.K): 449 l=1 450 for n in data.keys(): 451 if isinstance(data[n],list) and data[n] == []: 452 continue #no observations for this variable 453 elif isinstance(data[n],numpy.ndarray) and (not data[n].any()): 454 continue #no observations for this variable 455 p = phi[n] 456 457 # liklist=[po.applyAsync(like.Normal,(data[n][m], j, tau)) for m,j in enumerate(p[i])] 458 # l=product([p.get() for p in liklist]) 459 460 l *= product([exp(like.Normal(data[n][m], j,tau)) for m,j in enumerate(p[i])]) 461 lik[i]=l 462 # po.close() 463 # po.join() 464 print "==> Done Calculating Likelihoods (took %s seconds)"%(time()-t0) 465 print "==> Likelihood ratio of best run/worst run: %s"%(max(lik)/min(lik),) 466 # Calculating the weights 467 w = nan_to_num(qtilphi*lik) 468 w = nan_to_num(w/sum(w)) 469 470 if not sum(w) == 0.0: 471 j = 0 472 t0 = time() 473 while j < self.L: # Extract L samples from q1theta 474 i=randint(0,w.size)# Random position of w and q1theta 475 if random()*max(w)<= w[i]: 476 self.post_theta[j] = self.q1theta[i]# retain the sample according with resampling prob. 477 j+=1 478 self.done_running = True 479 print "==> Done Resampling priors (took %s seconds)"%(time()-t0) 480 else: 481 print 'Resampling weights are all zero, please check your model or data, and try again.\n' 482 print '==> Likelihood (min,mean,max): ',min(lik),mean(lik),max(lik) 483 return 0 484 return 1
485
486 - def runModel(self,savetemp,t=1):
487 ''' 488 Handles running the model self.K times keeping a temporary savefile for 489 resuming calculation in case of interruption. 490 491 :Parameters: 492 - `savetemp`: Boolean. create a temp file? 493 ''' 494 if savetemp: 495 CP.dump(self.q1theta,open('q1theta','w')) 496 # Running the model ========================== 497 498 499 if os.path.exists('phi.temp'): 500 phi,j = CP.load(open('phi.temp','r')) 501 else: 502 j=0 503 phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names) 504 def cb(r): 505 ''' 506 callback function for the asynchronous model runs 507 ''' 508 if t == 1: 509 phi[r[1]] = (r[0][-1],) 510 else: 511 phi[r[1]] = [tuple(l) for l in r[0][-t:]]# #phi is the last t points in the simulation
512 513 po = Pool() 514 t0=time() 515 for i in xrange(j,self.K): 516 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 517 r = po.applyAsync(enumRun,(self.model,theta,i),callback=cb) 518 # r = po.applyAsync(self.model,theta) 519 # if t == 1: 520 # phi[i] = (r.get()[-1],) 521 # else: 522 # phi[i] = [tuple(l) for l in r.get()[-t:]]# #phi is the last t points in the simulation 523 if i%100 == 0 and self.verbose: 524 print "==> K = %s"%i 525 if savetemp: 526 CP.dump((phi,i),open('phi.temp','w')) 527 if savetemp: #If all replicates are done, clear temporary save files. 528 os.unlink('phi.temp') 529 os.unlink('q1theta') 530 po.close() 531 po.join() 532 print "==> Done Running the K replicates (took %s seconds)\n"%(time()-t0) 533 534 return phi
535 -def enumRun(model,theta,k):
536 res =model(*theta) 537 return (res,k)
538
539 -def model(r, p0, n=1):
540 """ 541 Model (r,p0, n=1) 542 Simulates the Population dynamic Model (PDM) Pt = rP0 543 for n time steps. 544 P0 is the initial population size. 545 Example model for testing purposes. 546 """ 547 # print "oi" 548 Pt = zeros(n, float) # initialize the output vector 549 P = p0 550 for i in xrange(n): 551 Pt[i] = r*P 552 P = Pt[i] 553 554 return Pt
555
556 -def Run(k):
557 """ 558 Run (k) 559 Draw k samples of Theta from its prior distribution, run the model with it 560 and obtain phi = M(theta). For testing purposes only. 561 """ 562 po = Pool() 563 #---q1theta--------------------------------------------------------------------- 564 #---Priors for the theta (model parameters)-------------------- 565 r = lhs.lhs(stats.uniform, [2, 4], k) 566 p0 = lhs.lhs(stats.uniform,[0,5],k) 567 q1theta = (r, p0) 568 #------------------------------------------------------------------------------- 569 phi=zeros(k, float) 570 #print r.shape, p0.shape 571 for i in xrange(k): 572 re = po.applyAsync(model,(r[i], p0[i])) 573 phi[i] = re.get()[-1]#model(r[i], p0[i])[-1] # Sets phi[i] to the last point of the simulation 574 575 576 return phi, q1theta
577
578 -def KDE(x, (ll, ul)=('',''),res=1024.):
579 """ 580 KDE(x) 581 performs a kernel density estimate using the scipy gaussian density 582 if (ll,ul), enforce limits for the distribution's support. 583 Returns a dictionary. 584 """ 585 #r.assign("x", x) 586 587 if ll : 588 rn=arange(ll,ul,(ul-ll)/res) 589 #print x.shape,rn.shape 590 est = kde.gaussian_kde(x.ravel()).evaluate(rn) 591 #r.assign("ll", ll) 592 #r.assign("ul", ul) 593 #est = r('density(x,from=ll, to=ul)') #trims the density borders 594 else: 595 ll = min(x) 596 ul = max(x) 597 rn=arange(ll,ul,(ul-ll)/res) 598 est = kde.gaussian_kde(x).evaluate(rn) 599 #est = r('density(x)') 600 print 'No - KDE' 601 return {'y':est,'x':rn}
602 603
604 -def Likeli(data, dist, limits,**kwargs):
605 """ 606 Generates the likelihood function of data given dist. 607 limits is a tuple setting the interval of the parameter space that will 608 be used as the support for the Likelihood function. 609 returns a vector (1024 elements). 610 """ 611 n = len(data) # Number of data points 612 data = array(data) 613 (ll,ul) = limits #limits for the parameter space 614 step = (ul-ll)/1024. 615 616 if dist == 'normal': # In this case, L is a function of the mean. SD is set to the SD(data) 617 sd = std(data) #standard deviation of data 618 prec = 1/sd #precision of the data 619 res = array([exp(like.Normal(data,mu,prec)) for mu in arange(ll,ul,step)]) 620 lik = res/max(res) # Likelihood function 621 print max(lik), min(lik) 622 elif dist == 'exponential': 623 res = [lamb**n*exp(-lamb*sum(data)) for lamb in arange(ll,ul,step)] 624 lik = array(res)/max(array(res)) 625 626 elif dist == 'bernoulli': 627 if ll<0 or ul>1: 628 print "Parameter p of the bernoulli is out of range[0,1]" 629 res = [exp(like.Bernoulli(data,p)) for p in arange(ll,ul,step)] 630 lik = array(res)/max(array(res)) 631 632 elif dist == 'poisson': 633 res = [exp(like.Poisson(data,lb)) for lb in arange(ll,ul,step)] 634 lik = array(res)/max(array(res)) 635 636 elif dist == 'lognormal': 637 sd = std(data) #standard deviation of data 638 prec = 1/sd #precision of the data 639 res = [exp(like.Lognormal(data,mu,prec)) for mu in arange(ll,ul,step)] 640 lik = array(res)/max(array(res)) 641 else: 642 print 'Invalid distribution type. Valid distributions: normal, exponential, bernoulli and poisson' 643 return lik
644 645
646 -def Filt(cond, x, (ll, ul)):
647 """ 648 filtering out Out-of-boundary thetas and phis. 649 for single output models. 650 ul and ll are the pre-model boundaries of phi. 651 cond is a vector over which the conditional operations will be applied. 652 x is a vector or matrix of data. matrices are filtered line by line 653 """ 654 #print cond.shape, x.shape, ll, ul 655 cond = array(cond) 656 cond = cond.ravel() 657 if isinstance(x,tuple): 658 l = len(x) 659 x = array(x) 660 x.shape = (l,x.size/float(l)) 661 #print 'shape of x is', x.shape 662 else: 663 #print 'shape of x is', x.shape 664 pass 665 try: 666 f = compress(less(cond,ul) & greater(cond,ll),x, axis=1) 667 except: 668 f = compress(less(cond,ul) & greater(cond,ll),x) 669 670 671 return f
672
673 -def FiltM(cond,x,limits):
674 """ 675 Multiple condition filtering. 676 for multiple output models 677 cond is an array of condition vectors 678 limits is a list of tuples (ll,ul) with the length of cond 679 """ 680 cond = array(cond) 681 cnd = ones(cond.shape[1],int) 682 for i,j in zip(cond,limits): 683 ll = j[0] 684 ul = j[1] 685 #print cond.shape,cnd.shape,i.shape,ll,ul 686 cnd = cnd & less(i,ul) & greater(i,ll) 687 f = compress(cnd,x, axis=1) 688 return f
689 690
691 -def SIR(alpha,q2phi,limits,q2type,q1theta, phi,L, lik=[]):
692 """ 693 Sampling Importance Resampling. 694 695 :Parameters: 696 - `alpha`: pooling weight; 697 - `q2phi`: premodel of phi(tuple of vectors); 698 - `limits`: limits for q2phi (list/tuple of tuples); 699 - `q2type`: dist. type of q2phi (list of strings); 700 - `q1theta`: premodel dists of thetas (tuple); 701 - `phi`: model output (tuple of vectors); 702 - `L`: size of the resample. 703 - `lik`: list of likelihoods available 704 """ 705 ##==On Uniform Priors we have to trim the density borders======================== 706 ## The Density estimation with a gaussian kernel, extends beyond the limits of 707 ## an uniform distribution, due to this fact, we clip the ends of the kde 708 ## output in order to avoid artifacts. 709 ##=============================================================================== 710 np = len(q1theta) # Number of parameters(theta) in the model 711 no = len(phi) #Number of output variables 712 713 q2pd =[] 714 for i in xrange(no): 715 (ll,ul) = limits[i] # limits of q2phi[i] 716 if q2type[i] == 'uniform': 717 q2pd.append(KDE(q2phi[i],(ll,ul))) 718 else: 719 q2pd.append(KDE(q2phi[i])) 720 q2phi = q2pd 721 #---filtering out Out-of-boundary thetas and phis------------------------------- 722 723 phi_filt=[] 724 print "shape de q1theta[0]: ",q1theta[0].shape 725 q1theta2 = array(q1theta) #Temporary copy to allow multiple filtering 726 727 phi_filt = FiltM(phi,phi,limits) #filter Phis 728 #print type(phi_filt) 729 if not phi_filt.any(): 730 print "Due to bad specification of the prior distributions or of the model\nthe inference can't continue. please verify that your priors include at least\npart of the range of the output variables." 731 return None 732 #Remove thetas that generate out-of-bound phis for every phi 733 q1theta_filt = FiltM(phi,q1theta2,limits) 734 # print "shape de q1theta_filt (ln272): ",q1theta_filt.shape 735 q1theta2 = q1theta_filt 736 737 phi_filt = array(phi_filt) 738 # TODO: check to see if thetas or phis get empty due to bad priors!!!! 739 #------------------------------------------------------------------------------- 740 741 #---Calculate Kernel Density of the filtered phis----------------------------------------------------------------------- 742 q1ed = [] 743 for i in xrange(no): 744 (ll,ul) = limits[i] # limits of q2phi[i] 745 if q2type[i] == 'uniform': 746 # print sum(isinf(phi_filt)) 747 q1ed.append(KDE(phi_filt[i],(ll,ul))) 748 else: 749 q1ed.append(KDE(phi_filt[i])) 750 q1est = q1ed 751 #------------------------------------------------------------------------------- 752 753 ##============================================================================== 754 ##Now, the two priors for Phi q2phi (derived from prior information and q1est 755 ##(generated by the model from the q1theta(priors on the inputs)), are pooled. 756 ##The pooling is done by logarithmic pooling using alpha as a weighting factor. 757 ##The higher the value of alpha the more wight is given to q1est. 758 ##============================================================================== 759 #---Calculating the pooled prior of Phi----------------------------------------- 760 qtilphi = [] 761 for i in xrange(no): 762 qtilphi.append((array(q2phi[i]['y'])**(1-alpha))*(array(q1est[i]['y'])**alpha)) 763 qtilphi = array(qtilphi) 764 #------------------------------------------------------------------------------- 765 #---Calculating first term of the weigth expression----------------------------- 766 # TODO: Consider having a different alpha for each phi 767 denslist=[] 768 for i in xrange(no): 769 #pairwise pooling of the phis and q2phis 770 denslist.append((array(q2phi[i]['y'])/array(q1est[i]['y']))**(1-alpha)) 771 772 firstterm = denslist#product(denslist, axis=0) 773 #---Weights--------------------------------------------------------------------- 774 775 if not lik: 776 w = firstterm #---- without likelihoods -----# 777 else: 778 if len(lik)>1: 779 prodlik = product(array(lik),axis=0) 780 else: 781 #only one likelihood function 782 prodlik = lik[0] 783 # w = firstterm*prodlik 784 w = [i*prodlik for i in firstterm] 785 #------------------------------------------------------------------------------- 786 ##========Link weights with each phi[i]========================================= 787 ## The weight vector (w) to be used in the resampling of the thetas is calculated 788 ## from operations on densities. Consequently,its values are associated with 789 ## values on the support of Phi, not with the actual Phi[i] as output by the 790 ## model. Thus, its is necessary to recover the association between 791 ## the Phi[i] (the outputs of each model run), and the weights 792 ## associated with them. For that, the support for phi is divided into 1024 bins 793 ## (the length of the weight vector), and the filtered Phi[i] are assigned to these bins 794 ## according to their value. This mapping is represented by the variable phi_bins 795 ## in which each element is the bin number of the correponding element in Phi. 796 ## A new weight vector(wi) is then created in which the elements of w are posi- 797 ## tioned according to the position of the Phi[i] to which it corresponds. That 798 ## is: w[i] = w[phi_bin[i]] repeated for each element i. 799 ##============================================================================== 800 801 bin_bound = [] 802 phi_bins = [] 803 wi = [] 804 for i in xrange(no): 805 (ll,ul) = limits[i] #limits of phi 806 step = (ul-ll)/1024. 807 bin_bound.append(arange(ll,ul,step)) # Bin boundaries of the weight vector 808 phi_bins.append(searchsorted(bin_bound[i], phi_filt[i])) # Return a vector of the bins for each phi 809 g = lambda x:w[i][x-1] # searchsorted returns 1 as the index for the first bin, not 0 810 phi_bins = array(phi_bins) 811 for i in xrange(no): 812 wi.append(map(g,phi_bins[i])) 813 wi = mean(array(wi),axis=0) #ATTENTION: Should this be averaged? 814 815 ##========Resampling q1theta===================================================== 816 ## Here, the filtered q1theta are resampled according to the weight vector. 817 ## L values are generated as indices to the weight vector wi(resamples) and used to resample 818 ## the parameters. 819 ##=============================================================================== 820 821 # A given value is going to be resampled if random() < wi 822 # A column of q1theta_filt is extracted for each value in resamples 823 q = [0]*L 824 wi = nan_to_num(array(wi)) 825 print sum(wi) 826 if sum(wi) == 0: 827 sys.exit('Resampling weights are all zero, please check your model or data.') 828 j = 0 829 while j < L: # Extract L samples from q1theta_filt 830 i=randint(0,wi.size)# Random position of wi and q1theta_filt 831 if random()<= wi[i]: 832 #print i, q1theta_filt.shape 833 q[j]=q1theta_filt[:,i]# retain the sample according with resampling prob. 834 j+=1 835 # q is a list of arrays which is converted to an array and then transposed. 836 #print "shape de q",len(q),q[0].shape 837 qtiltheta = transpose(array(q)) 838 #print qtiltheta.shape 839 return (w, qtiltheta, qtilphi, q1est)
840 841 842 843 # TODO: Implement calculation of Bayes factors! 844 #------------------------------------------------------------------------------- 845 ##==MAIN======================================================================== 846 #------------------------------------------------------------------------------- 847
848 -def plotRaHist(arr):
849 ''' 850 Plots a record array 851 as a panel of histograms 852 ''' 853 nv = len(arr.dtype.names) 854 fs = (ceil(sqrt(nv)),floor(sqrt(nv))+1) #figure size 855 P.figure() 856 for i,n in enumerate(arr.dtype.names): 857 P.subplot(nv/2+1,2,i+1) 858 P.hist(arr[n],bins=50, normed=1, label=n) 859 P.legend()
860
861 -def main():
862 """ 863 testing function 864 """ 865 start = time() 866 k = 20000 # Number of model runs 867 L = 2000 868 ll = 6 869 ul = 9 870 #data = [7,8,7,8,7,8,7] 871 data = normal(7.5,1,400) 872 lik = [] #initialize list of likelihoods 873 lik.append(Likeli(data,'normal',(ll,ul))) 874 875 q2phi = lhs.lhs(stats.uniform, (ll, ul), k) 876 877 (phi, q1theta) = Run(k) # Runs the model 878 print len(q1theta) 879 #---Restricting the range of phi------------------------------------------------ 880 881 (w, post_theta, qtilphi, q1est) = SIR(0.5,[q2phi],[(ll,ul)], ['uniform'],q1theta, [phi],L, lik) 882 print "out of SIR" 883 print post_theta.shape 884 #--generating the posterior of phi------------------------------------------------------- 885 r = randint(0,len(post_theta[0]),L) #random index for the marginal posterior of r 886 p = randint(0,len(post_theta[1]),L) #random index for the marginal posterior of p0 887 post_phi = zeros(L,float) #initializing post_phi 888 for i in xrange(L): #Monte Carlo with values of the posterior of Theta 889 post_phi[i] = model(post_theta[0][r[i]],post_theta[1][p[i]])[-1] 890 891 end = time() 892 print end-start, ' seconds' 893 #---Plotting with matplotlib---------------------------------------------------------------------------- 894 P.figure(1) 895 P.subplot(411) 896 P.hist(post_theta[0],bins=50) 897 P.ylabel(r'$\pi^{[r]}(\theta)$',fontsize=18) 898 P.title('Posteriors and weight vector') 899 P.subplot(412) 900 P.hist(post_theta[1],bins=50) 901 P.ylabel(r'$\pi^{[P_0]}(\theta)$',fontsize=18) 902 P.subplot(413) 903 P.hist(post_phi,bins=50) 904 P.ylabel(r'$\pi^{[P]}(\phi)$',fontsize=18) 905 ##plot(q1est['x'],qtilphi) 906 ##ylabel(r'$P$', fontsize=12) 907 P.subplot(414) 908 P.plot(w) 909 P.ylabel(r'$W_i$', fontsize=12) 910 911 912 P.figure(2) 913 P.subplot(411) 914 P.hist(q1theta[0],bins=50) 915 P.ylabel(r'$\theta r$',fontsize=18) 916 P.title('Priors') 917 P.subplot(412) 918 P.hist(phi,bins=50) 919 P.ylabel(r'$\phi$',fontsize=18) 920 P.subplot(413) 921 P.hist(q1theta[1],bins=50) 922 P.ylabel(r'$\theta p_0$',fontsize=18) 923 P.subplot(414) 924 P.hist(q2phi,bins=50) 925 P.ylabel(r'$q_2 \phi$',fontsize=18) 926 P.show()
927
928 -def main2():
929 start = time() 930 Me = Meld(K=10000,L=2000,model=model, ntheta=2,nphi=1,verbose=True) 931 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)]) 932 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(6,9)]) 933 #Me.addData(normal(7.5,1,400),'normal',(6,9)) 934 #Me.run() 935 Me.sir(data ={'p':[7.5]} ) 936 pt,pp = Me.getPosteriors() 937 end = time() 938 plotRaHist(pt) 939 plotRaHist(pp) 940 P.show() 941 print end-start, ' seconds'
942 943 if __name__ == '__main__': 944 # main() 945 main2() 946