Package BIP :: Package Bayes :: Module Melding
[hide private]
[frames] | no frames]

Source Code for Module BIP.Bayes.Melding

  1  # -*- coding:utf-8 -*- 
  2  #----------------------------------------------------------------------------- 
  3  # Name:        Melding.py 
  4  # Purpose:     The Bayesian melding Class provides 
  5  #              uncertainty analyses for simulation models. 
  6  # 
  7  # Author:      Flávio Codeço Coelho 
  8  # 
  9  # Created:     2003/08/10 
 10  # Copyright:   (c) 2003-2009 by the Author 
 11  # Licence:     GPL v3 
 12  #----------------------------------------------------------------------------- 
 13  from numpy.core.records import recarray 
 14  try: 
 15      import psyco 
 16      psyco.full() 
 17  except: 
 18      pass 
 19  import sys 
 20  import os 
 21  import copy 
 22  import cPickle as CP 
 23  import like 
 24  import pylab as P 
 25  from scipy.stats.kde import gaussian_kde 
 26  from scipy.linalg import LinAlgError 
 27  from scipy import stats 
 28  import numpy 
 29  from numpy import array, nan_to_num, zeros, product, exp, ones,mean, var,sqrt,floor 
 30  from time import time 
 31  from numpy.random import normal, randint,  random, seed 
 32  import PlotMeld as PM 
 33  try: 
 34      from BIP.Viz.realtime import RTplot 
 35      Viz=True 
 36  except: 
 37      Viz=False 
 38      print r"""Please install Gnuplot-py to enable realtime visualization. 
 39      http://gnuplot-py.sourceforge.net/ 
 40      """ 
 41  import lhs 
 42   
 43  from multiprocessing import Pool 
 44  if Viz: 
 45      dtplot = RTplot();phiplot = RTplot();thplot = RTplot() 
 46   
 47  __docformat__ = "restructuredtext en" 
 48   
49 -class FitModel:
50 """ 51 Fit a model to data generating 52 Bayesian posterior distributions of input and 53 outputs of the model. 54 """
55 - def __init__(self, K,L,model, ntheta, nphi,inits,tf,phinames,thetanames,wl=None,nw=1,verbose=False):
56 """ 57 Initialize the model fitter. 58 59 :Parameters: 60 - `K`: Number of samples from the priors. On MCMC also the number of samples of the posterior. 61 - `L`: Number of samples of the posteriors. Only used on SIR and ABC methods. 62 - `model`: Callable (function) returning the output of the model, from a set of parameter values received as argument. 63 - `ntheta`: Number of parameters included in the inference. 64 - `nphi`: Number of outputs of the model. 65 - `inits`: inits initial values for the model's variables. 66 - `tf`: Length of the simulation, in units of time. 67 - `phinames`: List of names (strings) with names of the model's variables 68 - `thetanames`: List of names (strings) with names of parameters included on the inference. 69 - `wl`: window lenght length of the inference window. 70 - `nw`: Number of windows to analyze on iterative inference mode 71 - `verbose`: Verbose output if True. 72 """ 73 self.Me = Meld(K=K,L=L,model=model,ntheta=3,nphi=5,verbose=verbose) 74 self.inits = inits 75 self.tf = tf 76 self.model = model 77 self.nphi = nphi 78 self.ntheta = ntheta 79 self.phinames = phinames 80 self.thetanames = thetanames 81 self.wl = wl 82 self.nw = nw 83 self.done_running = False 84 self.prior_set = False
85
86 - def set_priors(self,pdists,ppars,tdists,tpars):
87 """ 88 Set the prior distributions for Phi and Theta 89 90 :Parameters: 91 - `pdists`: distributions for the output variables. For example: [scipy.stats.uniform,scipy.stats.norm] 92 - `ppars`: paramenters for the distributions in pdists. For example: [(0,1),(0,1)] 93 - `tdists`: same as pdists, but for input parameters (Theta). 94 - `tpars`: same as ppars, but for tdists. 95 """ 96 self.pdists = pdists 97 self.ppars = ppars 98 self.tdists = tdists 99 self.tpars = tpars 100 self.prior_set = True
101
102 - def _init_priors(self, prior=None):
103 """ 104 """ 105 if prior['theta'] and prior['phi']: 106 self.Me.setThetaFromData(names_theta,prior['theta'],[(0,.9),(0,.0001),(0,1)]) 107 self.Me.setPhiFromData(names_phi,prior['phi'],[(0,1)]*self.nphi) 108 else: 109 print "++++>" 110 self.Me.setTheta(self.thetanames,[stats.uniform]*nt,[(0.001,.2),(0,.0001),(0.1,.5)]) 111 self.Me.setPhi(self.phinames,[stats.uniform]*np,[(0,1)]*self.nphi,[(0,1)]*self.nphi)
112 - def do_inference(self, prior, data, predlen, method):
113 """ 114 """ 115 self._init_priors(prior) 116 succ=0 117 att = 1 118 if method == "SIR": 119 while not succ: #run sir Until is able to get a fit 120 print 'attempt #',att 121 succ = self.Me.sir(data=data,variance=1e-6,nopool=True,t=tf) 122 att += 1 123 pt,series = self.Me.getPosteriors(t=tf) 124 elif method == "MCMC": 125 while not succ: #run sir Until is able to get a fitd == "mcmc": 126 print 'attempt #',att 127 succ = self.Me.mcmc_run(data,t=tf,likvariance=.5e-7,burnin=1000) 128 pt,series = self.Me.getPosteriors(t=tf) 129 130 elif method == "ABC": 131 Me.abcRun(data=data,fitfun=fitInc,t=105) 132 pt,series = Me.getPosteriors(t=tf)
133
134 - def run(self, data,method,monitor=False):
135 """ 136 Fit the model against data 137 138 :Parameters: 139 - `data`: dictionary with variable names and observed series, as Key and value respectively. 140 - `method`: Inference method: "ABC", "SIR" or "MCMC" 141 """ 142 if not self.prior_set: return 143 if monitor: 144 self._monitor_setup() 145 start = time.time() 146 d = data 147 prior = {'theta':[],'phi':[]} 148 os.system('rm wres_*') 149 wl = self.wl 150 for w in range(self.nw): 151 print '==> Window # %s of %s!'%(w,nw) 152 self.tf=wl 153 d2 = {} 154 for k,v in d.items():#Slicing data to the current window 155 d2[k] = v[w*wl:w*wl+wl] 156 print v.shape 157 if w==0: 158 self.inits[2] = d2['I'][0] 159 self.inits[0] += 1-sum(self.inits) #adjusting sunceptibles 160 pt,pp,series,predseries,att = self.do_inference(data=d2, prior=prior,predlen=wl, method=method) 161 f = open('wres_%s'%w,'w') 162 #save weekly posteriors of theta and phi, posteriors of series, data (d) and predictions(z) 163 cPickle.dump((pt,pp,series,d,predseries, att*K),f) 164 f.close() 165 prior = {'theta':[],'phi':[]} 166 for n in pt.dtype.names: 167 prior['theta'].append(pt[n]) 168 #beta,alpha,sigma,Ri = median(pt.beta),median(pt.alpha),median(pt.sigma),median(pt.Ri 169 for n in pp.dtype.names: 170 #print compress(isinf(pp[n]),pp[n]) 171 prior['phi'].append(pp[n]) 172 if monitor: 173 self._monitor_plot(series,prior) 174 print "time: %s seconds"%(time.time()-start) 175 self.done_running = True
176 177
178 - def _monitor_setup(self):
179 """ 180 Sets up realtime plotting of inference 181 """ 182 self.hst = RTplot() #realtime display of theta histograms 183 self.hsp = RTplot()#realtime display of phi histograms 184 self.ser = RTplot()#realtime display of phi time series
185 - def _monitor_plot(self, series, prior ):
186 """ 187 Plots real time data 188 """ 189 self.ser.plotlines(d2[r'I'],style='points', names=['Obs. I']) 190 i5 = array([stats.scoreatpercentile(t,2.5) for t in series[r'I'].T]) 191 i95 = array([stats.scoreatpercentile(t,97.5) for t in series[r'I'].T]) 192 diff = abs(series.I[:,-1]-(d2['I'][-1])) 193 initind = diff.tolist().index(min(diff)) 194 self.ser.plotlines(data=series[r'I'][initind], names=["Best run's I"]) 195 self.ser.plotlines(data=i5, names=['2.5%']) 196 self.ser.plotlines(data=i95, names=['97.5%'],title='Week %s'%(w+1)) 197 self.hst.plothist(data=array(prior['theta']),names=list(self.thetanames),title='Week %s'%(w+1)) 198 self.ser.clearFig() 199 self.hst.clearFig() 200 self.hsp.clearFig()
201
202 - def plot_results(self, names=[]):
203 """ 204 Plot the final results of the inference 205 """ 206 if not self.done_running: 207 return 208 pt,pp,series,predseries = self._read_results() 209 PM.plot_par_series(range(len(pt)),pt) 210 PM.plot_par_violin(range(len(pt)),pt) 211 P.figure() 212 PM.predNewCases(obs,predseries,nc,fig,names,ws) 213 plot_series2(range(nc*ws),obs,series,names=names) 214 plot_series2(range(nc*ws),obs,predseries,names=names,tit='Predicted vs. Observed series',lag=True) 215 P.show()
216
217 - def _read_results(self):
218 """ 219 read results from disk 220 """ 221 pt,pp,series,predseries = [],[],[],[] 222 for w in range(self.nw): 223 fn = 'wres_%s'%w 224 print fn 225 f = open(fn,'r') 226 a,b,c,obs,pred, samples = cPickle.load(f) 227 f.close() 228 229 pt.append(a) 230 pp.append(b) 231 series.append(c) 232 predseries.append(pred) 233 return pt,pp,series,predseries
234 235
236 -class Meld:
237 """ 238 Bayesian Melding class 239 """
240 - def __init__(self, K, L, model, ntheta, nphi, alpha = 0.5, verbose = False, viz=False ):
241 """ 242 Initializes the Melding class. 243 244 :Parameters: 245 - `K`: Number of replicates of the model run. Also determines the prior sample size. 246 - `L`: Number of samples from the Posterior distributions. Usually 10% of K. 247 - `model`: Callable taking theta as argument and returning phi = M(theta). 248 - `ntheta`: Number of inputs to the model (parameters). 249 - `nphi`: Number of outputs of the model (State-variables) 250 - `verbose`: Boolean: whether to show more information about the computations 251 - `viz`: Boolean. Wether to show graphical outputs of the fitting process 252 """ 253 self.K = K 254 self.L = L 255 self.verbose = verbose 256 self.model = model 257 self.likelist = [] #list of likelihoods 258 self.q1theta = recarray(K,formats=['f8']*ntheta) #Theta Priors (record array) 259 self.post_theta = recarray(L,formats=['f8']*ntheta) #Theta Posteriors (record array) 260 self.q2phi = recarray(K,formats=['f8']*nphi) #Phi Priors (record array) 261 self.phi = recarray(K,formats=['f8']*nphi) #Phi model-induced Priors (record array) 262 self.q2type = [] #list of distribution types 263 self.post_phi = recarray(L,formats=['f8']*nphi) #Phi Posteriors (record array) 264 self.ntheta = ntheta 265 self.nphi = nphi 266 self.thetapars = [] 267 self.phipars = [] 268 self.alpha = alpha #pooling weight of user-provided phi priors 269 self.done_running = False 270 self.theta_dists = {}#parameterized rngs for each parameter 271 self.phi_dists = {}#parameterized rngs for each variable 272 self.proposal_variance = 0.0000001 273 self.adaptscalefactor = 1 #adaptive variance. Used my metropolis hastings 274 self.salt_band = 0.1 275 if Viz: #Gnuplot installed 276 self.viz = viz 277 else: 278 self.viz = False
279 # self.po = Pool() #pool of processes for parallel processing 280
281 - def setPhi(self, names, dists=[stats.norm], pars=[(0, 1)], limits=[(-5,5)]):
282 """ 283 Setup the models Outputs, or Phi, and generate the samples from prior distributions 284 needed for the melding replicates. 285 286 :Parameters: 287 - `names`: list of string with the names of the variables. 288 - `dists`: is a list of RNG from scipy.stats 289 - `pars`: is a list of tuples of variables for each prior distribution, respectively. 290 - `limits`: lower and upper limits on the support of variables. 291 """ 292 if len(names) != self.nphi: 293 raise ValueError("Number of names(%s) does not match the number of output variables(%s)."%(len(names),self.nphi)) 294 self.q2phi.dtype.names = names 295 self.phi.dtype.names = names 296 self.post_phi.dtype.names = names 297 self.plimits = limits 298 self.phipars = pars 299 for n,d,p in zip(names,dists,pars): 300 self.q2phi[n] = lhs.lhs(d,p,self.K).ravel() 301 self.q2type.append(d.name) 302 self.phi_dists[n]=d(*p)
303 304 305
306 - def setTheta(self, names, dists=[stats.norm], pars=[(0, 1)]):
307 """ 308 Setup the models inputs and generate the samples from prior distributions 309 needed for the dists the melding replicates. 310 311 :Parameters: 312 - `names`: list of string with the names of the parameters. 313 - `dists`: is a list of RNG from scipy.stats 314 - `pars`: is a list of tuples of parameters for each prior distribution, respectivelydists 315 """ 316 self.q1theta.dtype.names = names 317 self.post_theta.dtype.names = names 318 self.thetapars = pars 319 if os.path.exists('q1theta'): 320 self.q1theta = CP.load(open('q1theta','r')) 321 else: 322 for n,d,p in zip(names,dists,pars): 323 self.q1theta[n] = lhs.lhs(d,p,self.K).ravel() 324 self.theta_dists[n]=d(*p).rvs
325
326 - def add_salt(self,dataset,band):
327 """ 328 Adds a few extra uniformly distributed data 329 points beyond the dataset range. 330 This is done by adding from a uniform dist. 331 332 :Parameters: 333 - `dataset`: vector of data 334 - `band`: Fraction of range to extend [0,1[ 335 336 :Returns: 337 Salted dataset. 338 """ 339 dmax = max(dataset) 340 dmin = min(dataset) 341 drange = dmax-dmin 342 hb = drange*band/2. 343 d = numpy.concatenate((dataset,stats.uniform(dmin-hb,dmax-dmin+hb).rvs(self.K*.05))) 344 return d
345
346 - def setThetaFromData(self,names,data, limits):
347 """ 348 Setup the model inputs and set the prior distributions from the vectors 349 in data. 350 This method is to be used when the prior distributions are available in 351 the form of a sample from an empirical distribution such as a bayesian 352 posterior. 353 In order to expand the samples provided, K samples are generated from a 354 kernel density estimate of the original sample. 355 356 :Parameters: 357 - `names`: list of string with the names of the parameters. 358 - `data`: list of vectors. Samples of a proposed distribution 359 - `limits`: List of (min,max) tuples for each theta to make sure samples are not generated outside these limits. 360 """ 361 self.q1theta.dtype.names = names 362 self.post_theta.dtype.names = names 363 tlimits = dict(zip(names,limits)) 364 class Proposal: 365 def __init__(self,name, dist): 366 self.name = name 367 self.dist = dist
368 def __call__(self): 369 smp = -numpy.inf 370 while not (smp>=tlimits[self.name][0] and smp<=tlimits[self.name][1]): 371 smp = self.dist.resample(1)[0][0] 372 return smp
373 374 if os.path.exists('q1theta'): 375 self.q1theta = CP.load(open('q1theta','r')) 376 else: 377 for n,d,lim in zip(names,data,limits): 378 smp = [] 379 #add some points uniformly across the support 380 #to help MCMC to escape from prior bounds 381 salted = self.add_salt(d,self.salt_band) 382 383 dist = gaussian_kde(salted) 384 while len(smp)<self.K: 385 smp += [x for x in dist.resample(self.K)[0] if x >= lim[0] and x <= lim[1]] 386 #print self.q1theta[n].shape, array(smp[:self.K]).shape 387 self.q1theta[n] = array(smp[:self.K]) 388 self.theta_dists[n] = Proposal(copy.deepcopy(n),copy.deepcopy(dist)) 389
390 - def setPhiFromData(self,names,data,limits):
391 """ 392 Setup the model outputs and set their prior distributions from the 393 vectors in data. 394 This method is to be used when the prior distributions are available in 395 the form of a sample from an empirical distribution such as a bayesian 396 posterior. 397 In order to expand the samples provided, K samples are generated from a 398 kernel density estimate of the original sample. 399 400 :Parameters: 401 - `names`: list of string with the names of the variables. 402 - `data`: list of vectors. Samples of the proposed distribution. 403 - `limits`: list of tuples (ll,ul),lower and upper limits on the support of variables. 404 """ 405 self.q2phi.dtype.names = names 406 self.phi.dtype.names = names 407 self.post_phi.dtype.names = names 408 self.limits = limits 409 for n,d in zip(names,data): 410 i = 0 411 smp = [] 412 while len(smp)<self.K: 413 try: 414 smp += [x for x in gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]] 415 except: 416 #d is has no variation, i.e., all elements are the same 417 #print d 418 #raise LinAlgError, "Singular matrix" 419 smp = ones(self.K)*d[0] #in this case return a constant sample 420 self.q2phi[n] = array(smp[:self.K]) 421 self.q2type.append('empirical') 422 i += 1
423 #self.q2phi = self.filtM(self.q2phi, self.q2phi, limits) 424 425
426 - def run(self,*args):
427 """ 428 Runs the model through the Melding inference.model 429 model is a callable which return the output of the deterministic model, 430 i.e. the model itself. 431 The model is run self.K times to obtain phi = M(theta). 432 """ 433 434 for i in xrange(self.K): 435 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 436 r = self.po.apply_async(self.model, theta) 437 self.phi[i]= r.get()[-1]#self.model(*theta)[-1] #phi is the last point in the simulation 438 439 self.done_running = True
440
441 - def getPosteriors(self,t):
442 """ 443 Updates the posteriors of the model's output for the last t time steps. 444 Returns two record arrays: 445 - The posteriors of the Theta 446 - the posterior of Phi last t values of time-series. self.L by `t` arrays. 447 448 :Parameters: 449 - `t`: length of the posterior time-series to return. 450 """ 451 if not self.done_running: 452 return 453 if t > 1: 454 self.post_phi = recarray((self.L,t),formats=['f8']*self.nphi) 455 self.post_phi.dtype.names = self.phi.dtype.names 456 def cb(r): 457 ''' 458 callback function for the asynchronous model runs. 459 r: tuple with results of simulation (results, run#) 460 ''' 461 if t == 1: 462 self.post_phi[r[1]] = (r[0][-1],) 463 #self.post_phi[r[1]]= [tuple(l) for l in r[0][-t:]] 464 else: 465 self.post_phi[r[1]]= [tuple(l) for l in r[0][-t:]]
466 po = Pool() 467 #random indices for the marginal posteriors of theta 468 pti = lhs.lhs(stats.randint,(0,self.L),siz=(self.ntheta,self.L)) 469 for i in xrange(self.L):#Monte Carlo with values of the posterior of Theta 470 theta = [self.post_theta[n][pti[j,i]] for j,n in enumerate(self.post_theta.dtype.names)] 471 po.apply_async(enumRun, (self.model,theta,i), callback=cb) 472 # r = po.apply_async(self.model,theta) 473 # if t == 1: 474 # self.post_phi[i] = r.get()[-1] 475 # else: 476 # self.post_phi[i]= [tuple(l) for l in r.get()[-t:]] 477 if i%100 == 0 and self.verbose: 478 print "==> L = %s\r"%i 479 480 po.close() 481 po.join() 482 return self.post_theta, self.post_phi 483
484 - def filtM(self,cond,x,limits):
485 ''' 486 Multiple condition filtering. 487 Remove values in x[i], if corresponding values in 488 cond[i] are less than limits[i][0] or greater than 489 limits[i][1]. 490 491 :Parameters: 492 - `cond`: is an array of conditions. 493 - `limits`: is a list of tuples (ll,ul) with length equal to number of lines in `cond` and `x`. 494 - `x`: array to be filtered. 495 ''' 496 # Deconstruct the record array, if necessary. 497 names = [] 498 if isinstance(cond, recarray): 499 names = list(cond.dtype.names) 500 cond = [cond[v] for v in cond.dtype.names] 501 x = [x[v] for v in x.dtype.names] 502 503 cond = array(cond) 504 cnd = ones(cond.shape[1],int) 505 for i,j in zip(cond,limits): 506 ll = j[0] 507 ul = j[1] 508 #print cond.shape,cnd.shape,i.shape,ll,ul 509 cnd = cnd & less(i,ul) & greater(i,ll) 510 f = compress(cnd,x, axis=1) 511 512 if names:#Reconstruct the record array 513 r = recarray((1,f.shape[1]),formats=['f8']*len(names),names=names) 514 for i,n in enumerate(names): 515 r[n]=f[i] 516 f=r 517 518 return f
519
520 - def basicfit(self,s1,s2):
521 ''' 522 Calculates a basic fitness calculation between a model- 523 generated time series and a observed time series. 524 it uses a normalized RMS variation. 525 526 :Parameters: 527 - `s1`: model-generated time series. record array. 528 - `s2`: observed time series. dictionary with keys matching names of s1 529 530 :Return: 531 Root mean square deviation between ´s1´ and ´s2´. 532 ''' 533 fit = [] 534 for k in s2.keys(): 535 if s2[k] == [] or (not s2[k].any()): 536 continue #no observations for this variable 537 e = numpy.sqrt(mean((s1[k]-s2[k])**2.)) 538 fit.append(e) #min to guarantee error is bounded to (0,1) 539 540 return mean(fit) #mean r-squared
541 542
543 - def logPooling(self,phi):
544 """ 545 Returns the probability associated with each phi[i] 546 on the pooled pdf of phi and q2phi. 547 548 :Parameters: 549 - `phi`: prior of Phi induced by the model and q1theta. 550 """ 551 552 # Estimating the multivariate joint probability densities 553 phidens = gaussian_kde(array([phi[n][:,-1] for n in phi.dtype.names])) 554 555 q2dens = gaussian_kde(array([self.q2phi[n] for n in self.q2phi.dtype.names])) 556 # Determining the pooled probabilities for each phi[i] 557 # qtilphi = zeros(self.K) 558 lastp = array([list(phi[i,-1]) for i in xrange(self.K)]) 559 # print lastp,lastp.shape 560 qtilphi = (phidens.evaluate(lastp.T)**(1-self.alpha))*q2dens.evaluate(lastp.T)**self.alpha 561 return qtilphi/sum(qtilphi)
562
563 - def abcRun(self,fitfun=None, data={}, t=1,nopool=False,savetemp=False):
564 """ 565 Runs the model for inference through Approximate Bayes Computation 566 techniques. This method should be used as an alternative to the sir. 567 568 :Parameters: 569 - `fitfun`: Callable which will return the goodness of fit of the model to data as a number between 0-1, with 1 meaning perfect fit 570 - `t`: number of time steps to retain at the end of the of the model run for fitting purposes. 571 - `data`: dict containing observed time series (lists of length t) of the state variables. This dict must have as many items the number of state variables, with labels matching variables names. Unorbserved variables must have an empty list as value. 572 - `savetemp`: Should temp results be saved. Useful for long runs. Alows for resuming the simulation from last sa 573 """ 574 seed() 575 if not fitfun: 576 fitfun = self.basicfit 577 if savetemp: 578 CP.dump(self.q1theta,open('q1theta','w')) 579 # Running the model ========================== 580 phi = self.runModel(savetemp,t) 581 582 print "==> Done Running the K replicates\n" 583 # Do Log Pooling 584 if nopool: 585 qtilphi = ones(self.K) 586 else: 587 t0 = time() 588 qtilphi = self.logPooling(phi) #vector with probability of each phi[i] belonging to qtilphi 589 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0) 590 qtilphi = nan_to_num(qtilphi) 591 #print 'max(qtilphi): ', max(qtilphi) 592 if sum(qtilphi)==0: 593 print 'Pooled prior on ouputs is null, please check your priors, and try again.' 594 return 0 595 # 596 # calculate weights 597 w = [fitfun(phi[i],data) for i in xrange(phi.shape[0])] 598 w /=sum(w) 599 w = 1-w 600 #print "w=",w, mean(w), var(w) 601 # print 602 # print 'qtilphi=',qtilphi 603 # Resampling Thetas 604 w = nan_to_num(w) 605 w = array(w)*qtilphi 606 w /=sum(w) 607 w = nan_to_num(w) 608 print 'max(w): %s\nmean(w): %s\nvar(w): %s'%(max(w), mean(w), var(w)) 609 if sum(w) == 0.0: 610 print 'Resampling weights are all zero, please check your model or data.' 611 return 0 612 t0 = time() 613 j = 0 614 while j < self.L: # Extract L samples from q1theta 615 i=randint(0,w.size)# Random position of w and q1theta 616 if random()<= w[i]: 617 self.post_theta[j] = self.q1theta[i]# retain the sample according with resampling prob. 618 j+=1 619 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0)) 620 621 self.done_running = True 622 return 1
623
624 - def imp_sample(self,n,data, w):
625 """ 626 Importance sampling 627 628 :Returns: 629 returns a sample of size n 630 """ 631 #sanitizing weights 632 print "Starting importance Sampling" 633 w /=sum(w) 634 w = nan_to_num(w) 635 j=0 636 k=0 637 smp = copy.deepcopy(data[:n]) 638 while j < n: 639 i = randint(0,w.size)# Random position of w 640 if random() <= w[i]: 641 smp[j] = data[j] 642 j += 1 643 644 k+=1 645 print "Done imp samp." 646 return smp
647
648 - def mcmc_run(self, data, t=1, likvariance=10,burnin=100, nopool=False, method="MH" ):
649 """ 650 MCMC based fitting 651 652 :Parameters: 653 - `data`: observed time series on the model's output 654 - `t`: length of the observed time series 655 - `likvariance`: variance of the Normal likelihood function 656 - `nopool`: True if no priors on the outputs are available. Leads to faster calculations 657 - `method`: Step method. defaults to Metropolis hastings 658 """ 659 self.phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names) 660 if method == "MH": 661 self.mh(self.K,t,data,like.Normal,likvariance,burnin) 662 else: 663 sys.exit("Invalid MCMC method!\nTry 'MH'.") 664 self.done_running = 1 665 return 1
666 667
668 - def mh(self, n,t, data, likfun,variance, burnin):
669 """ 670 Simple Metropolis hastings. 671 Assumes symmetric proposal distributions. 672 673 :Parameters: 674 - `n`: number of samples to obtain 675 - `t`: Length of time-series 676 - `data`: Observation dictionary {'variable':[time-series]} 677 - `likfun`: likelihood function to use 678 - `variance`: variance of the likelihood function 679 - `burnin`: number of samples to discard at the beginning of the chain. 680 """ 681 def tune(ar): 682 if ar<0.05: 683 self.adaptscalefactor *= 0.5 684 elif ar <0.2: 685 self.adaptscalefactor *= 0.9 686 elif ar >0.9: 687 self.adaptscalefactor *= 10 688 elif ar >0.75: 689 self.adaptscalefactor *= 2 690 elif ar >0.5: 691 self.adaptscalefactor *= 1.1
692 ptheta = recarray((self.K),formats=['f8']*self.ntheta, names = self.post_theta.dtype.names) 693 i=0;j=0;rej=0 #total samples,accepted samples, rejected proposals 694 last_lik = None 695 liklist = [] 696 while j < n: 697 #generating proposals 698 pvar = self.proposal_variance*self.adaptscalefactor 699 theta = [self.theta_dists[dist]() for dist in self.q1theta.dtype.names] 700 prop = self.model(*theta) 701 if t == 1: 702 prop=[(v,) for v in prop] 703 #calculate likelihoods 704 lik=0 705 for k in xrange(self.nphi): #loop on series 706 if self.q2phi.dtype.names[k] not in data: 707 continue#Only calculate liks of series for which we have data 708 obs = data[self.q2phi.dtype.names[k]] 709 for p in xrange(t): #Loop on time 710 lik += likfun(obs[p],prop[p][k],1./variance) 711 #store samples 712 if last_lik == None: #on first sample 713 last_lik = lik 714 continue 715 716 if not (lik-last_lik) > numpy.log(1): 717 if not numpy.log(random())< lik-last_lik: 718 rej +=1 #adjust rejection counter 719 i +=1 720 #print theta 721 continue 722 if i >= burnin:#store only after burnin samples 723 self.phi[j] = prop[0] if t==1 else [tuple(point) for point in prop] 724 ptheta[j] = tuple(theta) 725 liklist.append(lik) 726 j += 1 #update good sample counter 727 last_lik = lik 728 i+=1 729 if i%n ==0: 730 print "==>",n,"\r" 731 732 self.post_theta = self.imp_sample(self.L,ptheta,liklist) 733 ar = (i-rej)/float(i) 734 if ar > 0.9: 735 if self.salt_band < 0: 736 self.salt_band = 0.05 737 elif self.salt_band < 30: 738 self.salt_band *= 10 739 elif ar > 0.7: 740 self.salt_band *= 2 741 elif ar < 0.4: 742 self.salt_band = 0.001 #no more expansion 743 print "Total steps(i): ",i,"rej:",rej, "j:",j 744 print ">>> Salt-Band: %s"%self.salt_band 745 print ">>> Acceptance rate: %s"%ar 746 747 return 1 748 749 750 751
752 - def sir(self, data={}, t=1,variance=0.1, nopool=False,savetemp=False):
753 """ 754 Run the model output through the Sampling-Importance-Resampling algorithm. 755 Returns 1 if successful or 0 if not. 756 757 :Parameters: 758 - `data`: observed time series on the model's output 759 - `t`: length of the observed time series 760 - `variance`: variance of the Normal likelihood function 761 - `nopool`: True if no priors on the outputs are available. Leads to faster calculations 762 - `savetemp`: Boolean. create a temp file? 763 """ 764 seed() 765 phi = self.runModel(savetemp,t) 766 # Do Log Pooling 767 if nopool: 768 qtilphi = ones(self.K) 769 else: 770 t0 = time() 771 qtilphi = self.logPooling(phi) #vector with probability of each phi[i] belonging to qtilphi 772 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0) 773 qtilphi = nan_to_num(qtilphi) 774 print 'max(qtilphi): ', max(qtilphi) 775 if sum(qtilphi)==0: 776 print 'Pooled prior on ouputs is null, please check your priors, and try again.' 777 return 0 778 779 # Calculating the likelihood of each phi[i] considering the observed data 780 lik = zeros(self.K) 781 t0=time() 782 # po = Pool() 783 for i in xrange(self.K): 784 l=1 785 for n in data.keys(): 786 if isinstance(data[n],list) and data[n] == []: 787 continue #no observations for this variable 788 elif isinstance(data[n],numpy.ndarray) and (not data[n].any()): 789 continue #no observations for this variable 790 p = phi[n] 791 # liklist=[po.apply_async(like.Normal,(data[n][m], j, tau)) for m,j in enumerate(p[i])] 792 # l=product([p.get() for p in liklist]) 793 l *= product([exp(like.Normal(data[n][m], j,1./(variance))) for m,j in enumerate(p[i])]) 794 #l += sum([like.Normal(data[n][m], j,1./(tau*j+.0001)) for m,j in enumerate(p[i])]) 795 796 lik[i]=l 797 # po.close() 798 # po.join() 799 if self.viz: 800 dtplot.clearFig();phiplot.clearFig();thplot.clearFig() 801 dtplot.gp.xlabel('observed') 802 dtplot.gp.ylabel('simulated') 803 obs = [];sim =[] 804 for n in data.keys(): 805 obs.append(data[n]) 806 sim.append(phi[n].mean(axis=0).tolist()) 807 dtplot.scatter(array(obs),array(sim),names=data.keys(),title='fit') 808 phiplot.plotlines(array(sim),names=data.keys(),title='Model Output') 809 thplot.plothist(self.q1theta, title='Input parameters',names=self.q1theta.dtype.names) 810 print "==> Done Calculating Likelihoods (took %s seconds)"%(time()-t0) 811 lr = nan_to_num(max(lik)/min(lik)) 812 print '==> Likelihood (min,mean,max,sum): ',min(lik),mean(lik),max(lik), sum(lik) 813 print "==> Likelihood ratio of best run/worst run: %s"%(lr,) 814 # Calculating the weights 815 w = nan_to_num(qtilphi*lik) 816 w = nan_to_num(w/sum(w)) 817 818 if not sum(w) == 0.0: 819 j = 0 820 t0 = time() 821 maxw = 0;minw = max(w) #keep track of goodness of fit of phi 822 while j < self.L: # Extract L samples from q1theta 823 i=randint(0,w.size)# Random position of w and q1theta 824 if random()*max(w)<= w[i]: 825 self.post_theta[j] = self.q1theta[i]# retain the sample according with resampling prob. 826 maxw = max(maxw,w[i]) 827 minw = min(minw,w[i]) 828 j+=1 829 if not j%100 and self.verbose: 830 print j, "of %s"%self.L 831 self.done_running = True 832 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0)) 833 wr = maxw/minw 834 print "==> Likelihood ratio of best/worst retained runs: %s"%(wr,) 835 if wr == 1: 836 print "==> Flat likelihood, trying again..." 837 return 0 838 print "==> Improvement: %s percent"%(100-100*wr/lr,) 839 else: 840 print 'Resampling weights are all zero, please check your model or data, and try again.\n' 841 print '==> Likelihood (min,mean,max): ',min(lik),mean(lik),max(lik) 842 print '==> RMS deviation of outputs: %s'%(self.basicfit(phi, data),) 843 return 0 844 return 1
845
846 - def runModel(self,savetemp,t=1, k=None):
847 ''' 848 Handles running the model k times keeping a temporary savefile for 849 resuming calculation in case of interruption. 850 851 :Parameters: 852 - `savetemp`: Boolean. create a temp file? 853 ''' 854 if savetemp: 855 CP.dump(self.q1theta,open('q1theta','w')) 856 if not k: 857 k = self.K 858 # Running the model ========================== 859 860 861 if os.path.exists('phi.temp'): 862 self.phi,j = CP.load(open('phi.temp','r')) 863 else: 864 j=0 865 self.phi = recarray((k,t),formats=['f8']*self.nphi, names = self.phi.dtype.names) 866 def cb(r): 867 ''' 868 callback function for the asynchronous model runs 869 ''' 870 if t == 1: 871 self.phi[r[1]] = (r[0][-1],) 872 else: 873 self.phi[r[1]] = [tuple(l) for l in r[0][-t:]]# #phi is the last t points in the simulation
874 875 po = Pool() 876 t0=time() 877 for i in xrange(j,k): 878 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names] 879 r = po.apply_async(enumRun,(self.model,theta,i),callback=cb) 880 # r = po.apply_async(self.model,theta) 881 # if t == 1: 882 # phi[i] = (r.get()[-1],) 883 # else: 884 # phi[i] = [tuple(l) for l in r.get()[-t:]]# #phi is the last t points in the simulation 885 if i%100 == 0 and self.verbose: 886 print "==> K = %s"%i 887 if savetemp: 888 CP.dump((self.phi,i),open('phi.temp','w')) 889 if savetemp: #If all replicates are done, clear temporary save files. 890 os.unlink('phi.temp') 891 os.unlink('q1theta') 892 po.close() 893 po.join() 894 print "==> Done Running the K (%s) replicates (took %s seconds)\n"%(k,(time()-t0)) 895 896 return self.phi 897 898
899 -def enumRun(model,theta,k):
900 """ 901 Returns model results plus run number. 902 903 :Parameters: 904 - `model`: model callable 905 - `theta`: model input list 906 - `k`: run number 907 908 :Return: 909 - res: result list 910 - `k`: run number 911 """ 912 res =model(*theta) 913 return (res,k)
914 915 916
917 -def model(r, p0, n=1):
918 """ 919 Model (r,p0, n=1) 920 Simulates the Population dynamic Model (PDM) Pt = rP0 921 for n time steps. 922 P0 is the initial population size. 923 Example model for testing purposes. 924 """ 925 # print "oi" 926 Pt = zeros(n, float) # initialize the output vector 927 P = p0 928 for i in xrange(n): 929 Pt[i] = r*P 930 P = Pt[i] 931 932 return Pt
933 934
935 -def plotRaHist(arr):
936 ''' 937 Plots a record array 938 as a panel of histograms 939 ''' 940 nv = len(arr.dtype.names) 941 fs = (numpy.ceil(numpy.sqrt(nv)),numpy.floor(numpy.sqrt(nv))+1) #figure size 942 P.figure() 943 for i,n in enumerate(arr.dtype.names): 944 P.subplot(floor(sqrt(nv)),floor(sqrt(nv))+1,i+1) 945 P.hist(arr[n],bins=50, normed=1, label=n) 946 P.legend()
947 948
949 -def main2():
950 start = time() 951 #Me = Meld(K=5000,L=1000,model=model, ntheta=2,nphi=1,verbose=False,viz=False) 952 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)]) 953 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(6,9)]) 954 #Me.add_data(normal(7.5,1,400),'normal',(6,9)) 955 #Me.run() 956 Me.sir(data ={'p':[7.5]} ) 957 pt,pp = Me.getPosteriors(1) 958 end = time() 959 print end-start, ' seconds' 960 plotRaHist(pt) 961 plotRaHist(pp) 962 P.show()
963 964
965 -def mh_test():
966 start = time() 967 #Me = Meld(K=5000,L=1000,model=model, ntheta=2,nphi=1,verbose=False,viz=False) 968 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)]) 969 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(6,9)]) 970 #Me.add_data(normal(7.5,1,400),'normal',(6,9)) 971 #Me.run() 972 Me.mcmc_run(data ={'p':[7.5]},burnin=1000) 973 pt,pp = Me.getPosteriors(1) 974 end = time() 975 print end-start, ' seconds' 976 plotRaHist(pt) 977 plotRaHist(pp) 978 P.show()
979 980 981 if __name__ == '__main__': 982 Me = Meld(K=5000,L=1000,model=model, ntheta=2,nphi=1,verbose=False,viz=False) 983 mh_test() 984 #main2() 985