1
2
3
4
5
6
7
8
9
10
11
12
13 from numpy.core.records import recarray
14 try:
15 import psyco
16 psyco.full()
17 except:
18 pass
19 import sys
20 import os
21 import cPickle as CP
22 import like
23 import pylab as P
24 from scipy.stats.kde import gaussian_kde
25 from scipy import stats
26 import numpy
27 from numpy import array, nan_to_num, zeros, product, exp, ones,mean, var
28 from time import time
29 from numpy.random import normal, randint, random, seed
30 try:
31 from BIP.Viz.realtime import RTplot
32 Viz=True
33 except:
34 Viz=False
35 print r"""Please install Gnuplot-py to enable realtime visualization.
36 http://gnuplot-py.sourceforge.net/
37 """
38 import lhs
39
40 from multiprocessing import Pool
41
42 __docformat__ = "restructuredtext en"
43
44
46 """
47 Bayesian Melding class
48 """
49 - def __init__(self, K, L, model, ntheta, nphi, alpha = 0.5, verbose = False, viz=False ):
50 """
51 Initializes the Melding class.
52
53 :Parameters:
54 - `K`: Number of replicates of the model run. Also determines the prior sample size.
55 - `L`: Number of samples from the Posterior distributions. Usually 10% of K.
56 - `model`: Callable taking theta as argument and returning phi = M(theta).
57 - `ntheta`: Number of inputs to the model (parameters).
58 - `nphi`: Number of outputs of the model (State-variables)
59 - `verbose`: Boolean: whether to show more information about the computations
60 - `viz`: Boolean. Wether to show graphical outputs of the fitting process
61 """
62 self.K = K
63 self.L = L
64 self.verbose = verbose
65 self.model = model
66 self.likelist = []
67 self.q1theta = recarray(K,formats=['f8']*ntheta)
68 self.post_theta = recarray(L,formats=['f8']*ntheta)
69 self.q2phi = recarray(K,formats=['f8']*nphi)
70 self.phi = recarray(K,formats=['f8']*nphi)
71 self.q2type = []
72 self.post_phi = recarray(L,formats=['f8']*nphi)
73 self.ntheta = ntheta
74 self.nphi = nphi
75 self.alpha = alpha
76 self.done_running = False
77 if Viz:
78 self.viz = viz
79 else:
80 self.viz = False
81
82
83 - def setPhi(self, names, dists=[stats.norm], pars=[(0, 1)], limits=[(-5,5)]):
84 """
85 Setup the models Outputs, or Phi, and generate the samples from prior distributions
86 needed for the melding replicates.
87
88 :Parameters:
89 - `names`: list of string with the names of the variables.
90 - `dists`: is a list of RNG from scipy.stats
91 - `pars`: is a list of tuples of variables for each prior distribution, respectively.
92 - `limits`: lower and upper limits on the support of variables.
93 """
94 if len(names) != self.nphi:
95 raise ValueError("Number of names(%s) does not match the number of output variables(%s)."%(len(names),self.nphi))
96 self.q2phi.dtype.names = names
97 self.phi.dtype.names = names
98 self.post_phi.dtype.names = names
99 self.plimits = limits
100 for n,d,p in zip(names,dists,pars):
101 self.q2phi[n] = lhs.lhs(d,p,self.K).ravel()
102 self.q2type.append(d.name)
103
104
105
106 - def setTheta(self, names, dists=[stats.norm], pars=[(0, 1)]):
107 """
108 Setup the models inputs and generate the samples from prior distributions
109 needed for the dists the melding replicates.
110
111 :Parameters:
112 - `names`: list of string with the names of the parameters.
113 - `dists`: is a list of RNG from scipy.stats
114 - `pars`: is a list of tuples of parameters for each prior distribution, respectivelydists
115 """
116 self.q1theta.dtype.names = names
117 self.post_theta.dtype.names = names
118 if os.path.exists('q1theta'):
119 self.q1theta = CP.load(open('q1theta','r'))
120 else:
121 for n,d,p in zip(names,dists,pars):
122 self.q1theta[n] = lhs.lhs(d,p,self.K).ravel()
123
125 """
126 Setup the model inputs and set the prior distributions from the vectors
127 in data.
128 This method is to be used when the prior distributions are available in
129 the form of a sample from an empirical distribution such as a bayesian
130 posterior.
131 In order to expand the samples provided, K samples are generated from a
132 kernel density estimate of the original sample.
133
134 :Parameters:
135 - `names`: list of string with the names of the parameters.
136 - `data`: list of vectors. Samples of a proposed distribution
137 - `limits`: List of (min,max) tuples for each theta to make sure samples are not generated outside these limits.
138 """
139 self.q1theta.dtype.names = names
140 self.post_theta.dtype.names = names
141 if os.path.exists('q1theta'):
142 self.q1theta = CP.load(open('q1theta','r'))
143 else:
144 i = 0
145 for n,d in zip(names,data):
146 smp = []
147 while len(smp)<self.K:
148 try:
149 smp += [x for x in gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]]
150 except:
151 print d
152 sys.exit()
153
154 self.q1theta[n] = array(smp[:self.K])
155 i += 1
156
157
159 """
160 Setup the model outputs and set their prior distributions from the
161 vectors in data.
162 This method is to be used when the prior distributions are available in
163 the form of a sample from an empirical distribution such as a bayesian
164 posterior.
165 In order to expand the samples provided, K samples are generated from a
166 kernel density estimate of the original sample.
167
168 :Parameters:
169 - `names`: list of string with the names of the variables.
170 - `data`: list of vectors. Samples of the proposed distribution.
171 - `limits`: list of tuples (ll,ul),lower and upper limits on the support of variables.
172 """
173 self.q2phi.dtype.names = names
174 self.phi.dtype.names = names
175 self.post_phi.dtype.names = names
176 self.limits = limits
177 for n,d in zip(names,data):
178 i = 0
179 smp = []
180 while len(smp)<self.K:
181 smp += [x for x in gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]]
182 self.q2phi[n] = array(smp[:self.K])
183 self.q2type.append('empirical')
184 i += 1
185
186
187
188 - def run(self,*args):
189 """
190 Runs the model through the Melding inference.model
191 model is a callable which return the output of the deterministic model,
192 i.e. the model itself.
193 The model is run self.K times to obtain phi = M(theta).
194 """
195
196 for i in xrange(self.K):
197 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names]
198 r = self.po.apply_async(self.model, theta)
199 self.phi[i]= r.get()[-1]
200
201 self.done_running = True
202
203 - def getPosteriors(self,t=1):
204 """
205 Updates the posteriors of the model's output for the last t time steps.
206 Returns two record arrays:
207 - The posteriors of the Theta
208 - the posterior of Phi last t values of time-series. self.L by `t` arrays.
209
210 :Parameters:
211 - `t`: length of the posterior time-series to return.
212 """
213 if not self.done_running:
214 return
215 if t > 1:
216 self.post_phi = recarray((self.L,t),formats=['f8']*self.nphi)
217 self.post_phi.dtype.names = self.phi.dtype.names
218 def cb(r):
219 '''
220 callback function for the asynchronous model runs.
221 r: tuple with results of simulatio (results, run#)
222 '''
223 if t == 1:
224 self.post_phi[r[1]] = (r[0][-1],)
225 else:
226 self.post_phi[r[1]]= [tuple(l) for l in r[0][-t:]]
227 po = Pool()
228
229 pti = lhs.lhs(stats.randint,(0,self.L),siz=(self.ntheta,self.L))
230 for i in xrange(self.L):
231 theta = [self.post_theta[n][pti[j,i]] for j,n in enumerate(self.post_theta.dtype.names)]
232 po.apply_async(enumRun, (self.model,theta,i), callback=cb)
233
234
235
236
237
238 if i%100 == 0 and self.verbose:
239 print "==> L = %s"%i
240
241 po.close()
242 po.join()
243 return self.post_theta, self.post_phi
244
245 - def filtM(self,cond,x,limits):
246 '''
247 Multiple condition filtering.
248 Remove values in x[i], if corresponding values in
249 cond[i] are less than limits[i][0] or greater than
250 limits[i][1].
251
252 :Parameters:
253 - `cond`: is an array of conditions.
254 - `limits`: is a list of tuples (ll,ul) with length equal to number of lines in `cond` and `x`.
255 - `x`: array to be filtered.
256 '''
257
258 names = []
259 if isinstance(cond, recarray):
260 names = list(cond.dtype.names)
261 cond = [cond[v] for v in cond.dtype.names]
262 x = [x[v] for v in x.dtype.names]
263
264 cond = array(cond)
265 cnd = ones(cond.shape[1],int)
266 for i,j in zip(cond,limits):
267 ll = j[0]
268 ul = j[1]
269
270 cnd = cnd & less(i,ul) & greater(i,ll)
271 f = compress(cnd,x, axis=1)
272
273 if names:
274 r = recarray((1,f.shape[1]),formats=['f8']*len(names),names=names)
275 for i,n in enumerate(names):
276 r[n]=f[i]
277 f=r
278
279 return f
280
282 '''
283 Calculates a basic fitness calculation between a model-
284 generated time series and a observed time series.
285 it uses a normalized RMS variation.
286
287 :Parameters:
288 - `s1`: model-generated time series. record array.
289 - `s2`: observed time series. dictionary with keys matching names of s1
290
291 :Return:
292 Root mean square deviation between ´s1´ and ´s2´.
293 '''
294 fit = []
295 for k in s2.keys():
296 if s2[k] == [] or (not s2[k].any()):
297 continue
298 e = numpy.sqrt(mean((s1[k]-s2[k])**2.))
299 fit.append(e)
300
301 return mean(fit)
302
303
305 """
306 Returns the probability associated with each phi[i]
307 on the pooled pdf of phi and q2phi.
308
309 :Parameters:
310 - `phi`: prior of Phi induced by the model and q1theta.
311 """
312
313
314 phidens = gaussian_kde(array([phi[n][:,-1] for n in phi.dtype.names]))
315
316 q2dens = gaussian_kde(array([self.q2phi[n] for n in self.q2phi.dtype.names]))
317
318
319 lastp = array([list(phi[i,-1]) for i in xrange(self.K)])
320
321 qtilphi = (phidens.evaluate(lastp.T)**(1-self.alpha))*q2dens.evaluate(lastp.T)**self.alpha
322 return qtilphi/sum(qtilphi)
323
324 - def abcRun(self,fitfun=None, data={}, t=1,nopool=False,savetemp=False):
325 """
326 Runs the model for inference through Approximate Bayes Computation
327 techniques. This method should be used as an alternative to the sir.
328
329 :Parameters:
330 - `fitfun`: Callable which will return the goodness of fit of the model to data as a number between 0-1, with 1 meaning perfect fit
331 - `t`: number of time steps to retain at the end of the of the model run for fitting purposes.
332 - `data`: dict containing observed time series (lists of length t) of the state variables. This dict must have as many items the number of state variables, with labels matching variables names. Unorbserved variables must have an empty list as value.
333 - `savetemp`: Should temp results be saved. Useful for long runs. Alows for resuming the simulation from last sa
334 """
335 seed()
336 if not fitfun:
337 fitfun = self.basicfit
338 if savetemp:
339 CP.dump(self.q1theta,open('q1theta','w'))
340
341 phi = self.runModel(savetemp,t)
342
343 print "==> Done Running the K replicates\n"
344
345 if nopool:
346 qtilphi = ones(self.K)
347 else:
348 t0 = time()
349 qtilphi = self.logPooling(phi)
350 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0)
351 qtilphi = nan_to_num(qtilphi)
352
353 if sum(qtilphi)==0:
354 print 'Pooled prior on ouputs is null, please check your priors, and try again.'
355 return 0
356
357
358 w = [fitfun(phi[i],data) for i in xrange(phi.shape[0])]
359 w /=sum(w)
360 w = 1-w
361
362
363
364
365 w = nan_to_num(w)
366 w = array(w)*qtilphi
367 w /=sum(w)
368 w = nan_to_num(w)
369 print 'max(w): %s\nmean(w): %s\nvar(w): %s'%(max(w), mean(w), var(w))
370
371
372
373
374
375
376
377 if sum(w) == 0.0:
378 print 'Resampling weights are all zero, please check your model or data.'
379 return 0
380 t0 = time()
381 j = 0
382 while j < self.L:
383 i=randint(0,w.size)
384 if random()<= w[i]:
385 self.post_theta[j] = self.q1theta[i]
386 j+=1
387 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0))
388
389 self.done_running = True
390 return 1
391
392 - def sir(self, data={}, t=1,tau=0.1, nopool=False,savetemp=False):
393 """
394 Run the model output through the Sampling-Importance-Resampling algorithm.
395 Returns 1 if successful or 0 if not.
396
397 :Parameters:
398 - `data`: observed time series on the model's output
399 - `t`: length of the observed time series
400 - `tau`: Precision of the Normal likelihood function
401 - `nopool`: True if no priors on the outputs are available. Leads to faster calculations
402 - `savetemp`: Boolean. create a temp file?
403 """
404 seed()
405 phi = self.runModel(savetemp,t)
406
407 if nopool:
408 qtilphi = ones(self.K)
409 else:
410 t0 = time()
411 qtilphi = self.logPooling(phi)
412 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0)
413 qtilphi = nan_to_num(qtilphi)
414 print 'max(qtilphi): ', max(qtilphi)
415 if sum(qtilphi)==0:
416 print 'Pooled prior on ouputs is null, please check your priors, and try again.'
417 return 0
418
419
420 lik = zeros(self.K)
421 t0=time()
422
423 for i in xrange(self.K):
424 l=1
425 for n in data.keys():
426 if isinstance(data[n],list) and data[n] == []:
427 continue
428 elif isinstance(data[n],numpy.ndarray) and (not data[n].any()):
429 continue
430 p = phi[n]
431
432
433
434 l *= product([exp(like.Normal(data[n][m], j,1./(tau))) for m,j in enumerate(p[i])])
435
436
437 lik[i]=l
438
439
440 if self.viz:
441 dtplot.clearFig();phiplot.clearFig();thplot.clearFig()
442 dtplot.gp.xlabel('observed')
443 dtplot.gp.ylabel('simulated')
444 obs = [];sim =[]
445 for n in data.keys():
446 obs.append(data[n])
447 sim.append(phi[n].mean(axis=0).tolist())
448 dtplot.scatter(array(obs),array(sim),names=data.keys(),title='fit')
449 phiplot.plotlines(array(sim),names=data.keys(),title='Model Output')
450
451 print "==> Done Calculating Likelihoods (took %s seconds)"%(time()-t0)
452 lr = nan_to_num(max(lik)/min(lik))
453 print '==> Likelihood (min,mean,max,sum): ',min(lik),mean(lik),max(lik), sum(lik)
454 print "==> Likelihood ratio of best run/worst run: %s"%(lr,)
455
456 w = nan_to_num(qtilphi*lik)
457 w = nan_to_num(w/sum(w))
458
459 if not sum(w) == 0.0:
460 j = 0
461 t0 = time()
462 maxw = 0;minw = max(w)
463 while j < self.L:
464 i=randint(0,w.size)
465 if random()*max(w)<= w[i]:
466 self.post_theta[j] = self.q1theta[i]
467 maxw = max(maxw,w[i])
468 minw = min(minw,w[i])
469 j+=1
470 if not j%100 and self.verbose:
471 print j, "of %s"%self.L
472 self.done_running = True
473 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0))
474 wr = maxw/minw
475 print "==> Likelihood ratio of best/worst retained runs: %s"%(wr,)
476 if wr == 1:
477 print "==> Flat likelihood, trying again..."
478 return 0
479 print "==> Improvement: %s percent"%(100-100*wr/lr,)
480 else:
481 print 'Resampling weights are all zero, please check your model or data, and try again.\n'
482 print '==> Likelihood (min,mean,max): ',min(lik),mean(lik),max(lik)
483 print '==> RMS deviation of outputs: %s'%(self.basicfit(phi, data),)
484 return 0
485 return 1
486
488 '''
489 Handles running the model self.K times keeping a temporary savefile for
490 resuming calculation in case of interruption.
491
492 :Parameters:
493 - `savetemp`: Boolean. create a temp file?
494 '''
495 if savetemp:
496 CP.dump(self.q1theta,open('q1theta','w'))
497
498
499
500 if os.path.exists('phi.temp'):
501 phi,j = CP.load(open('phi.temp','r'))
502 else:
503 j=0
504 phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names)
505 def cb(r):
506 '''
507 callback function for the asynchronous model runs
508 '''
509 if t == 1:
510 phi[r[1]] = (r[0][-1],)
511 else:
512 phi[r[1]] = [tuple(l) for l in r[0][-t:]]
513
514 po = Pool()
515 t0=time()
516 for i in xrange(j,self.K):
517 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names]
518 r = po.apply_async(enumRun,(self.model,theta,i),callback=cb)
519
520
521
522
523
524 if i%100 == 0 and self.verbose:
525 print "==> K = %s"%i
526 if savetemp:
527 CP.dump((phi,i),open('phi.temp','w'))
528 if savetemp:
529 os.unlink('phi.temp')
530 os.unlink('q1theta')
531 po.close()
532 po.join()
533 print "==> Done Running the K (%s) replicates (took %s seconds)\n"%(self.K,(time()-t0))
534
535 return phi
537 """
538 Returns model results plus run number.
539
540 :Parameters:
541 - `model`: model callable
542 - `theta`: model input list
543 - `k`: run number
544
545 :Return:
546 - res: result list
547 - `k`: run number
548 """
549 res =model(*theta)
550 return (res,k)
551
553 """
554 Model (r,p0, n=1)
555 Simulates the Population dynamic Model (PDM) Pt = rP0
556 for n time steps.
557 P0 is the initial population size.
558 Example model for testing purposes.
559 """
560
561 Pt = zeros(n, float)
562 P = p0
563 for i in xrange(n):
564 Pt[i] = r*P
565 P = Pt[i]
566
567 return Pt
568
569
571 '''
572 Plots a record array
573 as a panel of histograms
574 '''
575 nv = len(arr.dtype.names)
576 fs = (numpy.ceil(numpy.sqrt(nv)),numpy.floor(numpy.sqrt(nv))+1)
577 P.figure()
578 for i,n in enumerate(arr.dtype.names):
579 P.subplot(nv/2+1,2,i+1)
580 P.hist(arr[n],bins=50, normed=1, label=n)
581 P.legend()
582
583
585 start = time()
586 Me = Meld(K=10000,L=2000,model=model, ntheta=2,nphi=1,verbose=False,viz=False)
587 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)])
588 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(6,9)])
589
590
591 Me.sir(data ={'p':[7.5]} )
592 pt,pp = Me.getPosteriors()
593 end = time()
594 plotRaHist(pt)
595 plotRaHist(pp)
596 P.show()
597 print end-start, ' seconds'
598 if Viz:
599 dtplot = RTplot();phiplot = RTplot();thplot = RTplot()
600 if __name__ == '__main__':
601
602 main2()
603