1
2
3
4
5
6
7
8
9
10
11
12
13 from numpy.core.records import recarray
14 try:
15 import psyco
16 psyco.full()
17 except:
18 pass
19 import sys
20 import os
21 import cPickle as CP
22 import like
23 import pylab as P
24 from scipy.stats.kde import gaussian_kde
25 from scipy.linalg import LinAlgError
26 from scipy import stats
27 import numpy
28 from numpy import array, nan_to_num, zeros, product, exp, ones,mean, var
29 from time import time
30 from numpy.random import normal, randint, random, seed
31 try:
32 from BIP.Viz.realtime import RTplot
33 Viz=True
34 except:
35 Viz=False
36 print r"""Please install Gnuplot-py to enable realtime visualization.
37 http://gnuplot-py.sourceforge.net/
38 """
39 import lhs
40
41 from multiprocessing import Pool
42 if Viz:
43 dtplot = RTplot();phiplot = RTplot();thplot = RTplot()
44
45 __docformat__ = "restructuredtext en"
46
47
49 """
50 Bayesian Melding class
51 """
52 - def __init__(self, K, L, model, ntheta, nphi, alpha = 0.5, verbose = False, viz=False ):
53 """
54 Initializes the Melding class.
55
56 :Parameters:
57 - `K`: Number of replicates of the model run. Also determines the prior sample size.
58 - `L`: Number of samples from the Posterior distributions. Usually 10% of K.
59 - `model`: Callable taking theta as argument and returning phi = M(theta).
60 - `ntheta`: Number of inputs to the model (parameters).
61 - `nphi`: Number of outputs of the model (State-variables)
62 - `verbose`: Boolean: whether to show more information about the computations
63 - `viz`: Boolean. Wether to show graphical outputs of the fitting process
64 """
65 self.K = K
66 self.L = L
67 self.verbose = verbose
68 self.model = model
69 self.likelist = []
70 self.q1theta = recarray(K,formats=['f8']*ntheta)
71 self.post_theta = recarray(L,formats=['f8']*ntheta)
72 self.q2phi = recarray(K,formats=['f8']*nphi)
73 self.phi = recarray(K,formats=['f8']*nphi)
74 self.q2type = []
75 self.post_phi = recarray(L,formats=['f8']*nphi)
76 self.ntheta = ntheta
77 self.nphi = nphi
78 self.alpha = alpha
79 self.done_running = False
80 if Viz:
81 self.viz = viz
82 else:
83 self.viz = False
84
85
86 - def setPhi(self, names, dists=[stats.norm], pars=[(0, 1)], limits=[(-5,5)]):
87 """
88 Setup the models Outputs, or Phi, and generate the samples from prior distributions
89 needed for the melding replicates.
90
91 :Parameters:
92 - `names`: list of string with the names of the variables.
93 - `dists`: is a list of RNG from scipy.stats
94 - `pars`: is a list of tuples of variables for each prior distribution, respectively.
95 - `limits`: lower and upper limits on the support of variables.
96 """
97 if len(names) != self.nphi:
98 raise ValueError("Number of names(%s) does not match the number of output variables(%s)."%(len(names),self.nphi))
99 self.q2phi.dtype.names = names
100 self.phi.dtype.names = names
101 self.post_phi.dtype.names = names
102 self.plimits = limits
103 for n,d,p in zip(names,dists,pars):
104 self.q2phi[n] = lhs.lhs(d,p,self.K).ravel()
105 self.q2type.append(d.name)
106
107
108
109 - def setTheta(self, names, dists=[stats.norm], pars=[(0, 1)]):
110 """
111 Setup the models inputs and generate the samples from prior distributions
112 needed for the dists the melding replicates.
113
114 :Parameters:
115 - `names`: list of string with the names of the parameters.
116 - `dists`: is a list of RNG from scipy.stats
117 - `pars`: is a list of tuples of parameters for each prior distribution, respectivelydists
118 """
119 self.q1theta.dtype.names = names
120 self.post_theta.dtype.names = names
121 if os.path.exists('q1theta'):
122 self.q1theta = CP.load(open('q1theta','r'))
123 else:
124 for n,d,p in zip(names,dists,pars):
125 self.q1theta[n] = lhs.lhs(d,p,self.K).ravel()
126
128 """
129 Setup the model inputs and set the prior distributions from the vectors
130 in data.
131 This method is to be used when the prior distributions are available in
132 the form of a sample from an empirical distribution such as a bayesian
133 posterior.
134 In order to expand the samples provided, K samples are generated from a
135 kernel density estimate of the original sample.
136
137 :Parameters:
138 - `names`: list of string with the names of the parameters.
139 - `data`: list of vectors. Samples of a proposed distribution
140 - `limits`: List of (min,max) tuples for each theta to make sure samples are not generated outside these limits.
141 """
142 self.q1theta.dtype.names = names
143 self.post_theta.dtype.names = names
144 if os.path.exists('q1theta'):
145 self.q1theta = CP.load(open('q1theta','r'))
146 else:
147 i = 0
148 for n,d in zip(names,data):
149 smp = []
150 while len(smp)<self.K:
151 smp += [x for x in gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]]
152
153 self.q1theta[n] = array(smp[:self.K])
154 i += 1
155
156
158 """
159 Setup the model outputs and set their prior distributions from the
160 vectors in data.
161 This method is to be used when the prior distributions are available in
162 the form of a sample from an empirical distribution such as a bayesian
163 posterior.
164 In order to expand the samples provided, K samples are generated from a
165 kernel density estimate of the original sample.
166
167 :Parameters:
168 - `names`: list of string with the names of the variables.
169 - `data`: list of vectors. Samples of the proposed distribution.
170 - `limits`: list of tuples (ll,ul),lower and upper limits on the support of variables.
171 """
172 self.q2phi.dtype.names = names
173 self.phi.dtype.names = names
174 self.post_phi.dtype.names = names
175 self.limits = limits
176 for n,d in zip(names,data):
177 i = 0
178 smp = []
179 while len(smp)<self.K:
180 try:
181 smp += [x for x in gaussian_kde(d).resample(self.K)[0] if x >= limits[i][0] and x <= limits[i][1]]
182 except:
183
184
185
186 smp = ones(self.K)*d[0]
187 self.q2phi[n] = array(smp[:self.K])
188 self.q2type.append('empirical')
189 i += 1
190
191
192
193 - def run(self,*args):
194 """
195 Runs the model through the Melding inference.model
196 model is a callable which return the output of the deterministic model,
197 i.e. the model itself.
198 The model is run self.K times to obtain phi = M(theta).
199 """
200
201 for i in xrange(self.K):
202 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names]
203 r = self.po.apply_async(self.model, theta)
204 self.phi[i]= r.get()[-1]
205
206 self.done_running = True
207
208 - def getPosteriors(self,t):
209 """
210 Updates the posteriors of the model's output for the last t time steps.
211 Returns two record arrays:
212 - The posteriors of the Theta
213 - the posterior of Phi last t values of time-series. self.L by `t` arrays.
214
215 :Parameters:
216 - `t`: length of the posterior time-series to return.
217 """
218 if not self.done_running:
219 return
220 if t > 1:
221 self.post_phi = recarray((self.L,t),formats=['f8']*self.nphi)
222 self.post_phi.dtype.names = self.phi.dtype.names
223 def cb(r):
224 '''
225 callback function for the asynchronous model runs.
226 r: tuple with results of simulation (results, run#)
227 '''
228 if t == 1:
229 self.post_phi[r[1]] = (r[0][-1],)
230
231 else:
232 self.post_phi[r[1]]= [tuple(l) for l in r[0][-t:]]
233 po = Pool()
234
235 pti = lhs.lhs(stats.randint,(0,self.L),siz=(self.ntheta,self.L))
236 for i in xrange(self.L):
237 theta = [self.post_theta[n][pti[j,i]] for j,n in enumerate(self.post_theta.dtype.names)]
238 po.apply_async(enumRun, (self.model,theta,i), callback=cb)
239
240
241
242
243
244 if i%100 == 0 and self.verbose:
245 print "==> L = %s"%i
246
247 po.close()
248 po.join()
249 return self.post_theta, self.post_phi
250
251 - def filtM(self,cond,x,limits):
252 '''
253 Multiple condition filtering.
254 Remove values in x[i], if corresponding values in
255 cond[i] are less than limits[i][0] or greater than
256 limits[i][1].
257
258 :Parameters:
259 - `cond`: is an array of conditions.
260 - `limits`: is a list of tuples (ll,ul) with length equal to number of lines in `cond` and `x`.
261 - `x`: array to be filtered.
262 '''
263
264 names = []
265 if isinstance(cond, recarray):
266 names = list(cond.dtype.names)
267 cond = [cond[v] for v in cond.dtype.names]
268 x = [x[v] for v in x.dtype.names]
269
270 cond = array(cond)
271 cnd = ones(cond.shape[1],int)
272 for i,j in zip(cond,limits):
273 ll = j[0]
274 ul = j[1]
275
276 cnd = cnd & less(i,ul) & greater(i,ll)
277 f = compress(cnd,x, axis=1)
278
279 if names:
280 r = recarray((1,f.shape[1]),formats=['f8']*len(names),names=names)
281 for i,n in enumerate(names):
282 r[n]=f[i]
283 f=r
284
285 return f
286
288 '''
289 Calculates a basic fitness calculation between a model-
290 generated time series and a observed time series.
291 it uses a normalized RMS variation.
292
293 :Parameters:
294 - `s1`: model-generated time series. record array.
295 - `s2`: observed time series. dictionary with keys matching names of s1
296
297 :Return:
298 Root mean square deviation between ´s1´ and ´s2´.
299 '''
300 fit = []
301 for k in s2.keys():
302 if s2[k] == [] or (not s2[k].any()):
303 continue
304 e = numpy.sqrt(mean((s1[k]-s2[k])**2.))
305 fit.append(e)
306
307 return mean(fit)
308
309
311 """
312 Returns the probability associated with each phi[i]
313 on the pooled pdf of phi and q2phi.
314
315 :Parameters:
316 - `phi`: prior of Phi induced by the model and q1theta.
317 """
318
319
320 phidens = gaussian_kde(array([phi[n][:,-1] for n in phi.dtype.names]))
321
322 q2dens = gaussian_kde(array([self.q2phi[n] for n in self.q2phi.dtype.names]))
323
324
325 lastp = array([list(phi[i,-1]) for i in xrange(self.K)])
326
327 qtilphi = (phidens.evaluate(lastp.T)**(1-self.alpha))*q2dens.evaluate(lastp.T)**self.alpha
328 return qtilphi/sum(qtilphi)
329
330 - def abcRun(self,fitfun=None, data={}, t=1,nopool=False,savetemp=False):
331 """
332 Runs the model for inference through Approximate Bayes Computation
333 techniques. This method should be used as an alternative to the sir.
334
335 :Parameters:
336 - `fitfun`: Callable which will return the goodness of fit of the model to data as a number between 0-1, with 1 meaning perfect fit
337 - `t`: number of time steps to retain at the end of the of the model run for fitting purposes.
338 - `data`: dict containing observed time series (lists of length t) of the state variables. This dict must have as many items the number of state variables, with labels matching variables names. Unorbserved variables must have an empty list as value.
339 - `savetemp`: Should temp results be saved. Useful for long runs. Alows for resuming the simulation from last sa
340 """
341 seed()
342 if not fitfun:
343 fitfun = self.basicfit
344 if savetemp:
345 CP.dump(self.q1theta,open('q1theta','w'))
346
347 phi = self.runModel(savetemp,t)
348
349 print "==> Done Running the K replicates\n"
350
351 if nopool:
352 qtilphi = ones(self.K)
353 else:
354 t0 = time()
355 qtilphi = self.logPooling(phi)
356 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0)
357 qtilphi = nan_to_num(qtilphi)
358
359 if sum(qtilphi)==0:
360 print 'Pooled prior on ouputs is null, please check your priors, and try again.'
361 return 0
362
363
364 w = [fitfun(phi[i],data) for i in xrange(phi.shape[0])]
365 w /=sum(w)
366 w = 1-w
367
368
369
370
371 w = nan_to_num(w)
372 w = array(w)*qtilphi
373 w /=sum(w)
374 w = nan_to_num(w)
375 print 'max(w): %s\nmean(w): %s\nvar(w): %s'%(max(w), mean(w), var(w))
376
377
378
379
380
381
382
383 if sum(w) == 0.0:
384 print 'Resampling weights are all zero, please check your model or data.'
385 return 0
386 t0 = time()
387 j = 0
388 while j < self.L:
389 i=randint(0,w.size)
390 if random()<= w[i]:
391 self.post_theta[j] = self.q1theta[i]
392 j+=1
393 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0))
394
395 self.done_running = True
396 return 1
397
398 - def sir(self, data={}, t=1,variance=0.1, nopool=False,savetemp=False):
399 """
400 Run the model output through the Sampling-Importance-Resampling algorithm.
401 Returns 1 if successful or 0 if not.
402
403 :Parameters:
404 - `data`: observed time series on the model's output
405 - `t`: length of the observed time series
406 - `variance`: variance of the Normal likelihood function
407 - `nopool`: True if no priors on the outputs are available. Leads to faster calculations
408 - `savetemp`: Boolean. create a temp file?
409 """
410 seed()
411 phi = self.runModel(savetemp,t)
412
413 if nopool:
414 qtilphi = ones(self.K)
415 else:
416 t0 = time()
417 qtilphi = self.logPooling(phi)
418 print "==> Done Running the Log Pooling (took %s seconds)\n"%(time()-t0)
419 qtilphi = nan_to_num(qtilphi)
420 print 'max(qtilphi): ', max(qtilphi)
421 if sum(qtilphi)==0:
422 print 'Pooled prior on ouputs is null, please check your priors, and try again.'
423 return 0
424
425
426 lik = zeros(self.K)
427 t0=time()
428
429 for i in xrange(self.K):
430 l=1
431 for n in data.keys():
432 if isinstance(data[n],list) and data[n] == []:
433 continue
434 elif isinstance(data[n],numpy.ndarray) and (not data[n].any()):
435 continue
436 p = phi[n]
437
438
439
440 l *= product([exp(like.Normal(data[n][m], j,1./(variance))) for m,j in enumerate(p[i])])
441
442
443 lik[i]=l
444
445
446 if self.viz:
447 dtplot.clearFig();phiplot.clearFig();thplot.clearFig()
448 dtplot.gp.xlabel('observed')
449 dtplot.gp.ylabel('simulated')
450 obs = [];sim =[]
451 for n in data.keys():
452 obs.append(data[n])
453 sim.append(phi[n].mean(axis=0).tolist())
454 dtplot.scatter(array(obs),array(sim),names=data.keys(),title='fit')
455 phiplot.plotlines(array(sim),names=data.keys(),title='Model Output')
456 thplot.plothist(self.q1theta, title='Input parameters',names=self.q1theta.dtype.names)
457 print "==> Done Calculating Likelihoods (took %s seconds)"%(time()-t0)
458 lr = nan_to_num(max(lik)/min(lik))
459 print '==> Likelihood (min,mean,max,sum): ',min(lik),mean(lik),max(lik), sum(lik)
460 print "==> Likelihood ratio of best run/worst run: %s"%(lr,)
461
462 w = nan_to_num(qtilphi*lik)
463 w = nan_to_num(w/sum(w))
464
465 if not sum(w) == 0.0:
466 j = 0
467 t0 = time()
468 maxw = 0;minw = max(w)
469 while j < self.L:
470 i=randint(0,w.size)
471 if random()*max(w)<= w[i]:
472 self.post_theta[j] = self.q1theta[i]
473 maxw = max(maxw,w[i])
474 minw = min(minw,w[i])
475 j+=1
476 if not j%100 and self.verbose:
477 print j, "of %s"%self.L
478 self.done_running = True
479 print "==> Done Resampling (L=%s) priors (took %s seconds)"%(self.L,(time()-t0))
480 wr = maxw/minw
481 print "==> Likelihood ratio of best/worst retained runs: %s"%(wr,)
482 if wr == 1:
483 print "==> Flat likelihood, trying again..."
484 return 0
485 print "==> Improvement: %s percent"%(100-100*wr/lr,)
486 else:
487 print 'Resampling weights are all zero, please check your model or data, and try again.\n'
488 print '==> Likelihood (min,mean,max): ',min(lik),mean(lik),max(lik)
489 print '==> RMS deviation of outputs: %s'%(self.basicfit(phi, data),)
490 return 0
491 return 1
492
494 '''
495 Handles running the model self.K times keeping a temporary savefile for
496 resuming calculation in case of interruption.
497
498 :Parameters:
499 - `savetemp`: Boolean. create a temp file?
500 '''
501 if savetemp:
502 CP.dump(self.q1theta,open('q1theta','w'))
503
504
505
506 if os.path.exists('phi.temp'):
507 self.phi,j = CP.load(open('phi.temp','r'))
508 else:
509 j=0
510 self.phi = recarray((self.K,t),formats=['f8']*self.nphi, names = self.phi.dtype.names)
511 def cb(r):
512 '''
513 callback function for the asynchronous model runs
514 '''
515 if t == 1:
516 self.phi[r[1]] = (r[0][-1],)
517 else:
518 self.phi[r[1]] = [tuple(l) for l in r[0][-t:]]
519
520 po = Pool()
521 t0=time()
522 for i in xrange(j,self.K):
523 theta = [self.q1theta[n][i] for n in self.q1theta.dtype.names]
524 r = po.apply_async(enumRun,(self.model,theta,i),callback=cb)
525
526
527
528
529
530 if i%100 == 0 and self.verbose:
531 print "==> K = %s"%i
532 if savetemp:
533 CP.dump((self.phi,i),open('phi.temp','w'))
534 if savetemp:
535 os.unlink('phi.temp')
536 os.unlink('q1theta')
537 po.close()
538 po.join()
539 print "==> Done Running the K (%s) replicates (took %s seconds)\n"%(self.K,(time()-t0))
540
541 return self.phi
543 """
544 Returns model results plus run number.
545
546 :Parameters:
547 - `model`: model callable
548 - `theta`: model input list
549 - `k`: run number
550
551 :Return:
552 - res: result list
553 - `k`: run number
554 """
555 res =model(*theta)
556 return (res,k)
557
559 """
560 Model (r,p0, n=1)
561 Simulates the Population dynamic Model (PDM) Pt = rP0
562 for n time steps.
563 P0 is the initial population size.
564 Example model for testing purposes.
565 """
566
567 Pt = zeros(n, float)
568 P = p0
569 for i in xrange(n):
570 Pt[i] = r*P
571 P = Pt[i]
572
573 return Pt
574
575
577 '''
578 Plots a record array
579 as a panel of histograms
580 '''
581 nv = len(arr.dtype.names)
582 fs = (numpy.ceil(numpy.sqrt(nv)),numpy.floor(numpy.sqrt(nv))+1)
583 P.figure()
584 for i,n in enumerate(arr.dtype.names):
585 P.subplot(nv/2+1,2,i+1)
586 P.hist(arr[n],bins=50, normed=1, label=n)
587 P.legend()
588
589
591 start = time()
592 Me = Meld(K=5000,L=1000,model=model, ntheta=2,nphi=1,verbose=False,viz=False)
593 Me.setTheta(['r','p0'],[stats.uniform,stats.uniform],[(2,4),(0,5)])
594 Me.setPhi(['p'],[stats.uniform],[(6,9)],[(6,9)])
595
596
597 Me.sir(data ={'p':[7.5]} )
598 pt,pp = Me.getPosteriors(1)
599 end = time()
600 plotRaHist(pt)
601 plotRaHist(pp)
602 P.show()
603 print end-start, ' seconds'
604
605 if __name__ == '__main__':
606
607 main2()
608