Package BIP :: Package Bayes :: Module lhs
[hide private]
[frames] | no frames]

Source Code for Module BIP.Bayes.lhs

  1  #!/usr/bin/python 
  2  # -*- coding:utf-8 -*- 
  3  #----------------------------------------------------------------------------- 
  4  # Name:        lhs.py 
  5  # Project:  Bayesian-Inference 
  6  # Purpose:      
  7  # 
  8  # Author:      Flávio Codeço Coelho<fccoelho@gmail.com> 
  9  # 
 10  # Created:     2008-11-26 
 11  # Copyright:   (c) 2008 by the Author 
 12  # Licence:     GPL 
 13  #----------------------------------------------------------------------------- 
 14  __docformat__ = "restructuredtext en" 
 15  #from pylab import plot, figure,hist,show, savefig, legend 
 16  import scipy.stats as stats 
 17  import numpy 
 18  from numpy.linalg import cholesky,inv 
 19   
20 -def lhsFromSample(sample,siz=100):
21 """ 22 Latin Hypercube Sample from a set of values. 23 For univariate distributions only 24 25 :Parameters: 26 - `sample`: list, tuple of array 27 - `siz`: Number or shape tuple for the output sample 28 """ 29 #TODO: add support to correlation restricted multivariate samples 30 if not isinstance(sample, (list,tuple,numpy.ndarray)): 31 raise TypeError('sample is not a list, tuple or numpy vector') 32 n = siz 33 if isinstance(siz,(tuple,list)): 34 n=numpy.product(siz) 35 perc = numpy.arange(0,100.,100./n) 36 numpy.random.shuffle(perc) 37 smp = [stats.uniform(i,100./n).rvs() for i in perc] 38 v = numpy.array([stats.scoreatpercentile(sample,p) for p in smp]) 39 if isinstance(siz,(tuple,list)): 40 v.shape = siz 41 return v
42
43 -def lhsFromDensity(kde,siz=100):
44 ''' 45 LHS sampling from a variable's Kernel density estimate. 46 47 :Parameters: 48 - `kde`: scipy.stats.kde.gaussian_kde object 49 - `siz`: Number or shape tuple for the output sample 50 ''' 51 if not isinstance(kde,scipy.stats.kde.gaussian_kde): 52 raise TypeError("kde is not a density object") 53 if isinstance(siz,(tuple,list)): 54 n=numpy.product(siz) 55 s = kde.resample(n) 56 v = lhsFromSample(s,n) 57 if isinstance(siz,(tuple,list)): 58 v.shape = siz 59 return v
60 61
62 -def lhs(dist, parms, siz=100, noCorrRestr=False, corrmat=None):
63 ''' 64 Latin Hypercube sampling of any distribution. 65 dist is is a scipy.stats random number generator 66 such as stats.norm, stats.beta, etc 67 parms is a tuple with the parameters needed for 68 the specified distribution. 69 70 :Parameters: 71 - `dist`: random number generator from scipy.stats module or a list of them. 72 - `parms`: tuple of parameters as required for dist, or a list of them. 73 - `siz` :number or shape tuple for the output sample 74 ''' 75 if not isinstance(dist,(list,tuple)): 76 dists = [dist] 77 parms = [parms] 78 else: 79 assert len(dist) == len(parms) 80 dists = dist 81 indices=rank_restr(nvars=len(dists), smp=siz, noCorrRestr=noCorrRestr, Corrmat=corrmat) 82 smplist = [] 83 for j,d in enumerate(dists): 84 if not isinstance(d, (stats.rv_discrete,stats.rv_continuous)): 85 raise TypeError('dist is not a scipy.stats distribution object') 86 n=siz 87 if isinstance(siz,(tuple,list)): 88 n=numpy.product(siz) 89 #force type to float for sage compatibility 90 pars = tuple([float(k) for k in parms[j]]) 91 perc = numpy.arange(1.,n+1)/(n+1) 92 v = d(*pars).ppf(perc) 93 #print numpy.isinf(indices[j].sum()) 94 index=map(int,indices[j]-1) 95 v = v[index] 96 if isinstance(siz,(tuple,list)): 97 v.shape = siz 98 smplist.append(v) 99 if len(dists) == 1: 100 return smplist[0] 101 return smplist
102
103 -def rank_restr(nvars=4, smp=100, noCorrRestr=False, Corrmat=None):
104 """ 105 Returns the indices for sampling variables with 106 the desired correlation structure. 107 108 :Parameters: 109 - `nvars`: number of variables 110 - `smp`: number of samples 111 - `noCorrRestr`: No correlation restriction if True 112 - `Corrmat`: Correlation matrix. If None, assure uncorrelated samples. 113 """ 114 if isinstance(smp,(tuple,list)): 115 smp=numpy.product(smp) 116 def shuf(s): 117 s1=[] 118 for i in xrange(nvars): 119 numpy.random.shuffle(s) 120 s1.append(s.copy()) 121 return s1
122 if noCorrRestr or nvars ==1: 123 x = [stats.randint.rvs(1,smp+1,size=smp) for i in xrange(nvars)] 124 else: 125 if Corrmat == None: 126 C=numpy.core.numeric.identity(nvars) 127 else: 128 if Corrmat.shape[0] != nvars: 129 raise TypeError('Correlation matrix must be of rank %s'%nvars) 130 C=numpy.matrix(Corrmat) 131 s0=numpy.arange(1.,smp+1)/(smp+1.) 132 s=stats.norm().ppf(s0) 133 s1 = shuf(s) 134 S=numpy.matrix(s1) 135 P=cholesky(C) 136 Q=cholesky(numpy.corrcoef(S)) 137 138 Final=S.transpose()*inv(Q).transpose()*P.transpose() 139 x = [stats.stats.rankdata(Final.transpose()[i,]) for i in xrange(nvars)] 140 return x 141 142 if __name__=='__main__': 143 import pylab as P 144 dist = stats.norm 145 #dist = stats.beta 146 pars = (50,1) 147 #pars = (1,5) #beta 148 b = lhs(dist,pars,1000) 149 cm = numpy.array([[1,.8],[.8,1]]) 150 c=lhs([dist,dist], [pars,pars],2000,False, cm) 151 print stats.pearsonr(c[0],c[1]), stats.spearmanr(c[0],c[1]) 152 #P.hist(c[0],normed=1)#, label='c0 sample') 153 P.scatter(c[0],c[1]) 154 #P.hist(c[1],normed=1)#, label='c1 sample') 155 print c[0].shape,c[1].shape 156 n = dist(*pars).rvs(size=20) 157 #hist(n.ravel(),facecolor='r',alpha =0.3,normed=1, label='Regular sample') 158 #plot(numpy.arange(min(min(c),min(n)),max(max(c),max(n)),.1),dist(*pars).pdf(numpy.arange(min(min(c),min(n)),max(max(c),max(n)),.1)),label='PDF') 159 #legend() 160 #savefig('lhs.png',dpi=400) 161 lhs([stats.norm]*19,[(0,1)]*19,17,False,numpy.identity(19)) 162 P.show() 163 164 165 #TODO: Extend lhsFromSample to allow multivariate correlated sampling 166