1
2
3
4
5
6
7
8
9
10
11
12
13
14 __docformat__ = "restructuredtext en"
15
16 import scipy.stats as stats
17 import numpy
18 from numpy.linalg import cholesky,inv
19
21 """
22 Latin Hypercube Sample from a set of values.
23 For univariate distributions only
24
25 :Parameters:
26 - `sample`: list, tuple of array
27 - `siz`: Number or shape tuple for the output sample
28 """
29
30 if not isinstance(sample, (list,tuple,numpy.ndarray)):
31 raise TypeError('sample is not a list, tuple or numpy vector')
32 n = siz
33 if isinstance(siz,(tuple,list)):
34 n=numpy.product(siz)
35 perc = numpy.arange(0,100.,100./n)
36 numpy.random.shuffle(perc)
37 smp = [stats.uniform(i,100./n).rvs() for i in perc]
38 v = numpy.array([stats.scoreatpercentile(sample,p) for p in smp])
39 if isinstance(siz,(tuple,list)):
40 v.shape = siz
41 return v
42
44 '''
45 LHS sampling from a variable's Kernel density estimate.
46
47 :Parameters:
48 - `kde`: scipy.stats.kde.gaussian_kde object
49 - `siz`: Number or shape tuple for the output sample
50 '''
51 if not isinstance(kde,scipy.stats.kde.gaussian_kde):
52 raise TypeError("kde is not a density object")
53 if isinstance(siz,(tuple,list)):
54 n=numpy.product(siz)
55 s = kde.resample(n)
56 v = lhsFromSample(s,n)
57 if isinstance(siz,(tuple,list)):
58 v.shape = siz
59 return v
60
61
62 -def lhs(dist, parms, siz=100, noCorrRestr=False, corrmat=None):
63 '''
64 Latin Hypercube sampling of any distribution.
65 dist is is a scipy.stats random number generator
66 such as stats.norm, stats.beta, etc
67 parms is a tuple with the parameters needed for
68 the specified distribution.
69
70 :Parameters:
71 - `dist`: random number generator from scipy.stats module or a list of them.
72 - `parms`: tuple of parameters as required for dist, or a list of them.
73 - `siz` :number or shape tuple for the output sample
74 '''
75 if not isinstance(dist,(list,tuple)):
76 dists = [dist]
77 parms = [parms]
78 else:
79 assert len(dist) == len(parms)
80 dists = dist
81 indices=rank_restr(nvars=len(dists), smp=siz, noCorrRestr=noCorrRestr, Corrmat=corrmat)
82 smplist = []
83 for j,d in enumerate(dists):
84 if not isinstance(d, (stats.rv_discrete,stats.rv_continuous)):
85 raise TypeError('dist is not a scipy.stats distribution object')
86 n=siz
87 if isinstance(siz,(tuple,list)):
88 n=numpy.product(siz)
89
90 pars = tuple([float(k) for k in parms[j]])
91 perc = numpy.arange(1.,n+1)/(n+1)
92 v = d(*pars).ppf(perc)
93
94 index=map(int,indices[j]-1)
95 v = v[index]
96 if isinstance(siz,(tuple,list)):
97 v.shape = siz
98 smplist.append(v)
99 if len(dists) == 1:
100 return smplist[0]
101 return smplist
102
103 -def rank_restr(nvars=4, smp=100, noCorrRestr=False, Corrmat=None):
104 """
105 Returns the indices for sampling variables with
106 the desired correlation structure.
107
108 :Parameters:
109 - `nvars`: number of variables
110 - `smp`: number of samples
111 - `noCorrRestr`: No correlation restriction if True
112 - `Corrmat`: Correlation matrix. If None, assure uncorrelated samples.
113 """
114 if isinstance(smp,(tuple,list)):
115 smp=numpy.product(smp)
116 def shuf(s):
117 s1=[]
118 for i in xrange(nvars):
119 numpy.random.shuffle(s)
120 s1.append(s.copy())
121 return s1
122 if noCorrRestr or nvars ==1:
123 x = [stats.randint.rvs(1,smp+1,size=smp) for i in xrange(nvars)]
124 else:
125 if Corrmat == None:
126 C=numpy.core.numeric.identity(nvars)
127 else:
128 if Corrmat.shape[0] != nvars:
129 raise TypeError('Correlation matrix must be of rank %s'%nvars)
130 C=numpy.matrix(Corrmat)
131 s0=numpy.arange(1.,smp+1)/(smp+1.)
132 s=stats.norm().ppf(s0)
133 s1 = shuf(s)
134 S=numpy.matrix(s1)
135 P=cholesky(C)
136 Q=cholesky(numpy.corrcoef(S))
137
138 Final=S.transpose()*inv(Q).transpose()*P.transpose()
139 x = [stats.stats.rankdata(Final.transpose()[i,]) for i in xrange(nvars)]
140 return x
141
142 if __name__=='__main__':
143 import pylab as P
144 dist = stats.norm
145
146 pars = (50,1)
147
148 b = lhs(dist,pars,1000)
149 cm = numpy.array([[1,.8],[.8,1]])
150 c=lhs([dist,dist], [pars,pars],2000,False, cm)
151 print stats.pearsonr(c[0],c[1]), stats.spearmanr(c[0],c[1])
152
153 P.scatter(c[0],c[1])
154
155 print c[0].shape,c[1].shape
156 n = dist(*pars).rvs(size=20)
157
158
159
160
161 lhs([stats.norm]*19,[(0,1)]*19,17,False,numpy.identity(19))
162 P.show()
163
164
165
166