Coverage for contextualized/helpers/simulation.py: 0%
109 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-18 16:32 -0400
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-18 16:32 -0400
1"""
2Helper functions for simulations and experiments.
3"""
4import numpy as np
5from sklearn.decomposition import PCA
8class GaussianSimulator:
9 """
10 Generate samples from Gaussian distributions with known parameters
11 """
13 def __init__(self, p, k, c, ctype="self", seed=None, sigmas=None, mus=None):
14 self.seed = seed if seed is not None else np.random.randint(1e9)
15 np.random.seed(self.seed)
16 self.ctype = ctype
17 # Distribution generation parameters
18 self.p = p
19 self.k = k
20 self.c = c
21 # Distribution parameters
22 self.sigmas = sigmas
23 self.mus = mus
24 self.vars = None
25 self.betas = None
26 self.rhos = None # rho^2, Pearson's correlation coefficient squared
27 self.contexts = None
28 self._build()
30 def _build(self):
31 """
32 Generate parameters for k p-variate gaussians with context
33 """
34 new_mus = self.mus is None
35 new_sigmas = self.sigmas is None
36 self.mus = np.zeros((self.k, self.p)) if new_mus else self.mus
37 self.sigmas = np.zeros((self.k, self.p, self.p)) if new_sigmas else self.sigmas
38 self.contexts = np.zeros((self.k, self.c))
39 self.vars = np.zeros((self.k, self.p))
40 self.betas = np.zeros((self.k, self.p, self.p))
41 self.rhos = np.zeros((self.k, self.p, self.p))
42 # Parameterize Gaussian models
43 for i in range(self.k):
44 if new_mus:
45 self.mus[i] = np.random.uniform(-self.p, self.p, self.p)
46 # TODO: generate sigma using eigen decomposition
47 if new_sigmas:
48 sigma = np.random.random((self.p, self.p)) * 2 - 1
49 sigma = sigma @ sigma.T
50 self.sigmas[i] = sigma
51 self.vars[i] = self.sigmas[i].diagonal()
52 vars_tiled = np.tile(self.vars[i], (self.p, 1)).T
53 self.betas[i] = self.sigmas[i] / vars_tiled # beta[i,j] = beta_{i-->j}
54 self.rhos[i] = np.power(self.sigmas[i], 2) / (vars_tiled * vars_tiled.T)
55 # Build contexts
56 if self.ctype == "uniform":
57 for i in range(self.k):
58 self.contexts[i] = np.random.random((self.c,))
59 elif self.ctype == "pca":
60 gaussian_reps = np.concatenate(
61 (self.sigmas, self.mus[:, :, np.newaxis]), axis=-1
62 )
63 gaussian_reps = gaussian_reps.reshape((self.k, self.p * (self.p + 1)))
64 self.contexts = PCA(n_components=self.c).fit_transform(gaussian_reps)
65 elif self.ctype == "self":
66 gaussian_reps = np.concatenate(
67 (self.sigmas, self.mus[:, :, np.newaxis]), axis=-1
68 )
69 self.contexts = gaussian_reps.reshape((self.k, self.p * (self.p + 1)))
70 self.c = self.contexts.shape[-1]
72 def gen_samples(self, k_n):
73 """
74 Generate full datasets of samples
75 """
76 # Sample each distribution
77 n = self.k * k_n
78 C = np.zeros((n, self.c))
79 X = np.zeros((n, self.p))
80 for i in range(self.k):
81 mu, sigma, context = self.mus[i], self.sigmas[i], self.contexts[i]
82 sample = np.random.multivariate_normal(mu, sigma, k_n)
83 C[i * k_n : (i + 1) * k_n] = context
84 X[i * k_n : (i + 1) * k_n] = sample
85 return C, X
88f_context = lambda z: z + np.random.normal(0, 0.01, z.shape)
91def f_sigma(z):
92 z_tiled = np.tile(z, (z.shape[-1], 1))
93 A = z_tiled + z_tiled.T
94 # TODO: maybe make this more controlled
95 sigma = A.T @ A # from spectral decomposition
96 return sigma
99f_mu = lambda z: np.zeros(z.shape)
102class GraphicalSimulator:
103 """
104 Generate samples from a Graphical model (C <- Z -> Sigma/Mu -> X) with known correlation
105 """
107 def __init__(
108 self, k, p, f_context=f_context, f_sigma=f_sigma, f_mu=f_mu, seed=None
109 ):
110 self.seed = seed if seed is not None else np.random.randint(1e9)
111 np.random.seed(self.seed)
112 # Distribution generation parameters
113 self.p = p
114 self.k = k
115 self.c = p
116 self.f_context = f_context
117 self.f_sigma = f_sigma
118 self.f_mu = f_mu
119 # Distribution parameters
120 self.Z = np.random.uniform(-1, 1, (k, p))
121 self.contexts = None
122 self.sigmas = None
123 self.mus = None
124 self.vars = None
125 self.betas = None
126 self.rhos = None # rho^2, Pearson's correlation coefficient squared
127 self._build()
129 def _build(self):
130 """
131 Generate parameters for n p-variate gaussians with context
132 """
133 self.contexts = np.zeros((self.k, self.p))
134 self.sigmas = np.zeros((self.k, self.p, self.p))
135 self.mus = np.zeros((self.k, self.p))
136 self.vars = np.zeros((self.k, self.p))
137 self.betas = np.zeros((self.k, self.p, self.p))
138 self.rhos = np.zeros((self.k, self.p, self.p))
139 # Parameterize Gaussian models
140 for i in range(self.k):
141 Z_i = self.Z[i]
142 self.contexts[i] = self.f_context(Z_i)
143 self.sigmas[i] = self.f_sigma(Z_i)
144 self.mus[i] = self.f_mu(Z_i)
145 self.vars[i] = self.sigmas[i].diagonal()
146 vars_tiled = np.tile(self.vars[i], (self.p, 1)).T
147 self.betas[i] = self.sigmas[i] / vars_tiled # beta[i,j] = beta_{i-->j}
148 self.rhos[i] = np.power(self.sigmas[i], 2) / (vars_tiled * vars_tiled.T)
150 def gen_samples(self, k_n):
151 """
152 Generate full datasets of samples
153 """
154 # Sample each distribution
155 n = self.k * k_n
156 C = np.zeros((n, self.c))
157 X = np.zeros((n, self.p))
158 for i in range(self.k):
159 mu, sigma, context = self.mus[i], self.sigmas[i], self.contexts[i]
160 sample = np.random.multivariate_normal(mu, sigma, k_n)
161 C[i * k_n : (i + 1) * k_n] = context
162 X[i * k_n : (i + 1) * k_n] = sample
163 return C, X