Coverage for contextualized/helpers/simulation.py: 0%

109 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-18 16:32 -0400

1""" 

2Helper functions for simulations and experiments. 

3""" 

4import numpy as np 

5from sklearn.decomposition import PCA 

6 

7 

8class GaussianSimulator: 

9 """ 

10 Generate samples from Gaussian distributions with known parameters 

11 """ 

12 

13 def __init__(self, p, k, c, ctype="self", seed=None, sigmas=None, mus=None): 

14 self.seed = seed if seed is not None else np.random.randint(1e9) 

15 np.random.seed(self.seed) 

16 self.ctype = ctype 

17 # Distribution generation parameters 

18 self.p = p 

19 self.k = k 

20 self.c = c 

21 # Distribution parameters 

22 self.sigmas = sigmas 

23 self.mus = mus 

24 self.vars = None 

25 self.betas = None 

26 self.rhos = None # rho^2, Pearson's correlation coefficient squared 

27 self.contexts = None 

28 self._build() 

29 

30 def _build(self): 

31 """ 

32 Generate parameters for k p-variate gaussians with context 

33 """ 

34 new_mus = self.mus is None 

35 new_sigmas = self.sigmas is None 

36 self.mus = np.zeros((self.k, self.p)) if new_mus else self.mus 

37 self.sigmas = np.zeros((self.k, self.p, self.p)) if new_sigmas else self.sigmas 

38 self.contexts = np.zeros((self.k, self.c)) 

39 self.vars = np.zeros((self.k, self.p)) 

40 self.betas = np.zeros((self.k, self.p, self.p)) 

41 self.rhos = np.zeros((self.k, self.p, self.p)) 

42 # Parameterize Gaussian models 

43 for i in range(self.k): 

44 if new_mus: 

45 self.mus[i] = np.random.uniform(-self.p, self.p, self.p) 

46 # TODO: generate sigma using eigen decomposition 

47 if new_sigmas: 

48 sigma = np.random.random((self.p, self.p)) * 2 - 1 

49 sigma = sigma @ sigma.T 

50 self.sigmas[i] = sigma 

51 self.vars[i] = self.sigmas[i].diagonal() 

52 vars_tiled = np.tile(self.vars[i], (self.p, 1)).T 

53 self.betas[i] = self.sigmas[i] / vars_tiled # beta[i,j] = beta_{i-->j} 

54 self.rhos[i] = np.power(self.sigmas[i], 2) / (vars_tiled * vars_tiled.T) 

55 # Build contexts 

56 if self.ctype == "uniform": 

57 for i in range(self.k): 

58 self.contexts[i] = np.random.random((self.c,)) 

59 elif self.ctype == "pca": 

60 gaussian_reps = np.concatenate( 

61 (self.sigmas, self.mus[:, :, np.newaxis]), axis=-1 

62 ) 

63 gaussian_reps = gaussian_reps.reshape((self.k, self.p * (self.p + 1))) 

64 self.contexts = PCA(n_components=self.c).fit_transform(gaussian_reps) 

65 elif self.ctype == "self": 

66 gaussian_reps = np.concatenate( 

67 (self.sigmas, self.mus[:, :, np.newaxis]), axis=-1 

68 ) 

69 self.contexts = gaussian_reps.reshape((self.k, self.p * (self.p + 1))) 

70 self.c = self.contexts.shape[-1] 

71 

72 def gen_samples(self, k_n): 

73 """ 

74 Generate full datasets of samples 

75 """ 

76 # Sample each distribution 

77 n = self.k * k_n 

78 C = np.zeros((n, self.c)) 

79 X = np.zeros((n, self.p)) 

80 for i in range(self.k): 

81 mu, sigma, context = self.mus[i], self.sigmas[i], self.contexts[i] 

82 sample = np.random.multivariate_normal(mu, sigma, k_n) 

83 C[i * k_n : (i + 1) * k_n] = context 

84 X[i * k_n : (i + 1) * k_n] = sample 

85 return C, X 

86 

87 

88f_context = lambda z: z + np.random.normal(0, 0.01, z.shape) 

89 

90 

91def f_sigma(z): 

92 z_tiled = np.tile(z, (z.shape[-1], 1)) 

93 A = z_tiled + z_tiled.T 

94 # TODO: maybe make this more controlled 

95 sigma = A.T @ A # from spectral decomposition 

96 return sigma 

97 

98 

99f_mu = lambda z: np.zeros(z.shape) 

100 

101 

102class GraphicalSimulator: 

103 """ 

104 Generate samples from a Graphical model (C <- Z -> Sigma/Mu -> X) with known correlation 

105 """ 

106 

107 def __init__( 

108 self, k, p, f_context=f_context, f_sigma=f_sigma, f_mu=f_mu, seed=None 

109 ): 

110 self.seed = seed if seed is not None else np.random.randint(1e9) 

111 np.random.seed(self.seed) 

112 # Distribution generation parameters 

113 self.p = p 

114 self.k = k 

115 self.c = p 

116 self.f_context = f_context 

117 self.f_sigma = f_sigma 

118 self.f_mu = f_mu 

119 # Distribution parameters 

120 self.Z = np.random.uniform(-1, 1, (k, p)) 

121 self.contexts = None 

122 self.sigmas = None 

123 self.mus = None 

124 self.vars = None 

125 self.betas = None 

126 self.rhos = None # rho^2, Pearson's correlation coefficient squared 

127 self._build() 

128 

129 def _build(self): 

130 """ 

131 Generate parameters for n p-variate gaussians with context 

132 """ 

133 self.contexts = np.zeros((self.k, self.p)) 

134 self.sigmas = np.zeros((self.k, self.p, self.p)) 

135 self.mus = np.zeros((self.k, self.p)) 

136 self.vars = np.zeros((self.k, self.p)) 

137 self.betas = np.zeros((self.k, self.p, self.p)) 

138 self.rhos = np.zeros((self.k, self.p, self.p)) 

139 # Parameterize Gaussian models 

140 for i in range(self.k): 

141 Z_i = self.Z[i] 

142 self.contexts[i] = self.f_context(Z_i) 

143 self.sigmas[i] = self.f_sigma(Z_i) 

144 self.mus[i] = self.f_mu(Z_i) 

145 self.vars[i] = self.sigmas[i].diagonal() 

146 vars_tiled = np.tile(self.vars[i], (self.p, 1)).T 

147 self.betas[i] = self.sigmas[i] / vars_tiled # beta[i,j] = beta_{i-->j} 

148 self.rhos[i] = np.power(self.sigmas[i], 2) / (vars_tiled * vars_tiled.T) 

149 

150 def gen_samples(self, k_n): 

151 """ 

152 Generate full datasets of samples 

153 """ 

154 # Sample each distribution 

155 n = self.k * k_n 

156 C = np.zeros((n, self.c)) 

157 X = np.zeros((n, self.p)) 

158 for i in range(self.k): 

159 mu, sigma, context = self.mus[i], self.sigmas[i], self.contexts[i] 

160 sample = np.random.multivariate_normal(mu, sigma, k_n) 

161 C[i * k_n : (i + 1) * k_n] = context 

162 X[i * k_n : (i + 1) * k_n] = sample 

163 return C, X