Coverage for intelligence_toolkit/tests/unit/graph/test_graph_encoder_embed.py: 100%

113 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1# Copyright (c) 2024 Microsoft Corporation. All rights reserved. 

2# Licensed under the MIT license. See LICENSE file in the project. 

3# 

4import numpy as np 

5import pytest 

6from scipy import sparse 

7 

8from intelligence_toolkit.graph.graph_encoder_embed import GraphEncoderEmbed 

9 

10 

11@pytest.fixture 

12def graph_encoder(): 

13 return GraphEncoderEmbed() 

14 

15 

16@pytest.fixture 

17def simple_adjacency_matrix(): 

18 """Create a simple 4-node graph adjacency matrix.""" 

19 return sparse.csr_matrix(np.array([ 

20 [0, 1, 1, 0], 

21 [1, 0, 1, 1], 

22 [1, 1, 0, 1], 

23 [0, 1, 1, 0] 

24 ], dtype=np.float32)) 

25 

26 

27@pytest.fixture 

28def simple_labels(): 

29 """Create simple labels for 4 nodes with 2 classes.""" 

30 return np.array([[0], [0], [1], [1]]) 

31 

32 

33def test_graph_encoder_embed_initialization(): 

34 encoder = GraphEncoderEmbed() 

35 assert encoder is not None 

36 

37 

38def test_basic_embedding(graph_encoder, simple_adjacency_matrix, simple_labels): 

39 n = 4 

40 Z, W = graph_encoder.Basic(simple_adjacency_matrix, simple_labels, n) 

41 

42 # Check output dimensions 

43 assert Z.shape == (4, 2) # 4 nodes, 2 classes 

44 assert W.shape == (4, 2) 

45 

46 # Check that W is sparse 

47 assert sparse.issparse(W) 

48 

49 

50def test_basic_embedding_values(graph_encoder, simple_adjacency_matrix, simple_labels): 

51 n = 4 

52 Z, W = graph_encoder.Basic(simple_adjacency_matrix, simple_labels, n) 

53 

54 # W should have 1/nk where nk is the count of each class 

55 # Class 0 has 2 nodes, class 1 has 2 nodes 

56 W_dense = W.toarray() 

57 assert W_dense[0, 0] == 0.5 # Node 0 in class 0 

58 assert W_dense[1, 0] == 0.5 # Node 1 in class 0 

59 assert W_dense[2, 1] == 0.5 # Node 2 in class 1 

60 assert W_dense[3, 1] == 0.5 # Node 3 in class 1 

61 

62 

63def test_basic_with_unlabeled_nodes(graph_encoder, simple_adjacency_matrix): 

64 n = 4 

65 labels_with_unlabeled = np.array([[0], [0], [1], [-1]]) # Node 3 is unlabeled 

66 

67 Z, W = graph_encoder.Basic(simple_adjacency_matrix, labels_with_unlabeled, n) 

68 

69 # Check that unlabeled node has zero weights 

70 W_dense = W.toarray() 

71 assert np.all(W_dense[3, :] == 0) 

72 

73 

74def test_diagonal(graph_encoder, simple_adjacency_matrix): 

75 n = 4 

76 X_diag = graph_encoder.Diagonal(simple_adjacency_matrix, n) 

77 

78 # Check that diagonal is all 1s 

79 X_dense = X_diag.toarray() 

80 assert np.all(np.diag(X_dense) == 1) 

81 

82 

83def test_laplacian(graph_encoder, simple_adjacency_matrix): 

84 n = 4 

85 L = graph_encoder.Laplacian(simple_adjacency_matrix, n) 

86 

87 # Check output shape 

88 assert L.shape == (4, 4) 

89 

90 # Check that it's sparse 

91 assert sparse.issparse(L) 

92 

93 # Laplacian normalization should preserve symmetry 

94 L_dense = L.toarray() 

95 assert np.allclose(L_dense, L_dense.T) 

96 

97 

98def test_correlation(graph_encoder): 

99 # Create a simple embedding matrix 

100 Z = sparse.csr_matrix(np.array([ 

101 [3, 4], 

102 [1, 0], 

103 [0, 1] 

104 ], dtype=np.float32)) 

105 

106 Z_norm = graph_encoder.Correlation(Z) 

107 

108 # Check that rows are normalized (each row should have norm 1) 

109 Z_norm_dense = Z_norm.toarray() 

110 row_norms = np.linalg.norm(Z_norm_dense, axis=1) 

111 assert np.allclose(row_norms, 1.0) 

112 

113 

114def test_correlation_with_zero_rows(graph_encoder): 

115 # Test with a zero row 

116 Z = sparse.csr_matrix(np.array([ 

117 [3, 4], 

118 [0, 0], # Zero row 

119 [1, 1] 

120 ], dtype=np.float32)) 

121 

122 Z_norm = graph_encoder.Correlation(Z) 

123 

124 # Zero rows should remain zero (nan_to_num handles division by zero) 

125 Z_norm_dense = Z_norm.toarray() 

126 assert np.all(Z_norm_dense[1, :] == 0) 

127 

128 

129def test_edge_list_size_s2(graph_encoder): 

130 # S2 edge list (2 columns) 

131 edge_list = np.array([[0, 1], [1, 2], [2, 3]]) 

132 

133 result = graph_encoder.edge_list_size(edge_list) 

134 assert result == "S2" 

135 

136 

137def test_edge_list_size_s3(graph_encoder): 

138 # S3 edge list (3 columns) 

139 edge_list = np.array([[0, 1, 0.5], [1, 2, 0.8], [2, 3, 1.0]]) 

140 

141 result = graph_encoder.edge_list_size(edge_list) 

142 assert result == "S3" 

143 

144 

145def test_edge_to_sparse_s2(graph_encoder): 

146 edge_list = np.array([[0, 1], [1, 2], [2, 0]]) 

147 n = 3 

148 

149 X_sparse = graph_encoder.Edge_to_Sparse(edge_list, n, "S2") 

150 

151 # Check shape and type 

152 assert X_sparse.shape == (3, 3) 

153 assert sparse.issparse(X_sparse) 

154 

155 # Check values (all should be 1 for S2) 

156 X_dense = X_sparse.toarray() 

157 assert X_dense[0, 1] == 1 

158 assert X_dense[1, 2] == 1 

159 assert X_dense[2, 0] == 1 

160 

161 

162def test_edge_to_sparse_s3(graph_encoder): 

163 edge_list = np.array([[0, 1, 0.5], [1, 2, 0.8], [2, 0, 1.2]]) 

164 n = 3 

165 

166 X_sparse = graph_encoder.Edge_to_Sparse(edge_list, n, "S3") 

167 

168 # Check that weights are preserved 

169 X_dense = X_sparse.toarray() 

170 assert np.isclose(X_dense[0, 1], 0.5) 

171 assert np.isclose(X_dense[1, 2], 0.8) 

172 assert np.isclose(X_dense[2, 0], 1.2) 

173 

174 

175def test_run_with_edge_list(graph_encoder): 

176 # Use S3 format (with weights) so edge_list_size detection works correctly 

177 edge_list = np.array([[0, 1, 1.0], [1, 2, 1.0], [2, 0, 1.0]]) 

178 labels = np.array([[0], [1], [0]]) 

179 n = 3 

180 

181 Z, W = graph_encoder.run(edge_list, labels, n, EdgeList=True) 

182 

183 # Check output dimensions 

184 assert Z.shape == (3, 2) # 3 nodes, 2 classes 

185 assert W.shape == (3, 2) 

186 

187 

188def test_run_with_all_options(graph_encoder): 

189 edge_list = np.array([[0, 1, 1.0], [1, 2, 1.0], [2, 0, 1.0]]) 

190 labels = np.array([[0], [1], [0]]) 

191 n = 3 

192 

193 Z, W = graph_encoder.run( 

194 edge_list, 

195 labels, 

196 n, 

197 EdgeList=True, 

198 DiagA=True, 

199 Laplacian=True, 

200 Correlation=True 

201 ) 

202 

203 # Should complete without errors 

204 assert Z.shape == (3, 2) 

205 

206 

207def test_run_without_correlation(graph_encoder, simple_adjacency_matrix, simple_labels): 

208 n = 4 

209 

210 Z, W = graph_encoder.run( 

211 simple_adjacency_matrix, 

212 simple_labels, 

213 n, 

214 EdgeList=False, 

215 Correlation=False 

216 ) 

217 

218 # Without correlation, rows won't be normalized 

219 assert Z.shape == (4, 2) 

220 

221 

222def test_basic_with_multiple_classes(graph_encoder, simple_adjacency_matrix): 

223 n = 4 

224 labels = np.array([[0], [1], [2], [0]]) # 3 classes 

225 

226 Z, W = graph_encoder.Basic(simple_adjacency_matrix, labels, n) 

227 

228 assert Z.shape == (4, 3) # 4 nodes, 3 classes 

229 assert W.shape == (4, 3) 

230 

231 

232def test_diagonal_preserves_sparsity(graph_encoder): 

233 # Very sparse matrix 

234 X = sparse.csr_matrix(np.array([ 

235 [0, 0, 1, 0], 

236 [0, 0, 0, 1], 

237 [1, 0, 0, 0], 

238 [0, 1, 0, 0] 

239 ], dtype=np.float32)) 

240 

241 X_diag = graph_encoder.Diagonal(X, 4) 

242 

243 # Should still be sparse 

244 assert sparse.issparse(X_diag) 

245 

246 # Should have more non-zero elements (added diagonal) 

247 assert X_diag.nnz >= X.nnz