Coverage for src/driada/network/matrix_utils.py: 84.16%

202 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-25 15:40 +0300

1import numpy as np 

2import copy 

3import scipy.sparse as sp 

4 

5 

6def _plain_bfs(adj, source): 

7 ''' 

8 adapted from networkx.algorithms.components.connected._plain_bfs 

9 

10 Args: 

11 adj: 

12 source: 

13 

14 Returns: 

15 

16 ''' 

17 

18 n = adj.shape[0] 

19 seen = {source} 

20 nextlevel = [source] 

21 while nextlevel: 

22 thislevel = nextlevel 

23 nextlevel = [] 

24 for v in thislevel: 

25 for w in get_neighbors_from_adj(adj, v): 

26 if w not in seen: 

27 seen.add(w) 

28 nextlevel.append(w) 

29 if len(seen) == n: 

30 return seen 

31 return seen 

32 

33 

34def get_neighbors_from_adj(a, node): 

35 inds = a[[node], :].nonzero()[1] 

36 return inds 

37 

38 

39def get_ccs_from_adj(adj): 

40 seen = set() 

41 for v in range(adj.shape[0]): 

42 if v not in seen: 

43 c = _plain_bfs(adj, v) 

44 seen.update(c) 

45 yield c 

46 

47 

48def get_sccs_from_adj(adj): 

49 ''' 

50 adapted from networkx.algorithms.components.strongly_connected.strongly_connected_components 

51 Args: 

52 adj: 

53 

54 Returns: 

55 

56 ''' 

57 

58 all_nodes = range(adj.shape[0]) 

59 preorder = {} 

60 lowlink = {} 

61 scc_found = set() 

62 scc_queue = [] 

63 i = 0 # Preorder counter 

64 neighbors = {v: iter(get_neighbors_from_adj(adj, v)) for v in all_nodes} 

65 for source in all_nodes: 

66 if source not in scc_found: 

67 queue = [source] 

68 while queue: 

69 v = queue[-1] 

70 if v not in preorder: 

71 i = i + 1 

72 preorder[v] = i 

73 done = True 

74 for w in neighbors[v]: 

75 if w not in preorder: 

76 queue.append(w) 

77 done = False 

78 break 

79 if done: 

80 lowlink[v] = preorder[v] 

81 for w in get_neighbors_from_adj(adj, v): 

82 if w not in scc_found: 

83 if preorder[w] > preorder[v]: 

84 lowlink[v] = min([lowlink[v], lowlink[w]]) 

85 else: 

86 lowlink[v] = min([lowlink[v], preorder[w]]) 

87 queue.pop() 

88 if lowlink[v] == preorder[v]: 

89 scc = {v} 

90 while scc_queue and preorder[scc_queue[-1]] > preorder[v]: 

91 k = scc_queue.pop() 

92 scc.add(k) 

93 scc_found.update(scc) 

94 yield scc 

95 else: 

96 scc_queue.append(v) 

97 

98 

99def get_giant_cc_from_adj(adj): 

100 connected_components = sorted(get_ccs_from_adj(adj), key=len, reverse=True) 

101 gcc = np.array(list(connected_components[0])) 

102 gcc_adj = adj[gcc, :].tocsc()[:, gcc].tocsr() 

103 

104 # mapping of new nodes to old ones 

105 node_mapping = dict(zip(range(len(gcc)), gcc)) 

106 

107 return gcc_adj, node_mapping 

108 

109 

110def get_giant_scc_from_adj(adj): 

111 connected_components = sorted(get_sccs_from_adj(adj), key=len, reverse=True) 

112 gscc = np.array(list(connected_components[0])) 

113 gscc_adj = adj[gscc, :].tocsc()[:, gscc].tocsr() 

114 

115 # mapping of new nodes to old ones 

116 node_mapping = dict(zip(range(len(gscc)), gscc)) 

117 

118 return gscc_adj, node_mapping 

119 

120 

121def assign_random_weights(A): 

122 X = np.random.random(size=(A.shape[0], A.shape[0])) 

123 W = np.multiply(X, A) 

124 return (W + W.T) / 2 

125 

126 

127# TODO: refactor to sparse format 

128def turn_to_partially_directed(mat, directed=0.0, weighted=0): 

129 if not isinstance(mat, np.ndarray): 

130 raise Exception('Wrong input parsed to turn_to_directed function!') 

131 

132 A = copy.deepcopy(mat) 

133 

134 if directed == 0.0 or directed is None: 

135 if not weighted: 

136 a = A.astype(bool) 

137 else: 

138 a = A.astype(float) 

139 

140 return sp.csr_array(a) 

141 

142 np.fill_diagonal(A, 0) 

143 rows, cols = A.nonzero() 

144 edgeset = set(zip(rows, cols)) 

145 upper = np.array([l for l in edgeset if l[0] < l[1]]) 

146 dircount = 0 

147 

148 random_tosses = np.random.random(len(upper)) 

149 condition1 = (random_tosses >= directed / 2.0) & (random_tosses < directed) 

150 condition2 = (random_tosses <= directed / 2.0) & (random_tosses < directed) 

151 indices_where_upper_is_removed = np.where(condition1 == True)[0] 

152 indices_where_lower_is_removed = np.where(condition2 == True)[0] 

153 

154 u_xdata = [u[0] for u in upper[indices_where_upper_is_removed]] 

155 u_ydata = [u[1] for u in upper[indices_where_upper_is_removed]] 

156 A[u_xdata, u_ydata] = 0 

157 

158 l_xdata = [u[1] for u in upper[indices_where_lower_is_removed]] 

159 l_ydata = [u[0] for u in upper[indices_where_lower_is_removed]] 

160 A[l_xdata, l_ydata] = 0 

161 

162 ''' 

163 for i in range(double_edges): 

164 toss = random.random() 

165 if toss < directed: # this means double edge will be reduced to single randomly 

166 dircount += 1 

167 if toss >= directed/2.: 

168 A[upper_right[i]] = 0#A[upper_right[i][::-1]] + 0#.1*np.random.random() 

169 else: 

170 A[upper_right[i][::-1]] = 0#A[upper_right[i]] + 0#.1*np.random.random() 

171 ''' 

172 

173 #a = sp.csr_array(A) 

174 # get_symmetry_index(a) 

175 return A 

176 

177 

178def get_symmetry_index(a): 

179 a = a.astype(bool) 

180 symmetrized = a + a.T 

181 

182 difference = symmetrized.astype(int) - a.astype(int) 

183 difference.eliminate_zeros() 

184 symm_index = 1 - difference.nnz / symmetrized.nnz * 2 

185 # symm_index is 1 for a symmetrix matrix and 0 for an asymmetric one 

186 return symm_index 

187 

188 

189def symmetric_component(A, is_weighted): 

190 a = A.astype(bool).todense() 

191 symm_mask = np.bitwise_and(a, a.T) 

192 if not is_weighted: 

193 return symm_mask 

194 

195 return np.multiply(symm_mask, A.todense()) 

196 

197 

198def non_symmetric_component(A, is_weighted): 

199 return A.astype(float) - symmetric_component(A, is_weighted).astype(float) 

200 

201 

202def remove_duplicates(coo): 

203 # this function removes duplicate entries from a coo-format adjacency matrix 

204 # duplicates are discarded as the data is always the same: 

205 # coo[i,j] = val1, coo[i,j] = val2 ==> val1 = val2 

206 

207 dok = sp.dok_matrix((coo.shape), dtype=coo.dtype) 

208 dok._update(zip(zip(coo.row, coo.col), coo.data)) 

209 return dok.tocoo() 

210 

211 

212def adj_input_to_csr_sparse_matrix(a): 

213 if isinstance(a, np.ndarray): 

214 adj = sp.csr_array(a) 

215 elif a.format in ['csr', 'csc']: 

216 adj = a 

217 elif a.format == 'coo': 

218 adj = remove_duplicates(a) 

219 else: 

220 raise Exception('Wrong input parsed to preprocess_adj_matrix function:', type(a)) 

221 

222 return sp.csr_array(adj) 

223 

224 

225def remove_selfloops_from_adj(a): 

226 if a.trace() != 0: 

227 a = adj_input_to_csr_sparse_matrix(a) 

228 anew = a.copy() 

229 anew.setdiag(0) 

230 anew.eliminate_zeros() 

231 return anew 

232 else: 

233 return a 

234 

235 

236def remove_isolates_from_adj(a): 

237 a = adj_input_to_csr_sparse_matrix(a) 

238 

239 in_degrees = np.array(a.astype(bool).astype(int).sum(axis=1)) # .flatten().ravel() 

240 out_degrees = np.array(a.astype(bool).astype(int).sum(axis=0)) # .flatten().ravel() 

241 

242 indices = np.where(in_degrees + out_degrees > 0)[0] 

243 cleared_matrix = a[indices, :].tocsc()[:, indices].tocsr() 

244 

245 # mapping of new nodes to old ones 

246 node_mapping = dict(zip(range(len(indices)), indices)) 

247 

248 return cleared_matrix, node_mapping 

249 

250 

251def sausage_index(A, nn): 

252 A = A.astype(bool).astype(int) 

253 sausage_edges = 0 

254 for i in range(nn): 

255 sausage_edges += sum(np.diag(A, k=i)) 

256 

257 si = sausage_edges / (np.sum(A) / 2) 

258 print('sausage edges:', sausage_edges) 

259 print('other edges:', np.sum(A) / 2 - sausage_edges) 

260 print('sausage index=', si) 

261 

262 

263#TODO: create separate branches for np and sparse matrices for further functions 

264def get_laplacian(A): 

265 A = A.astype(float) 

266 out_degrees = np.array(A.sum(axis=0)).ravel() 

267 D = sp.spdiags(out_degrees, [0], A.shape[0], A.shape[0], format='csr') 

268 L = D - A 

269 return L 

270 

271 

272def get_inv_sqrt_diag_matrix(a): 

273 n = a.shape[0] 

274 A = sp.csr_array(a) 

275 out_degrees = np.array(A.sum(axis=0)).ravel() 

276 diags_sqrt = 1.0 / np.sqrt(out_degrees) 

277 

278 diags_sqrt[np.isinf(diags_sqrt)] = 0 

279 DH = sp.spdiags(diags_sqrt, [0], n, n, format='csr') 

280 return DH 

281 

282 

283def get_norm_laplacian(a): 

284 if get_symmetry_index(a) != 1: 

285 raise Exception('Cannot construct normalized laplacian matrix from a non-hermitian adjacency matrix') 

286 

287 n = a.shape[0] 

288 A = sp.csr_array(a) 

289 DH = get_inv_sqrt_diag_matrix(A) 

290 matrix = sp.eye(n, dtype=float) - DH.dot(A.dot(DH)) 

291 return matrix 

292 

293 

294def get_inv_diag_matrix(a): 

295 n = a.shape[0] 

296 A = sp.csr_array(a) 

297 out_degrees = np.array(A.sum(axis=0)).ravel() 

298 invdiags = 1.0 / out_degrees 

299 

300 invdiags[np.isinf(invdiags)] = 0 

301 Dinv = sp.spdiags(invdiags, [0], n, n, format='csr') 

302 return Dinv 

303 

304 

305def get_rw_laplacian(a): 

306 n = a.shape[0] 

307 T = get_trans_matrix(a) 

308 matrix = sp.eye(n, dtype=float) - T 

309 return matrix 

310 

311 

312def get_trans_matrix(a): 

313 A = sp.csr_array(a) 

314 Dinv = get_inv_diag_matrix(a) 

315 T = Dinv.dot(A) 

316 return T