Coverage for .tox/p311/lib/python3.10/site-packages/scicom/utilities/prune.py: 19%

57 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-16 09:50 +0200

1import os 

2import numpy as np 

3import pandas as pd 

4import igraph as ig 

5from tqdm import tqdm 

6 

7 

8class PruneNetwork(): 

9 """Create statistics for communication networks by deletion. 

10 

11 For a given dataset with sender and receiver information,  

12 create a weighted network with igraph. For a given number  

13 of iterations, deletion amounts, and deletion types, the  

14 algorithm then generates network statistics for randomly 

15 sampled subnetworks.  

16 """ 

17 

18 def __init__(self, dataframe): 

19 self.inputDF = dataframe 

20 

21 def makeNet(self, dataframe): 

22 """Create network from dataframe. 

23  

24 Assumes the existence of sender, receiver and step 

25 column names.  

26 """ 

27 df = dataframe[['sender', 'receiver', 'step']] 

28 df = df.groupby( 

29 ['sender', 'receiver'] 

30 ).size().reset_index(name='Count') 

31 net = ig.Graph.TupleList( 

32 df.itertuples(index=False), directed=True, weights=True 

33 ) 

34 return net 

35 

36 def netStats(self, G): 

37 """Generate network statistics. 

38  

39 Any statistic calculated on the full 

40 network can be added in principle. 

41 Currently implemented are:  

42 average relative degree, 

43 density 

44 transitivtiy 

45 cohesion 

46 average path length 

47 modularity 

48 """ 

49 numVs = len(G.vs) 

50 avg_rel_deg = np.mean([x/numVs for x in G.degree(mode="all")]) 

51 density = G.density() 

52 transitivity = G.transitivity_undirected() 

53 cohesion = G.cohesion() 

54 avg_path_len = G.average_path_length() 

55 modularity = G.modularity(G.components()) 

56 statDF = pd.DataFrame( 

57 [ 

58 { 

59 'avg_relative_degree': avg_rel_deg, 

60 'avg_path_length': avg_path_len, 

61 'density': density, 

62 'transitivity': transitivity, 

63 'cohesion': cohesion, 

64 'modularity': modularity 

65 } 

66 ] 

67 ) 

68 return statDF 

69 

70 def generatePruningParameters(self, G): 

71 """Generate a random set of pruning weights.""" 

72 nodes = G.get_vertex_dataframe() 

73 id2name = G.get_vertex_dataframe().to_dict()['name'] 

74 del_parameter = pd.DataFrame( 

75 { 

76 'ids': nodes.index, 

77 'degree': G.degree(), 

78 'unif': np.random.uniform(0, 1, len(G.vs)), 

79 'log_normal': np.random.lognormal(0, 1, len(G.vs)), 

80 'exp': np.random.exponential(1, len(G.vs)), 

81 'beta': np.random.beta(a=2, b=3, size=len(G.vs)) 

82 } 

83 ) 

84 

85 del_parameter = G.get_edge_dataframe()[["source", "target"]].merge( 

86 del_parameter, left_on='source', right_on='ids' 

87 ).merge( 

88 del_parameter, left_on='target', right_on='ids' 

89 ) 

90 del_parameter['degree'] = del_parameter.degree_x * del_parameter.degree_y / np.dot(del_parameter.degree_x, del_parameter.degree_y) 

91 del_parameter['unif'] = del_parameter.unif_x * del_parameter.unif_y / np.dot(del_parameter.unif_x, del_parameter.unif_y) 

92 del_parameter['log_normal'] = del_parameter.log_normal_x * del_parameter.log_normal_y / np.dot(del_parameter.log_normal_x, del_parameter.log_normal_y) 

93 del_parameter['exp'] = del_parameter.exp_x * del_parameter.exp_y / np.dot(del_parameter.exp_x, del_parameter.exp_y) 

94 del_parameter['beta'] = del_parameter.beta_x * del_parameter.beta_y / np.dot(del_parameter.beta_x, del_parameter.beta_y) 

95 sender = del_parameter["source"].apply(lambda x: id2name[x]) 

96 receiver = del_parameter["target"].apply(lambda x: id2name[x]) 

97 del_parameter.insert(0, "sender", sender) 

98 del_parameter.insert(0, "receiver", receiver) 

99 outDF = del_parameter[ 

100 ['sender', 'receiver', 'degree', 'unif', 'log_normal', 'exp', 'beta'] 

101 ] 

102 return outDF 

103 

104 def deleteFromNetwork( 

105 self, 

106 iterations=10, 

107 delAmounts=(0.1, 0.25, 0.5, 0.75, 0.9), 

108 delTypes=("degree", "unif", "log_normal", "exp", "beta") 

109 ): 

110 """Run the deletion by sampling.""" 

111 results = [] 

112 fullNet = self.makeNet( 

113 self.inputDF 

114 ) 

115 fullStats = self.netStats(fullNet) 

116 fullStats = fullStats.assign( 

117 **{ 

118 "delVal": 0, 

119 "delType": 'NA', 

120 "delIteration": 0 

121 } 

122 ) 

123 results.append(fullStats) 

124 for idx in range(1, iterations + 1): 

125 prunVals = self.generatePruningParameters( 

126 fullNet 

127 ) 

128 tempDF = self.inputDF.merge( 

129 prunVals 

130 ) 

131 for val in list(delAmounts): 

132 for deltype in list(delTypes): 

133 delDF = tempDF.sample( 

134 round(len(tempDF) * (1 - val)), 

135 weights=deltype 

136 ) 

137 delNet = self.makeNet(delDF) 

138 delStats = self.netStats(delNet) 

139 delStats = delStats.assign( 

140 **{ 

141 "delVal": val, 

142 "delType": deltype, 

143 "delIteration": idx 

144 } 

145 ) 

146 results.append(delStats) 

147 resDF = pd.concat(results) 

148 return resDF 

149