Coverage for .tox/p311/lib/python3.10/site-packages/scicom/utilities/prune.py: 19%
57 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-16 09:50 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-16 09:50 +0200
1import os
2import numpy as np
3import pandas as pd
4import igraph as ig
5from tqdm import tqdm
8class PruneNetwork():
9 """Create statistics for communication networks by deletion.
11 For a given dataset with sender and receiver information,
12 create a weighted network with igraph. For a given number
13 of iterations, deletion amounts, and deletion types, the
14 algorithm then generates network statistics for randomly
15 sampled subnetworks.
16 """
18 def __init__(self, dataframe):
19 self.inputDF = dataframe
21 def makeNet(self, dataframe):
22 """Create network from dataframe.
24 Assumes the existence of sender, receiver and step
25 column names.
26 """
27 df = dataframe[['sender', 'receiver', 'step']]
28 df = df.groupby(
29 ['sender', 'receiver']
30 ).size().reset_index(name='Count')
31 net = ig.Graph.TupleList(
32 df.itertuples(index=False), directed=True, weights=True
33 )
34 return net
36 def netStats(self, G):
37 """Generate network statistics.
39 Any statistic calculated on the full
40 network can be added in principle.
41 Currently implemented are:
42 average relative degree,
43 density
44 transitivtiy
45 cohesion
46 average path length
47 modularity
48 """
49 numVs = len(G.vs)
50 avg_rel_deg = np.mean([x/numVs for x in G.degree(mode="all")])
51 density = G.density()
52 transitivity = G.transitivity_undirected()
53 cohesion = G.cohesion()
54 avg_path_len = G.average_path_length()
55 modularity = G.modularity(G.components())
56 statDF = pd.DataFrame(
57 [
58 {
59 'avg_relative_degree': avg_rel_deg,
60 'avg_path_length': avg_path_len,
61 'density': density,
62 'transitivity': transitivity,
63 'cohesion': cohesion,
64 'modularity': modularity
65 }
66 ]
67 )
68 return statDF
70 def generatePruningParameters(self, G):
71 """Generate a random set of pruning weights."""
72 nodes = G.get_vertex_dataframe()
73 id2name = G.get_vertex_dataframe().to_dict()['name']
74 del_parameter = pd.DataFrame(
75 {
76 'ids': nodes.index,
77 'degree': G.degree(),
78 'unif': np.random.uniform(0, 1, len(G.vs)),
79 'log_normal': np.random.lognormal(0, 1, len(G.vs)),
80 'exp': np.random.exponential(1, len(G.vs)),
81 'beta': np.random.beta(a=2, b=3, size=len(G.vs))
82 }
83 )
85 del_parameter = G.get_edge_dataframe()[["source", "target"]].merge(
86 del_parameter, left_on='source', right_on='ids'
87 ).merge(
88 del_parameter, left_on='target', right_on='ids'
89 )
90 del_parameter['degree'] = del_parameter.degree_x * del_parameter.degree_y / np.dot(del_parameter.degree_x, del_parameter.degree_y)
91 del_parameter['unif'] = del_parameter.unif_x * del_parameter.unif_y / np.dot(del_parameter.unif_x, del_parameter.unif_y)
92 del_parameter['log_normal'] = del_parameter.log_normal_x * del_parameter.log_normal_y / np.dot(del_parameter.log_normal_x, del_parameter.log_normal_y)
93 del_parameter['exp'] = del_parameter.exp_x * del_parameter.exp_y / np.dot(del_parameter.exp_x, del_parameter.exp_y)
94 del_parameter['beta'] = del_parameter.beta_x * del_parameter.beta_y / np.dot(del_parameter.beta_x, del_parameter.beta_y)
95 sender = del_parameter["source"].apply(lambda x: id2name[x])
96 receiver = del_parameter["target"].apply(lambda x: id2name[x])
97 del_parameter.insert(0, "sender", sender)
98 del_parameter.insert(0, "receiver", receiver)
99 outDF = del_parameter[
100 ['sender', 'receiver', 'degree', 'unif', 'log_normal', 'exp', 'beta']
101 ]
102 return outDF
104 def deleteFromNetwork(
105 self,
106 iterations=10,
107 delAmounts=(0.1, 0.25, 0.5, 0.75, 0.9),
108 delTypes=("degree", "unif", "log_normal", "exp", "beta")
109 ):
110 """Run the deletion by sampling."""
111 results = []
112 fullNet = self.makeNet(
113 self.inputDF
114 )
115 fullStats = self.netStats(fullNet)
116 fullStats = fullStats.assign(
117 **{
118 "delVal": 0,
119 "delType": 'NA',
120 "delIteration": 0
121 }
122 )
123 results.append(fullStats)
124 for idx in range(1, iterations + 1):
125 prunVals = self.generatePruningParameters(
126 fullNet
127 )
128 tempDF = self.inputDF.merge(
129 prunVals
130 )
131 for val in list(delAmounts):
132 for deltype in list(delTypes):
133 delDF = tempDF.sample(
134 round(len(tempDF) * (1 - val)),
135 weights=deltype
136 )
137 delNet = self.makeNet(delDF)
138 delStats = self.netStats(delNet)
139 delStats = delStats.assign(
140 **{
141 "delVal": val,
142 "delType": deltype,
143 "delIteration": idx
144 }
145 )
146 results.append(delStats)
147 resDF = pd.concat(results)
148 return resDF