Coverage for .tox/p311/lib/python3.10/site-packages/scicom/utilities/statistics.py: 100%
57 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-26 18:34 +0200
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-26 18:34 +0200
1"""Prune a network."""
2import igraph as ig
3import numpy as np
4import pandas as pd
7class PruneNetwork:
8 """Create statistics for communication networks by deletion.
10 For a given dataset with sender and receiver information,
11 create a weighted network with igraph. For a given number
12 of iterations, deletion amounts, and deletion types, the
13 algorithm then generates network statistics for randomly
14 sampled subnetworks.
15 """
17 def __init__(self, dataframe:pd.DataFrame) -> None:
18 """Initialize pruning."""
19 self.inputDF = dataframe
21 def makeNet(self, dataframe:pd.DataFrame) -> ig.Graph:
22 """Create network from dataframe.
24 Assumes the existence of sender, receiver and step
25 column names.
26 """
27 networkdata = dataframe[["sender", "receiver", "step"]]
28 networkdata = networkdata.groupby(
29 ["sender", "receiver"],
30 ).size().reset_index(name="Count")
31 return ig.Graph.TupleList(
32 networkdata.itertuples(index=False), directed=True, weights=True,
33 )
35 def netStats(self, G:ig.Graph) -> pd.DataFrame:
36 """Generate network statistics.
38 Any statistic calculated on the full
39 network can be added in principle.
40 Currently implemented are:
41 average relative degree,
42 density
43 transitivtiy
44 cohesion
45 average path length
46 modularity
47 """
48 numVs = len(G.vs)
49 avg_rel_deg = np.mean([x/numVs for x in G.degree(mode="all")])
50 density = G.density()
51 transitivity = G.transitivity_undirected()
52 cohesion = G.cohesion()
53 avg_path_len = G.average_path_length()
54 modularity = G.modularity(G.components())
55 return pd.DataFrame(
56 [
57 {
58 "avg_relative_degree": avg_rel_deg,
59 "avg_path_length": avg_path_len,
60 "density": density,
61 "transitivity": transitivity,
62 "cohesion": cohesion,
63 "modularity": modularity,
64 },
65 ],
66 )
68 def generatePruningParameters(self, G:ig.Graph) -> pd.DataFrame:
69 """Generate a random set of pruning weights."""
70 nodes = G.get_vertex_dataframe()
71 id2name = G.get_vertex_dataframe().to_dict()["name"]
72 rng = np.random.default_rng()
73 del_parameter = pd.DataFrame(
74 {
75 "ids": nodes.index,
76 "degree": G.degree(),
77 "unif": rng.uniform(0, 1, len(G.vs)),
78 "log_normal": rng.lognormal(0, 1, len(G.vs)),
79 "exp": rng.exponential(1, len(G.vs)),
80 "beta": rng.beta(a=2, b=3, size=len(G.vs)),
81 },
82 )
84 del_parameter = G.get_edge_dataframe()[["source", "target"]].merge(
85 del_parameter, left_on="source", right_on="ids",
86 ).merge(
87 del_parameter, left_on="target", right_on="ids",
88 )
89 del_parameter["degree"] = del_parameter.degree_x * del_parameter.degree_y / np.dot(
90 del_parameter.degree_x, del_parameter.degree_y,
91 )
92 del_parameter["unif"] = del_parameter.unif_x * del_parameter.unif_y / np.dot(
93 del_parameter.unif_x, del_parameter.unif_y,
94 )
95 del_parameter["log_normal"] = del_parameter.log_normal_x * del_parameter.log_normal_y / np.dot(
96 del_parameter.log_normal_x, del_parameter.log_normal_y,
97 )
98 del_parameter["exp"] = del_parameter.exp_x * del_parameter.exp_y / np.dot(
99 del_parameter.exp_x, del_parameter.exp_y,
100 )
101 del_parameter["beta"] = del_parameter.beta_x * del_parameter.beta_y / np.dot(
102 del_parameter.beta_x, del_parameter.beta_y,
103 )
104 sender = del_parameter["source"].apply(lambda x: id2name[x])
105 receiver = del_parameter["target"].apply(lambda x: id2name[x])
106 del_parameter.insert(0, "sender", sender)
107 del_parameter.insert(0, "receiver", receiver)
108 return del_parameter[
109 ["sender", "receiver", "degree", "unif", "log_normal", "exp", "beta"]
110 ]
113 def deleteFromNetwork(
114 self,
115 iterations: int = 10,
116 delAmounts: tuple = (0.1, 0.25, 0.5, 0.75, 0.9),
117 delTypes: tuple = ("degree", "unif", "log_normal", "exp", "beta"),
118 ) -> pd.DataFrame:
119 """Run the deletion by sampling."""
120 results = []
121 fullNet = self.makeNet(
122 self.inputDF,
123 )
124 fullStats = self.netStats(fullNet)
125 fullStats = fullStats.assign(
126 delVal=0, delType="NA", delIteration=0,
127 )
128 results.append(fullStats)
129 for idx in range(1, iterations + 1):
130 prunVals = self.generatePruningParameters(
131 fullNet,
132 )
133 tempDF = self.inputDF.merge(
134 prunVals,
135 )
136 for val in list(delAmounts):
137 for deltype in list(delTypes):
138 delDF = tempDF.sample(
139 round(len(tempDF) * (1 - val)),
140 weights=deltype,
141 )
142 delNet = self.makeNet(delDF)
143 delStats = self.netStats(delNet)
144 delStats = delStats.assign(
145 delVal=val, delType=deltype, delIteration=idx,
146 )
147 results.append(delStats)
148 return pd.concat(results)
152def prune(
153 modelparameters: dict, network: tuple, columns: list,
154 iterations: int = 10, delAmounts: tuple = (0.1, 0.25, 0.5, 0.75, 0.9),
155 delTypes: tuple = ("degree", "unif")) -> pd.DataFrame:
156 """Generate pruned networks from input.
158 Assumes existence of columns "sender", "receiver", and "step".
159 """
160 runDf = pd.DataFrame(network, columns = columns)
161 pruning = PruneNetwork(runDf)
162 result = pruning.deleteFromNetwork(
163 iterations=iterations,
164 delAmounts=delAmounts,
165 delTypes=delTypes,
166 )
167 return result.assign(**modelparameters)