Coverage for /home/deng/Projects/metatree_drawer/treeprofiler_algo/pastml/pastml/annotation.py: 12%
75 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
1import logging
3import pandas as pd
4import numpy as np
7def get_min_forest_stats(forest):
8 len_sum = 0
9 num_zero_nodes = 0
10 num_tips = 0
11 num_nodes = 0
13 for tree in forest:
14 for node in tree.traverse():
15 num_nodes += 1
17 if not node.dist:
18 num_zero_nodes += 1
20 len_sum += node.dist
22 if node.is_leaf():
23 num_tips += 1
25 avg_len = len_sum / (num_nodes - num_zero_nodes)
26 return [avg_len, num_nodes, num_tips, len_sum]
29class ForestStats(object):
31 def __init__(self, forest):
32 self.avg_nonzero_brlen, self.num_nodes, self.num_tips, self.forest_length = get_forest_stats(forest)
33 self.num_trees = len(forest)
36def get_forest_stats(forest):
37 len_sum_ext = 0
38 len_sum_int = 0
39 num_zero_nodes = 0
40 max_polynomy = 0
41 max_len_ext = 0
42 max_len_int = 0
43 min_len_ext = np.inf
44 min_len_int = np.inf
45 num_tips = 0
46 num_nodes = 0
47 num_zero_tips = 0
48 tip_len_sum = 0
50 for tree in forest:
51 for node in tree.traverse():
52 num_nodes += 1
53 max_polynomy = max(len(node.children), max_polynomy)
55 if not node.dist:
56 num_zero_nodes += 1
58 if node.is_leaf:
59 num_tips += 1
60 tip_len_sum += node.dist
61 if node.dist:
62 min_len_ext = min(node.dist, min_len_ext)
63 len_sum_ext += node.dist
64 max_len_ext = max(max_len_ext, node.dist)
65 else:
66 num_zero_tips += 1
67 else:
68 if node.dist:
69 min_len_int = min(node.dist, min_len_int)
70 len_sum_int += node.dist
71 max_len_int = max(max_len_int, node.dist)
73 avg_len = (len_sum_ext + len_sum_int) / (num_nodes - num_zero_nodes) if num_nodes > num_zero_nodes else 0
74 avg_len_ext = len_sum_ext / (num_tips - num_zero_tips) if num_tips > num_zero_tips else 0
75 avg_len_int = len_sum_int / (num_nodes - num_tips - num_zero_nodes + num_zero_tips) \
76 if (num_nodes - num_tips - num_zero_nodes + num_zero_tips) > 0 else 0
78 logging.getLogger('pastml').debug('\n=============TREE STATISTICS===================\n'
79 '\tnumber of tips:\t{}\n'
80 '\tnumber of zero-branch tips:\t{}\n'
81 '\tnumber of internal nodes:\t{}\n'
82 '\tmax number of children per node:\t{}\n'
83 '\tmax tip branch length:\t{:.5f}\n'
84 '\tmax internal branch length:\t{:.5f}\n'
85 '\tmin non-zero tip branch length:\t{:.5f}\n'
86 '\tmin non-zero internal branch length:\t{:.5f}\n'
87 '\tavg non-zero tip branch length:\t{:.5f}\n'
88 '\tavg non-zero internal branch length:\t{:.5f}\n'
89 '\tavg non-zero branch length:\t{:.5f}.'
90 .format(num_tips,
91 num_zero_tips,
92 num_nodes - num_tips,
93 max_polynomy,
94 max_len_ext,
95 max_len_int,
96 min_len_ext,
97 min_len_int,
98 avg_len_ext,
99 avg_len_int,
100 avg_len))
101 return [avg_len, num_nodes, num_tips, len_sum_ext + len_sum_int]
104def df2gdf(df):
105 df.fillna('', inplace=True)
106 gb = df.groupby(df.index)
107 gdf = pd.DataFrame(columns=df.columns)
108 for c in df.columns:
109 gdf[c] = gb[c].apply(lambda vs: {v for v in vs if not pd.isnull(v) and v != ''})
110 return gdf
113def preannotate_forest(forest, df=None, gdf=None):
114 if gdf is None:
115 gdf = df2gdf(df)
116 for tree in forest:
117 for node in tree.traverse('postorder'):
118 if node.name in gdf.index:
119 node.add_props(**gdf.loc[node.name, :].to_dict())
120 else:
121 for c in gdf.columns:
122 node.del_prop(c)
123 return gdf.columns, gdf