Coverage for /home/deng/Projects/metatree_drawer/treeprofiler_algo/pastml/pastml/annotation.py: 12%

75 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-03-21 09:19 +0100

1import logging 

2 

3import pandas as pd 

4import numpy as np 

5 

6 

7def get_min_forest_stats(forest): 

8 len_sum = 0 

9 num_zero_nodes = 0 

10 num_tips = 0 

11 num_nodes = 0 

12 

13 for tree in forest: 

14 for node in tree.traverse(): 

15 num_nodes += 1 

16 

17 if not node.dist: 

18 num_zero_nodes += 1 

19 

20 len_sum += node.dist 

21 

22 if node.is_leaf(): 

23 num_tips += 1 

24 

25 avg_len = len_sum / (num_nodes - num_zero_nodes) 

26 return [avg_len, num_nodes, num_tips, len_sum] 

27 

28 

29class ForestStats(object): 

30 

31 def __init__(self, forest): 

32 self.avg_nonzero_brlen, self.num_nodes, self.num_tips, self.forest_length = get_forest_stats(forest) 

33 self.num_trees = len(forest) 

34 

35 

36def get_forest_stats(forest): 

37 len_sum_ext = 0 

38 len_sum_int = 0 

39 num_zero_nodes = 0 

40 max_polynomy = 0 

41 max_len_ext = 0 

42 max_len_int = 0 

43 min_len_ext = np.inf 

44 min_len_int = np.inf 

45 num_tips = 0 

46 num_nodes = 0 

47 num_zero_tips = 0 

48 tip_len_sum = 0 

49 

50 for tree in forest: 

51 for node in tree.traverse(): 

52 num_nodes += 1 

53 max_polynomy = max(len(node.children), max_polynomy) 

54 

55 if not node.dist: 

56 num_zero_nodes += 1 

57 

58 if node.is_leaf: 

59 num_tips += 1 

60 tip_len_sum += node.dist 

61 if node.dist: 

62 min_len_ext = min(node.dist, min_len_ext) 

63 len_sum_ext += node.dist 

64 max_len_ext = max(max_len_ext, node.dist) 

65 else: 

66 num_zero_tips += 1 

67 else: 

68 if node.dist: 

69 min_len_int = min(node.dist, min_len_int) 

70 len_sum_int += node.dist 

71 max_len_int = max(max_len_int, node.dist) 

72 

73 avg_len = (len_sum_ext + len_sum_int) / (num_nodes - num_zero_nodes) if num_nodes > num_zero_nodes else 0 

74 avg_len_ext = len_sum_ext / (num_tips - num_zero_tips) if num_tips > num_zero_tips else 0 

75 avg_len_int = len_sum_int / (num_nodes - num_tips - num_zero_nodes + num_zero_tips) \ 

76 if (num_nodes - num_tips - num_zero_nodes + num_zero_tips) > 0 else 0 

77 

78 logging.getLogger('pastml').debug('\n=============TREE STATISTICS===================\n' 

79 '\tnumber of tips:\t{}\n' 

80 '\tnumber of zero-branch tips:\t{}\n' 

81 '\tnumber of internal nodes:\t{}\n' 

82 '\tmax number of children per node:\t{}\n' 

83 '\tmax tip branch length:\t{:.5f}\n' 

84 '\tmax internal branch length:\t{:.5f}\n' 

85 '\tmin non-zero tip branch length:\t{:.5f}\n' 

86 '\tmin non-zero internal branch length:\t{:.5f}\n' 

87 '\tavg non-zero tip branch length:\t{:.5f}\n' 

88 '\tavg non-zero internal branch length:\t{:.5f}\n' 

89 '\tavg non-zero branch length:\t{:.5f}.' 

90 .format(num_tips, 

91 num_zero_tips, 

92 num_nodes - num_tips, 

93 max_polynomy, 

94 max_len_ext, 

95 max_len_int, 

96 min_len_ext, 

97 min_len_int, 

98 avg_len_ext, 

99 avg_len_int, 

100 avg_len)) 

101 return [avg_len, num_nodes, num_tips, len_sum_ext + len_sum_int] 

102 

103 

104def df2gdf(df): 

105 df.fillna('', inplace=True) 

106 gb = df.groupby(df.index) 

107 gdf = pd.DataFrame(columns=df.columns) 

108 for c in df.columns: 

109 gdf[c] = gb[c].apply(lambda vs: {v for v in vs if not pd.isnull(v) and v != ''}) 

110 return gdf 

111 

112 

113def preannotate_forest(forest, df=None, gdf=None): 

114 if gdf is None: 

115 gdf = df2gdf(df) 

116 for tree in forest: 

117 for node in tree.traverse('postorder'): 

118 if node.name in gdf.index: 

119 node.add_props(**gdf.loc[node.name, :].to_dict()) 

120 else: 

121 for c in gdf.columns: 

122 node.del_prop(c) 

123 return gdf.columns, gdf