Coverage for test_prune.py: 99%

109 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-03-21 09:19 +0100

1 

2import sys 

3import os 

4from io import StringIO 

5import unittest 

6 

7sys.path.insert(0, os.path.abspath(os.path.dirname(__file__) + '/..')) 

8 

9#from collections import namedtuple 

10from tempfile import NamedTemporaryFile 

11 

12from treeprofiler import tree_annotate 

13from treeprofiler.src import utils 

14 

15class TestPrune(unittest.TestCase): 

16 # test pruned_by in order to test if data type is process correctly 

17 def test_pruned_by_00(self): 

18 # test "contains" in leaf name 

19 # load tree 

20 internal_parser = "name" 

21 parser = utils.get_internal_parser(internal_parser) 

22 

23 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

24 

25 # load metadata 

26 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

27 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n') 

28 f_annotation.flush() 

29 

30 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

31 

32 expected_tree = '((B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])Internal_1:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];' 

33 

34 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

35 metadata_dict=metadata_dict, node_props=node_props, 

36 columns=columns, prop2type=prop2type) 

37 props = ['alphabet_type', 'alphabet_type_counter'] 

38 condition_inputs = ["name contains A"] 

39 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type) 

40 

41 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree) 

42 

43 def test_pruned_by_01(self): 

44 # test "contains" in leaf node in categorical data 

45 # load tree 

46 internal_parser = "name" 

47 parser = utils.get_internal_parser(internal_parser) 

48 

49 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

50 

51 # load metadata 

52 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

53 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n') 

54 f_annotation.flush() 

55 

56 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

57 

58 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

59 metadata_dict=metadata_dict, node_props=node_props, 

60 columns=columns, prop2type=prop2type) 

61 

62 expected_tree = '((B:1[&&NHX:alphabet_type=consonant],(D:1[&&NHX:alphabet_type=consonant])Internal_1:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];' 

63 props = ['alphabet_type', 'alphabet_type_counter'] 

64 condition_inputs = ["alphabet_type=vowel"] 

65 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type) 

66 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree) 

67 

68 def test_pruned_by_02(self): 

69 # test "contains" in internal node in categorical data 

70 # load tree 

71 internal_parser = "name" 

72 parser = utils.get_internal_parser(internal_parser) 

73 

74 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

75 

76 # load metadata 

77 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

78 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n') 

79 f_annotation.flush() 

80 

81 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

82 

83 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

84 metadata_dict=metadata_dict, node_props=node_props, 

85 columns=columns, prop2type=prop2type) 

86 props = ['alphabet_type', 'alphabet_type_counter'] 

87 expected_tree = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];' 

88 condition_inputs = ["alphabet_type_counter:consonant < 2"] 

89 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type) 

90 

91 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree) 

92 

93 def test_pruned_by_03(self): 

94 # test operators in leaf node in numerical data 

95 # load tree 

96 internal_parser = "name" 

97 parser = utils.get_internal_parser(internal_parser) 

98 

99 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

100 

101 # load metadata 

102 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

103 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

104 f_annotation.flush() 

105 

106 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

107 

108 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

109 metadata_dict=metadata_dict, node_props=node_props, 

110 columns=columns, prop2type=prop2type) 

111 props = ['col1', 'col1_avg', 'col1_sum', 'col1_max', 'col1_min', 'col1_std'] 

112 

113 expected_tree = '(((E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_avg=3.5:col1_sum=7.0:col1_max=4.0:col1_min=3.0:col1_std=0.5])Internal_2:0.5[&&NHX:col1_avg=3.0:col1_sum=9.0:col1_max=4.0:col1_min=2.0:col1_std=1.0])Root[&&NHX:col1_avg=2.5:col1_sum=10.0:col1_max=4.0:col1_min=1.0:col1_std=1.6666666666666667];' 

114 condition_inputs = ["col1 < 3"] 

115 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type) 

116 

117 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree) 

118 

119 def test_pruned_by_04(self): 

120 # test operators in internal node in numerical data 

121 # load tree 

122 internal_parser = "name" 

123 parser = utils.get_internal_parser(internal_parser) 

124 

125 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

126 

127 # load metadata 

128 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

129 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

130 f_annotation.flush() 

131 

132 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

133 

134 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

135 metadata_dict=metadata_dict, node_props=node_props, 

136 columns=columns, prop2type=prop2type) 

137 

138 props = ['col1', 'col1_avg', 'col1_sum', 'col1_max', 'col1_min', 'col1_std'] 

139 

140 expected_tree = '(A:1[&&NHX:col1=1.0])Root[&&NHX:col1_avg=2.5:col1_sum=10.0:col1_max=4.0:col1_min=1.0:col1_std=1.6666666666666667];' 

141 condition_inputs = ["col1_avg < 3.5"] 

142 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type) 

143 

144 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree) 

145 

146 def test_pruned_by_05(self): 

147 # test "contains" in leaf node in list data 

148 # internal_nodes annotation list data 

149 internal_parser = "name" 

150 parser = utils.get_internal_parser(internal_parser) 

151 

152 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5)Root;") 

153 

154 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

155 f_annotation.write(b'#name\tlist_data\nA\ta,b,c\nB\tc,d\nD\ta,c,d,e\nE\te,d,b\n') 

156 f_annotation.flush() 

157 

158 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

159 

160 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

161 metadata_dict=metadata_dict, node_props=node_props, 

162 columns=columns, prop2type=prop2type) 

163 props = ['list_data', 'list_data_counter'] 

164 expected_tree = '((B:1[&&NHX:list_data=c|d],(E:1[&&NHX:list_data=e|d|b])N4:0.5[&&NHX:list_data_counter=a--1||b--1||c--1||d--2||e--2])N5:0.5[&&NHX:list_data_counter=a--1||b--1||c--2||d--3||e--2])Root[&&NHX:list_data_counter=a--2||b--2||c--3||d--3||e--2];' 

165 condition_inputs = ['list_data contains a'] 

166 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type) 

167 

168 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree) 

169 

170 def test_pruned_by_06(self): 

171 # test "contains" in internal node in list data 

172 # load tree 

173 internal_parser = "name" 

174 parser = utils.get_internal_parser(internal_parser) 

175 

176 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5);") 

177 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

178 f_annotation.write(b'#name\tlist_data\nA\ta,b,c\nB\tc,d\nD\ta,c,d,e\nE\te,d,b\n') 

179 f_annotation.flush() 

180 

181 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

182 

183 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

184 metadata_dict=metadata_dict, node_props=node_props, 

185 columns=columns, prop2type=prop2type) 

186 props = ['list_data', 'list_data_counter'] 

187 expected_tree = '(A:1[&&NHX:list_data=a|b|c])Root[&&NHX:list_data_counter=a--2||b--2||c--3||d--3||e--2];' 

188 condition_inputs = ['list_data_counter:a<2'] 

189 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type) 

190 

191 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree) 

192 

193if __name__ == '__main__': 

194 unittest.main() 

195#pytest.main(['-v'])