Coverage for test_prune.py: 99%
109 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
2import sys
3import os
4from io import StringIO
5import unittest
7sys.path.insert(0, os.path.abspath(os.path.dirname(__file__) + '/..'))
9#from collections import namedtuple
10from tempfile import NamedTemporaryFile
12from treeprofiler import tree_annotate
13from treeprofiler.src import utils
15class TestPrune(unittest.TestCase):
16 # test pruned_by in order to test if data type is process correctly
17 def test_pruned_by_00(self):
18 # test "contains" in leaf name
19 # load tree
20 internal_parser = "name"
21 parser = utils.get_internal_parser(internal_parser)
23 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
25 # load metadata
26 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
27 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n')
28 f_annotation.flush()
30 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
32 expected_tree = '((B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])Internal_1:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];'
34 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
35 metadata_dict=metadata_dict, node_props=node_props,
36 columns=columns, prop2type=prop2type)
37 props = ['alphabet_type', 'alphabet_type_counter']
38 condition_inputs = ["name contains A"]
39 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type)
41 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree)
43 def test_pruned_by_01(self):
44 # test "contains" in leaf node in categorical data
45 # load tree
46 internal_parser = "name"
47 parser = utils.get_internal_parser(internal_parser)
49 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
51 # load metadata
52 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
53 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n')
54 f_annotation.flush()
56 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
58 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
59 metadata_dict=metadata_dict, node_props=node_props,
60 columns=columns, prop2type=prop2type)
62 expected_tree = '((B:1[&&NHX:alphabet_type=consonant],(D:1[&&NHX:alphabet_type=consonant])Internal_1:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];'
63 props = ['alphabet_type', 'alphabet_type_counter']
64 condition_inputs = ["alphabet_type=vowel"]
65 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type)
66 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree)
68 def test_pruned_by_02(self):
69 # test "contains" in internal node in categorical data
70 # load tree
71 internal_parser = "name"
72 parser = utils.get_internal_parser(internal_parser)
74 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
76 # load metadata
77 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
78 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n')
79 f_annotation.flush()
81 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
83 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
84 metadata_dict=metadata_dict, node_props=node_props,
85 columns=columns, prop2type=prop2type)
86 props = ['alphabet_type', 'alphabet_type_counter']
87 expected_tree = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];'
88 condition_inputs = ["alphabet_type_counter:consonant < 2"]
89 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type)
91 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree)
93 def test_pruned_by_03(self):
94 # test operators in leaf node in numerical data
95 # load tree
96 internal_parser = "name"
97 parser = utils.get_internal_parser(internal_parser)
99 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
101 # load metadata
102 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
103 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
104 f_annotation.flush()
106 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
108 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
109 metadata_dict=metadata_dict, node_props=node_props,
110 columns=columns, prop2type=prop2type)
111 props = ['col1', 'col1_avg', 'col1_sum', 'col1_max', 'col1_min', 'col1_std']
113 expected_tree = '(((E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_avg=3.5:col1_sum=7.0:col1_max=4.0:col1_min=3.0:col1_std=0.5])Internal_2:0.5[&&NHX:col1_avg=3.0:col1_sum=9.0:col1_max=4.0:col1_min=2.0:col1_std=1.0])Root[&&NHX:col1_avg=2.5:col1_sum=10.0:col1_max=4.0:col1_min=1.0:col1_std=1.6666666666666667];'
114 condition_inputs = ["col1 < 3"]
115 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type)
117 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree)
119 def test_pruned_by_04(self):
120 # test operators in internal node in numerical data
121 # load tree
122 internal_parser = "name"
123 parser = utils.get_internal_parser(internal_parser)
125 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
127 # load metadata
128 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
129 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
130 f_annotation.flush()
132 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
134 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
135 metadata_dict=metadata_dict, node_props=node_props,
136 columns=columns, prop2type=prop2type)
138 props = ['col1', 'col1_avg', 'col1_sum', 'col1_max', 'col1_min', 'col1_std']
140 expected_tree = '(A:1[&&NHX:col1=1.0])Root[&&NHX:col1_avg=2.5:col1_sum=10.0:col1_max=4.0:col1_min=1.0:col1_std=1.6666666666666667];'
141 condition_inputs = ["col1_avg < 3.5"]
142 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type)
144 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree)
146 def test_pruned_by_05(self):
147 # test "contains" in leaf node in list data
148 # internal_nodes annotation list data
149 internal_parser = "name"
150 parser = utils.get_internal_parser(internal_parser)
152 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5)Root;")
154 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
155 f_annotation.write(b'#name\tlist_data\nA\ta,b,c\nB\tc,d\nD\ta,c,d,e\nE\te,d,b\n')
156 f_annotation.flush()
158 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
160 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
161 metadata_dict=metadata_dict, node_props=node_props,
162 columns=columns, prop2type=prop2type)
163 props = ['list_data', 'list_data_counter']
164 expected_tree = '((B:1[&&NHX:list_data=c|d],(E:1[&&NHX:list_data=e|d|b])N4:0.5[&&NHX:list_data_counter=a--1||b--1||c--1||d--2||e--2])N5:0.5[&&NHX:list_data_counter=a--1||b--1||c--2||d--3||e--2])Root[&&NHX:list_data_counter=a--2||b--2||c--3||d--3||e--2];'
165 condition_inputs = ['list_data contains a']
166 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type)
168 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree)
170 def test_pruned_by_06(self):
171 # test "contains" in internal node in list data
172 # load tree
173 internal_parser = "name"
174 parser = utils.get_internal_parser(internal_parser)
176 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5);")
177 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
178 f_annotation.write(b'#name\tlist_data\nA\ta,b,c\nB\tc,d\nD\ta,c,d,e\nE\te,d,b\n')
179 f_annotation.flush()
181 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
183 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
184 metadata_dict=metadata_dict, node_props=node_props,
185 columns=columns, prop2type=prop2type)
186 props = ['list_data', 'list_data_counter']
187 expected_tree = '(A:1[&&NHX:list_data=a|b|c])Root[&&NHX:list_data_counter=a--2||b--2||c--3||d--3||e--2];'
188 condition_inputs = ['list_data_counter:a<2']
189 pruned_tree = utils.conditional_prune(test_tree_annotated, condition_inputs, prop2type)
191 self.assertEqual(pruned_tree.write(props=props, parser=parser, format_root_node=True), expected_tree)
193if __name__ == '__main__':
194 unittest.main()
195#pytest.main(['-v'])