Coverage for test_annotate.py: 99%
256 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
2import sys
3import os
4import tarfile
5from io import StringIO, BytesIO
6import unittest
8sys.path.insert(0, os.path.abspath(os.path.dirname(__file__) + '/..'))
10#from collections import namedtuple
11from tempfile import NamedTemporaryFile, TemporaryDirectory
13from treeprofiler import tree_annotate
14from treeprofiler.src import utils
15import time
17class TestAnnotate(unittest.TestCase):
18 def test_annotate_01(self):
19 # basic annotate categorical data
20 # load tree
21 test_tree = utils.ete4_parse('(a:1);')
23 # load metadata
24 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
25 f_annotation.write(b'#name\tfruit\na\tapple')
26 f_annotation.flush()
27 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
29 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
30 metadata_dict=metadata_dict, node_props=node_props,
31 columns=columns, prop2type=prop2type)
33 expected_tree = '(a:1[&&NHX:fruit=apple]);'
35 self.assertEqual(test_tree_annotated.write(props=None),expected_tree)
37 def test_annotate_02(self):
38 # internal_nodes annotation categorical data
39 # load tree
40 internal_parser = "name"
41 parser = utils.get_internal_parser(internal_parser)
43 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;", internal_parser=internal_parser)
45 # load metadata
46 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
47 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n')
48 f_annotation.flush()
50 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
52 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
53 metadata_dict=metadata_dict, node_props=node_props,
54 columns=columns, prop2type=prop2type)
56 expected_tree_no_root = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])Internal_1:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1]);'
57 expected_tree_with_root = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])Internal_1:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];'
59 self.assertEqual(test_tree_annotated.write(props=None, parser=parser),expected_tree_no_root)
60 self.assertEqual(test_tree_annotated.write(props=None, parser=parser, format_root_node=True),expected_tree_with_root)
62 def test_annotate_03(self):
63 # basic annotate numerical data
64 # load tree
65 test_tree = utils.ete4_parse('(a:1);')
67 # load metadata
68 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
69 f_annotation.write(b'#name\tannotate_03\na\t2')
70 f_annotation.flush()
72 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
74 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
75 metadata_dict=metadata_dict, node_props=node_props,
76 columns=columns, prop2type=prop2type)
78 expected_tree = '(a:1[&&NHX:annotate_03=2.0]);'
80 assert test_tree_annotated.write(props=None) == expected_tree
83 def test_annotate_04(self):
84 # internal_nodes annotation numerical data
85 # load tree
86 internal_parser = "name"
87 parser = utils.get_internal_parser(internal_parser)
88 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
90 # load metadata
91 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
92 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
93 f_annotation.flush()
95 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
97 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
98 metadata_dict=metadata_dict, node_props=node_props,
99 columns=columns, prop2type=prop2type, threads=4)
101 props = ['col1', 'col1_sum','col1_max','col1_min','col1_std','col1_avg']
102 expected_tree_no_root = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_sum=7.0:col1_max=4.0:col1_min=3.0:col1_std=0.5:col1_avg=3.5])Internal_2:0.5[&&NHX:col1_sum=9.0:col1_max=4.0:col1_min=2.0:col1_std=1.0:col1_avg=3.0]);'
103 expected_tree_with_root = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_sum=7.0:col1_max=4.0:col1_min=3.0:col1_std=0.5:col1_avg=3.5])Internal_2:0.5[&&NHX:col1_sum=9.0:col1_max=4.0:col1_min=2.0:col1_std=1.0:col1_avg=3.0])Root[&&NHX:col1_sum=10.0:col1_max=4.0:col1_min=1.0:col1_std=1.6666666666666667:col1_avg=2.5];'
105 self.assertEqual(test_tree_annotated.write(props=props, parser=parser), expected_tree_no_root)
106 self.assertEqual(test_tree_annotated.write(props=props, parser=parser, format_root_node=True), expected_tree_with_root)
108 def test_annotate_05(self):
109 # test num_stat none and counter_stat none
110 # internal_nodes annotation categorical data
111 # load tree
112 internal_parser = "name"
113 parser = utils.get_internal_parser(internal_parser)
114 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
116 # load metadata
117 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
118 f_annotation.write(b'#name\tcol1\talphabet_type\nA\t1\tvowel\nB\t2\tconsonant\nD\t3\tconsonant\nE\t4\tvowel\n')
119 f_annotation.flush()
121 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
123 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
124 metadata_dict=metadata_dict, node_props=node_props, counter_stat='none', num_stat='none',
125 columns=columns, prop2type=prop2type)
127 props = ["alphabet_type", "col1"]
128 expected_tree_no_root = '(A:1[&&NHX:alphabet_type=vowel:col1=1.0],(B:1[&&NHX:alphabet_type=consonant:col1=2.0],(E:1[&&NHX:alphabet_type=vowel:col1=4.0],D:1[&&NHX:alphabet_type=consonant:col1=3.0])Internal_1:0.5)Internal_2:0.5);'
129 expected_tree_with_root = '(A:1[&&NHX:alphabet_type=vowel:col1=1.0],(B:1[&&NHX:alphabet_type=consonant:col1=2.0],(E:1[&&NHX:alphabet_type=vowel:col1=4.0],D:1[&&NHX:alphabet_type=consonant:col1=3.0])Internal_1:0.5)Internal_2:0.5)Root;'
131 assert test_tree_annotated.write(props=props, parser=parser) == '(A:1[&&NHX:alphabet_type=vowel:col1=1.0],(B:1[&&NHX:alphabet_type=consonant:col1=2.0],(E:1[&&NHX:alphabet_type=vowel:col1=4.0],D:1[&&NHX:alphabet_type=consonant:col1=3.0])Internal_1:0.5)Internal_2:0.5);'
132 assert test_tree_annotated.write(props=props, parser=parser, format_root_node=True) == '(A:1[&&NHX:alphabet_type=vowel:col1=1.0],(B:1[&&NHX:alphabet_type=consonant:col1=2.0],(E:1[&&NHX:alphabet_type=vowel:col1=4.0],D:1[&&NHX:alphabet_type=consonant:col1=3.0])Internal_1:0.5)Internal_2:0.5)Root;'
134 def test_annotate_06(self):
135 # assign internal node name
136 # internal_nodes annotation categorical data
137 # load tree
138 internal_parser = "name"
139 parser = utils.get_internal_parser(internal_parser)
141 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5);")
143 # load metadata
144 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
145 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n')
146 f_annotation.flush()
148 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
150 expected_tree_no_root = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])N4:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])N5:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1]);'
151 expected_tree_with_root = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])N4:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])N5:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];'
153 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
154 metadata_dict=metadata_dict, node_props=node_props,
155 columns=columns, prop2type=prop2type, threads=4)
157 self.assertEqual(test_tree_annotated.write(props=None, parser=parser),expected_tree_no_root)
158 self.assertEqual(test_tree_annotated.write(props=None, parser=parser, format_root_node=True), expected_tree_with_root)
160 def test_annotate_07(self):
161 # internal_nodes annotation boolean data
162 internal_parser = "name"
163 parser = utils.get_internal_parser(internal_parser)
165 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5);")
167 # load metadata
168 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
169 f_annotation.write(b'#name\tbool_type\nA\tTrue\nB\tFalse\nD\tTrue\nE\tFalse\n')
170 f_annotation.flush()
172 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
174 expected_tree_no_root = '(A:1[&&NHX:bool_type=True],(B:1[&&NHX:bool_type=False],(E:1[&&NHX:bool_type=False],D:1[&&NHX:bool_type=True])N4:0.5[&&NHX:bool_type_counter=False--1||True--1])N5:0.5[&&NHX:bool_type_counter=False--2||True--1]);'
175 expected_tree_with_root = '(A:1[&&NHX:bool_type=True],(B:1[&&NHX:bool_type=False],(E:1[&&NHX:bool_type=False],D:1[&&NHX:bool_type=True])N4:0.5[&&NHX:bool_type_counter=False--1||True--1])N5:0.5[&&NHX:bool_type_counter=False--2||True--1])Root[&&NHX:bool_type_counter=False--2||True--2];'
177 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
178 metadata_dict=metadata_dict, node_props=node_props,
179 columns=columns, prop2type=prop2type)
181 self.assertEqual(test_tree_annotated.write(props=None, parser=parser),expected_tree_no_root)
182 self.assertEqual(test_tree_annotated.write(props=None, parser=parser, format_root_node=True),expected_tree_with_root)
184 def test_annotate_08(self):
185 # internal_nodes annotation list data
186 internal_parser = "name"
187 parser = utils.get_internal_parser(internal_parser)
188 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5);")
190 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
191 f_annotation.write(b'#name\tlist_data\nA\ta,b,c\nB\tc,d\nD\ta,c,d,e\nE\te,d,b\n')
192 f_annotation.flush()
194 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
196 expected_tree_no_root = '(A:1[&&NHX:list_data=a|b|c],(B:1[&&NHX:list_data=c|d],(E:1[&&NHX:list_data=e|d|b],D:1[&&NHX:list_data=a|c|d|e])N4:0.5[&&NHX:list_data_counter=a--1||b--1||c--1||d--2||e--2])N5:0.5[&&NHX:list_data_counter=a--1||b--1||c--2||d--3||e--2]);'
197 expected_tree_with_root = '(A:1[&&NHX:list_data=a|b|c],(B:1[&&NHX:list_data=c|d],(E:1[&&NHX:list_data=e|d|b],D:1[&&NHX:list_data=a|c|d|e])N4:0.5[&&NHX:list_data_counter=a--1||b--1||c--1||d--2||e--2])N5:0.5[&&NHX:list_data_counter=a--1||b--1||c--2||d--3||e--2])Root[&&NHX:list_data_counter=a--2||b--2||c--3||d--3||e--2];'
199 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
200 metadata_dict=metadata_dict, node_props=node_props,
201 columns=columns, prop2type=prop2type)
203 self.assertEqual(test_tree_annotated.write(props=None, parser=parser),expected_tree_no_root)
204 self.assertEqual(test_tree_annotated.write(props=None, parser=parser, format_root_node=True),expected_tree_with_root)
206 # def test_annotate_09():
207 # # specify datatype of each column
208 # internal_parser = "name"
209 # parser = utils.get_internal_parser(internal_parser)
211 # test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
213 # with NamedTemporaryFile(suffix='.tsv') as f_annotation:
214 # f_annotation.write(b'#name\tcol1\tcol2\tcol3\tcol4\nA\tvowel\t1\tTrue\ta,b,c\nB\tconsonant\t2\tFalse\tc,d\nD\tconsonant\t3\tTrue\ta,c,d,e\nE\tvowel\t4\tFalse\te,d,b\n')
215 # f_annotation.flush()
217 # metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
219 # text_prop = ['col1']
220 # num_prop = ['col2']
221 # bool_prop = ['col3']
222 # multiple_text_prop = ['col4']
224 # test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
225 # metadata_dict=metadata_dict, node_props=node_props,
226 # text_prop=text_prop, multiple_text_prop=multiple_text_prop,
227 # num_prop=num_prop, bool_prop=bool_prop,
228 # columns=columns, prop2type=prop2type)
230 # props = ['col1', 'col2', 'col3', 'col4', 'col1_counter', 'col4_counter', 'col3_counter', 'col2_avg', 'col2_sum', 'col2_max', 'col2_min', 'col2_std']
231 # expected_tree = '(A:1[&&NHX:col1=vowel:col2=1.0:col3=True:col4=a|b|c],(B:1[&&NHX:col1=consonant:col2=2.0:col3=False:col4=c|d],(E:1[&&NHX:col1=vowel:col2=4.0:col3=False:col4=e|d|b],D:1[&&NHX:col1=consonant:col2=3.0:col3=True:col4=a|c|d|e])Internal_1:0.5[&&NHX:col1_counter=consonant--1||vowel--1:col4_counter=a--1||b--1||c--1||d--2||e--2:col3_counter=False--1||True--1:col2_avg=3.5:col2_sum=7.0:col2_max=4.0:col2_min=3.0:col2_std=0.5])Internal_2:0.5[&&NHX:col1_counter=consonant--2||vowel--1:col4_counter=a--1||b--1||c--2||d--3||e--2:col3_counter=False--2||True--1:col2_avg=3.0:col2_sum=9.0:col2_max=4.0:col2_min=2.0:col2_std=1.0])Root[&&NHX:col1_counter=consonant--2||vowel--2:col4_counter=a--2||b--2||c--3||d--3||e--2:col3_counter=False--2||True--2:col2_avg=2.5:col2_sum=10.0:col2_max=4.0:col2_min=1.0:col2_std=1.6666666666666667];'
232 # assert test_tree_annotated.write(props=props, parser=parser, format_root_node=True) == expected_tree
234 # def test_annotate_10():
235 # # specify datatype of each column index
236 # internal_parser = "name"
237 # parser = utils.get_internal_parser(internal_parser)
239 # test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
241 # with NamedTemporaryFile(suffix='.tsv') as f_annotation:
242 # f_annotation.write(b'#name\tcol1\tcol2\tcol3\tcol4\nA\tvowel\t1\tTrue\ta,b,c\nB\tconsonant\t2\tFalse\tc,d\nD\tconsonant\t3\tTrue\ta,c,d,e\nE\tvowel\t4\tFalse\te,d,b\n')
243 # f_annotation.flush()
245 # metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
247 # text_prop_idx = '1'
248 # num_prop_idx = '2'
249 # bool_prop_idx = '3'
250 # multiple_text_prop = ['col4']
252 # test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
253 # metadata_dict=metadata_dict, node_props=node_props,
254 # text_prop_idx=text_prop_idx, multiple_text_prop=multiple_text_prop,
255 # num_prop_idx=num_prop_idx, bool_prop_idx=bool_prop_idx,
256 # columns=columns, prop2type=prop2type)
257 # props = ['col1', 'col2', 'col3', 'col4', 'col1_counter', 'col4_counter', 'col3_counter', 'col2_avg', 'col2_sum', 'col2_max', 'col2_min', 'col2_std']
259 # expected_tree = '(A:1[&&NHX:col1=vowel:col2=1.0:col3=True:col4=a|b|c],(B:1[&&NHX:col1=consonant:col2=2.0:col3=False:col4=c|d],(E:1[&&NHX:col1=vowel:col2=4.0:col3=False:col4=e|d|b],D:1[&&NHX:col1=consonant:col2=3.0:col3=True:col4=a|c|d|e])Internal_1:0.5[&&NHX:col1_counter=consonant--1||vowel--1:col4_counter=a--1||b--1||c--1||d--2||e--2:col3_counter=False--1||True--1:col2_avg=3.5:col2_sum=7.0:col2_max=4.0:col2_min=3.0:col2_std=0.5])Internal_2:0.5[&&NHX:col1_counter=consonant--2||vowel--1:col4_counter=a--1||b--1||c--2||d--3||e--2:col3_counter=False--2||True--1:col2_avg=3.0:col2_sum=9.0:col2_max=4.0:col2_min=2.0:col2_std=1.0])Root[&&NHX:col1_counter=consonant--2||vowel--2:col4_counter=a--2||b--2||c--3||d--3||e--2:col3_counter=False--2||True--2:col2_avg=2.5:col2_sum=10.0:col2_max=4.0:col2_min=1.0:col2_std=1.6666666666666667];'
260 # assert test_tree_annotated.write(props=props, parser=parser, format_root_node=True) ==expected_tree
262 def test_annotate_11(self):
263 # specify datatype of each column index range
264 internal_parser = "name"
265 parser = utils.get_internal_parser(internal_parser)
266 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
268 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
269 f_annotation.write(b'#name\tcol01\tcol02\tcol03\tcol04\tcol05\tcol06\tcol07\nA\tvowel\tvowel\t1\t1\tTrue\tTrue\ta,b,c\nB\tconsonant\tconsonant\t2\t2\tFalse\tFalse\tc,d\nD\tconsonant\tconsonant\t3\t3\tTrue\tTrue\ta,c,d,e\nE\tvowel\tvowel\t4\t4\tFalse\tFalse\te,d,b\n')
270 f_annotation.flush()
272 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
274 text_prop_idx = ['[1-2]']
275 num_prop_idx = ['[3-4]']
276 bool_prop_idx = ['[5-6]']
277 multiple_text_prop = ['col7']
279 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
280 metadata_dict=metadata_dict, node_props=node_props,
281 text_prop_idx=text_prop_idx, multiple_text_prop=multiple_text_prop,
282 num_prop_idx=num_prop_idx, bool_prop_idx=bool_prop_idx,
283 columns=columns, prop2type=prop2type)
284 props = ['col01', 'col02', 'col03', 'col04', 'col05', 'col06', 'col07', 'col01_counter',
285 'col02_counter', 'col07_counter', 'col05_counter', 'col6_counter', 'col03_avg', 'col03_sum',
286 'col03_max', 'col03_min', 'col03_std', 'col04_avg', 'col04_sum', 'col04_max', 'col04_min', 'col04_std']
287 expected_tree = '(A:1[&&NHX:col01=vowel:col02=vowel:col03=1.0:col04=1.0:col05=True:col06=True:col07=a|b|c],(B:1[&&NHX:col01=consonant:col02=consonant:col03=2.0:col04=2.0:col05=False:col06=False:col07=c|d],(E:1[&&NHX:col01=vowel:col02=vowel:col03=4.0:col04=4.0:col05=False:col06=False:col07=e|d|b],D:1[&&NHX:col01=consonant:col02=consonant:col03=3.0:col04=3.0:col05=True:col06=True:col07=a|c|d|e])Internal_1:0.5[&&NHX:col01_counter=consonant--1||vowel--1:col02_counter=consonant--1||vowel--1:col07_counter=a--1||b--1||c--1||d--2||e--2:col05_counter=False--1||True--1:col03_avg=3.5:col03_sum=7.0:col03_max=4.0:col03_min=3.0:col03_std=0.5:col04_avg=3.5:col04_sum=7.0:col04_max=4.0:col04_min=3.0:col04_std=0.5])Internal_2:0.5[&&NHX:col01_counter=consonant--2||vowel--1:col02_counter=consonant--2||vowel--1:col07_counter=a--1||b--1||c--2||d--3||e--2:col05_counter=False--2||True--1:col03_avg=3.0:col03_sum=9.0:col03_max=4.0:col03_min=2.0:col03_std=1.0:col04_avg=3.0:col04_sum=9.0:col04_max=4.0:col04_min=2.0:col04_std=1.0])Root[&&NHX:col01_counter=consonant--2||vowel--2:col02_counter=consonant--2||vowel--2:col07_counter=a--2||b--2||c--3||d--3||e--2:col05_counter=False--2||True--2:col03_avg=2.5:col03_sum=10.0:col03_max=4.0:col03_min=1.0:col03_std=1.6666666666666667:col04_avg=2.5:col04_sum=10.0:col04_max=4.0:col04_min=1.0:col04_std=1.6666666666666667];'
288 self.assertEqual(test_tree_annotated.write(props=props, parser=parser, format_root_node=True), expected_tree)
290 def test_annotate_12(self):
291 # test missing data and unmapped data they should be see as the same as none
292 # r'^(?:\W+|none|None|null|NaN|)$'
293 # load tree
294 internal_parser = "name"
295 parser = utils.get_internal_parser(internal_parser)
296 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
298 # load metadata with missing categorical data
299 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
300 f_annotation.write(b'#name\talphabet_type\nA\tnone\nB\t-\nD\t\nE\tvowel\n')
301 f_annotation.flush()
303 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
305 test_tree_annotated_1, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
306 metadata_dict=metadata_dict, node_props=node_props, counter_stat='raw',
307 columns=columns, prop2type=prop2type)
309 # load metadata with unmapped categorical data
310 with NamedTemporaryFile(suffix='.tsv') as f_annotation_2:
311 f_annotation_2.write(b'#name\talphabet_type\nA\tnone\nD\t\nE\tvowel\n')
312 f_annotation_2.flush()
314 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation_2.name])
316 test_tree_annotated_2, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
317 metadata_dict=metadata_dict, node_props=node_props, counter_stat='raw',
318 columns=columns, prop2type=prop2type)
320 expected_tree = '(A:1[&&NHX:alphabet_type=NaN],(B:1[&&NHX:alphabet_type=NaN],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=NaN])Internal_1:0.5[&&NHX:alphabet_type_counter=NaN--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=NaN--2||vowel--1])Root[&&NHX:alphabet_type_counter=NaN--3||vowel--1];'
322 self.assertEqual(test_tree_annotated_1.write(props=None, parser=parser, format_root_node=True), expected_tree)
323 self.assertEqual(test_tree_annotated_2.write(props=None, parser=parser, format_root_node=True), expected_tree)
325 def test_annotate_13(self):
326 # test relative on categorical, boolean and list
327 internal_parser = "name"
328 parser = utils.get_internal_parser(internal_parser)
330 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
332 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
333 f_annotation.write(b'#name\tCol1\tCol2\tCol3\nA\tvowel\tTrue\ta,b,c\nB\tconsonant\tFalse\tc,d\nD\tconsonant\tTrue\ta,c,d,e\nE\tvowel\tFalse\te,d,b\n')
334 f_annotation.flush()
336 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
338 test_tree_annotated_13, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
339 metadata_dict=metadata_dict, node_props=node_props, counter_stat='relative',
340 columns=columns, prop2type=prop2type)
341 props = ['Col1', 'Col2', 'Col3', 'Col1_counter', 'Col2_counter', 'Col3_counter']
342 expected_tree_13 = '(A:1[&&NHX:Col1=vowel:Col2=True:Col3=a|b|c],(B:1[&&NHX:Col1=consonant:Col2=False:Col3=c|d],(E:1[&&NHX:Col1=vowel:Col2=False:Col3=e|d|b],D:1[&&NHX:Col1=consonant:Col2=True:Col3=a|c|d|e])Internal_1:0.5[&&NHX:Col1_counter=consonant--0.50||vowel--0.50:Col2_counter=False--0.50||True--0.50:Col3_counter=a--0.14||b--0.14||c--0.14||d--0.29||e--0.29])Internal_2:0.5[&&NHX:Col1_counter=consonant--0.67||vowel--0.33:Col2_counter=False--0.67||True--0.33:Col3_counter=a--0.11||b--0.11||c--0.22||d--0.33||e--0.22])Root[&&NHX:Col1_counter=consonant--0.50||vowel--0.50:Col2_counter=False--0.50||True--0.50:Col3_counter=a--0.17||b--0.17||c--0.25||d--0.25||e--0.17];'
343 self.assertEqual(test_tree_annotated_13.write(props=props, parser=parser, format_root_node=True), expected_tree_13)
345 def test_annotate_14_a(self):
346 # test different numerical stats
347 # load tree
348 internal_parser = "name"
349 parser = utils.get_internal_parser(internal_parser)
351 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
353 # load metadata
354 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
355 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
356 f_annotation.flush()
358 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
360 test_tree_annotated_all, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
361 metadata_dict=metadata_dict, node_props=node_props, num_stat='all', column2method={},
362 columns=columns, prop2type=prop2type)
363 props = ['col1', 'col1_sum','col1_max','col1_min','col1_std','col1_avg']
364 expected_tree_all = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_sum=7.0:col1_max=4.0:col1_min=3.0:col1_std=0.5:col1_avg=3.5])Internal_2:0.5[&&NHX:col1_sum=9.0:col1_max=4.0:col1_min=2.0:col1_std=1.0:col1_avg=3.0])Root[&&NHX:col1_sum=10.0:col1_max=4.0:col1_min=1.0:col1_std=1.6666666666666667:col1_avg=2.5];'
366 self.assertEqual(test_tree_annotated_all.write(props=props, parser=parser, format_root_node=True), expected_tree_all)
368 def test_annotate_14_b(self):
369 # test different numerical stats
370 # load tree
371 internal_parser = "name"
372 parser = utils.get_internal_parser(internal_parser)
374 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
376 # load metadata
377 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
378 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
379 f_annotation.flush()
381 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
384 test_tree_annotated_sum, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
385 metadata_dict=metadata_dict, node_props=node_props, num_stat='sum', column2method={},
386 columns=columns, prop2type=prop2type)
388 expected_tree = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_sum=7.0])Internal_2:0.5[&&NHX:col1_sum=9.0])Root[&&NHX:col1_sum=10.0];'
390 self.assertEqual(test_tree_annotated_sum.write(props=None, parser=parser, format_root_node=True), expected_tree)
392 def test_annotate_14_c(self):
393 # test different numerical stats
394 # load tree
395 internal_parser = "name"
396 parser = utils.get_internal_parser(internal_parser)
398 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
400 # load metadata
401 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
402 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
403 f_annotation.flush()
405 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
407 test_tree_annotated_avg, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
408 metadata_dict=metadata_dict, node_props=node_props, num_stat='avg', column2method={},
409 columns=columns, prop2type=prop2type)
411 expected_tree_avg = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_avg=3.5])Internal_2:0.5[&&NHX:col1_avg=3.0])Root[&&NHX:col1_avg=2.5];'
412 self.assertEqual(test_tree_annotated_avg.write(props=None, parser=parser, format_root_node=True), expected_tree_avg)
414 def test_annotate_14_d(self):
415 # test different numerical stats
416 # load tree
417 internal_parser = "name"
418 parser = utils.get_internal_parser(internal_parser)
420 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
422 # load metadata
423 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
424 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
425 f_annotation.flush()
427 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
429 test_tree_annotated_max, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
430 metadata_dict=metadata_dict, node_props=node_props, num_stat='max', column2method={},
431 columns=columns, prop2type=prop2type)
433 expected_tree_max = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_max=4.0])Internal_2:0.5[&&NHX:col1_max=4.0])Root[&&NHX:col1_max=4.0];'
434 self.assertEqual(test_tree_annotated_max.write(props=None, parser=parser, format_root_node=True), expected_tree_max)
436 def test_annotate_14_e(self):
437 # test different numerical stats
438 # load tree
439 internal_parser = "name"
440 parser = utils.get_internal_parser(internal_parser)
442 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
444 # load metadata
445 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
446 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
447 f_annotation.flush()
449 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
451 test_tree_annotated_min, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
452 metadata_dict=metadata_dict, node_props=node_props, num_stat='min', column2method={},
453 columns=columns, prop2type=prop2type)
455 expected_tree_min = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_min=3.0])Internal_2:0.5[&&NHX:col1_min=2.0])Root[&&NHX:col1_min=1.0];'
457 self.assertEqual(test_tree_annotated_min.write(props=None, parser=parser, format_root_node=True), expected_tree_min)
459 def test_annotate_14_f(self):
460 # test different numerical stats
461 # load tree
462 internal_parser = "name"
463 parser = utils.get_internal_parser(internal_parser)
465 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;")
467 # load metadata
468 with NamedTemporaryFile(suffix='.tsv') as f_annotation:
469 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n')
470 f_annotation.flush()
472 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name])
474 test_tree_annotated_std, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
475 metadata_dict=metadata_dict, node_props=node_props, num_stat='std', column2method={},
476 columns=columns, prop2type=prop2type)
478 expected_tree_std = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_std=0.5])Internal_2:0.5[&&NHX:col1_std=1.0])Root[&&NHX:col1_std=1.6666666666666667];'
480 self.assertEqual(test_tree_annotated_std.write(props=None, parser=parser, format_root_node=True), expected_tree_std)
482 def test_annotate_tar(self):
483 # test if can read tar.gz file
484 # load tree
485 test_tree = utils.ete4_parse('(a);')
487 # load metadata
488 with TemporaryDirectory() as temp_dir:
489 file1_path = temp_dir + '/metadata1.tsv'
490 with open(file1_path, 'w') as file1:
491 file1.write('#name\tcol1\na\tapple')
493 file2_path = temp_dir + '/metadata2.tsv'
494 with open(file2_path, 'w') as file2:
495 file2.write('#name\tcol2\na\t3')
497 with NamedTemporaryFile(suffix='.tar.gz') as temp_tar:
498 tar_path = temp_tar.name
500 # Create a tarfile and add the files from the temporary directory
501 with tarfile.open(tar_path, 'w:gz') as tar:
502 tar.add(file1_path, arcname='metadata1.tsv')
503 tar.add(file2_path, arcname='metadata2.tsv')
505 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([tar_path])
507 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,
508 metadata_dict=metadata_dict, node_props=node_props, column2method={},
509 columns=columns, prop2type=prop2type)
510 props = ['col1', 'col2']
511 expected_tree = '(a:1[&&NHX:col1=apple:col2=3.0]);'
512 self.assertEqual(test_tree_annotated.write(props=props), expected_tree)
514 def test_internal_parser_01(self):
515 parser='name'
516 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;", internal_parser=parser)
517 expected_tree_paser_1 = "(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;"
518 expected_tree_paser_0 = "(A:1,(B:1,(E:1,D:1):0.5[&&NHX:name=Internal_1]):0.5[&&NHX:name=Internal_2]);"
520 self.assertEqual(test_tree.write(props=None, parser=1, format_root_node=True), expected_tree_paser_1)
521 self.assertEqual(test_tree.write(props=None, parser=0), expected_tree_paser_0)
524 def test_internal_parser_02(self):
525 parser='support'
526 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)1:0.5)1:0.5);", internal_parser=parser)
527 expected_tree_paser_0 = "(A:1,(B:1,(E:1,D:1)1:0.5)1:0.5);"
528 expected_tree_paser_1 = "(A:1,(B:1,(E:1,D:1):0.5[&&NHX:support=1.0]):0.5[&&NHX:support=1.0]);"
530 self.assertEqual(test_tree.write(props=None, parser=1, format_root_node=True), expected_tree_paser_1)
531 self.assertEqual(test_tree.write(props=None, parser=0), expected_tree_paser_0)
533if __name__ == '__main__':
534 unittest.main()