Coverage for test_annotate.py: 99%

256 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-03-21 09:19 +0100

1 

2import sys 

3import os 

4import tarfile 

5from io import StringIO, BytesIO 

6import unittest 

7 

8sys.path.insert(0, os.path.abspath(os.path.dirname(__file__) + '/..')) 

9 

10#from collections import namedtuple 

11from tempfile import NamedTemporaryFile, TemporaryDirectory 

12 

13from treeprofiler import tree_annotate 

14from treeprofiler.src import utils 

15import time 

16 

17class TestAnnotate(unittest.TestCase): 

18 def test_annotate_01(self): 

19 # basic annotate categorical data 

20 # load tree 

21 test_tree = utils.ete4_parse('(a:1);') 

22 

23 # load metadata 

24 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

25 f_annotation.write(b'#name\tfruit\na\tapple') 

26 f_annotation.flush() 

27 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

28 

29 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

30 metadata_dict=metadata_dict, node_props=node_props, 

31 columns=columns, prop2type=prop2type) 

32 

33 expected_tree = '(a:1[&&NHX:fruit=apple]);' 

34 

35 self.assertEqual(test_tree_annotated.write(props=None),expected_tree) 

36 

37 def test_annotate_02(self): 

38 # internal_nodes annotation categorical data 

39 # load tree 

40 internal_parser = "name" 

41 parser = utils.get_internal_parser(internal_parser) 

42 

43 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;", internal_parser=internal_parser) 

44 

45 # load metadata 

46 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

47 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n') 

48 f_annotation.flush() 

49 

50 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

51 

52 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

53 metadata_dict=metadata_dict, node_props=node_props, 

54 columns=columns, prop2type=prop2type) 

55 

56 expected_tree_no_root = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])Internal_1:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1]);' 

57 expected_tree_with_root = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])Internal_1:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];' 

58 

59 self.assertEqual(test_tree_annotated.write(props=None, parser=parser),expected_tree_no_root) 

60 self.assertEqual(test_tree_annotated.write(props=None, parser=parser, format_root_node=True),expected_tree_with_root) 

61 

62 def test_annotate_03(self): 

63 # basic annotate numerical data 

64 # load tree 

65 test_tree = utils.ete4_parse('(a:1);') 

66 

67 # load metadata 

68 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

69 f_annotation.write(b'#name\tannotate_03\na\t2') 

70 f_annotation.flush() 

71 

72 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

73 

74 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

75 metadata_dict=metadata_dict, node_props=node_props, 

76 columns=columns, prop2type=prop2type) 

77 

78 expected_tree = '(a:1[&&NHX:annotate_03=2.0]);' 

79 

80 assert test_tree_annotated.write(props=None) == expected_tree 

81 

82 

83 def test_annotate_04(self): 

84 # internal_nodes annotation numerical data 

85 # load tree 

86 internal_parser = "name" 

87 parser = utils.get_internal_parser(internal_parser) 

88 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

89 

90 # load metadata 

91 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

92 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

93 f_annotation.flush() 

94 

95 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

96 

97 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

98 metadata_dict=metadata_dict, node_props=node_props, 

99 columns=columns, prop2type=prop2type, threads=4) 

100 

101 props = ['col1', 'col1_sum','col1_max','col1_min','col1_std','col1_avg'] 

102 expected_tree_no_root = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_sum=7.0:col1_max=4.0:col1_min=3.0:col1_std=0.5:col1_avg=3.5])Internal_2:0.5[&&NHX:col1_sum=9.0:col1_max=4.0:col1_min=2.0:col1_std=1.0:col1_avg=3.0]);' 

103 expected_tree_with_root = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_sum=7.0:col1_max=4.0:col1_min=3.0:col1_std=0.5:col1_avg=3.5])Internal_2:0.5[&&NHX:col1_sum=9.0:col1_max=4.0:col1_min=2.0:col1_std=1.0:col1_avg=3.0])Root[&&NHX:col1_sum=10.0:col1_max=4.0:col1_min=1.0:col1_std=1.6666666666666667:col1_avg=2.5];' 

104 

105 self.assertEqual(test_tree_annotated.write(props=props, parser=parser), expected_tree_no_root) 

106 self.assertEqual(test_tree_annotated.write(props=props, parser=parser, format_root_node=True), expected_tree_with_root) 

107 

108 def test_annotate_05(self): 

109 # test num_stat none and counter_stat none 

110 # internal_nodes annotation categorical data 

111 # load tree 

112 internal_parser = "name" 

113 parser = utils.get_internal_parser(internal_parser) 

114 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

115 

116 # load metadata 

117 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

118 f_annotation.write(b'#name\tcol1\talphabet_type\nA\t1\tvowel\nB\t2\tconsonant\nD\t3\tconsonant\nE\t4\tvowel\n') 

119 f_annotation.flush() 

120 

121 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

122 

123 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

124 metadata_dict=metadata_dict, node_props=node_props, counter_stat='none', num_stat='none', 

125 columns=columns, prop2type=prop2type) 

126 

127 props = ["alphabet_type", "col1"] 

128 expected_tree_no_root = '(A:1[&&NHX:alphabet_type=vowel:col1=1.0],(B:1[&&NHX:alphabet_type=consonant:col1=2.0],(E:1[&&NHX:alphabet_type=vowel:col1=4.0],D:1[&&NHX:alphabet_type=consonant:col1=3.0])Internal_1:0.5)Internal_2:0.5);' 

129 expected_tree_with_root = '(A:1[&&NHX:alphabet_type=vowel:col1=1.0],(B:1[&&NHX:alphabet_type=consonant:col1=2.0],(E:1[&&NHX:alphabet_type=vowel:col1=4.0],D:1[&&NHX:alphabet_type=consonant:col1=3.0])Internal_1:0.5)Internal_2:0.5)Root;' 

130 

131 assert test_tree_annotated.write(props=props, parser=parser) == '(A:1[&&NHX:alphabet_type=vowel:col1=1.0],(B:1[&&NHX:alphabet_type=consonant:col1=2.0],(E:1[&&NHX:alphabet_type=vowel:col1=4.0],D:1[&&NHX:alphabet_type=consonant:col1=3.0])Internal_1:0.5)Internal_2:0.5);' 

132 assert test_tree_annotated.write(props=props, parser=parser, format_root_node=True) == '(A:1[&&NHX:alphabet_type=vowel:col1=1.0],(B:1[&&NHX:alphabet_type=consonant:col1=2.0],(E:1[&&NHX:alphabet_type=vowel:col1=4.0],D:1[&&NHX:alphabet_type=consonant:col1=3.0])Internal_1:0.5)Internal_2:0.5)Root;' 

133 

134 def test_annotate_06(self): 

135 # assign internal node name 

136 # internal_nodes annotation categorical data 

137 # load tree 

138 internal_parser = "name" 

139 parser = utils.get_internal_parser(internal_parser) 

140 

141 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5);") 

142 

143 # load metadata 

144 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

145 f_annotation.write(b'#name\talphabet_type\nA\tvowel\nB\tconsonant\nD\tconsonant\nE\tvowel\n') 

146 f_annotation.flush() 

147 

148 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

149 

150 expected_tree_no_root = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])N4:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])N5:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1]);' 

151 expected_tree_with_root = '(A:1[&&NHX:alphabet_type=vowel],(B:1[&&NHX:alphabet_type=consonant],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=consonant])N4:0.5[&&NHX:alphabet_type_counter=consonant--1||vowel--1])N5:0.5[&&NHX:alphabet_type_counter=consonant--2||vowel--1])Root[&&NHX:alphabet_type_counter=consonant--2||vowel--2];' 

152 

153 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

154 metadata_dict=metadata_dict, node_props=node_props, 

155 columns=columns, prop2type=prop2type, threads=4) 

156 

157 self.assertEqual(test_tree_annotated.write(props=None, parser=parser),expected_tree_no_root) 

158 self.assertEqual(test_tree_annotated.write(props=None, parser=parser, format_root_node=True), expected_tree_with_root) 

159 

160 def test_annotate_07(self): 

161 # internal_nodes annotation boolean data 

162 internal_parser = "name" 

163 parser = utils.get_internal_parser(internal_parser) 

164 

165 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5);") 

166 

167 # load metadata 

168 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

169 f_annotation.write(b'#name\tbool_type\nA\tTrue\nB\tFalse\nD\tTrue\nE\tFalse\n') 

170 f_annotation.flush() 

171 

172 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

173 

174 expected_tree_no_root = '(A:1[&&NHX:bool_type=True],(B:1[&&NHX:bool_type=False],(E:1[&&NHX:bool_type=False],D:1[&&NHX:bool_type=True])N4:0.5[&&NHX:bool_type_counter=False--1||True--1])N5:0.5[&&NHX:bool_type_counter=False--2||True--1]);' 

175 expected_tree_with_root = '(A:1[&&NHX:bool_type=True],(B:1[&&NHX:bool_type=False],(E:1[&&NHX:bool_type=False],D:1[&&NHX:bool_type=True])N4:0.5[&&NHX:bool_type_counter=False--1||True--1])N5:0.5[&&NHX:bool_type_counter=False--2||True--1])Root[&&NHX:bool_type_counter=False--2||True--2];' 

176 

177 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

178 metadata_dict=metadata_dict, node_props=node_props, 

179 columns=columns, prop2type=prop2type) 

180 

181 self.assertEqual(test_tree_annotated.write(props=None, parser=parser),expected_tree_no_root) 

182 self.assertEqual(test_tree_annotated.write(props=None, parser=parser, format_root_node=True),expected_tree_with_root) 

183 

184 def test_annotate_08(self): 

185 # internal_nodes annotation list data 

186 internal_parser = "name" 

187 parser = utils.get_internal_parser(internal_parser) 

188 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1):0.5):0.5);") 

189 

190 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

191 f_annotation.write(b'#name\tlist_data\nA\ta,b,c\nB\tc,d\nD\ta,c,d,e\nE\te,d,b\n') 

192 f_annotation.flush() 

193 

194 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

195 

196 expected_tree_no_root = '(A:1[&&NHX:list_data=a|b|c],(B:1[&&NHX:list_data=c|d],(E:1[&&NHX:list_data=e|d|b],D:1[&&NHX:list_data=a|c|d|e])N4:0.5[&&NHX:list_data_counter=a--1||b--1||c--1||d--2||e--2])N5:0.5[&&NHX:list_data_counter=a--1||b--1||c--2||d--3||e--2]);' 

197 expected_tree_with_root = '(A:1[&&NHX:list_data=a|b|c],(B:1[&&NHX:list_data=c|d],(E:1[&&NHX:list_data=e|d|b],D:1[&&NHX:list_data=a|c|d|e])N4:0.5[&&NHX:list_data_counter=a--1||b--1||c--1||d--2||e--2])N5:0.5[&&NHX:list_data_counter=a--1||b--1||c--2||d--3||e--2])Root[&&NHX:list_data_counter=a--2||b--2||c--3||d--3||e--2];' 

198 

199 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

200 metadata_dict=metadata_dict, node_props=node_props, 

201 columns=columns, prop2type=prop2type) 

202 

203 self.assertEqual(test_tree_annotated.write(props=None, parser=parser),expected_tree_no_root) 

204 self.assertEqual(test_tree_annotated.write(props=None, parser=parser, format_root_node=True),expected_tree_with_root) 

205 

206 # def test_annotate_09(): 

207 # # specify datatype of each column  

208 # internal_parser = "name" 

209 # parser = utils.get_internal_parser(internal_parser) 

210 

211 # test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

212 

213 # with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

214 # f_annotation.write(b'#name\tcol1\tcol2\tcol3\tcol4\nA\tvowel\t1\tTrue\ta,b,c\nB\tconsonant\t2\tFalse\tc,d\nD\tconsonant\t3\tTrue\ta,c,d,e\nE\tvowel\t4\tFalse\te,d,b\n') 

215 # f_annotation.flush() 

216 

217 # metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

218 

219 # text_prop = ['col1'] 

220 # num_prop = ['col2'] 

221 # bool_prop = ['col3'] 

222 # multiple_text_prop = ['col4'] 

223 

224 # test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,  

225 # metadata_dict=metadata_dict, node_props=node_props,  

226 # text_prop=text_prop, multiple_text_prop=multiple_text_prop, 

227 # num_prop=num_prop, bool_prop=bool_prop, 

228 # columns=columns, prop2type=prop2type) 

229 

230 # props = ['col1', 'col2', 'col3', 'col4', 'col1_counter', 'col4_counter', 'col3_counter', 'col2_avg', 'col2_sum', 'col2_max', 'col2_min', 'col2_std'] 

231 # expected_tree = '(A:1[&&NHX:col1=vowel:col2=1.0:col3=True:col4=a|b|c],(B:1[&&NHX:col1=consonant:col2=2.0:col3=False:col4=c|d],(E:1[&&NHX:col1=vowel:col2=4.0:col3=False:col4=e|d|b],D:1[&&NHX:col1=consonant:col2=3.0:col3=True:col4=a|c|d|e])Internal_1:0.5[&&NHX:col1_counter=consonant--1||vowel--1:col4_counter=a--1||b--1||c--1||d--2||e--2:col3_counter=False--1||True--1:col2_avg=3.5:col2_sum=7.0:col2_max=4.0:col2_min=3.0:col2_std=0.5])Internal_2:0.5[&&NHX:col1_counter=consonant--2||vowel--1:col4_counter=a--1||b--1||c--2||d--3||e--2:col3_counter=False--2||True--1:col2_avg=3.0:col2_sum=9.0:col2_max=4.0:col2_min=2.0:col2_std=1.0])Root[&&NHX:col1_counter=consonant--2||vowel--2:col4_counter=a--2||b--2||c--3||d--3||e--2:col3_counter=False--2||True--2:col2_avg=2.5:col2_sum=10.0:col2_max=4.0:col2_min=1.0:col2_std=1.6666666666666667];'  

232 # assert test_tree_annotated.write(props=props, parser=parser, format_root_node=True) == expected_tree 

233 

234 # def test_annotate_10(): 

235 # # specify datatype of each column index 

236 # internal_parser = "name" 

237 # parser = utils.get_internal_parser(internal_parser) 

238 

239 # test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

240 

241 # with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

242 # f_annotation.write(b'#name\tcol1\tcol2\tcol3\tcol4\nA\tvowel\t1\tTrue\ta,b,c\nB\tconsonant\t2\tFalse\tc,d\nD\tconsonant\t3\tTrue\ta,c,d,e\nE\tvowel\t4\tFalse\te,d,b\n') 

243 # f_annotation.flush() 

244 

245 # metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

246 

247 # text_prop_idx = '1' 

248 # num_prop_idx = '2' 

249 # bool_prop_idx = '3' 

250 # multiple_text_prop = ['col4'] 

251 

252 # test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree,  

253 # metadata_dict=metadata_dict, node_props=node_props,  

254 # text_prop_idx=text_prop_idx, multiple_text_prop=multiple_text_prop, 

255 # num_prop_idx=num_prop_idx, bool_prop_idx=bool_prop_idx, 

256 # columns=columns, prop2type=prop2type) 

257 # props = ['col1', 'col2', 'col3', 'col4', 'col1_counter', 'col4_counter', 'col3_counter', 'col2_avg', 'col2_sum', 'col2_max', 'col2_min', 'col2_std'] 

258 

259 # expected_tree = '(A:1[&&NHX:col1=vowel:col2=1.0:col3=True:col4=a|b|c],(B:1[&&NHX:col1=consonant:col2=2.0:col3=False:col4=c|d],(E:1[&&NHX:col1=vowel:col2=4.0:col3=False:col4=e|d|b],D:1[&&NHX:col1=consonant:col2=3.0:col3=True:col4=a|c|d|e])Internal_1:0.5[&&NHX:col1_counter=consonant--1||vowel--1:col4_counter=a--1||b--1||c--1||d--2||e--2:col3_counter=False--1||True--1:col2_avg=3.5:col2_sum=7.0:col2_max=4.0:col2_min=3.0:col2_std=0.5])Internal_2:0.5[&&NHX:col1_counter=consonant--2||vowel--1:col4_counter=a--1||b--1||c--2||d--3||e--2:col3_counter=False--2||True--1:col2_avg=3.0:col2_sum=9.0:col2_max=4.0:col2_min=2.0:col2_std=1.0])Root[&&NHX:col1_counter=consonant--2||vowel--2:col4_counter=a--2||b--2||c--3||d--3||e--2:col3_counter=False--2||True--2:col2_avg=2.5:col2_sum=10.0:col2_max=4.0:col2_min=1.0:col2_std=1.6666666666666667];' 

260 # assert test_tree_annotated.write(props=props, parser=parser, format_root_node=True) ==expected_tree  

261 

262 def test_annotate_11(self): 

263 # specify datatype of each column index range 

264 internal_parser = "name" 

265 parser = utils.get_internal_parser(internal_parser) 

266 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

267 

268 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

269 f_annotation.write(b'#name\tcol01\tcol02\tcol03\tcol04\tcol05\tcol06\tcol07\nA\tvowel\tvowel\t1\t1\tTrue\tTrue\ta,b,c\nB\tconsonant\tconsonant\t2\t2\tFalse\tFalse\tc,d\nD\tconsonant\tconsonant\t3\t3\tTrue\tTrue\ta,c,d,e\nE\tvowel\tvowel\t4\t4\tFalse\tFalse\te,d,b\n') 

270 f_annotation.flush() 

271 

272 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

273 

274 text_prop_idx = ['[1-2]'] 

275 num_prop_idx = ['[3-4]'] 

276 bool_prop_idx = ['[5-6]'] 

277 multiple_text_prop = ['col7'] 

278 

279 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

280 metadata_dict=metadata_dict, node_props=node_props, 

281 text_prop_idx=text_prop_idx, multiple_text_prop=multiple_text_prop, 

282 num_prop_idx=num_prop_idx, bool_prop_idx=bool_prop_idx, 

283 columns=columns, prop2type=prop2type) 

284 props = ['col01', 'col02', 'col03', 'col04', 'col05', 'col06', 'col07', 'col01_counter', 

285 'col02_counter', 'col07_counter', 'col05_counter', 'col6_counter', 'col03_avg', 'col03_sum', 

286 'col03_max', 'col03_min', 'col03_std', 'col04_avg', 'col04_sum', 'col04_max', 'col04_min', 'col04_std'] 

287 expected_tree = '(A:1[&&NHX:col01=vowel:col02=vowel:col03=1.0:col04=1.0:col05=True:col06=True:col07=a|b|c],(B:1[&&NHX:col01=consonant:col02=consonant:col03=2.0:col04=2.0:col05=False:col06=False:col07=c|d],(E:1[&&NHX:col01=vowel:col02=vowel:col03=4.0:col04=4.0:col05=False:col06=False:col07=e|d|b],D:1[&&NHX:col01=consonant:col02=consonant:col03=3.0:col04=3.0:col05=True:col06=True:col07=a|c|d|e])Internal_1:0.5[&&NHX:col01_counter=consonant--1||vowel--1:col02_counter=consonant--1||vowel--1:col07_counter=a--1||b--1||c--1||d--2||e--2:col05_counter=False--1||True--1:col03_avg=3.5:col03_sum=7.0:col03_max=4.0:col03_min=3.0:col03_std=0.5:col04_avg=3.5:col04_sum=7.0:col04_max=4.0:col04_min=3.0:col04_std=0.5])Internal_2:0.5[&&NHX:col01_counter=consonant--2||vowel--1:col02_counter=consonant--2||vowel--1:col07_counter=a--1||b--1||c--2||d--3||e--2:col05_counter=False--2||True--1:col03_avg=3.0:col03_sum=9.0:col03_max=4.0:col03_min=2.0:col03_std=1.0:col04_avg=3.0:col04_sum=9.0:col04_max=4.0:col04_min=2.0:col04_std=1.0])Root[&&NHX:col01_counter=consonant--2||vowel--2:col02_counter=consonant--2||vowel--2:col07_counter=a--2||b--2||c--3||d--3||e--2:col05_counter=False--2||True--2:col03_avg=2.5:col03_sum=10.0:col03_max=4.0:col03_min=1.0:col03_std=1.6666666666666667:col04_avg=2.5:col04_sum=10.0:col04_max=4.0:col04_min=1.0:col04_std=1.6666666666666667];' 

288 self.assertEqual(test_tree_annotated.write(props=props, parser=parser, format_root_node=True), expected_tree) 

289 

290 def test_annotate_12(self): 

291 # test missing data and unmapped data they should be see as the same as none 

292 # r'^(?:\W+|none|None|null|NaN|)$' 

293 # load tree 

294 internal_parser = "name" 

295 parser = utils.get_internal_parser(internal_parser) 

296 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

297 

298 # load metadata with missing categorical data 

299 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

300 f_annotation.write(b'#name\talphabet_type\nA\tnone\nB\t-\nD\t\nE\tvowel\n') 

301 f_annotation.flush() 

302 

303 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

304 

305 test_tree_annotated_1, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

306 metadata_dict=metadata_dict, node_props=node_props, counter_stat='raw', 

307 columns=columns, prop2type=prop2type) 

308 

309 # load metadata with unmapped categorical data 

310 with NamedTemporaryFile(suffix='.tsv') as f_annotation_2: 

311 f_annotation_2.write(b'#name\talphabet_type\nA\tnone\nD\t\nE\tvowel\n') 

312 f_annotation_2.flush() 

313 

314 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation_2.name]) 

315 

316 test_tree_annotated_2, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

317 metadata_dict=metadata_dict, node_props=node_props, counter_stat='raw', 

318 columns=columns, prop2type=prop2type) 

319 

320 expected_tree = '(A:1[&&NHX:alphabet_type=NaN],(B:1[&&NHX:alphabet_type=NaN],(E:1[&&NHX:alphabet_type=vowel],D:1[&&NHX:alphabet_type=NaN])Internal_1:0.5[&&NHX:alphabet_type_counter=NaN--1||vowel--1])Internal_2:0.5[&&NHX:alphabet_type_counter=NaN--2||vowel--1])Root[&&NHX:alphabet_type_counter=NaN--3||vowel--1];' 

321 

322 self.assertEqual(test_tree_annotated_1.write(props=None, parser=parser, format_root_node=True), expected_tree) 

323 self.assertEqual(test_tree_annotated_2.write(props=None, parser=parser, format_root_node=True), expected_tree) 

324 

325 def test_annotate_13(self): 

326 # test relative on categorical, boolean and list 

327 internal_parser = "name" 

328 parser = utils.get_internal_parser(internal_parser) 

329 

330 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

331 

332 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

333 f_annotation.write(b'#name\tCol1\tCol2\tCol3\nA\tvowel\tTrue\ta,b,c\nB\tconsonant\tFalse\tc,d\nD\tconsonant\tTrue\ta,c,d,e\nE\tvowel\tFalse\te,d,b\n') 

334 f_annotation.flush() 

335 

336 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

337 

338 test_tree_annotated_13, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

339 metadata_dict=metadata_dict, node_props=node_props, counter_stat='relative', 

340 columns=columns, prop2type=prop2type) 

341 props = ['Col1', 'Col2', 'Col3', 'Col1_counter', 'Col2_counter', 'Col3_counter'] 

342 expected_tree_13 = '(A:1[&&NHX:Col1=vowel:Col2=True:Col3=a|b|c],(B:1[&&NHX:Col1=consonant:Col2=False:Col3=c|d],(E:1[&&NHX:Col1=vowel:Col2=False:Col3=e|d|b],D:1[&&NHX:Col1=consonant:Col2=True:Col3=a|c|d|e])Internal_1:0.5[&&NHX:Col1_counter=consonant--0.50||vowel--0.50:Col2_counter=False--0.50||True--0.50:Col3_counter=a--0.14||b--0.14||c--0.14||d--0.29||e--0.29])Internal_2:0.5[&&NHX:Col1_counter=consonant--0.67||vowel--0.33:Col2_counter=False--0.67||True--0.33:Col3_counter=a--0.11||b--0.11||c--0.22||d--0.33||e--0.22])Root[&&NHX:Col1_counter=consonant--0.50||vowel--0.50:Col2_counter=False--0.50||True--0.50:Col3_counter=a--0.17||b--0.17||c--0.25||d--0.25||e--0.17];' 

343 self.assertEqual(test_tree_annotated_13.write(props=props, parser=parser, format_root_node=True), expected_tree_13) 

344 

345 def test_annotate_14_a(self): 

346 # test different numerical stats 

347 # load tree 

348 internal_parser = "name" 

349 parser = utils.get_internal_parser(internal_parser) 

350 

351 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

352 

353 # load metadata 

354 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

355 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

356 f_annotation.flush() 

357 

358 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

359 

360 test_tree_annotated_all, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

361 metadata_dict=metadata_dict, node_props=node_props, num_stat='all', column2method={}, 

362 columns=columns, prop2type=prop2type) 

363 props = ['col1', 'col1_sum','col1_max','col1_min','col1_std','col1_avg'] 

364 expected_tree_all = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_sum=7.0:col1_max=4.0:col1_min=3.0:col1_std=0.5:col1_avg=3.5])Internal_2:0.5[&&NHX:col1_sum=9.0:col1_max=4.0:col1_min=2.0:col1_std=1.0:col1_avg=3.0])Root[&&NHX:col1_sum=10.0:col1_max=4.0:col1_min=1.0:col1_std=1.6666666666666667:col1_avg=2.5];' 

365 

366 self.assertEqual(test_tree_annotated_all.write(props=props, parser=parser, format_root_node=True), expected_tree_all) 

367 

368 def test_annotate_14_b(self): 

369 # test different numerical stats 

370 # load tree 

371 internal_parser = "name" 

372 parser = utils.get_internal_parser(internal_parser) 

373 

374 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

375 

376 # load metadata 

377 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

378 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

379 f_annotation.flush() 

380 

381 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

382 

383 

384 test_tree_annotated_sum, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

385 metadata_dict=metadata_dict, node_props=node_props, num_stat='sum', column2method={}, 

386 columns=columns, prop2type=prop2type) 

387 

388 expected_tree = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_sum=7.0])Internal_2:0.5[&&NHX:col1_sum=9.0])Root[&&NHX:col1_sum=10.0];' 

389 

390 self.assertEqual(test_tree_annotated_sum.write(props=None, parser=parser, format_root_node=True), expected_tree) 

391 

392 def test_annotate_14_c(self): 

393 # test different numerical stats 

394 # load tree 

395 internal_parser = "name" 

396 parser = utils.get_internal_parser(internal_parser) 

397 

398 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

399 

400 # load metadata 

401 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

402 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

403 f_annotation.flush() 

404 

405 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

406 

407 test_tree_annotated_avg, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

408 metadata_dict=metadata_dict, node_props=node_props, num_stat='avg', column2method={}, 

409 columns=columns, prop2type=prop2type) 

410 

411 expected_tree_avg = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_avg=3.5])Internal_2:0.5[&&NHX:col1_avg=3.0])Root[&&NHX:col1_avg=2.5];' 

412 self.assertEqual(test_tree_annotated_avg.write(props=None, parser=parser, format_root_node=True), expected_tree_avg) 

413 

414 def test_annotate_14_d(self): 

415 # test different numerical stats 

416 # load tree 

417 internal_parser = "name" 

418 parser = utils.get_internal_parser(internal_parser) 

419 

420 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

421 

422 # load metadata 

423 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

424 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

425 f_annotation.flush() 

426 

427 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

428 

429 test_tree_annotated_max, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

430 metadata_dict=metadata_dict, node_props=node_props, num_stat='max', column2method={}, 

431 columns=columns, prop2type=prop2type) 

432 

433 expected_tree_max = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_max=4.0])Internal_2:0.5[&&NHX:col1_max=4.0])Root[&&NHX:col1_max=4.0];' 

434 self.assertEqual(test_tree_annotated_max.write(props=None, parser=parser, format_root_node=True), expected_tree_max) 

435 

436 def test_annotate_14_e(self): 

437 # test different numerical stats 

438 # load tree 

439 internal_parser = "name" 

440 parser = utils.get_internal_parser(internal_parser) 

441 

442 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

443 

444 # load metadata 

445 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

446 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

447 f_annotation.flush() 

448 

449 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

450 

451 test_tree_annotated_min, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

452 metadata_dict=metadata_dict, node_props=node_props, num_stat='min', column2method={}, 

453 columns=columns, prop2type=prop2type) 

454 

455 expected_tree_min = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_min=3.0])Internal_2:0.5[&&NHX:col1_min=2.0])Root[&&NHX:col1_min=1.0];' 

456 

457 self.assertEqual(test_tree_annotated_min.write(props=None, parser=parser, format_root_node=True), expected_tree_min) 

458 

459 def test_annotate_14_f(self): 

460 # test different numerical stats 

461 # load tree 

462 internal_parser = "name" 

463 parser = utils.get_internal_parser(internal_parser) 

464 

465 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;") 

466 

467 # load metadata 

468 with NamedTemporaryFile(suffix='.tsv') as f_annotation: 

469 f_annotation.write(b'#name\tcol1\nA\t1\nB\t2\nD\t3\nE\t4\n') 

470 f_annotation.flush() 

471 

472 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([f_annotation.name]) 

473 

474 test_tree_annotated_std, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

475 metadata_dict=metadata_dict, node_props=node_props, num_stat='std', column2method={}, 

476 columns=columns, prop2type=prop2type) 

477 

478 expected_tree_std = '(A:1[&&NHX:col1=1.0],(B:1[&&NHX:col1=2.0],(E:1[&&NHX:col1=4.0],D:1[&&NHX:col1=3.0])Internal_1:0.5[&&NHX:col1_std=0.5])Internal_2:0.5[&&NHX:col1_std=1.0])Root[&&NHX:col1_std=1.6666666666666667];' 

479 

480 self.assertEqual(test_tree_annotated_std.write(props=None, parser=parser, format_root_node=True), expected_tree_std) 

481 

482 def test_annotate_tar(self): 

483 # test if can read tar.gz file 

484 # load tree 

485 test_tree = utils.ete4_parse('(a);') 

486 

487 # load metadata 

488 with TemporaryDirectory() as temp_dir: 

489 file1_path = temp_dir + '/metadata1.tsv' 

490 with open(file1_path, 'w') as file1: 

491 file1.write('#name\tcol1\na\tapple') 

492 

493 file2_path = temp_dir + '/metadata2.tsv' 

494 with open(file2_path, 'w') as file2: 

495 file2.write('#name\tcol2\na\t3') 

496 

497 with NamedTemporaryFile(suffix='.tar.gz') as temp_tar: 

498 tar_path = temp_tar.name 

499 

500 # Create a tarfile and add the files from the temporary directory 

501 with tarfile.open(tar_path, 'w:gz') as tar: 

502 tar.add(file1_path, arcname='metadata1.tsv') 

503 tar.add(file2_path, arcname='metadata2.tsv') 

504 

505 metadata_dict, node_props, columns, prop2type = tree_annotate.parse_csv([tar_path]) 

506 

507 test_tree_annotated, annotated_prop2type = tree_annotate.run_tree_annotate(test_tree, 

508 metadata_dict=metadata_dict, node_props=node_props, column2method={}, 

509 columns=columns, prop2type=prop2type) 

510 props = ['col1', 'col2'] 

511 expected_tree = '(a:1[&&NHX:col1=apple:col2=3.0]);' 

512 self.assertEqual(test_tree_annotated.write(props=props), expected_tree) 

513 

514 def test_internal_parser_01(self): 

515 parser='name' 

516 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;", internal_parser=parser) 

517 expected_tree_paser_1 = "(A:1,(B:1,(E:1,D:1)Internal_1:0.5)Internal_2:0.5)Root;" 

518 expected_tree_paser_0 = "(A:1,(B:1,(E:1,D:1):0.5[&&NHX:name=Internal_1]):0.5[&&NHX:name=Internal_2]);" 

519 

520 self.assertEqual(test_tree.write(props=None, parser=1, format_root_node=True), expected_tree_paser_1) 

521 self.assertEqual(test_tree.write(props=None, parser=0), expected_tree_paser_0) 

522 

523 

524 def test_internal_parser_02(self): 

525 parser='support' 

526 test_tree = utils.ete4_parse("(A:1,(B:1,(E:1,D:1)1:0.5)1:0.5);", internal_parser=parser) 

527 expected_tree_paser_0 = "(A:1,(B:1,(E:1,D:1)1:0.5)1:0.5);" 

528 expected_tree_paser_1 = "(A:1,(B:1,(E:1,D:1):0.5[&&NHX:support=1.0]):0.5[&&NHX:support=1.0]);" 

529 

530 self.assertEqual(test_tree.write(props=None, parser=1, format_root_node=True), expected_tree_paser_1) 

531 self.assertEqual(test_tree.write(props=None, parser=0), expected_tree_paser_0) 

532 

533if __name__ == '__main__': 

534 unittest.main()