Coverage for /home/deng/Projects/metatree_drawer/metatreedrawer/treeprofiler/tree_plot.py: 24%

1172 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-08-07 10:33 +0200

1#!/usr/bin/env python 

2import numbers 

3import math 

4import sys 

5import os 

6import argparse 

7import csv 

8 

9from collections import defaultdict 

10from collections import OrderedDict 

11from collections import Counter 

12from itertools import islice 

13from io import StringIO 

14import matplotlib.pyplot as plt 

15import numpy as np 

16 

17from ete4.parser.newick import NewickError 

18from ete4 import Tree, PhyloTree 

19from ete4 import GTDBTaxa 

20from ete4 import NCBITaxa 

21from ete4.smartview import TreeStyle, NodeStyle, TreeLayout 

22from treeprofiler.tree_image import get_image 

23from treeprofiler.layouts import ( 

24 text_layouts, taxon_layouts, staple_layouts, 

25 conditional_layouts, seq_layouts, profile_layouts, phylosignal_layouts) 

26 

27import treeprofiler.src.utils as utils 

28from treeprofiler.tree_annotate import can_convert_to_bool 

29 

30paired_color = [ 

31 '#9a312f', '#9b57d0', '#f8ce9a', '#f16017', '#28fef9', '#53707a', 

32 '#213b07', '#b5e5ac', '#9640b2', '#a9bd10', '#69e42b', '#b44d67', 

33 '#b110c1', '#0b08a3', '#d07671', '#29e23b', '#3f2bf4', '#9b2a08', 

34 '#b42b94', '#77566a', '#2dfee7', '#046904', '#e2835d', '#53db2b', 

35 '#0b97e9', '#e0f6e9', '#ba46d1', '#4aba53', '#d4d6db', '#7a5d7c', 

36 '#4b100e', '#9e6373', '#5f4945', '#7e057a', '#f8e372', '#209f87', 

37 '#383f59', '#9d59e9', '#40c9fb', '#4cfc8b', '#d94769', '#20feba', 

38 '#c53238', '#068b02', '#6b4c93', '#f1968e', '#86d720', '#076fa6', 

39 '#0dbcfe', '#4d74b2', '#7b3dd2', '#286d26', '#a0faca', '#97505d', 

40 '#159e7a', '#fc05df', '#5df454', '#9160e1', '#c2eb5e', '#304fce', 

41 '#033379', '#54770f', '#271211', '#ab8479', '#37d9a0', '#f12205', 

42 '#cdd7e2', '#578f56', '#5ad9be', '#8596e9', '#c999ee', '#5f6b8a', 

43 '#f5c3a1', '#8e0603', '#cc21cf', '#65e7d0', '#97b3b6', '#d6220c', 

44 '#29c1e1', '#a30139', '#c9a619', '#a19410', '#da874f', '#64246d', 

45 '#66f35d', '#b8366c', '#116c95', '#bd851a', '#27f7cb', '#512ca4', 

46 '#60e72e', '#d1941c', '#1045a8', '#c1b03a', '#0c62a5', '#7ac9b2', 

47 '#6bb9bd', '#cb30eb', '#26bad0', '#d9e557' 

48] 

49 

50DESC = "plot tree" 

51 

52def poplulate_plot_args(plot_args_p): 

53 """ 

54 Parse the input parameters 

55 Return the parsed arguments. 

56 """ 

57 group = plot_args_p.add_argument_group(title='Conditional display arguments', 

58 description="Conditional display parameters") 

59 group.add_argument('--internal-plot-measure', 

60 default='avg', 

61 choices=['sum', 'avg', 'max', 'min', 'std', 'none'], 

62 type=str, 

63 required=False, 

64 help="statistic measures to be shown in numerical layout for internal nodes, [default: avg]") 

65 

66 group.add_argument('--collapsed-by', 

67 type=str, 

68 required=False, 

69 action='append', 

70 help='target tree nodes collapsed by customized conditions') 

71 group.add_argument('--highlighted-by', 

72 type=str, 

73 required=False, 

74 action='append', 

75 help='target tree nodes highlighted by customized conditions') 

76 

77 # group = plot_args_p.add_argument_group(title='Basic treelayout arguments', 

78 # description="treelayout parameters") 

79 

80 # group.add_argument('--drawer', 

81 # type=str, 

82 # required=False, 

83 # help="Circular or Rectangular") 

84 # group.add_argument('--collapse_level', 

85 # type=str, 

86 # required=False, 

87 # help="default collapse level, default is 10")  

88 # group.add_argument('--ultrametric', 

89 # default=False, 

90 # action='store_true', 

91 # required=False, 

92 # help="ultrametric tree") 

93 

94 group = plot_args_p.add_argument_group(title="Properties' layout arguments", 

95 description="Prop layout parameters") 

96 group.add_argument('--column-width', 

97 type=int, 

98 default=20, 

99 help="customize column width of each layout.[default: 20]" 

100 ) 

101 group.add_argument('--barplot-width', 

102 type=float, 

103 default=200, 

104 help="customize barplot width of barplot layout.[default: 200]" 

105 ) 

106 group.add_argument('--barplot-scale', 

107 type=str, 

108 default=None, 

109 help="find the barplot column as scale anchor.[default: None]" 

110 ) 

111 group.add_argument('--color-config', 

112 type=argparse.FileType('r'), 

113 default=None, 

114 help="Path to the file to find the color for each variables. [default: None]" 

115 ) 

116 group.add_argument('-s', '--config-sep', default='\t', 

117 help="column separator of color table [default: \\t]") 

118 # group.add_argument('--profiling_width', 

119 # type=int, 

120 # default=None, 

121 # help="customize profiling width of each profiling layout." 

122 # ) 

123 group.add_argument('--padding-x', 

124 type=int, 

125 default=1, 

126 help="customize horizontal column padding distance of each layout.[default: 1]" 

127 ) 

128 group.add_argument('--padding-y', 

129 type=int, 

130 default=0, 

131 help="customize vertical padding distance of each layout.[default: 0]" 

132 ) 

133 group.add_argument('--acr-discrete-layout', 

134 nargs='+', 

135 required=False, 

136 help="<prop1> <prop2> names of properties which need to be plot as acr-discrete-layout") 

137 group.add_argument('--acr-continuous-layout', 

138 nargs='+', 

139 required=False, 

140 help="<prop1> <prop2> names of properties which need to be plot as acr-continuous-layout") 

141 group.add_argument('--ls-layout', 

142 nargs='+', 

143 required=False, 

144 help="<prop1> <prop2> names of properties which need to be plot as ls-layout") 

145 group.add_argument('--binary-layout', 

146 nargs='+', 

147 required=False, 

148 help="<prop1> <prop2> names of properties which need to be plot as binary-layout which highlights the postives") 

149 group.add_argument('--binary-aggregate-layout', 

150 nargs='+', 

151 required=False, 

152 help="<prop1> <prop2> names of properties which need to be plot as binary-aggregate-layout which highlights the postives") 

153 group.add_argument('--binary-unicolor-layout', 

154 nargs='+', 

155 required=False, 

156 help="<prop1> <prop2> names of properties which need to be plot as binary-layout which highlights the postives") 

157 group.add_argument('--binary-unicolor-aggregate-layout', 

158 nargs='+', 

159 required=False, 

160 help="<prop1> <prop2> names of properties which need to be plot as binary-aggregate-layout which highlights the postives") 

161 # group.add_argument('--revbinary-layout', 

162 # nargs='+', 

163 # required=False, 

164 # help="<prop1> <prop2> names of properties which need to be plot as revbinary-layout which highlights the negatives") 

165 # group.add_argument('--revbinary-unicolor-layout', 

166 # nargs='+', 

167 # required=False, 

168 # help="<prop1> <prop2> names of properties which need to be plot as revbinary-layout which highlights the negatives") 

169 group.add_argument('--colorbranch-layout', 

170 nargs='+', 

171 required=False, 

172 help="<prop1> <prop2> names of properties where branches will be colored based on different values.") 

173 group.add_argument('--label-layout', 

174 nargs='+', 

175 required=False, 

176 help="<prop1> <prop2> names of properties where values will be displayed on the aligned panel.") 

177 group.add_argument('--rectangle-layout', 

178 nargs='+', 

179 required=False, 

180 help="<prop1> <prop2> names of properties where values will be label as rectangular color block on the aligned panel.") 

181 group.add_argument('--bubble-layout', 

182 nargs='+', 

183 required=False, 

184 help="(experimental) <prop1> <prop2> names of properties which need to be plot as bubble-layout") 

185 group.add_argument('--background-layout', 

186 nargs='+', 

187 required=False, 

188 help="<prop1> <prop2> names of properties where values will be label as rectangular color block on the aligned panel.") 

189 group.add_argument('--piechart-layout', 

190 nargs='+', 

191 required=False, 

192 help="<prop1> <prop2> names of properties whose internal nodes need to be plot as piechart-layout.") 

193 group.add_argument('--heatmap-layout', 

194 nargs='+', 

195 required=False, 

196 help="<prop1> <prop2> names of numerical properties which need to be read as heatmap-layout") 

197 group.add_argument('--heatmap-mean-layout', 

198 nargs='+', 

199 required=False, 

200 help="<prop1> <prop2> names of numerical properties which need to be read as heatmap-layout") 

201 group.add_argument('--heatmap-zscore-layout', 

202 nargs='+', 

203 required=False, 

204 help="<prop1> <prop2> names of numerical properties which need to be read as heatmap-layout") 

205 group.add_argument('--barplot-layout', 

206 nargs='+', 

207 required=False, 

208 help="<prop1> <prop2> names of numerical properties which need to be read as barplot_layouts") 

209 # group.add_argument('--branchscore-layout', 

210 # nargs='+', 

211 # required=False, 

212 # help="<prop1> <prop2> names of numerical properties which need to be read as branchscore_layouts")  

213 group.add_argument('--taxonclade-layout', 

214 default=False, 

215 action='store_true', 

216 help="Activate taxonclade_layout which clades will be colored based on taxonomy of each node.") 

217 group.add_argument('--taxonrectangle-layout', 

218 default=False, 

219 action='store_true', 

220 help="Activate taxonrectangle-layout which taxonomy of each node will be display as rectangular blocks in aligned panel.") 

221 group.add_argument('--taxoncollapse-layout', 

222 default=False, 

223 action='store_true', 

224 help="Activate taxoncollapse-layout which taxonomy of each node will be display as rectangular blocks in aligned panel.") 

225 group.add_argument('--emapper-layout', 

226 default=False, 

227 action='store_true', 

228 help="Activate emapper_layout which display all the annotation from EggNOG-mapper.") #domain_layout 

229 group.add_argument('--domain-layout', 

230 default=False, 

231 action='store_true', 

232 help="Activate domain_layout which display protein domain annotation in sequence.") #domain_layout 

233 group.add_argument('--alignment-layout', 

234 default=False, 

235 action='store_true', 

236 help="Display Multiple Sequence Alignment layout in aligned panel.") 

237 group.add_argument('--profiling-layout', 

238 nargs='+', 

239 required=False, 

240 help="<prop1> <prop2> names of properties which need to be convert to presence-absence profiling matrix of each value") 

241 # group.add_argument('--multi-profiling-layout', 

242 # nargs='+', 

243 # required=False, 

244 # help="<prop1> <prop2> names of properties containing values as list which need to be convert to presence-absence profiling matrix") 

245 group.add_argument('--categorical-matrix-layout', 

246 nargs='+', 

247 required=False, 

248 help="<prop1> <prop2> names which need to be plot as categorical_matrix_layout for categorical values") 

249 group.add_argument('--numerical-matrix-layout', 

250 nargs='+', 

251 required=False, 

252 help="numerical matrix that take into account ALL values into gradient from white to red. <prop1> <prop2> names which need to be plot as numerical_matrix_layout for numerical values ") 

253 group.add_argument('--binary-matrix-layout', 

254 nargs='+', 

255 required=False, 

256 help="names of properties which need to be plot as binary-matrix which highlights the postives") 

257 

258 group = plot_args_p.add_argument_group(title='Visualizing output arguments', 

259 description="Visualizing output parameters") 

260 # group.add_argument('--interactive', 

261 # default=False, 

262 # action='store_true', 

263 # help="run interactive session") 

264 

265 group.add_argument('--hide-leaf-name', action='store_false', 

266 help='Hide the leaf names in the tree view.') 

267 group.add_argument('--hide-branch-support', action='store_false', 

268 help='Hide the branch support values in the tree view.') 

269 group.add_argument('--hide-branch-distance', action='store_false', 

270 help='Hide the branch distances in the tree view.') 

271 

272 group.add_argument('--verbose', 

273 action="store_false", 

274 required=False, 

275 help="show detail on prompt when visualizing taget tree.") 

276 group.add_argument('--port', 

277 type=str, 

278 default=5000, 

279 help="run interactive session on custom port.[default: 5000]") 

280 group.add_argument('--render', 

281 #type=str, 

282 action="store_true", 

283 required=False, 

284 help="output as svg") 

285 group.add_argument('--out-colordict', 

286 action="store_true", 

287 required=False, 

288 help="print color dictionary of each property") 

289 

290 

291### visualize tree 

292def run(args): 

293 global prop2type, properties, tree 

294 node_props=[] 

295 properties = {} 

296 rank2values = {} 

297 total_color_dict = [] 

298 layouts = [] 

299 level = 1 # level 1 is the leaf name 

300 

301 # checking file and output exists 

302 if not os.path.exists(args.tree): 

303 raise FileNotFoundError(f"Input tree {args.tree} does not exist.") 

304 

305 # parsing tree 

306 try: 

307 tree, eteformat_flag = utils.validate_tree(args.tree, args.input_type, args.internal) 

308 except utils.TreeFormatError as e: 

309 print(e) 

310 sys.exit(1) 

311 

312 # resolve polytomy 

313 if args.resolve_polytomy: 

314 tree.resolve_polytomy() 

315 

316 #rest_prop = [] 

317 if args.prop2type: 

318 prop2type = {} 

319 with open(args.prop2type, 'r') as f: 

320 for line in f: 

321 line = line.rstrip() 

322 prop, value = line.split('\t') 

323 prop2type[prop] = eval(value) 

324 

325 popup_prop_keys = list(prop2type.keys()) 

326 

327 else: 

328 prop2type = {# start with leaf name 

329 'name':str, 

330 'dist':float, 

331 'support':float, 

332 'rank': str, 

333 'sci_name': str, 

334 'taxid': str, 

335 'lineage':str, 

336 'named_lineage': str, 

337 'evoltype': str, 

338 'dup_sp': str, 

339 'dup_percent': float, 

340 'lca':str 

341 } 

342 popup_prop_keys = list(prop2type.keys()) 

343 

344 if eteformat_flag: 

345 for path, node in tree.iter_prepostorder(): 

346 prop2type.update(get_prop2type(node)) 

347 

348 

349 # elif args.input_type == 'newick': 

350 # popup_prop_keys = list(prop2type.keys())  

351 

352 # collapse tree by condition  

353 if args.collapsed_by: # need to be wrap with quotes 

354 condition_strings = args.collapsed_by 

355 for condition in condition_strings: 

356 c_layout = TreeLayout(name='Collapsed_by_'+condition, \ 

357 ns=conditional_layouts.collapsed_by_layout(condition, prop2type = prop2type, level=level)) 

358 layouts.append(c_layout) 

359 

360 # label node by condition 

361 if args.highlighted_by: # need to be wrap with quotes 

362 condition_strings = args.highlighted_by 

363 for condition in condition_strings: 

364 s_layout = TreeLayout(name='Highlighted_by_'+condition, \ 

365 ns=conditional_layouts.highlight_layout(condition, prop2type = prop2type, level=level)) 

366 layouts.append(s_layout) 

367 

368 #### Layouts settings #### 

369 # numerical representative mearsure  

370 internal_num_rep = args.internal_plot_measure 

371 

372 # color configuration 

373 color_config = {} 

374 if args.color_config: 

375 color_config = read_config_to_dict(args.color_config, delimiter=args.config_sep) 

376 

377 # Get the input arguments in order 

378 input_order = [] 

379 for arg in sys.argv[1:]: 

380 if arg.startswith('-') and arg.endswith('layout'): 

381 input_order.append(arg[2:]) 

382 else: 

383 continue 

384 

385 visualized_props = [] 

386 for layout in input_order: 

387 if layout == 'acr-discrete-layout': 

388 acr_discrete_layouts, level, color_dict = get_acr_discrete_layouts(tree, args.acr_discrete_layout, level, prop2type=prop2type, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) 

389 layouts.extend(acr_discrete_layouts) 

390 total_color_dict.append(color_dict) 

391 visualized_props.extend(args.acr_discrete_layout) 

392 

393 #delta statistic  

394 for suffix in ['delta', 'pval']: 

395 visualized_props.extend([utils.add_suffix(prop, suffix) for prop in args.acr_discrete_layout]) 

396 

397 if layout == 'acr-continuous-layout': 

398 acr_continuous_layouts = get_acr_continuous_layouts(tree, args.acr_continuous_layout, level, prop2type=prop2type, padding_x=args.padding_x, padding_y=args.padding_y) 

399 layouts.extend(acr_continuous_layouts) 

400 visualized_props.extend(args.acr_continuous_layout) 

401 

402 if layout == 'ls-layout': 

403 ls_layouts, ls_props = get_ls_layouts(tree, args.ls_layout, level, prop2type=prop2type, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) 

404 layouts.extend(ls_layouts) 

405 visualized_props.extend(args.ls_layout) 

406 visualized_props.extend(ls_props) 

407 

408 # if layout == 'heatmap-layout': 

409 # numerical_props = args.heatmap_layout 

410 # norm_method = 'min-max' 

411 # heatmap_layouts, level = get_heatmap_matrix_layouts(layout, numerical_props, norm_method, internal_num_rep, color_config, args, level) 

412 # layouts.extend(heatmap_layouts) 

413 

414 # if layout == 'heatmap-mean-layout': 

415 # numerical_props = args.heatmap_mean_layout 

416 # norm_method = 'mean' 

417 # heatmap_layouts, level = get_heatmap_matrix_layouts(layout, numerical_props, norm_method, internal_num_rep, color_config, args, level) 

418 # layouts.extend(heatmap_layouts) 

419 

420 # if layout == 'heatmap-zscore-layout': 

421 # numerical_props = args.heatmap_zscore_layout 

422 # norm_method = 'zscore' 

423 # heatmap_layouts, level = get_heatmap_matrix_layouts(layout, numerical_props, norm_method, internal_num_rep, color_config, args, level) 

424 # layouts.extend(heatmap_layouts) 

425 

426 if layout == 'heatmap-layout': 

427 heatmap_layouts, level = get_heatmap_layouts(tree, args.heatmap_layout, level, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep=internal_num_rep, color_config=color_config, norm_method='min-max') 

428 layouts.extend(heatmap_layouts) 

429 visualized_props.extend(args.heatmap_layout) 

430 

431 if layout == 'heatmap-mean-layout': 

432 heatmap_mean_layouts, level = get_heatmap_layouts(tree, args.heatmap_mean_layout, level, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep=internal_num_rep, color_config=color_config, norm_method='mean') 

433 layouts.extend(heatmap_mean_layouts) 

434 visualized_props.extend(args.heatmap_mean_layout) 

435 

436 if layout == 'heatmap-zscore-layout': 

437 heatmap_zscore_layouts, level = get_heatmap_layouts(tree, args.heatmap_zscore_layout, level, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep=internal_num_rep, color_config=color_config, norm_method='zscore') 

438 layouts.extend(heatmap_zscore_layouts) 

439 visualized_props.extend(args.heatmap_zscore_layout) 

440 

441 if layout == 'label-layout': 

442 label_layouts, level, color_dict = get_label_layouts(tree, args.label_layout, level, prop2type=prop2type, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) 

443 layouts.extend(label_layouts) 

444 total_color_dict.append(color_dict) 

445 visualized_props.extend(args.label_layout) 

446 

447 if layout == 'colorbranch-layout': 

448 categorical_props = [prop for prop in args.colorbranch_layout if prop2type[prop] in [str, list, bool]] 

449 if categorical_props: 

450 colorbranch_layouts, level, color_dict = get_colorbranch_layouts(tree, categorical_props, level, prop2type=prop2type, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) 

451 layouts.extend(colorbranch_layouts) 

452 total_color_dict.append(color_dict) 

453 visualized_props.extend(categorical_props) 

454 #visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.piechart_layout]) 

455 

456 numerical_props = [prop for prop in args.colorbranch_layout if prop2type[prop] in [float, int]] 

457 if numerical_props: 

458 branchscore_layouts = get_branchscore_layouts(tree, numerical_props, 

459 prop2type, padding_x=args.padding_x, padding_y=args.padding_y, 

460 internal_rep=internal_num_rep, color_config=color_config) 

461 layouts.extend(branchscore_layouts) 

462 visualized_props.extend(numerical_props) 

463 

464 if layout == 'bubble-layout': 

465 bubble_layouts, level = get_bubble_layouts(tree, args.bubble_layout, level=level, prop2type=prop2type, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep=internal_num_rep, color_config=color_config) 

466 layouts.extend(bubble_layouts) 

467 visualized_props.extend(args.bubble_layout) 

468 

469 if layout == "piechart-layout": 

470 piechart_layouts = get_piechart_layouts(tree, args.piechart_layout, prop2type=prop2type, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) 

471 layouts.extend(piechart_layouts) 

472 visualized_props.extend(args.piechart_layout) 

473 visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.piechart_layout]) 

474 

475 if layout == 'rectangle-layout': 

476 rectangle_layouts, level, color_dict = get_rectangle_layouts(tree, args.rectangle_layout, level, prop2type=prop2type, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) 

477 layouts.extend(rectangle_layouts) 

478 total_color_dict.append(color_dict) 

479 visualized_props.extend(args.rectangle_layout) 

480 visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.rectangle_layout]) 

481 

482 if layout == 'background-layout': 

483 background_layouts, level, color_dict = get_background_layouts(tree, args.background_layout, level, prop2type=prop2type, column_width=args.column_width, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config) 

484 layouts.extend(background_layouts) 

485 total_color_dict.append(color_dict) 

486 visualized_props.extend(args.background_layout) 

487 visualized_props.extend([utils.add_suffix(prop, 'counter') for prop in args.background_layout]) 

488 

489 if layout == 'binary-layout': 

490 binary_layouts, level, color_dict = get_binary_layouts(tree, args.binary_layout, level, prop2type=prop2type, column_width=args.column_width, reverse=False, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config, same_color=False, aggregate=False) 

491 layouts.extend(binary_layouts) 

492 total_color_dict.append(color_dict) 

493 visualized_props.extend(args.binary_layout) 

494 

495 if layout == 'binary-aggregate-layout': 

496 binary_aggregate_layouts, level, color_dict = get_binary_layouts(tree, args.binary_aggregate_layout, level, prop2type=prop2type, column_width=args.column_width, reverse=False, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config, same_color=False, aggregate=True) 

497 layouts.extend(binary_aggregate_layouts) 

498 total_color_dict.append(color_dict) 

499 visualized_props.extend(args.binary_aggregate_layout) 

500 

501 if layout == 'binary-unicolor-layout': 

502 binary2_layouts, level, color_dict = get_binary_layouts(tree, args.binary_unicolor_layout, level, prop2type=prop2type, column_width=args.column_width, reverse=False, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config, same_color=True, aggregate=False) 

503 layouts.extend(binary2_layouts) 

504 total_color_dict.append(color_dict) 

505 visualized_props.extend(args.binary_unicolor_layout) 

506 

507 if layout == 'binary-unicolor-aggregate-layout': 

508 binary2_aggregate_layouts, level, color_dict = get_binary_layouts(tree, args.binary_unicolor_aggregate_layout, level, prop2type=prop2type, column_width=args.column_width, reverse=False, padding_x=args.padding_x, padding_y=args.padding_y, color_config=color_config, same_color=True, aggregate=True) 

509 layouts.extend(binary2_aggregate_layouts) 

510 total_color_dict.append(color_dict) 

511 visualized_props.extend(args.binary_unicolor_aggregate_layout) 

512 

513 # if layout == 'revbinary-layout': 

514 # revbinary_layouts, level, color_dict = get_binary_layouts(tree, args.revbinary_layout, level,  

515 # prop2type=prop2type, column_width=args.column_width, reverse=True,  

516 # padding_x=args.padding_x, padding_y=args.padding_y) 

517 # layouts.extend(revbinary_layouts) 

518 # total_color_dict.append(color_dict) 

519 # visualized_props.extend(args.revbinary_layout) 

520 

521 # if layout == 'revbinary-unicolor-layout': 

522 # revbinary2_layouts, level, color_dict = get_binary_layouts(tree, args.revbinary_unicolor_layout, level,  

523 # prop2type=prop2type, column_width=args.column_width, reverse=True,  

524 # padding_x=args.padding_x, padding_y=args.padding_y) 

525 # layouts.extend(revbinary2_layouts) 

526 # total_color_dict.append(color_dict) 

527 # visualized_props.extend(args.revbinary_unicolor_layout) 

528 

529 if layout == 'barplot-layout': 

530 barplot_layouts, level, color_dict = get_barplot_layouts(tree, args.barplot_layout, level, 

531 prop2type, column_width=args.barplot_width, padding_x=args.padding_x, padding_y=args.padding_y, 

532 internal_rep=internal_num_rep, anchor_column=args.barplot_scale, color_config=color_config) 

533 layouts.extend(barplot_layouts) 

534 total_color_dict.append(color_dict) 

535 visualized_props.extend(args.barplot_layout) 

536 

537 if layout == "branchscore-layout": 

538 branchscore_layouts = get_branchscore_layouts(tree, args.branchscore_layout, prop2type, padding_x=args.padding_x, padding_y=args.padding_y, internal_rep='avg') 

539 layouts.extend(branchscore_layouts) 

540 visualized_props.extend(args.branchscore_layout) 

541 

542 if layout == 'alignment-layout': 

543 #fasta_file = args.alignment_layout 

544 lengh = len(max(utils.tree_prop_array(tree, 'alignment'),key=len)) 

545 aln_layout = seq_layouts.LayoutAlignment(name='Alignment_layout', 

546 alignment_prop='alignment', column=level, scale_range=lengh, 

547 summarize_inner_nodes=True) 

548 layouts.append(aln_layout) 

549 

550 if layout == 'domain-layout': 

551 domain_layout = seq_layouts.LayoutDomain(name="Domain_layout", prop='dom_arq') 

552 layouts.append(domain_layout) 

553 

554 # presence-absence profiling based on categorical data 

555 if layout == 'profiling-layout': 

556 profiling_props = args.profiling_layout 

557 for profiling_prop in profiling_props: 

558 

559 matrix, value2color, all_profiling_values = multiple2matrix(tree, profiling_prop) 

560 matrix_layout = profile_layouts.LayoutPropsMatrixBinary(name=f"Profiling_{all_profiling_values}", 

561 matrix=matrix, matrix_props=all_profiling_values, value_range=[0,1], 

562 value_color=value2color, column=level, poswidth=args.column_width) 

563 

564 level += 1 

565 layouts.append(matrix_layout) 

566 

567 # presence-absence profiling based on list data 

568 # if layout == 'multi-profiling-layout': 

569 # profiling_props = args.multi_profiling_layout 

570 # for profiling_prop in profiling_props: 

571 # # matrix, all_values = multiple2profile(tree, profiling_prop) # create mimic msa 

572 # # profile_layout = profile_layouts.LayoutProfile(name=f'Profiling_{profiling_prop}',  

573 # # mode='profiles', alignment=matrix, seq_format='profiles', profiles=all_values,  

574 # # column=level, summarize_inner_nodes=False,  

575 # # poswidth=args.column_width) 

576 

577 # # matrix, value2color, all_values = multiple2matrix(tree, profiling_prop) 

578 # # matrix_layout = profile_layouts.LayoutPropsMatrixOld(name=f"Profiling_{profiling_prop}", 

579 # # matrix=matrix, matrix_type='categorical', matrix_props=all_values, 

580 # # value_color=value2color, column=level, poswidth=args.column_width) 

581 

582 # matrix, value2color, all_profiling_values = multiple2matrix(tree, profiling_prop) 

583 # matrix_layout = profile_layouts.LayoutPropsMatrixBinary(name=f"Profiling_{all_profiling_values}", 

584 # matrix=matrix, matrix_props=all_profiling_values, value_range=[0,1], 

585 # value_color=value2color, column=level, poswidth=args.column_width) 

586 

587 # level += 1 

588 # layouts.append(matrix_layout) 

589 

590 # categorical matrix 

591 if layout == 'categorical-matrix-layout': 

592 categorical_props = args.categorical_matrix_layout 

593 # matrix, value2color = str2matrix(tree, categorical_props) 

594 # all_values = list(value2color.keys()) 

595 # matrix_layout = profile_layouts.LayoutPropsMatrix(name=f'Categorical_matrix_{categorical_props}',  

596 # matrix_type='categorical', alignment=matrix, matrix_props=categorical_props,  

597 # profiles=all_values, column=level, summarize_inner_nodes=False, value_color=value2color, 

598 # poswidth=args.column_width) 

599 

600 # drawing as array in matrix 

601 matrix, value2color = categorical2matrix(tree, categorical_props, color_config=color_config) 

602 matrix_layout = profile_layouts.LayoutPropsMatrixOld(name=f"Categorical_matrix_{categorical_props}", 

603 matrix=matrix, matrix_type='categorical', matrix_props=categorical_props, 

604 value_color=value2color, column=level, poswidth=args.column_width) 

605 

606 level += 1 

607 layouts.append(matrix_layout) 

608 

609 # numerical matrix 

610 if layout == 'numerical-matrix-layout': 

611 numerical_props = args.numerical_matrix_layout 

612 # matrix, value2color = float2matrix(tree, numerical_props, count_negative=True) 

613 # all_values = list(value2color.keys()) 

614 # min_val, max_val = min(all_values), max(all_values) 

615 # matrix_layout = profile_layouts.LayoutPropsMatrix(name=f'Numerical_matrix_{numerical_props}',  

616 # matrix_type='numerical', alignment=matrix, matrix_props=numerical_props,  

617 # profiles=all_values, column=level, summarize_inner_nodes=False,  

618 # value_range = [min_val, max_val], value_color=value2color, 

619 # poswidth=args.column_width) 

620 

621 # if is list, it should provide more than one matrix 

622 matrix, minval, maxval, value2color, results_list, list_props, single_props = numerical2matrix(tree, 

623 numerical_props, count_negative=True, internal_num_rep=internal_num_rep, 

624 color_config=color_config, norm_method='min-max') 

625 

626 if list_props: 

627 index_map = {value: idx for idx, value in enumerate(numerical_props)} 

628 sorted_list_props = sorted(list_props, key=lambda x: index_map[x]) 

629 for list_prop in sorted_list_props: 

630 matrix, minval, maxval, value2color = results_list[list_prop] 

631 matrix_layout = profile_layouts.LayoutPropsMatrixOld(name=f"Numerical_matrix_{list_prop}", 

632 matrix=matrix, matrix_type='numerical', matrix_props=[list_prop], is_list=True, 

633 value_color=value2color, value_range=[minval, maxval], column=level, 

634 poswidth=args.column_width) 

635 

636 level += 1 

637 layouts.append(matrix_layout) 

638 

639 if single_props: 

640 index_map = {value: idx for idx, value in enumerate(numerical_props)} 

641 sorted_single_props = sorted(single_props, key=lambda x: index_map[x]) 

642 matrix_layout = profile_layouts.LayoutPropsMatrixOld(name=f"Numerical_matrix_{sorted_single_props}", 

643 matrix=matrix, matrix_type='numerical', matrix_props=sorted_single_props, is_list=False, 

644 value_color=value2color, value_range=[minval, maxval], column=level, 

645 poswidth=args.column_width) 

646 

647 level += 1 

648 layouts.append(matrix_layout) 

649 

650 

651 if layout == 'binary-matrix-layout': 

652 binary_props = args.binary_matrix_layout 

653 matrix, value2color, is_list = binary2matrix(tree, binary_props, color_config=color_config) 

654 all_values = list(value2color.keys()) 

655 

656 matrix_layout = profile_layouts.LayoutPropsMatrixBinary(name=f"Binary_matrix_{binary_props}", 

657 matrix=matrix, matrix_props=binary_props, value_range=[0,1], 

658 value_color=value2color, column=level, poswidth=args.column_width) 

659 

660 level += 1 

661 layouts.append(matrix_layout) 

662 

663 if layout == "taxoncollapse-layout": 

664 taxon_color_dict = {} 

665 taxa_layouts = [] 

666 

667 # generate a rank2values dict for pre taxonomic annotated tree 

668 if not rank2values: 

669 rank2values = defaultdict(list) 

670 for n in tree.traverse(): 

671 if n.props.get('lca'): 

672 lca_dict = utils.string_to_dict(n.props.get('lca')) 

673 for rank, sci_name in lca_dict.items(): 

674 rank2values[rank].append(sci_name) 

675 

676 current_rank = n.props.get('rank') 

677 if current_rank and current_rank != 'Unknown': 

678 rank2values[current_rank].append(n.props.get('sci_name','')) 

679 else: 

680 pass 

681 

682 # assign color for each value of each rank 

683 for rank, value in sorted(rank2values.items()): 

684 value = list(set(value)) 

685 color_dict = utils.assign_color_to_values(value, paired_color) 

686 taxa_layout = taxon_layouts.TaxaCollapse(name = "TaxaCollapse_"+rank, rank=rank, rect_width=args.column_width, color_dict=color_dict, column=level) 

687 taxa_layouts.append(taxa_layout) 

688 

689 layouts = layouts + taxa_layouts 

690 level += 1 

691 

692 # emapper layout  

693 if args.emapper_layout: 

694 text_props = [ 

695 'seed_ortholog', 

696 'max_annot_lvl', 

697 'COG_category', 

698 'Description', 

699 'Preferred_name', 

700 ] 

701 #label_layouts, level, _ = get_layouts(tree, text_props, 'rectangular', level, 'counter', prop2type=prop2type) 

702 label_layouts, level, _ = get_rectangle_layouts(tree, text_props, level, prop2type=prop2type, column_width=args.column_width) 

703 layouts.extend(label_layouts) 

704 

705 num_props = [ 

706 #'evalue', 

707 'score' 

708 ] 

709 #barplot_layouts, level, _ = get_layouts(tree, num_props, 'barplot', level, internal_num_rep, prop2type=prop2type) 

710 barplot_layouts, level, _ = get_barplot_layouts(tree, num_props, level, prop2type, column_width=args.barplot_width, internal_rep=internal_num_rep) 

711 layouts.extend(barplot_layouts) 

712 

713 multiple_text_props = [ 

714 'eggNOG_OGs', #28PAR@1|root,2QVY3@2759|Eukaryota 

715 'GOs', #GO:0000002,GO:0000003 

716 'KEGG_ko', #ko:K04451,ko:K10148 

717 'KEGG_Pathway', #ko01522,ko01524 

718 'KEGG_Module', #M00118  

719 'KEGG_Reaction', #R00497 

720 'KEGG_rclass', #RC00141 

721 'EC', #1.18.6.1,1.3.7.14,1.3.7.15 

722 'BRITE', #ko00000,ko00001,ko03000 

723 'KEGG_TC', #3.A.1.133.1  

724 

725 # Domains 

726 'CAZy', 

727 'BiGG_Reaction', 

728 'PFAMs' 

729 ] 

730 

731 for multiple_text_prop in multiple_text_props: 

732 matrix, value2color, all_profiling_values = multiple2matrix(tree, multiple_text_prop) 

733 multiple_text_prop_layout = profile_layouts.LayoutPropsMatrixBinary(name=f"Profiling_{multiple_text_prop}", 

734 matrix=matrix, matrix_props=all_profiling_values, value_range=[0,1], 

735 active=False, 

736 value_color=value2color, column=level, poswidth=args.column_width) 

737 

738 # matrix, all_values = multiple2profile(tree, multiple_text_prop) 

739 # multiple_text_prop_layout = profile_layouts.LayoutProfile( 

740 # name="Profiling_"+multiple_text_prop,  

741 # mode='profiles',  

742 # alignment=matrix,  

743 # profiles=all_values,  

744 # active=False, 

745 # column=level) 

746 

747 level += 1 

748 layouts.append(multiple_text_prop_layout) 

749 

750 # Taxa layouts 

751 if args.taxonclade_layout or args.taxonrectangle_layout: 

752 taxon_color_dict = {} 

753 taxa_layouts = [] 

754 

755 # generate a rank2values dict for pre taxonomic annotated tree 

756 if not rank2values: 

757 rank2values = defaultdict(list) 

758 for n in tree.traverse(): 

759 if n.props.get('rank') and n.props.get('rank') != 'Unknown': 

760 rank = n.props.get('rank') 

761 rank2values[rank].append(n.props.get('sci_name','')) 

762 else: 

763 pass 

764 

765 

766 # assign color for each value of each rank 

767 for rank, value in sorted(rank2values.items()): 

768 value = list(set(value)) 

769 color_dict = utils.assign_color_to_values(value, paired_color) 

770 if args.taxonclade_layout: 

771 taxa_layout = taxon_layouts.TaxaClade(name='TaxaClade_'+rank, level=level, rank = rank, color_dict=color_dict) 

772 taxa_layouts.append(taxa_layout) 

773 

774 if args.taxonrectangle_layout: 

775 taxa_layout = taxon_layouts.TaxaRectangular(name = "TaxaRect_"+rank, rank=rank, rect_width=args.column_width, color_dict=color_dict, column=level) 

776 taxa_layouts.append(taxa_layout) 

777 #level += 1 

778 

779 # if args.taxoncollapse_layout: 

780 # taxa_layout = taxon_layouts.TaxaCollapse(name = "TaxaCollapse_"+rank, rank=rank, rect_width=args.column_width, color_dict=color_dict, column=level) 

781 # taxa_layouts.append(taxa_layout) 

782 

783 taxon_color_dict[rank] = color_dict 

784 

785 #taxa_layouts.append(taxon_layouts.TaxaRectangular(name = "Last Common Ancester", color_dict=taxon_color_dict, column=level)) 

786 taxa_layouts.append(taxon_layouts.LayoutSciName(name = 'Taxa Scientific name', color_dict=taxon_color_dict)) 

787 taxa_layouts.append(taxon_layouts.LayoutEvolEvents(name='Taxa Evolutionary events', prop="evoltype", 

788 speciation_color="blue", 

789 duplication_color="red", node_size = 3, 

790 legend=True)) 

791 layouts = layouts + taxa_layouts 

792 level += 1 

793 total_color_dict.append(taxon_color_dict) 

794 

795 #### prune at the last step in case of losing leaves information 

796 # prune tree by rank 

797 if args.rank_limit: 

798 tree = utils.taxatree_prune(tree, rank_limit=args.rank_limit) 

799 

800 # prune tree by condition  

801 if args.pruned_by: # need to be wrap with quotes 

802 condition_strings = args.pruned_by 

803 tree = utils.conditional_prune(tree, condition_strings, prop2type) 

804 

805 #### Output ##### 

806 popup_prop_keys.extend(list(set(visualized_props))) 

807 popup_prop_keys = tuple(popup_prop_keys) 

808 

809 if args.out_colordict: 

810 wrtie_color(total_color_dict) 

811 if args.render: 

812 file_path = "tree-1.svg" 

813 get_image(tree, layouts, args.port, os.path.abspath(file_path)) 

814 else: 

815 tree.explore(keep_server=True, compress=False, quiet=args.verbose, 

816 layouts=layouts, port=args.port, include_props=sorted(popup_prop_keys), 

817 show_leaf_name=args.hide_leaf_name, show_branch_support=args.hide_branch_support, 

818 show_branch_length=args.hide_branch_distance) 

819 

820def wrtie_color(color_dict): 

821 with open('color_dict.txt','w') as f: 

822 for sub_dict in color_dict: 

823 for key,value in sub_dict.items(): 

824 if type(value) != dict: 

825 f.write('PROPERTY'+'\t'+ key+'\n') 

826 f.write('COLOR'+'\t'+ value+'\n') 

827 f.write('\n') 

828 else: 

829 f.write('PROPERTY'+'\t'+ key+'\n') 

830 for sub_k,sub_v in value.items(): 

831 f.write('VAR'+'\t'+ sub_k+'\n') 

832 f.write('COLOR'+'\t'+ sub_v+'\n') 

833 f.write('\n') 

834 

835def read_config_to_dict(file_obj, delimiter): 

836 """ 

837 Reads a configuration file to a dictionary. 

838 

839 The configuration file should have the format: 

840 prop,value,color 

841 random_type,low,green 

842 ... 

843 :param filename: Path to the file. 

844 :param delimiter: Delimiter used in the configuration file. 

845 :return: A dictionary with (prop, value) tuple as keys and color as values. 

846 """ 

847 config_dict = {} 

848 # Reset file pointer to start, in case it's been accessed before 

849 file_obj.seek(0) 

850 lines = file_obj.readlines() 

851 filtered_lines = [line for line in lines if not line.strip().startswith('#') and line.strip()] 

852 

853 #reader = csv.DictReader(file_obj, delimiter=delimiter) 

854 reader = csv.reader(filtered_lines, delimiter=delimiter) 

855 headers = next(reader) # Get the headers 

856 for row in reader: 

857 # Map the headers to the row values 

858 row_dict = dict(zip(headers, row)) 

859 

860 prop = row_dict['PROP'] 

861 value = row_dict['VALUE'] 

862 color = row_dict['COLOR'] 

863 detail = row_dict.get('CONDITION') 

864 

865 # Initialize property if not present 

866 if prop not in config_dict: 

867 config_dict[prop] = {"value2color": {}, "detail2color": {}} 

868 

869 # Assign colors based on presence of detail or value 

870 if detail: 

871 config_dict[prop]["detail2color"][detail.lower()] = (color, value) 

872 if value: 

873 config_dict[prop]["value2color"][value] = color 

874 

875 return config_dict 

876 

877def get_acr_discrete_layouts(tree, props, level, prop2type, column_width=70, padding_x=1, padding_y=0, color_config=None): 

878 prop_color_dict = {} 

879 layouts = [] 

880 for prop in props: 

881 if prop2type and prop2type.get(prop) == list: 

882 leaf_values = list(map(list,set(map(tuple, utils.tree_prop_array(tree, prop))))) 

883 prop_values = [val for sublist in leaf_values for val in sublist] 

884 else: 

885 prop_values = sorted(list(set(utils.tree_prop_array(tree, prop)))) 

886 

887 color_dict = {} # key = value, value = color id 

888 if color_config and color_config.get(prop): 

889 if color_config.get(prop).get('value2color'): 

890 color_dict = color_config.get(prop).get('value2color') 

891 

892 # Check if all property values have an assigned color 

893 existing_values = set(color_dict.keys()) 

894 additional_values = set(prop_values) - existing_values 

895 if additional_values: 

896 # Fetch new colors for the additional values 

897 additional_colors = utils.assign_color_to_values(sorted(additional_values), paired_color) 

898 color_dict.update(additional_colors) 

899 else: 

900 # normal text prop 

901 color_dict = utils.assign_color_to_values(prop_values, paired_color) 

902 

903 layout = phylosignal_layouts.LayoutACRDiscrete(name='acr_'+prop, column=level, \ 

904 color_dict=color_dict, acr_prop=prop, width=column_width, \ 

905 padding_x=padding_x, padding_y=padding_y) 

906 layouts.append(layout) 

907 level += 1 

908 return layouts, level, prop_color_dict 

909 

910def get_acr_continuous_layouts(tree, props, level, prop2type, padding_x=1, padding_y=0): 

911 gradientscolor = utils.build_color_gradient(20, colormap_name='jet') 

912 layouts = [] 

913 for prop in props: 

914 all_values = np.array(sorted(list(set(utils.tree_prop_array(tree, prop, numeric=True))))).astype('float64') 

915 all_values = all_values[~np.isnan(all_values)] 

916 minval, maxval = all_values.min(), all_values.max() 

917 num = len(gradientscolor) 

918 index_values = np.linspace(minval, maxval, num) 

919 value2color = {} 

920 for search_value in all_values: 

921 index = np.abs(index_values - search_value).argmin()+1 

922 value2color[search_value] = gradientscolor[index] 

923 layout = phylosignal_layouts.LayoutACRContinuous(name='acr_'+prop, column=level, \ 

924 color_dict=value2color, score_prop=prop, value_range=[minval, maxval], \ 

925 color_range=[gradientscolor[20], gradientscolor[10], gradientscolor[1]]) 

926 layouts.append(layout) 

927 return layouts 

928 

929def get_ls_layouts(tree, props, level, prop2type, padding_x=1, padding_y=0, color_config=None): 

930 precision_suffix = "prec" 

931 sensitivity_suffix = "sens" 

932 f1_suffix = "f1" 

933 ls_clade_suffix = "ls_clade" 

934 ls_clade_props = [utils.add_suffix(prop, ls_clade_suffix) for prop in props] 

935 lsprop2color = utils.assign_color_to_values(ls_clade_props, paired_color) 

936 

937 layouts = [] 

938 ls_props = [] 

939 for prop in props: 

940 value2color = {} 

941 if color_config and color_config.get(prop) is not None: 

942 prop_config = color_config[prop] 

943 

944 color_dict = {} 

945 # First, try to use value2color mappings if they exist and are applicable 

946 if 'value2color' in prop_config and prop_config['value2color']: 

947 color_dict = prop_config['value2color'] 

948 sorted_color_dict = {float(key): value for key, value in color_dict.items()} 

949 gradientscolor = sorted_color_dict.values() 

950 elif 'detail2color' in prop_config and prop_config['detail2color']: 

951 min_color = prop_config['detail2color'].get('color_min', 'white') 

952 max_color = prop_config['detail2color'].get('color_max', 'red') 

953 mid_color = prop_config['detail2color'].get('color_mid', None) 

954 gradientscolor = utils.build_custom_gradient(20, min_color, max_color, mid_color) 

955 else: 

956 gradientscolor = utils.build_color_gradient(20, colormap_name='bwr') 

957 

958 for suffix in [precision_suffix, sensitivity_suffix, f1_suffix]: 

959 

960 ls_prop = utils.add_suffix(prop, suffix) 

961 minval, maxval = 0, 1 

962 

963 # get value 

964 internalnode_all_values = np.array(sorted(list(set(utils.tree_prop_array(tree, ls_prop, numeric=True))))).astype('float64') 

965 all_values = internalnode_all_values[~np.isnan(internalnode_all_values)] 

966 num = len(gradientscolor) 

967 index_values = np.linspace(minval, maxval, num) 

968 for search_value in all_values: 

969 if search_value not in value2color: 

970 index = np.abs(index_values - search_value).argmin()+1 

971 value2color[search_value] = gradientscolor[index] 

972 

973 # layout = staple_layouts.LayoutBranchScore(name='BranchScore_'+prop, \ 

974 # color_dict=gradientscolor, score_prop=prop, internal_rep=internal_rep, \ 

975 # value_range=[minval, maxval], \ 

976 # color_range=[gradientscolor[20], gradientscolor[10], gradientscolor[1]]) 

977 if suffix != "f1": 

978 layout = staple_layouts.LayoutBranchScore(name='ls_'+ls_prop, \ 

979 color_dict=value2color, score_prop=ls_prop, value_range=[minval, maxval], \ 

980 color_range=[gradientscolor[20], gradientscolor[10], gradientscolor[1]], 

981 show_score=True, active=False) 

982 else: 

983 layout = staple_layouts.LayoutBranchScore(name='ls_'+ls_prop, \ 

984 color_dict=value2color, score_prop=ls_prop, value_range=[minval, maxval], \ 

985 color_range=[gradientscolor[20], gradientscolor[10], gradientscolor[1]], 

986 show_score=True) 

987 

988 layouts.append(layout) 

989 ls_props.append(ls_prop) 

990 

991 ls_clade_prop = utils.add_suffix(prop, ls_clade_suffix) 

992 ls_clade_layout = phylosignal_layouts.LayoutLineageSpecific(name=f'Linear Specific Clade {prop}', \ 

993 ls_prop=ls_clade_prop, color=lsprop2color[ls_clade_prop]) 

994 

995 layouts.append(ls_clade_layout) 

996 ls_props.append(ls_clade_prop) 

997 

998 return layouts, ls_props 

999 

1000def get_piechart_layouts(tree, props, prop2type, padding_x=1, padding_y=0, radius=20, color_config=None): 

1001 layouts = [] 

1002 for prop in props: 

1003 color_dict = {} 

1004 if color_config and color_config.get(prop): 

1005 if color_config.get(prop).get('value2color'): 

1006 color_dict = color_config.get(prop).get('value2color') 

1007 else: 

1008 if prop2type and prop2type.get(prop) == list: 

1009 leaf_values = list(map(list, set(map(tuple, utils.tree_prop_array(tree, prop))))) 

1010 prop_values = [val for sublist in leaf_values for val in sublist] 

1011 else: 

1012 prop_values = sorted(list(set(utils.tree_prop_array(tree, prop)))) 

1013 

1014 color_dict = utils.assign_color_to_values(prop_values, paired_color) 

1015 layout = text_layouts.LayoutPiechart(name='Piechart_'+prop, color_dict=color_dict, text_prop=prop, radius=radius) 

1016 layouts.append(layout) 

1017 return layouts 

1018 

1019def get_label_layouts(tree, props, level, prop2type, column_width=70, padding_x=1, padding_y=0, color_config=None): 

1020 prop_color_dict = {} 

1021 layouts = [] 

1022 for prop in props: 

1023 color_dict = {} 

1024 if color_config and color_config.get(prop): 

1025 if color_config.get(prop).get('value2color'): 

1026 color_dict = color_config.get(prop).get('value2color') 

1027 else: 

1028 if prop2type and prop2type.get(prop) == list: 

1029 leaf_values = list(map(list,set(map(tuple,utils.tree_prop_array(tree, prop))))) 

1030 prop_values = [val for sublist in leaf_values for val in sublist] 

1031 else: 

1032 prop_values = sorted(list(set(utils.tree_prop_array(tree, prop)))) 

1033 

1034 color_dict = utils.assign_color_to_values(prop_values, paired_color) 

1035 

1036 layout = text_layouts.LayoutText(name='Label_'+prop, column=level, 

1037 color_dict=color_dict, text_prop=prop, width=column_width, padding_x=padding_x, padding_y=padding_y) 

1038 layouts.append(layout) 

1039 level += 1 

1040 return layouts, level, prop_color_dict 

1041 

1042def get_colorbranch_layouts(tree, props, level, prop2type, column_width=70, padding_x=1, padding_y=0, color_config=None): 

1043 prop_color_dict = {} 

1044 layouts = [] 

1045 for prop in props: 

1046 color_dict = {} # key = value, value = color id 

1047 if color_config and color_config.get(prop): 

1048 if color_config.get(prop).get('value2color'): 

1049 color_dict = color_config.get(prop).get('value2color') 

1050 

1051 # Check if all property values have an assigned color 

1052 # prop_values = sorted(list(set(utils.tree_prop_array(tree, prop)))) 

1053 # existing_values = set(color_dict.keys()) 

1054 # additional_values = set(prop_values) - existing_values 

1055 # if additional_values: 

1056 # # Fetch new colors for the additional values 

1057 # additional_colors = utils.assign_color_to_values(sorted(additional_values), paired_color) 

1058 # color_dict.update(additional_colors) 

1059 

1060 else: 

1061 if prop2type and prop2type.get(prop) == list: 

1062 leaf_values = list(map(list,set(map(tuple,utils.tree_prop_array(tree, prop))))) 

1063 prop_values = [val for sublist in leaf_values for val in sublist] 

1064 else: 

1065 prop_values = sorted(list(set(utils.tree_prop_array(tree, prop)))) 

1066 

1067 # normal text prop 

1068 color_dict = utils.assign_color_to_values(prop_values, paired_color) 

1069 

1070 layout = text_layouts.LayoutColorbranch(name='Colorbranch_'+prop, column=level, \ 

1071 color_dict=color_dict, text_prop=prop, width=column_width, \ 

1072 padding_x=padding_x, padding_y=padding_y) 

1073 layouts.append(layout) 

1074 level += 1 

1075 return layouts, level, prop_color_dict 

1076 

1077def get_rectangle_layouts(tree, props, level, prop2type, column_width=70, padding_x=1, padding_y=0, color_config=None): 

1078 prop_color_dict = {} 

1079 layouts = [] 

1080 for prop in props: 

1081 color_dict = {} # key = value, value = color id 

1082 if color_config and color_config.get(prop): 

1083 if color_config.get(prop).get('value2color'): 

1084 color_dict = color_config.get(prop).get('value2color') 

1085 else: 

1086 if prop2type and prop2type.get(prop) == list: 

1087 leaf_values = list(map(list,set(map(tuple,utils.tree_prop_array(tree, prop))))) 

1088 prop_values = [val for sublist in leaf_values for val in sublist] 

1089 else: 

1090 prop_values = sorted(list(set(utils.tree_prop_array(tree, prop)))) 

1091 

1092 # normal text prop 

1093 color_dict = utils.assign_color_to_values(prop_values, paired_color) 

1094 

1095 layout = text_layouts.LayoutRect(name='Rectangular_'+prop, column=level, 

1096 color_dict=color_dict, text_prop=prop, 

1097 width=column_width, padding_x=padding_x, padding_y=padding_y) 

1098 layouts.append(layout) 

1099 level += 1 

1100 return layouts, level, prop_color_dict 

1101 

1102def get_background_layouts(tree, props, level, prop2type, column_width, padding_x=1, padding_y=0, color_config=None): 

1103 prop_color_dict = {} 

1104 layouts = [] 

1105 for prop in props: 

1106 color_dict = {} # key = value, value = color id 

1107 if color_config and color_config.get(prop): 

1108 if color_config.get(prop).get('value2color'): 

1109 color_dict = color_config.get(prop).get('value2color') 

1110 else: 

1111 if prop2type and prop2type.get(prop) == list: 

1112 leaf_values = list(map(list,set(map(tuple,utils.tree_prop_array(tree, prop))))) 

1113 prop_values = [val for sublist in leaf_values for val in sublist] 

1114 else: 

1115 prop_values = sorted(list(set(utils.tree_prop_array(tree, prop)))) 

1116 

1117 # normal text prop 

1118 color_dict = utils.assign_color_to_values(prop_values, paired_color) 

1119 

1120 layout = text_layouts.LayoutBackground(name='Background_'+prop, 

1121 column=level, width=column_width, 

1122 color_dict=color_dict, text_prop=prop, 

1123 padding_x=padding_x, padding_y=padding_y) 

1124 

1125 layouts.append(layout) 

1126 level += 1 

1127 return layouts, level, prop_color_dict 

1128 

1129 

1130def get_binary_layouts(tree, props, level, prop2type, column_width=70, reverse=False, padding_x=1, padding_y=0, color_config=None, same_color=False, aggregate=False): 

1131 prop_color_dict = {} 

1132 layouts = [] 

1133 

1134 for prop in props: 

1135 #prop_values = sorted(list(set(utils.tree_prop_array(tree, prop, leaf_only=True)))) 

1136 prop_values = utils.tree_prop_array(tree, prop, leaf_only=True) 

1137 

1138 if not reverse: 

1139 max_count = utils.find_bool_representations(prop_values) 

1140 else: 

1141 max_count = utils.find_bool_representations(prop_values, rep=False) 

1142 

1143 # If you still need a sorted list of unique property values 

1144 prop_values = sorted(set(prop_values)) 

1145 

1146 if can_convert_to_bool(prop_values): 

1147 if color_config and color_config.get(prop): 

1148 if color_config.get(prop).get('value2color'): 

1149 color_dict = color_config.get(prop).get('value2color') 

1150 

1151 if can_convert_to_bool(color_dict.keys()): 

1152 color_dict = {eval(k): v for k, v in color_dict.items()} 

1153 color = color_dict.get(True, "#ff0000") #get true color 

1154 if color_dict.get(False): 

1155 print("Warning: False color is not supported in binary layout. Ignored. ") 

1156 else: 

1157 if same_color: 

1158 color = "#ff0000" 

1159 else: 

1160 if level >= len(paired_color): 

1161 color = utils.random_color(h=None) 

1162 else: 

1163 color = paired_color[level] 

1164 

1165 if not reverse: 

1166 layout = conditional_layouts.LayoutBinary('Binary_'+prop, level, bool_prop=prop, color=color, width=column_width, padding_x=padding_x, padding_y=padding_y, reverse=reverse, aggregate=aggregate, max_count=max_count) 

1167 else: 

1168 layout = conditional_layouts.LayoutBinary('ReverseBinary_'+prop, level, bool_prop=prop, width=column_width, padding_x=padding_x, padding_y=0, reverse=reverse, aggregate=aggregate, max_count=max_count) 

1169 

1170 internal_prop = utils.add_suffix(prop, 'counter') 

1171 

1172 layouts.append(layout) 

1173 level += 1 

1174 else: 

1175 raise ValueError(f"Property {prop} is not binary trait.") 

1176 return layouts, level, prop_color_dict 

1177 

1178def get_branchscore_layouts(tree, props, prop2type, padding_x=1, padding_y=0, internal_rep='avg', color_config=None): 

1179 """ 

1180 Output dictionary of each score prop and corresponding color. 

1181 """ 

1182 

1183 def parse_color_config(prop, color_config, minval, maxval): 

1184 max_color = 'red' 

1185 min_color = 'white' 

1186 mid_color = None 

1187 value2color = {} 

1188 

1189 prop_config = color_config.get(prop, {}) 

1190 color_dict = prop_config.get('value2color', {}) 

1191 

1192 if color_dict: 

1193 value2color = {float(key): value for key, value in color_dict.items()} 

1194 

1195 detail2color = prop_config.get('detail2color', {}) 

1196 

1197 temp_min_color, temp_min_val = detail2color.get('color_min', (None, None)) 

1198 temp_max_color, temp_max_val = detail2color.get('color_max', (None, None)) 

1199 temp_mid_color, temp_mid_val = detail2color.get('color_mid', (None, None)) 

1200 

1201 if temp_max_color: 

1202 max_color = temp_max_color 

1203 if temp_min_color: 

1204 min_color = temp_min_color 

1205 if temp_mid_color: 

1206 mid_color = temp_mid_color 

1207 

1208 if temp_min_val: 

1209 minval = float(temp_min_val) 

1210 if temp_max_val: 

1211 maxval = float(temp_max_val) 

1212 

1213 gradientscolor = utils.build_custom_gradient(20, min_color, max_color, mid_color) 

1214 

1215 return gradientscolor, value2color, minval, maxval 

1216 

1217 layouts = [] 

1218 

1219 for prop in props: 

1220 # Get leaf values of each prop 

1221 leaf_all_values = np.array(sorted(list(set(utils.tree_prop_array(tree, prop, numeric=True))))).astype('float64') 

1222 

1223 # Get internal values of each prop 

1224 internal_prop = utils.add_suffix(prop, internal_rep) 

1225 internalnode_all_values = np.array(sorted(list(set(utils.tree_prop_array(tree, internal_prop, numeric=True))))).astype('float64') 

1226 all_values = np.concatenate((leaf_all_values, internalnode_all_values)) 

1227 all_values = all_values[~np.isnan(all_values)] 

1228 value2color = {} 

1229 minval, maxval = all_values.min(), all_values.max() 

1230 

1231 if color_config and color_config.get(prop) is not None: 

1232 gradientscolor, value2color, minval, maxval = parse_color_config(prop, color_config, minval, maxval) 

1233 else: 

1234 gradientscolor = utils.build_color_gradient(20, colormap_name='jet') 

1235 

1236 # Preload corresponding gradient color of each value 

1237 num = len(gradientscolor) 

1238 index_values = np.linspace(minval, maxval, num) 

1239 

1240 for search_value in all_values: 

1241 if search_value not in value2color: 

1242 index = np.abs(index_values - search_value).argmin() + 1 

1243 value2color[search_value] = gradientscolor[index] 

1244 

1245 # Get corresponding gradient color on the fly of visualization 

1246 layout = staple_layouts.LayoutBranchScore( 

1247 name='BranchScore_' + prop, 

1248 color_dict=value2color, 

1249 score_prop=prop, 

1250 internal_rep=internal_rep, 

1251 value_range=[minval, maxval], 

1252 color_range=[gradientscolor[20], gradientscolor[10], gradientscolor[1]] 

1253 ) 

1254 layouts.append(layout) 

1255 

1256 return layouts 

1257 

1258def get_barplot_layouts(tree, props, level, prop2type, column_width=70, padding_x=1, padding_y=0, internal_rep='avg', anchor_column=None, color_config=None, paired_color=[]): 

1259 def get_barplot_color(level): 

1260 global paired_color 

1261 """Determines the color for the barplot based on the level and available paired colors.""" 

1262 if level > len(paired_color): 

1263 return utils.random_color(h=None) 

1264 else: 

1265 return paired_color[level] 

1266 

1267 def process_prop_values(tree, prop): 

1268 """Extracts and processes property values, excluding NaNs.""" 

1269 prop_values = np.array(list(set(utils.tree_prop_array(tree, prop)))).astype('float64') 

1270 return prop_values[~np.isnan(prop_values)] 

1271 

1272 def calculate_column_width(prop_values, anchormax=None): 

1273 """Calculates new column width based on property values and optional anchormax.""" 

1274 if anchormax is not None: 

1275 minval, maxval = prop_values.min(), prop_values.max() 

1276 return maxval / (anchormax / column_width) 

1277 return column_width 

1278 

1279 def configure_layout(prop, new_column_width, color_dict, color_prop, size_prop, barplot_color=None, size_range=[]): 

1280 """Configures and returns the layout for the current property.""" 

1281 layout_params = { 

1282 'name': f'Barplot_{prop}', 

1283 'prop': prop, 

1284 'width': new_column_width, 

1285 'color': None if color_dict else barplot_color, 

1286 'colors': color_dict, 

1287 'color_prop': color_prop, 

1288 'size_prop': size_prop, 

1289 'column': level, 

1290 'internal_rep': internal_rep, 

1291 'padding_x': padding_x * 10, 

1292 'size_range': size_range, 

1293 } 

1294 if color_dict is None: 

1295 del layout_params['colors'] 

1296 del layout_params['color_prop'] 

1297 else: 

1298 del layout_params['color'] 

1299 return staple_layouts.LayoutBarplot(**layout_params) 

1300 

1301 prop_color_dict = {} 

1302 layouts = [] 

1303 barplot_minval = 0 

1304 

1305 # Initialize anchor column values if provided 

1306 anchormax = None 

1307 if anchor_column: 

1308 anchor_column_values = process_prop_values(tree, anchor_column) 

1309 anchormax = anchor_column_values.max() 

1310 

1311 for prop in props: 

1312 prop_values = process_prop_values(tree, prop) 

1313 maxval = prop_values.max() 

1314 size_prop = prop if prop_values.any() else f"{prop}_{internal_rep}" 

1315 new_column_width = calculate_column_width(prop_values, anchormax) 

1316 barplot_color = get_barplot_color(level) 

1317 

1318 # Determine color configuration if available 

1319 if color_config and (color_config.get(prop) or color_config.get("name")): 

1320 color_dict = color_config.get(prop, color_config.get("name")).get('value2color') 

1321 color_prop = prop if color_config.get(prop) else "name" 

1322 if color_prop != "name": 

1323 # Convert all keys in color_dict to float 

1324 try: 

1325 color_dict = {float(key): value for key, value in color_dict.items()} 

1326 except ValueError: 

1327 print(f"Warning: Unable to convert all keys to float for property '{prop}'.") 

1328 # Optionally, you could handle this situation differently, e.g., skipping the conversion, 

1329 # using the original keys, or halting execution with an error message. 

1330 else: 

1331 # Apply default color logic 

1332 color_dict = None 

1333 color_prop = None 

1334 #barplot_color = get_barplot_color(level) 

1335 prop_color_dict[prop] = barplot_color 

1336 

1337 # Configure and add layout 

1338 if maxval and maxval > barplot_minval: 

1339 size_range = [barplot_minval, maxval] 

1340 else: 

1341 size_range = [] 

1342 layout = configure_layout(prop, new_column_width, color_dict, color_prop, size_prop, barplot_color, size_range) 

1343 layouts.append(layout) 

1344 level += 1 

1345 

1346 return layouts, level, prop_color_dict 

1347 

1348def get_bubble_layouts(tree, props, level, prop2type, padding_x=0, padding_y=0, internal_rep='avg', color_config=None, paired_color=[]): 

1349 def process_prop_values(tree, prop): 

1350 """Extracts and processes property values, excluding NaNs.""" 

1351 prop_values = np.array(list(set(utils.tree_prop_array(tree, prop)))).astype('float64') 

1352 return prop_values[~np.isnan(prop_values)] 

1353 

1354 prop_color_dict = {} 

1355 layouts = [] 

1356 max_radius = 15 

1357 

1358 for prop in props: 

1359 prop_values = process_prop_values(tree, prop) 

1360 

1361 #minval, maxval = all_prop_values.min(), all_prop_values.max() 

1362 abs_maxval = np.abs(prop_values).max() 

1363 size_prop = prop if prop_values.any() else f"{prop}_{internal_rep}" 

1364 

1365 # Configure and add layout 

1366 layout = staple_layouts.LayoutBubble(name=f'Bubble_{prop}', column=level, 

1367 prop=prop, max_radius=max_radius, abs_maxval=abs_maxval, 

1368 padding_x=padding_x, padding_y=padding_y) 

1369 layouts.append(layout) 

1370 level += 1 

1371 

1372 return layouts, level 

1373 

1374def get_heatmap_layouts(tree, props, level, column_width=70, padding_x=1, padding_y=0, internal_rep='avg', color_config=None, norm_method='min-max'): 

1375 def min_max_normalize(value, minval, maxval): 

1376 if maxval - minval == 0: 

1377 return 0 

1378 else: 

1379 return (value - minval) / (maxval - minval) 

1380 

1381 def mean_normalize(value, mean_val, minval, maxval): 

1382 if maxval - minval == 0: 

1383 return 0 

1384 else: 

1385 return (value - mean_val) / (maxval - minval) 

1386 

1387 def z_score_normalize(value, mean_val, std_val): 

1388 if std_val == 0: 

1389 return 0 

1390 else: 

1391 return (value - mean_val) / std_val 

1392 

1393 def parse_color_config(prop, color_config, minval, maxval): 

1394 max_color = 'red' 

1395 min_color = 'white' 

1396 mid_color = None 

1397 nan_color = '#EBEBEB' 

1398 value2color = {} 

1399 

1400 prop_config = color_config.get(prop, {}) 

1401 color_dict = prop_config.get('value2color', {}) 

1402 

1403 if color_dict: 

1404 value2color = {float(key): value for key, value in color_dict.items()} 

1405 

1406 detail2color = prop_config.get('detail2color', {}) 

1407 

1408 temp_min_color, temp_min_val = detail2color.get('color_min', (None, None)) 

1409 temp_max_color, temp_max_val = detail2color.get('color_max', (None, None)) 

1410 temp_mid_color, temp_mid_val = detail2color.get('color_mid', (None, None)) 

1411 temp_none_color, _ = detail2color.get('color_nan', (None, None)) 

1412 

1413 if temp_max_color: 

1414 max_color = temp_max_color 

1415 if temp_min_color: 

1416 min_color = temp_min_color 

1417 if temp_mid_color: 

1418 mid_color = temp_mid_color 

1419 if temp_none_color: 

1420 nan_color = temp_none_color 

1421 if temp_min_val: 

1422 minval = float(temp_min_val) 

1423 if temp_max_val: 

1424 maxval = float(temp_max_val) 

1425 

1426 gradientscolor = utils.build_custom_gradient(20, min_color, max_color, mid_color) 

1427 

1428 return gradientscolor, value2color, minval, maxval, nan_color 

1429 

1430 layouts = [] 

1431 all_values = [] 

1432 

1433 for prop in props: 

1434 

1435 value2color = {} 

1436 leaf_all_values = np.array(sorted(list(set(utils.tree_prop_array(tree, prop, numeric=True))))).astype('float64') 

1437 internal_prop = utils.add_suffix(prop, internal_rep) 

1438 internalnode_all_values = np.array(sorted(list(set(utils.tree_prop_array(tree, internal_prop, numeric=True))))).astype('float64') 

1439 prop_all_values = np.concatenate((leaf_all_values, internalnode_all_values)) 

1440 prop_all_values = prop_all_values[~np.isnan(prop_all_values)] 

1441 

1442 minval, maxval = np.min(prop_all_values), np.max(prop_all_values) 

1443 mean_val = np.mean(prop_all_values) 

1444 std_val = np.std(prop_all_values) 

1445 

1446 if color_config and color_config.get(prop) is not None: 

1447 gradientscolor, value2color, minval, maxval, nan_color = parse_color_config(prop, color_config, minval, maxval) 

1448 else: 

1449 gradientscolor = None 

1450 nan_color = '#EBEBEB' 

1451 

1452 if not gradientscolor: 

1453 if norm_method == 'min-max': 

1454 gradientscolor = utils.build_color_gradient(20, colormap_name="Reds") 

1455 else: # "mean" "zscore" 

1456 gradientscolor = utils.build_color_gradient(20, colormap_name="coolwarm") 

1457 

1458 num = len(gradientscolor) 

1459 for search_value in prop_all_values: 

1460 if search_value is None or math.isnan(search_value): 

1461 value2color[search_value] = nan_color 

1462 #value2color[search_value] = _get_color(search_value, gradientscolor, norm_method) 

1463 else: 

1464 search_value = float(search_value) 

1465 if search_value not in value2color: 

1466 if norm_method == "min-max": 

1467 normalized_value = min_max_normalize(search_value, minval, maxval) 

1468 index_values = np.linspace(0, 1, num) 

1469 elif norm_method == "mean": 

1470 normalized_value = mean_normalize(search_value, mean_val, minval, maxval) 

1471 index_values = np.linspace(-1, 1, num) 

1472 elif norm_method == "zscore": 

1473 normalized_value = z_score_normalize(search_value, mean_val, std_val) 

1474 index_values = np.linspace(-3, 3, num) 

1475 else: 

1476 raise ValueError("Unsupported normalization method.") 

1477 index = np.abs(index_values - normalized_value).argmin() + 1 

1478 value2color[search_value] = gradientscolor.get(index, "") 

1479 

1480 layout = staple_layouts.LayoutHeatmap(name=f'Heatmap_{prop}_{norm_method}', column=level, 

1481 width=column_width, padding_x=padding_x, padding_y=padding_y, \ 

1482 internal_rep=internal_rep, heatmap_prop=prop, maxval=maxval, minval=minval,\ 

1483 value_color=value2color, value_range=[minval, maxval], color_range=gradientscolor, 

1484 absence_color=nan_color) 

1485 

1486 layouts.append(layout) 

1487 level += 1 

1488 

1489 return layouts, level 

1490 

1491 

1492# def get_heatmap_layouts(tree, props, level, column_width=70, padding_x=1, padding_y=0, internal_rep='avg', color_config=None, norm_method='min-max'): 

1493# layouts = [] 

1494# all_prop_values = [list(set(utils.tree_prop_array(tree, prop))) for prop in props] 

1495# all_prop_values = np.array(utils.flatten(all_prop_values)).astype('float64') 

1496 

1497# for prop in props: 

1498# if color_config and color_config.get(prop) is not None: 

1499# prop_config = color_config[prop] 

1500 

1501# color_dict = {} 

1502 

1503# # First, try to use value2color mappings if they exist and are applicable 

1504# if 'value2color' in prop_config and prop_config['value2color']: 

1505# color_dict = prop_config['value2color'] 

1506# sorted_color_dict = {float(key): value for key, value in color_dict.items()} 

1507# gradientscolor = sorted_color_dict.values() 

1508# elif 'detail2color' in prop_config and prop_config['detail2color']: 

1509# min_color = prop_config['detail2color'].get('color_min', 'white') 

1510# max_color = prop_config['detail2color'].get('color_max', 'red') 

1511# mid_color = prop_config['detail2color'].get('color_mid', None) 

1512# gradientscolor = utils.build_custom_gradient(20, min_color, max_color, mid_color) 

1513# else: 

1514# if norm_method == 'min-max': 

1515# gradientscolor = utils.build_color_gradient(20, colormap_name="Reds") 

1516# else: # "mean" "zscore" 

1517# gradientscolor = utils.build_color_gradient(20, colormap_name="coolwarm") 

1518 

1519# minval, maxval = all_prop_values.min(), all_prop_values.max() 

1520# mean_val = all_prop_values.mean() 

1521# std_val = all_prop_values.std() 

1522 

1523# layout = staple_layouts.LayoutHeatmapOld(name='Heatmap_'+prop, column=level,  

1524# width=column_width, padding_x=padding_x, padding_y=padding_y, \ 

1525# internal_rep=internal_rep, prop=prop, maxval=maxval, minval=minval,\ 

1526# mean_val=mean_val, std_val=std_val, \ 

1527# color_dict=gradientscolor, norm_method=norm_method) 

1528# layouts.append(layout)  

1529# level += 1 

1530# return layouts, level 

1531 

1532def get_heatmap_matrix_layouts(layout_name, numerical_props, norm_method, internal_num_rep, color_config, args, level): 

1533 layouts = [] 

1534 matrix, minval, maxval, value2color, results_list, list_props = numerical2matrix(tree, 

1535 numerical_props, 

1536 count_negative=True, 

1537 internal_num_rep=internal_num_rep, 

1538 color_config=color_config, 

1539 norm_method=norm_method) 

1540 

1541 if not list_props: 

1542 matrix_layout = profile_layouts.LayoutPropsMatrixOld(name=f"Heatmap_{numerical_props}", 

1543 matrix=matrix, 

1544 matrix_type='numerical', 

1545 matrix_props=numerical_props, 

1546 is_list=False, 

1547 value_color=value2color, 

1548 value_range=[minval, maxval], 

1549 column=level, 

1550 poswidth=args.column_width) 

1551 

1552 level += 1 

1553 layouts.append(matrix_layout) 

1554 else: 

1555 list_props = list(list_props) 

1556 for list_prop in list_props: 

1557 matrix, minval, maxval, value2color = results_list[list_prop] 

1558 matrix_layout = profile_layouts.LayoutPropsMatrixOld(name=f"Heatmap_{list_prop}", 

1559 matrix=matrix, 

1560 matrix_type='numerical', 

1561 matrix_props=list_prop, 

1562 is_list=True, 

1563 value_color=value2color, 

1564 value_range=[minval, maxval], 

1565 column=level, 

1566 poswidth=args.column_width) 

1567 

1568 level += 1 

1569 layouts.append(matrix_layout) 

1570 return layouts, level 

1571 

1572def get_prop2type(node): 

1573 output = {} 

1574 prop2value = node.props 

1575 if '_speciesFunction' in prop2value: 

1576 del prop2value['_speciesFunction'] 

1577 

1578 for prop, value in prop2value.items(): 

1579 if value != 'NaN': 

1580 if isinstance(value, numbers.Number): 

1581 output[prop] = float 

1582 elif type(value) == list: 

1583 output[prop] = list 

1584 else: 

1585 output[prop] = str 

1586 return output 

1587 

1588def categorical2matrix(tree, profiling_props, dtype=str, color_config=None): 

1589 """ 

1590 Input: 

1591 tree: A tree structure with nodes, each having properties. 

1592 profiling_props: A list of property names to be processed for each leaf in the tree. 

1593  

1594 Output: 

1595 A dictionary of matrix representation of the tree leaves and their properties. 

1596 A sorted dictionary mapping property values to their corresponding colors. 

1597 """ 

1598 absence_value = "NaN" 

1599 absence_color = "#EBEBEB" 

1600 leaf2matrix = {} 

1601 for node in tree.traverse(): 

1602 if node.is_leaf: 

1603 leaf2matrix[node.name] = [] 

1604 for profiling_prop in profiling_props: 

1605 if node.props.get(profiling_prop) is not None: 

1606 if dtype == str: 

1607 val = node.props.get(profiling_prop) 

1608 leaf2matrix[node.name].append(val) 

1609 else: 

1610 leaf2matrix[node.name].append(absence_value) 

1611 

1612 # get color 

1613 value2color = {} # key = value, value = color id 

1614 matrix_prop = '*' # special case for matrix 

1615 

1616 if color_config and color_config.get(matrix_prop): 

1617 if color_config.get(matrix_prop).get('value2color'): 

1618 value2color = color_config.get(matrix_prop).get('value2color') 

1619 value2color[absence_value] = absence_color 

1620 else: 

1621 all_values = sorted(list(set(utils.flatten([sublist for sublist in leaf2matrix.values()])))) 

1622 value2color = utils.assign_color_to_values(all_values, paired_color) 

1623 if absence_value in value2color: 

1624 value2color[absence_value] = absence_color 

1625 

1626 return leaf2matrix, value2color 

1627 

1628def numerical2matrix(tree, profiling_props, count_negative=True, internal_num_rep=None, color_config=None, norm_method='min-max'): 

1629 """ 

1630 Input: 

1631 tree: A tree structure with nodes, each having properties. 

1632 profiling_props: A list of property names to be processed for each leaf in the tree. 

1633 

1634 Output: 

1635 A dictionary of matrix representation of the tree leaves and their properties. 

1636 A sorted dictionary mapping property values to their corresponding colors. 

1637 """ 

1638 def flatten(l): 

1639 return [item for sublist in l for item in sublist] 

1640 

1641 def min_max_normalize(value, minval, maxval): 

1642 if maxval - minval == 0: 

1643 return 0 

1644 else: 

1645 return (value - minval) / (maxval - minval) 

1646 

1647 def mean_normalize(value, mean_val, minval, maxval): 

1648 if maxval - minval == 0: 

1649 return 0 

1650 else: 

1651 return (value - mean_val) / (maxval - minval) 

1652 

1653 def z_score_normalize(value, mean_val, std_val): 

1654 if std_val == 0: 

1655 return 0 

1656 else: 

1657 return (value - mean_val) / std_val 

1658 

1659 def _get_color(search_value, color_dict, norm_method='min-max'): 

1660 num = len(color_dict) 

1661 search_value = float(search_value) 

1662 if norm_method == "min-max": 

1663 normalized_value = min_max_normalize(search_value) 

1664 index_values = np.linspace(0, 1, num) 

1665 elif norm_method == "mean": 

1666 normalized_value = mean_normalize(search_value) 

1667 index_values = np.linspace(-1, 1, num) 

1668 elif norm_method == "zscore": 

1669 normalized_value = z_score_normalize(search_value) 

1670 index_values = np.linspace(-3, 3, num) 

1671 else: 

1672 raise ValueError("Unsupported normalization method.") 

1673 index = np.abs(index_values - normalized_value).argmin() + 1 

1674 #index = np.abs(index_values - search_value).argmin() + 1 

1675 return color_dict.get(index, "") 

1676 

1677 def parse_color_config(color_config, profiling_props, all_props_wildcard, minval, maxval): 

1678 gradientscolor = None 

1679 nan_color = '#EBEBEB' 

1680 max_color = 'red' 

1681 min_color = 'white' 

1682 mid_color = None 

1683 value2color = {} 

1684 

1685 if color_config.get(all_props_wildcard) is not None: 

1686 prop_config = color_config[all_props_wildcard] 

1687 if 'value2color' in prop_config and prop_config['value2color']: 

1688 value2color = prop_config['value2color'] 

1689 value2color = {float(key): value for key, value in value2color.items()} 

1690 if 'detail2color' in prop_config and prop_config['detail2color']: 

1691 detail2color = prop_config.get('detail2color', {}) 

1692 temp_min_color, temp_min_val = detail2color.get('color_min', (None, None)) 

1693 temp_max_color, temp_max_val = detail2color.get('color_max', (None, None)) 

1694 temp_mid_color, temp_mid_val = detail2color.get('color_mid', (None, None)) 

1695 temp_none_color, _ = detail2color.get('color_nan', (None, None)) 

1696 

1697 if temp_max_color: 

1698 max_color = temp_max_color 

1699 if temp_min_color: 

1700 min_color = temp_min_color 

1701 if temp_mid_color: 

1702 mid_color = temp_mid_color 

1703 if temp_none_color: 

1704 nan_color = temp_none_color 

1705 if temp_min_val: 

1706 minval = float(temp_min_val) 

1707 if temp_max_val: 

1708 maxval = float(temp_max_val) 

1709 

1710 gradientscolor = utils.build_custom_gradient(20, min_color, max_color, mid_color) 

1711 

1712 if profiling_props: 

1713 for profiling_prop in profiling_props: 

1714 if color_config.get(profiling_prop) is not None: 

1715 prop_config = color_config[profiling_prop] 

1716 if 'value2color' in prop_config and prop_config['value2color']: 

1717 value2color = prop_config['value2color'] 

1718 value2color = {float(key): value for key, value in value2color.items()} 

1719 if 'detail2color' in prop_config and prop_config['detail2color']: 

1720 detail2color = prop_config.get('detail2color', {}) 

1721 temp_min_color, temp_min_val = detail2color.get('color_min', (None, None)) 

1722 temp_max_color, temp_max_val = detail2color.get('color_max', (None, None)) 

1723 temp_mid_color, temp_mid_val = detail2color.get('color_mid', (None, None)) 

1724 

1725 if temp_max_color: 

1726 max_color = temp_max_color 

1727 if temp_min_color: 

1728 min_color = temp_min_color 

1729 if temp_mid_color: 

1730 mid_color = temp_mid_color 

1731 if temp_min_val: 

1732 minval = float(temp_min_val) 

1733 if temp_max_val: 

1734 maxval = float(temp_max_val) 

1735 gradientscolor = utils.build_custom_gradient(20, min_color, max_color, mid_color) 

1736 

1737 return value2color, gradientscolor, minval, maxval, nan_color 

1738 

1739 def process_color_configuration(node2matrix, profiling_props=None): 

1740 

1741 

1742 value2color = {} 

1743 all_props_wildcard = '*' 

1744 

1745 # Get color configuration 

1746 all_values_raw = list(set(flatten([sublist for sublist in node2matrix.values()]))) 

1747 all_values = sorted(list(filter(lambda x: x is not None and not math.isnan(x), all_values_raw))) 

1748 

1749 if not count_negative: 

1750 positive_values = sorted(list(filter(lambda x: x is not None and not math.isnan(x) and x >= 0, all_values))) 

1751 minval, maxval = min(positive_values), max(positive_values) 

1752 mean_val = np.mean(positive_values) 

1753 std_val = np.std(positive_values) 

1754 else: 

1755 minval, maxval = min(all_values), max(all_values) 

1756 mean_val = np.mean(all_values) 

1757 std_val = np.std(all_values) 

1758 

1759 if color_config: 

1760 value2color, gradientscolor, minval, maxval, nan_color = parse_color_config(color_config, profiling_props, all_props_wildcard, minval, maxval) 

1761 else: 

1762 gradientscolor = None 

1763 nan_color = '#EBEBEB' 

1764 

1765 if not gradientscolor: 

1766 if norm_method == 'min-max': 

1767 gradientscolor = utils.build_color_gradient(20, colormap_name="Reds") 

1768 else: # "mean" "zscore" 

1769 gradientscolor = utils.build_color_gradient(20, colormap_name="coolwarm") 

1770 

1771 num = len(gradientscolor) 

1772 for search_value in all_values_raw: 

1773 if search_value is None: 

1774 value2color[search_value] = nan_color 

1775 elif math.isnan(search_value): 

1776 value2color[search_value] = nan_color 

1777 #value2color[search_value] = _get_color(search_value, gradientscolor, norm_method) 

1778 else: 

1779 search_value = float(search_value) 

1780 if search_value not in value2color: 

1781 if not count_negative and search_value < 0: 

1782 value2color[search_value] = nan_color 

1783 else: 

1784 if norm_method == "min-max": 

1785 normalized_value = min_max_normalize(search_value, minval, maxval) 

1786 index_values = np.linspace(0, 1, num) 

1787 elif norm_method == "mean": 

1788 normalized_value = mean_normalize(search_value, mean_val, minval, maxval) 

1789 index_values = np.linspace(-1, 1, num) 

1790 elif norm_method == "zscore": 

1791 normalized_value = z_score_normalize(search_value, mean_val, std_val) 

1792 index_values = np.linspace(-3, 3, num) 

1793 else: 

1794 raise ValueError("Unsupported normalization method.") 

1795 index = np.abs(index_values - normalized_value).argmin() + 1 

1796 value2color[search_value] = gradientscolor.get(index, "") 

1797 return minval, maxval, value2color 

1798 

1799 node2matrix_single = {} 

1800 node2matrix_list = {prop: {} for prop in profiling_props} 

1801 

1802 single_props = set() 

1803 list_props = set() 

1804 

1805 for node in tree.traverse(): 

1806 node2matrix_single[node.name] = [] 

1807 for profiling_prop in profiling_props: 

1808 if node.is_leaf: 

1809 prop_value = node.props.get(profiling_prop) 

1810 if prop_value is not None: 

1811 if isinstance(prop_value, list): 

1812 list_props.add(profiling_prop) 

1813 prop_value = list(map(float, prop_value)) 

1814 if node.name not in node2matrix_list[profiling_prop]: 

1815 node2matrix_list[profiling_prop][node.name] = [] 

1816 node2matrix_list[profiling_prop][node.name] = prop_value 

1817 else: 

1818 single_props.add(profiling_prop) 

1819 node2matrix_single[node.name].append(float(prop_value)) 

1820 else: 

1821 node2matrix_single[node.name].append(None) 

1822 if node.name not in node2matrix_list[profiling_prop]: 

1823 node2matrix_list[profiling_prop][node.name] = [] 

1824 node2matrix_list[profiling_prop][node.name].append(None) 

1825 else: 

1826 if internal_num_rep != 'none': 

1827 representative_prop = utils.add_suffix(profiling_prop, internal_num_rep) 

1828 prop_value = node.props.get(representative_prop) 

1829 if prop_value is not None: 

1830 if isinstance(prop_value, list): 

1831 list_props.add(profiling_prop) 

1832 prop_value = list(map(float, prop_value)) 

1833 if node.name not in node2matrix_list[profiling_prop]: 

1834 node2matrix_list[profiling_prop][node.name] = [] 

1835 node2matrix_list[profiling_prop][node.name] = prop_value 

1836 else: 

1837 node2matrix_single[node.name].append(float(prop_value)) 

1838 else: 

1839 node2matrix_single[node.name].append(None) 

1840 if node.name not in node2matrix_list[profiling_prop]: 

1841 node2matrix_list[profiling_prop][node.name] = [] 

1842 node2matrix_list[profiling_prop][node.name].append(None) 

1843 

1844 # Process single values 

1845 if single_props: 

1846 minval_single, maxval_single, value2color_single = process_color_configuration(node2matrix_single, profiling_props) 

1847 else: 

1848 minval_single, maxval_single, value2color_single = None, None, None 

1849 

1850 if list_props: 

1851 # Process list values for each profiling_prop 

1852 results_list = {} 

1853 for prop in profiling_props: 

1854 if prop in list_props: 

1855 minval_list, maxval_list, value2color_list = process_color_configuration(node2matrix_list[prop]) 

1856 results_list[prop] = (node2matrix_list[prop], minval_list, maxval_list, value2color_list) 

1857 else: 

1858 results_list[prop] = (None, None, None, None) 

1859 else: 

1860 results_list = None 

1861 

1862 return node2matrix_single, minval_single, maxval_single, value2color_single, results_list, list_props, single_props 

1863 

1864# def _numerical2matrix(tree, profiling_props, count_negative=True, internal_num_rep=None, color_config=None): 

1865# """ 

1866# Input: 

1867# tree: A tree structure with nodes, each having properties. 

1868# profiling_props: A list of property names to be processed for each leaf in the tree. 

1869 

1870# Output: 

1871# A dictionary of matrix representation of the tree leaves and their properties. 

1872# A sorted dictionary mapping property values to their corresponding colors. 

1873# """ 

1874# is_list = False 

1875# node2matrix = {} 

1876# for node in tree.traverse(): 

1877# node2matrix[node.name] = [] 

1878# for profiling_prop in profiling_props: 

1879# if node.is_leaf: 

1880# prop_value = node.props.get(profiling_prop)  

1881# if prop_value is not None:  

1882# if isinstance(prop_value, list): # Check if the property value is a list 

1883# is_list = True # Set is_array to True upon finding the first list 

1884 

1885# for array_element in prop_value: 

1886# node2matrix[node.name].append(float(array_element)) 

1887 

1888# # prop_value = list(map(float, prop_value)) 

1889# # node2matrix[node.name].append(prop_value) 

1890 

1891# else: # If not a list, directly handle the single value case 

1892# node2matrix[node.name].append(float(prop_value)) 

1893 

1894# else: # If prop_value is None, append None 

1895# node2matrix[node.name].append(None) 

1896# else: 

1897# if internal_num_rep != 'none': 

1898# representative_prop = utils.add_suffix(profiling_prop, internal_num_rep) 

1899# prop_value = node.props.get(representative_prop) 

1900# if prop_value is not None:  

1901# if isinstance(prop_value, list): # Check if the property value is a list 

1902# is_list = True # Set is_array to True upon finding the first list 

1903# for array_element in prop_value: 

1904# node2matrix[node.name].append(float(array_element)) 

1905# # prop_value = list(map(float, prop_value)) 

1906# # node2matrix[node.name].append(prop_value) 

1907# else: # If not a list, directly handle the single value case 

1908# node2matrix[node.name].append(float(prop_value)) 

1909 

1910# else: # If prop_value is None, append None 

1911# node2matrix[node.name].append(None) 

1912 

1913# #get color 

1914# negative_color = 'black' 

1915# value2color = {} 

1916# matrix_prop = '-' # special case for matrix 

1917# if color_config and color_config.get(matrix_prop) is not None: 

1918# prop_config = color_config[matrix_prop] 

1919 

1920# # First, try to use value2color mappings if they exist and are applicable 

1921# if 'value2color' in prop_config and prop_config['value2color']: 

1922# value2color = prop_config['value2color'] 

1923# value2color = {float(key): value for key, value in value2color.items()} 

1924# #gradientscolor = list(sorted_value2color.values()) 

1925 

1926# if 'detail2color' in prop_config and prop_config['detail2color']: 

1927# min_color = prop_config['detail2color'].get('color_min', 'white') 

1928# max_color = prop_config['detail2color'].get('color_max', 'red') 

1929# mid_color = prop_config['detail2color'].get('color_mid', None) 

1930# gradientscolor = utils.build_custom_gradient(20, min_color, max_color, mid_color) 

1931 

1932# else: 

1933# gradientscolor = utils.build_color_gradient(20, colormap_name='Reds') 

1934 

1935# # everything 

1936# all_values_raw = list(set(utils.flatten([sublist for sublist in node2matrix.values()]))) 

1937# all_values = sorted(list(filter(lambda x: x is not None and not math.isnan(x), all_values_raw))) 

1938 

1939# if not count_negative: 

1940# # remove negative values 

1941# positive_values = sorted(list(filter(lambda x: x is not None and not math.isnan(x) and x >= 0, all_values))) 

1942# minval, maxval = min(positive_values), max(positive_values) 

1943# else: 

1944# minval, maxval = min(all_values), max(all_values) 

1945 

1946# num = len(gradientscolor) 

1947# index_values = np.linspace(minval, maxval, num) 

1948 

1949# for search_value in all_values: 

1950# if search_value not in value2color: 

1951# if not count_negative and search_value < 0: 

1952# value2color[search_value] = negative_color 

1953# # elif search_value == 0: 

1954# # value2color[search_value] = color_0 

1955# else: 

1956# index = np.abs(index_values - search_value).argmin()+1 

1957# value2color[search_value] = gradientscolor[index] 

1958 

1959# return node2matrix, minval, maxval, value2color, is_list 

1960 

1961def binary2matrix(tree, profiling_props, color_config=None): 

1962 """ 

1963 Input: 

1964 tree: A tree structure with nodes, each having properties. 

1965 profiling_props: A list of property names to be processed for each leaf in the tree. 

1966 

1967 Output: 

1968 A dictionary of matrix representation of the tree leaves and their properties. 

1969 A sorted dictionary mapping property values to their corresponding colors. 

1970 """ 

1971 is_list = False 

1972 binary2color = {True: 1, False: 0} 

1973 node2matrix = {} 

1974 counter_separator = '||' 

1975 all_props_wildcard = '*' 

1976 value2color = {} 

1977 

1978 for node in tree.traverse(): 

1979 node2matrix[node.name] = [] 

1980 for profiling_prop in profiling_props: 

1981 if node.is_leaf: 

1982 prop_value = node.props.get(profiling_prop) 

1983 if prop_value is not None: 

1984 if isinstance(prop_value, list): # Check if the property value is a list 

1985 is_list = True # Set is_array to True upon finding the first list 

1986 for array_element in prop_value: 

1987 node2matrix[node.name].append(binary2color.get(utils.str2bool(array_element))) 

1988 else: # If not a list, directly handle the single value case 

1989 node2matrix[node.name].append(binary2color.get(utils.str2bool(prop_value))) 

1990 else: # If prop_value is None, append None 

1991 node2matrix[node.name].append(None) 

1992 

1993 else: # for internal nodes parse counter of True/Total percentage  

1994 representative_prop = utils.add_suffix(profiling_prop, "counter") 

1995 if node.props.get(representative_prop): 

1996 ratio = utils.counter2ratio(node, representative_prop) 

1997 node2matrix[node.name].append(ratio) 

1998 

1999 if color_config: 

2000 if color_config.get(all_props_wildcard) is not None: 

2001 prop_config = color_config[all_props_wildcard] 

2002 if 'value2color' in prop_config and prop_config['value2color']: 

2003 value2color = prop_config['value2color'] 

2004 value2color = {float(key): value for key, value in value2color.items()} 

2005 if profiling_props: 

2006 for profiling_prop in profiling_props: 

2007 if color_config.get(profiling_prop) is not None: 

2008 prop_config = color_config[profiling_prop] 

2009 if 'value2color' in prop_config and prop_config['value2color']: 

2010 value2color = prop_config['value2color'] 

2011 value2color = {float(key): value for key, value in value2color.items()} 

2012 

2013 gradientscolor = utils.build_color_gradient(20, colormap_name='Reds') 

2014 

2015 # get color for binary value 0 to 1 

2016 all_values_raw = list(set(utils.flatten([sublist for sublist in node2matrix.values()]))) 

2017 all_values = sorted(list(filter(lambda x: x is not None and not math.isnan(x), all_values_raw))) 

2018 num = len(gradientscolor) 

2019 index_values = np.linspace(0, 1, num) # binary value 0 to 1 

2020 for search_value in all_values: 

2021 if search_value not in value2color: 

2022 index = np.abs(index_values - search_value).argmin() + 1 

2023 value2color[search_value] = gradientscolor[index] 

2024 

2025 return node2matrix, value2color, is_list 

2026 

2027def float2matrix(tree, profiling_props, count_negative=True): 

2028 """ 

2029 Input: 

2030 tree: A tree structure with nodes, each having properties. 

2031 profiling_props: A list of property names to be processed for each leaf in the tree. 

2032  

2033 Output: 

2034 A fasta text output of matrix representation of the tree leaves and their properties in color codes. 

2035 A sorted dictionary mapping property values to their corresponding colors in the gradient. 

2036 """ 

2037 def process_value(value): 

2038 """Process a single value or list of values to float.""" 

2039 if isinstance(value, list): 

2040 return [float(v) if v is not None else None for v in value] 

2041 else: 

2042 return float(value) if value is not None else None 

2043 

2044 gradients = [ 

2045 'a', 'b', 'c', 

2046 'd', 'e', 'f', 

2047 'g', 'h', 'i', 

2048 'j', 'k', 'l', 

2049 'm', 'n', 'o', 

2050 'p', 'q', 'r', 

2051 's', 't' 

2052 ] #white to red 

2053 

2054 absence_color = '-' 

2055 negative_color = 'x' 

2056 leaf2matrix = {} 

2057 for node in tree.traverse(): 

2058 if node.is_leaf: 

2059 #leaf2matrix[node.name] = [float(node.props.get(prop)) if node.props.get(prop) is not None else None for prop in profiling_props] 

2060 leaf2matrix[node.name] = [] 

2061 for prop in profiling_props: 

2062 value = node.props.get(prop) 

2063 processed_value = process_value(value) 

2064 leaf2matrix[node.name].extend(processed_value if isinstance(processed_value, list) else [processed_value]) 

2065 

2066 value2color = {} 

2067 all_values = list(set(utils.flatten(leaf2matrix.values()))) 

2068 if count_negative: 

2069 all_values = sorted([x for x in all_values if x is not None and not math.isnan(x)]) 

2070 else: 

2071 all_values = sorted([x for x in all_values if x is not None and not math.isnan(x) and x >= 0]) 

2072 

2073 maxval, minval = max(all_values), min(all_values) 

2074 num = len(gradients) 

2075 values = np.linspace(minval, maxval, num) 

2076 

2077 matrix = '' 

2078 for leaf, prop in leaf2matrix.items(): 

2079 matrix += '\n' + '>' + leaf + '\n' 

2080 for search_value in prop: 

2081 if search_value is not None: 

2082 if not count_negative and search_value < 0: 

2083 matrix += negative_color 

2084 value2color[search_value] = negative_color 

2085 else: 

2086 index = np.abs(values - search_value).argmin() 

2087 matrix += gradients[index] 

2088 value2color[search_value] = gradients[index] 

2089 else: 

2090 matrix += '-' 

2091 value2color[search_value] = absence_color 

2092 

2093 sorted_value2color = OrderedDict(sorted((k, v) for k, v in value2color.items() if k is not None)) 

2094 return matrix, sorted_value2color 

2095 

2096def str2matrix(tree, profiling_props): 

2097 """ 

2098 Input: 

2099 tree: A tree structure with nodes, each having properties. 

2100 profiling_props: A list of property names to be processed for each leaf in the tree. 

2101  

2102 Output: 

2103 A fasta text output of matrix representation of the tree leaves and their properties in color codes. 

2104 A sorted dictionary mapping property values to their corresponding colors in the 20 amino acid color codes. 

2105 """ 

2106 aa = [ 

2107 'A', 'R', 'N', 

2108 'D', 'C', 'Q', 

2109 'E', 'H', 

2110 'I', 'S', 'K', 

2111 'M', 'F', 'P', 

2112 'L', 'T', 'W', 

2113 'Z', 'V', 'B', 

2114 'Y', 'X' 

2115 ] 

2116 absence_color = 'G' 

2117 leaf2matrix = {} 

2118 for node in tree.traverse(): 

2119 if node.is_leaf: 

2120 leaf2matrix[node.name] = [node.props.get(prop) if node.props.get(prop) is not None else None for prop in profiling_props] 

2121 

2122 value2color = {} 

2123 all_values = sorted(set(utils.flatten(leaf2matrix.values()))) 

2124 for i, val in enumerate(all_values): 

2125 if val != 'NaN': 

2126 value2color[val] = aa[i % len(aa)] # Use modulo to avoid out-of-range errors 

2127 else: 

2128 value2color[val] = absence_color 

2129 

2130 matrix = '' 

2131 for leaf, prop in leaf2matrix.items(): 

2132 matrix += '\n' + '>' + leaf + '\n' + ''.join([value2color.get(item, '-') for item in prop]) 

2133 

2134 return matrix, value2color 

2135 

2136def single2matrix(tree, profiling_prop): 

2137 precence_color = '#E60A0A' # #E60A0A red 

2138 absence_color = '#EBEBEB' # #EBEBEB lightgrey 

2139 all_categorical_values = sorted(list(set(utils.flatten(utils.tree_prop_array(tree, profiling_prop)))), key=lambda x: (x != 'NaN', x)) 

2140 

2141 node2matrix = {} 

2142 for node in tree.traverse(): 

2143 if node.is_leaf: 

2144 # Leaf node processing for presence/absence 

2145 if node.props.get(profiling_prop): 

2146 node2matrix[node.name] = [1 if val == node.props.get(profiling_prop) else 0 for val in all_categorical_values] 

2147 else: 

2148 # Internal node processing to add a counter of True/Total percentage 

2149 representative_prop = utils.add_suffix(profiling_prop, "counter") 

2150 if node.props.get(representative_prop): 

2151 ratios = utils.categorical2ratio(node, representative_prop, all_categorical_values) 

2152 node2matrix[node.name] = ratios # Extend the ratio across all possible values 

2153 

2154 #get color 

2155 gradientscolor = utils.build_color_gradient(20, colormap_name='Reds') 

2156 #value2color = {} 

2157 value2color = {1: precence_color, 0: absence_color} 

2158 # get color for binary value 0 to 1 

2159 all_values_raw = list(set(utils.flatten([sublist for sublist in node2matrix.values()]))) 

2160 all_values = sorted(list(filter(lambda x: x is not None and not math.isnan(x), all_values_raw))) 

2161 num = len(gradientscolor) 

2162 index_values = np.linspace(0, 1, num) # binary value 0 to 1 

2163 for search_value in all_values: 

2164 if search_value not in value2color: 

2165 index = np.abs(index_values - search_value).argmin() + 1 

2166 value2color[search_value] = gradientscolor[index] 

2167 

2168 #value2color = {1: precence_color, 0: absence_color} 

2169 return node2matrix, value2color, all_categorical_values 

2170 

2171# def single2matrix(tree, profiling_prop): 

2172# precence_color = '#E60A0A' # #E60A0A red 

2173# absence_color = '#EBEBEB' # #EBEBEB lightgrey 

2174# all_values = sorted(list(set(utils.flatten(utils.tree_prop_array(tree, profiling_prop)))), key=lambda x: (x != 'NaN', x)) 

2175# leaf2matrix = {} 

2176# for leaf in tree.leaves(): 

2177# leaf2matrix[leaf.name] = [] 

2178# for val in all_values: 

2179# if val == leaf.props.get(profiling_prop): 

2180# leaf2matrix[leaf.name].append(1) 

2181# else: 

2182# leaf2matrix[leaf.name].append(0) 

2183# value2color = {1: precence_color, 0: absence_color} 

2184# return leaf2matrix, value2color, all_values 

2185 

2186def single2profile(tree, profiling_prop): 

2187 all_values = sorted(list(set(utils.flatten(utils.tree_prop_array(tree, profiling_prop)))), key=lambda x: (x != 'NaN', x)) 

2188 presence = 'p' # #E60A0A red 

2189 absence = 'z' # #EBEBEB lightgrey 

2190 matrix = '' 

2191 for leaf in tree.leaves(): 

2192 matrix += '\n'+'>'+leaf.name+'\n' 

2193 if leaf.props.get(profiling_prop): 

2194 for val in all_values: 

2195 if val == leaf.props.get(profiling_prop): 

2196 matrix += presence 

2197 else: 

2198 matrix += absence 

2199 else: 

2200 matrix += absence * len(all_values) +'\n' 

2201 return matrix, all_values 

2202 

2203def multiple2matrix(tree, profiling_prop): 

2204 precence_color = '#E60A0A' # #E60A0A red 

2205 absence_color = '#EBEBEB' # #EBEBEB lightgrey 

2206 all_categorical_values = sorted(list(set(utils.flatten(utils.tree_prop_array(tree, profiling_prop)))), key=lambda x: (x != 'NaN', x)) 

2207 

2208 node2matrix = {} 

2209 for node in tree.traverse(): 

2210 if node.is_leaf: 

2211 if node.props.get(profiling_prop): 

2212 node2matrix[node.name] = [1 if val in node.props.get(profiling_prop) else 0 for val in all_categorical_values] 

2213 else: 

2214 representative_prop = utils.add_suffix(profiling_prop, "counter") 

2215 if node.props.get(representative_prop): 

2216 ratios = utils.categorical2ratio(node, representative_prop, all_categorical_values) 

2217 node2matrix[node.name] = ratios 

2218 

2219 # get color 

2220 gradientscolor = utils.build_color_gradient(20, colormap_name='Reds') 

2221 #value2color = {} 

2222 value2color = {1: precence_color, 0: absence_color} 

2223 # get color for binary value 0 to 1 

2224 all_values_raw = list(set(utils.flatten([sublist for sublist in node2matrix.values()]))) 

2225 all_values = sorted(list(filter(lambda x: x is not None and not math.isnan(x), all_values_raw))) 

2226 num = len(gradientscolor) 

2227 index_values = np.linspace(0, 1, num) # binary value 0 to 1 

2228 for search_value in all_values: 

2229 if search_value not in value2color: 

2230 index = np.abs(index_values - search_value).argmin() + 1 

2231 value2color[search_value] = gradientscolor[index] 

2232 

2233 return node2matrix, value2color, all_categorical_values 

2234 

2235# def multiple2matrix(tree, profiling_prop): 

2236# precence_color = '#E60A0A' # #E60A0A red 

2237# absence_color = '#EBEBEB' # #EBEBEB lightgrey 

2238# all_values = sorted(list(set(utils.flatten(utils.tree_prop_array(tree, profiling_prop)))), key=lambda x: (x != 'NaN', x)) 

2239# leaf2matrix = {} 

2240# for leaf in tree.leaves(): 

2241# leaf2matrix[leaf.name] = [] 

2242# for val in all_values: 

2243# if val in leaf.props.get(profiling_prop): 

2244# leaf2matrix[leaf.name].append(1) 

2245# else: 

2246# leaf2matrix[leaf.name].append(0) 

2247# value2color = {1: precence_color, 0: absence_color} 

2248# return leaf2matrix, value2color, all_values 

2249 

2250 

2251def multiple2profile(tree, profiling_prop): 

2252 all_values = sorted(list(set(utils.flatten(utils.tree_prop_array(tree, profiling_prop)))), key=lambda x: (x != 'NaN', x)) 

2253 presence = 'p' # #E60A0A red 

2254 absence = 'z' # #EBEBEB lightgrey 

2255 matrix = '' 

2256 for leaf in tree.leaves(): 

2257 matrix += '\n'+'>'+leaf.name+'\n' 

2258 if leaf.props.get(profiling_prop): 

2259 for val in all_values: 

2260 if val in leaf.props.get(profiling_prop): 

2261 matrix += presence 

2262 else: 

2263 matrix += absence 

2264 else: 

2265 matrix += absence * len(all_values) +'\n' 

2266 return matrix, all_values 

2267 

2268def barplot_width_type(value): 

2269 if value.lower() == 'none': 

2270 return None 

2271 try: 

2272 return float(value) 

2273 except ValueError: 

2274 raise argparse.ArgumentTypeError("Value must be an float or 'None'.")