Coverage for /home/deng/Projects/ete4/hackathon/ete4/ete4/phylo/phylotree.py: 16%
360 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-08-07 10:27 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2024-08-07 10:27 +0200
1"""
2This module defines the PhyloTree class to manage phylogenetic trees.
3It inherits from Tree and adds some special features to the the node
4instances.
5"""
7import sys
8import re
9import warnings
10import itertools
11from collections import defaultdict
12from ete4 import Tree, SeqGroup, NCBITaxa, GTDBTaxa
13from .reconciliation import get_reconciled_tree
14from . import spoverlap
16__all__ = ["PhyloTree"]
19def is_dup(n):
20 return n.props.get("evoltype") == "D"
22def get_subtrees(tree, full_copy=False, properties=None, newick_only=False):
23 """Calculate all possible species trees within a gene tree. I
24 tested several recursive and iterative approaches to do it and
25 this is the most efficient way I found. The method is now fast and
26 light enough to deal with very large gene trees, and it scales
27 linearly instead of exponentially. For instance, a tree with ~8000
28 nodes, ~100 species and ~400 duplications returns ~10,000 sptrees
29 that could be loaded in few minutes.
31 To avoid memory overloads, this function returns a tuple containing the
32 total number of trees, number of duplication events, and an iterator for the
33 species trees. Real trees are not actually computed until the iterator is
34 first accessed. This allows to filter out cases producing astronomic numbers
35 of sptrees.
37 """
38 ntrees, ndups = calc_subtrees(tree)
39 return ntrees, ndups, _get_subtrees(tree, full_copy, properties, newick_only)
41def _get_subtrees(tree, full_copy=False, properties=None, newick_only=False):
42 # First I need to precalculate all the species trees in tuple (newick) format
43 nid = 0
44 n2nid = {}
45 nid2node = {}
46 n2subtrees = defaultdict(list)
47 for n in tree.traverse("postorder"):
48 n2nid[n] = nid
49 nid2node[nid] = n
50 nid += 1
51 if n.children:
52 if is_dup(n):
53 subtrees = []
54 for ch in n.children:
55 subtrees.extend(n2subtrees[n2nid[ch]])
56 else:
57 subtrees = tuple([val for val in
58 itertools.product(n2subtrees[n2nid[n.children[0]]],
59 n2subtrees[n2nid[n.children[1]]])])
60 else:
61 subtrees = tuple([n2nid[n]])
63 n2subtrees[n2nid[n]] = subtrees
64 for ch in n.children:
65 del n2subtrees[n2nid[ch]]
67 sp_trees = n2subtrees[n2nid[tree]]
69 # Second, I yield a tree per iteration in newick or ETE format
70 properties = set(properties) if properties else set()
71 properties.update(["name"])
73 def _nodereplacer(match):
74 pre, b, post = match.groups()
75 pre = '' if not pre else pre
76 post = '' if not post else post
77 node = nid2node[int(b)]
78 fstring = ""
79 if properties:
80 fstring = "".join(["[&&NHX:",
81 ':'.join(["%s=%s" %(p, node.props.get(p))
82 for p in properties if node.props.get(p)])
83 , "]"])
85 return ''.join([pre, node.name, fstring, post])
87 if newick_only:
88 id_match = re.compile(r"([^0-9])?(\d+)([^0-9])?")
89 for nw in sp_trees:
90 yield re.sub(id_match, _nodereplacer, str(nw)+";")
91 else:
92 for nw in sp_trees:
93 # I take advantage from the fact that I generated the subtrees
94 # using tuples, so str representation is actually a newick :)
95 t = PhyloTree(str(nw)+";")
96 # Map properties from original tree
97 for leaf in t.leaves():
98 _nid = int(leaf.name)
99 for p in properties:
100 leaf.add_prop(p, getattr(nid2node[_nid], p))
101 yield t
103def calc_subtrees(tree):
104 """Return the number of species and duplications for the given tree.
106 The ones that the TreeKO algorithm would produce.
107 """
108 n2subtrees = {}
109 dups = 0
110 for n in tree.traverse("postorder"):
111 if n.children:
112 if is_dup(n):
113 dups += 1
114 subtrees = 0
115 for ch in n.children:
116 subtrees += n2subtrees[ch]
117 else:
118 subtrees = n2subtrees[n.children[0]] * n2subtrees[n.children[1]]
119 else:
120 subtrees = 1
121 n2subtrees[n] = subtrees
122 return n2subtrees[tree], dups
124def iter_sptrees(sptrees, nid2node, properties=None, newick_only=False):
125 """ Loads and map the species trees returned by get_subtrees"""
127 properties = set(properties) if properties else set()
128 properties.update(["name"])
130 def _nodereplacer(match):
131 pre, b, post = match.groups()
132 node = nid2node[int(b)]
133 fstring = ""
134 if properties:
135 fstring = "".join(["[&&NHX:",
136 ','.join(["%s=%s" %(p, node.props.get(p))
137 for p in properties if node.props.get(p)])
138 , "]"])
140 return ''.join([pre, node.name, fstring, post])
142 if newick_only:
143 id_match = re.compile(r"([^0-9])(\d+)([^0-9])")
144 for nw in sptrees:
145 yield re.sub(id_match, _nodereplacer, str(nw)+";")
146 else:
147 for nw in sptrees:
148 # I take advantage from the fact that I generated the subtrees
149 # using tuples, so str representation is actually a newick :)
150 t = PhyloTree(str(nw)+";")
151 # Map properties from original tree
152 for leaf in t.leaves():
153 _nid = int(leaf.name)
154 for p in properties:
155 leaf.add_prop(p, getattr(nid2node[_nid], p))
156 yield t
158def _get_subtrees_recursive(node, full_copy=True):
159 if is_dup(node):
160 sp_trees = []
161 for ch in node.children:
162 sp_trees.extend(_get_subtrees_recursive(ch, full_copy=full_copy))
163 return sp_trees
165 # saves a list of duplication nodes under current node
166 dups = []
167 for _n in node.leaves(is_leaf_fn=is_dup):
168 if is_dup(_n):
169 dups.append(_n)
171 if dups:
172 # detach inner duplication nodes and stores their anchor point
173 subtrees = []
174 for dp in dups:
175 # The real node to attach sibling subtress
176 anchor = dp.up
177 dp.detach()
179 duptrees = []
180 #get all sibling sptrees in each side of the
181 #duplication. Each subtree is pointed to its anchor
182 for ch in dp.children:
183 for subt in _get_subtrees_recursive(ch, full_copy=full_copy):
184 if not full_copy:
185 subt = node.__class__(subt)
186 subt.up = anchor
187 duptrees.append(subt)
189 #all posible sptrees under this duplication are stored
190 subtrees.append(duptrees)
192 # Generates all combinations of subtrees in sibling duplications
193 sp_trees = []
194 for comb in itertools.product(*subtrees):
195 #each subtree is attached to its anchor point and make a copy
196 #of the final sp tree
197 for subt in comb:
198 #anchor = subt2anchor[subt]
199 if subt.up:
200 subt.up.children.append(subt)
201 #print subt.up
202 else:
203 sp_trees.append(subt)
204 if full_copy:
205 back_up = node.up
206 node.up = None
207 _node = node.copy()
208 node.up = back_up
209 else:
210 _node = node.write(format=9, properties=["name", "evoltype"])
211 sp_trees.append(_node)
212 # Clear current node
213 for subt in comb:
214 subt.up.children.pop(-1)
215 else:
216 if full_copy:
217 back_up = node.up
218 node.up = None
219 _node = node.copy()
220 node.up = back_up
221 else:
222 _node = node.write(format=9, properties=["name", "evoltype"])
223 #node.detach()
224 sp_trees = [_node]
226 return sp_trees
228def get_subparts(n):
229 subtrees = []
230 if is_dup(n):
231 for ch in n.get_children():
232 ch.detach()
233 subtrees.extend(get_subparts(ch))
234 else:
235 to_visit = []
236 for _n in n.leaves(is_leaf_fn=is_dup):
237 if is_dup(_n):
238 to_visit.append(_n)
240 for _n in to_visit:
241 _n.detach()
243 freaks = [_n for _n in n.descendants() if
244 len(_n.children)==1 or (not hasattr(_n, "_leaf") and not _n.children)]
245 for s in freaks:
246 s.delete(prevent_nondicotomic=True)
248 # Clean node structure to prevent nodes with only one child
249 while len(n.children) == 1:
250 n = n.children[0]
251 n.detach()
253 if not n.children and not hasattr(n, "_leaf"):
254 pass
255 else:
256 subtrees.append(n)
258 for _n in to_visit:
259 subtrees.extend(get_subparts(_n))
261 return subtrees
264class PhyloTree(Tree):
265 """
266 Class to store a phylogenetic tree.
268 Extends the standard :class:`Tree` instance by adding
269 specific properties and methods to work with phylogentic trees.
270 """
272 def __init__(self, newick=None, children=None, alignment=None,
273 alg_format="fasta", sp_naming_function=None,
274 parser=None):
275 """
276 :param newick: If not None, initializes the tree from a newick,
277 which can be a string or file object containing it.
278 :param children: If not None, the children to add to this node.
279 :param alignment: File containing a multiple sequence alignment.
280 :param alg_format: "fasta", "phylip" or "iphylip" (interleaved).
281 :param parser: Parser to read the newick.
282 :param sp_naming_function: Function that gets a node name and
283 returns the species name (see
284 :func:`PhyloTree.set_species_naming_function`). By default,
285 the 3 first letters of node names will be used as species
286 identifier.
287 """
288 super().__init__(data=newick, children=children, parser=parser)
290 # This will be only executed after reading the whole tree,
291 # because the argument 'alignment' is not passed to the
292 # PhyloTree constructor during parsing.
293 if alignment:
294 self.link_to_alignment(alignment, alg_format)
296 if newick:
297 self.set_species_naming_function(sp_naming_function)
299 @property
300 def species(self):
301 if self.props.get('_speciesFunction'):
302 if 'species' in self.props:
303 warnings.warn('Ambiguous species: both species and _speciesFunction'
304 'defined. You can remove "species" from this node.')
305 try:
306 return self.props.get('_speciesFunction')(self.name)
307 except:
308 return self.props.get('_speciesFunction')(self)
309 else:
310 return self.props.get('species')
312 @species.setter
313 def species(self, value):
314 assert self.props.get('_speciesFunction') is None, \
315 ('Species naming function present, cannot set species manually. '
316 'Maybe call set_species_naming_function() first?')
317 self.props['species'] = value
319 def __repr__(self):
320 return "PhyloTree '%s' (%s)" % (self.name, hex(self.__hash__()))
322 def write(self, outfile=None, props=(), parser=None,
323 format_root_node=False, is_leaf_fn=None):
324 if props is None:
325 props = sorted(set(p for node in self.traverse()
326 for p in node.props if not p.startswith('_')))
327 return super().write(outfile, props, parser, format_root_node, is_leaf_fn)
329 def set_species_naming_function(self, fn):
330 """Set the function used to get the species from the node's name.
332 :param fn: Function that takes a nodename and returns the species name.
334 Example of a parsing function::
336 def parse_sp_name(node_name):
337 return node_name.split("_")[1]
338 tree.set_species_naming_function(parse_sp_name)
339 """
340 for n in self.traverse():
341 if fn is not None:
342 n.props['_speciesFunction'] = fn
343 else:
344 n.props.pop('_speciesFunction', None)
346 def link_to_alignment(self, alignment, alg_format="fasta", **kwargs):
347 missing_leaves = []
348 missing_internal = []
349 if type(alignment) == SeqGroup:
350 alg = alignment
351 else:
352 alg = SeqGroup(alignment, format=alg_format, **kwargs)
353 # sets the seq of
354 for n in self.traverse():
355 try:
356 n.add_prop("sequence",alg.get_seq(n.name))
357 except KeyError:
358 if n.is_leaf:
359 missing_leaves.append(n.name)
360 else:
361 missing_internal.append(n.name)
362 if len(missing_leaves)>0:
363 print("Warnning: [%d] terminal nodes could not be found in the alignment." %\
364 len(missing_leaves), file=sys.stderr)
365 # Show warning of not associated internal nodes.
366 # if len(missing_internal)>0:
367 # print >>sys.stderr, \
368 # "Warnning: [%d] internal nodes could not be found in the alignment." %\
369 # len(missing_leaves)
371 def get_species(self):
372 """ Returns the set of species covered by its partition. """
373 return set([l.species for l in self.leaves()])
375 def iter_species(self):
376 """ Returns an iterator over the species grouped by this node. """
377 spcs = set([])
378 for l in self.leaves():
379 if l.species not in spcs:
380 spcs.add(l.species)
381 yield l.species
383 def get_age(self, species2age):
384 """
385 Implements the phylostratigrafic method described in:
387 Huerta-Cepas, J., & Gabaldon, T. (2011). Assigning duplication events to
388 relative temporal scales in genome-wide studies. Bioinformatics, 27(1),
389 38-45.
390 """
391 return max([species2age[sp] for sp in self.get_species()])
393 def reconcile(self, species_tree):
394 """ Returns the reconcilied topology with the provided species
395 tree, and a list of evolutionary events inferred from such
396 reconciliation. """
397 return get_reconciled_tree(self, species_tree, [])
399 def get_my_evol_events(self, sos_thr=0.0):
400 """Return list of duplication and speciation events involving this node.
402 Scanned nodes are also labeled internally as dup=True|False.
403 You can access these labels using ``node.dup``.
405 The algorithm scans all nodes from the given leafName to the
406 root. Nodes are assumed to be duplications when a species
407 overlap is found between its child linages. The method is
408 described in more detail in:
410 :Citation:
411 *The Human Phylome*. T. Genome Biol. 2007;8(6):R109.
412 """
413 return spoverlap.get_evol_events_from_leaf(self, sos_thr=sos_thr)
415 def get_descendant_evol_events(self, sos_thr=0.0):
416 """ Returns a list of all duplication and speciation
417 events detected after this node. Nodes are assumed to be
418 duplications when a species overlap is found between its child
419 linages. Method is described more detail in:
421 "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon
422 T. Genome Biol. 2007;8(6):R109.
423 """
424 return spoverlap.get_evol_events_from_root(self, sos_thr=sos_thr)
426 def get_farthest_oldest_leaf(self, species2age, is_leaf_fn=None):
427 """Return the farthest oldest leaf to the current one.
429 It requires an species2age dictionary with the age estimation
430 for all species.
432 :param None is_leaf_fn: A pointer to a function that receives
433 a node instance as unique argument and returns True or
434 False. It can be used to dynamically collapse nodes, so
435 they are seen as leaves.
436 """
437 root = self.root
438 outgroup_dist = 0
439 outgroup_node = self
440 outgroup_age = 0 # self.get_age(species2age)
442 for leaf in root.leaves(is_leaf_fn=is_leaf_fn):
443 if leaf.get_age(species2age) > outgroup_age:
444 outgroup_dist = leaf.get_distance(self, leaf)
445 outgroup_node = leaf
446 outgroup_age = species2age[leaf.get_species().pop()]
447 elif leaf.get_age(species2age) == outgroup_age:
448 dist = leaf.get_distance(self, leaf)
449 if dist>outgroup_dist:
450 outgroup_dist = leaf.get_distance(self, leaf)
451 outgroup_node = leaf
452 outgroup_age = species2age[leaf.get_species().pop()]
453 return outgroup_node
455 def get_farthest_oldest_node(self, species2age):
456 """Return the farthest oldest node (leaf or internal).
458 The difference with get_farthest_oldest_leaf() is that in this
459 function internal nodes grouping seqs from the same species
460 are collapsed.
461 """
462 # I use a custom is_leaf() function to collapse nodes groups
463 # seqs from the same species
464 is_leaf = lambda node: len(node.get_species())==1
465 return self.get_farthest_oldest_leaf(species2age, is_leaf_fn=is_leaf)
467 def get_age_balanced_outgroup(self, species2age):
468 """
469 .. versionadded:: 2.2
471 Returns the node better balance current tree structure
472 according to the topological age of the different leaves and
473 internal node sizes.
475 :param species2age: A dictionary translating from leaf names
476 into a topological age.
478 .. warning: This is currently an experimental method!!
480 """
481 root = self
482 all_seqs = set(self.leaf_names())
483 outgroup_dist = 0
484 best_balance = max(species2age.values())
485 outgroup_node = self
486 outgroup_size = 0
488 for leaf in root.descendants():
489 leaf_seqs = set(leaf.leaf_names())
490 size = len(leaf_seqs)
492 leaf_species =[self.props.get('_speciesFunction')(s) for s in leaf_seqs]
493 out_species = [self.props.get('_speciesFunction')(s) for s in all_seqs-leaf_seqs]
495 leaf_age_min = min([species2age[sp] for sp in leaf_species])
496 out_age_min = min([species2age[sp] for sp in out_species])
497 leaf_age_max = max([species2age[sp] for sp in leaf_species])
498 out_age_max = max([species2age[sp] for sp in out_species])
499 leaf_age = leaf_age_max - leaf_age_min
500 out_age = out_age_max - out_age_min
502 age_inbalance = abs(out_age - leaf_age)
504 # DEBUG ONLY
505 # leaf.add_features(age_inbalance = age_inbalance, age=leaf_age)
507 update = False
508 if age_inbalance < best_balance:
509 update = True
510 elif age_inbalance == best_balance:
511 if size > outgroup_size:
512 update = True
513 elif size == outgroup_size:
514 dist = self.get_distance(self, leaf)
515 outgroup_dist = self.get_distance(self, outgroup_node)
516 if dist > outgroup_dist:
517 update = True
519 if update:
520 best_balance = age_inbalance
521 outgroup_node = leaf
522 outgroup_size = size
524 return outgroup_node
526 def get_speciation_trees(self, map_properties=None, autodetect_duplications=True,
527 newick_only=False, prop='species'):
528 """Return number of species trees, of duplications, and an iterator.
530 Calculates all possible species trees contained within a
531 duplicated gene family tree as described in `Treeko
532 <http://treeko.cgenomics.org>`_ (see `Marcet and Gabaldon,
533 2011 <http://www.ncbi.nlm.nih.gov/pubmed/21335609>`_ ).
535 :param map_properties: List of properties that should be
536 mapped from the original gene family tree to each species
537 tree subtree.
538 :param autodetect_duplications: If True, duplication nodes
539 will be automatically detected using the Species Overlap
540 algorithm (:func:`PhyloTree.get_descendants_evol_events`).
541 If False, duplication nodes within the original tree are
542 expected to contain the property "evoltype='D'".
543 """
544 t = self
545 if autodetect_duplications:
546 n2content = t.get_cached_content()
547 n2species = t.get_cached_content(prop)
548 for node in n2content:
549 sp_subtotal = sum([len(n2species[_ch]) for _ch in node.children])
550 if len(n2species[node]) > 1 and len(n2species[node]) != sp_subtotal:
551 node.props['evoltype'] = 'D'
553 sp_trees = get_subtrees(t, properties=map_properties, newick_only=newick_only)
555 return sp_trees
557 def __get_speciation_trees_recursive(self):
558 # NOTE: This function is experimental and for testing.
559 t = self.copy()
560 if autodetect_duplications:
561 dups = 0
562 n2content = t.get_cached_content()
563 n2species = t.get_cached_content('species')
565 #print "Detecting dups"
566 for node in n2content:
567 sp_subtotal = sum([len(n2species[_ch]) for _ch in node.children])
568 if len(n2species[node]) > 1 and len(n2species[node]) != sp_subtotal:
569 node.props['evoltype'] = 'D'
570 dups += 1
571 elif node.is_leaf:
572 node._leaf = True
573 #print dups
574 else:
575 for node in t.leaves():
576 node._leaf = True
577 subtrees = _get_subtrees_recursive(t)
578 return len(subtrees), 0, subtrees
580 def split_by_dups(self, autodetect_duplications=True):
581 """Return the list of subtrees when splitting by its duplication nodes.
583 :param True autodetect_duplications: If True, duplication
584 nodes will be automatically detected using the Species
585 Overlap algorithm
586 (:func:`PhyloTree.get_descendants_evol_events`. If False,
587 duplication nodes within the original tree are expected to
588 contain the feature "evoltype=D".
589 """
590 try:
591 t = self.copy()
592 except Exception:
593 t = self.copy("deepcopy")
595 if autodetect_duplications:
596 dups = 0
597 n2content = t.get_cached_content()
598 n2species = t.get_cached_content('species')
600 #print "Detecting dups"
601 for node in n2content:
602 sp_subtotal = sum([len(n2species[_ch]) for _ch in node.children])
603 if len(n2species[node]) > 1 and len(n2species[node]) != sp_subtotal:
604 node.props['evoltype'] = 'D'
605 dups += 1
606 elif node.is_leaf:
607 node._leaf = True
608 else:
609 for node in t.leaves():
610 node._leaf = True
611 sp_trees = get_subparts(t)
612 return sp_trees
614 def collapse_lineage_specific_expansions(self, species=None, return_copy=True):
615 """ Converts lineage specific expansion nodes into a single
616 tip node (randomly chosen from tips within the expansion).
618 :param None species: If supplied, only expansions matching the
619 species criteria will be pruned. When None, all expansions
620 within the tree will be processed.
622 """
623 if species and isinstance(species, (list, tuple)):
624 species = set(species)
625 elif species and (not isinstance(species, (set, frozenset))):
626 raise TypeError("species argument should be a set (preferred), list or tuple")
628 prunned = self.copy("deepcopy") if return_copy else self
629 n2sp = prunned.get_cached_content('species')
630 n2leaves = prunned.get_cached_content()
631 is_expansion = lambda n: (len(n2sp[n])==1 and len(n2leaves[n])>1
632 and (species is None or species & n2sp[n]))
633 for n in prunned.leaves(is_leaf_fn=is_expansion):
634 repre = list(n2leaves[n])[0]
635 repre.detach()
636 if n is not prunned:
637 n.up.add_child(repre)
638 n.detach()
639 else:
640 return repre
642 return prunned
645 def annotate_ncbi_taxa(self, taxid_attr='species', tax2name=None, tax2track=None, tax2rank=None, dbfile=None, ignore_unclassified=False):
646 """Add NCBI taxonomy annotation to all descendant nodes. Leaf nodes are
647 expected to contain a feature (name, by default) encoding a valid taxid
648 number.
650 All descendant nodes (including internal nodes) are annotated with the
651 following new features:
653 `Node.spname`: scientific spcies name as encoded in the NCBI taxonomy database
655 `Node.named_lineage`: the NCBI lineage track using scientific names
657 `Node.taxid`: NCBI taxid number
659 `Node.lineage`: same as named_lineage but using taxid codes.
662 Note that for internal nodes, NCBI information will refer to the first
663 common lineage of the grouped species.
665 :param name taxid_attr: the name of the feature that should be used to access the taxid number associated to each node.
667 :param None tax2name: A dictionary where keys are taxid
668 numbers and values are their translation into NCBI
669 scientific name. Its use is optional and allows to avoid
670 database queries when annotating many trees containing the
671 same set of taxids.
673 :param None tax2track: A dictionary where keys are taxid
674 numbers and values are their translation into NCBI lineage
675 tracks (taxids). Its use is optional and allows to avoid
676 database queries when annotating many trees containing the
677 same set of taxids.
679 :param None tax2rank: A dictionary where keys are taxid
680 numbers and values are their translation into NCBI rank
681 name. Its use is optional and allows to avoid database
682 queries when annotating many trees containing the same set
683 of taxids.
685 :param None dbfile : If provided, the provided file will be
686 used as a local copy of the NCBI taxonomy database.
688 :returns: tax2name (a dictionary translating taxid numbers
689 into scientific name), tax2lineage (a dictionary
690 translating taxid numbers into their corresponding NCBI
691 lineage track) and tax2rank (a dictionary translating
692 taxid numbers into rank names).
694 """
696 ncbi = NCBITaxa(dbfile=dbfile)
697 return ncbi.annotate_tree(self, taxid_attr=taxid_attr, tax2name=tax2name, tax2track=tax2track, tax2rank=tax2rank, ignore_unclassified=ignore_unclassified)
699 def annotate_gtdb_taxa(self, taxid_attr='species', tax2name=None, tax2track=None, tax2rank=None, dbfile=None, ignore_unclassified=False):
700 gtdb = GTDBTaxa(dbfile=dbfile)
701 return gtdb.annotate_tree(self, taxid_attr=taxid_attr, tax2name=tax2name, tax2track=tax2track, tax2rank=tax2rank, ignore_unclassified=ignore_unclassified)
703 def ncbi_compare(self, autodetect_duplications=True, cached_content=None):
704 if not cached_content:
705 cached_content = self.get_cached_content()
706 cached_species = set([n.props.get('species') for n in cached_content[self]])
708 if len(cached_species) != len(cached_content[self]):
709 ntrees, ndups, target_trees = self.get_speciation_trees(
710 autodetect_duplications=autodetect_duplications,
711 map_properties=["taxid"])
712 else:
713 target_trees = [self]
716 ncbi = NCBITaxa()
717 for t in target_trees:
718 ncbi.get_broken_branches(t, cached_content)