Coverage for /home/deng/Projects/ete4/hackathon/ete4/ete4/phylo/reconciliation.py: 7%
103 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
1import copy
2from .evolevents import EvolEvent
5def get_reconciled_tree(node, sptree, events):
6 """ Returns the recoliation gene tree with a provided species
7 topology """
8 if len(node.children) == 2:
9 # First visit childs
10 morphed_childs = []
11 for ch in node.children:
12 mc, ev = get_reconciled_tree(ch, sptree, events)
13 morphed_childs.append(mc)
15 # morphed childs are the reconciled children. I trust its
16 # topology. Remember tree is visited on recursive post-order
17 sp_child_0 = morphed_childs[0].get_species()
18 sp_child_1 = morphed_childs[1].get_species()
19 all_species = sp_child_1 | sp_child_0
20 # If childs represents a duplication (duplicated species)
21 # Check that both are reconciliated to the same species
22 if len(sp_child_0 & sp_child_1) > 0:
23 newnode = copy.deepcopy(node)
24 newnode.up = None
25 newnode.remove_children()
26 template = _get_expected_topology(sptree, all_species)
27 # replaces child0 partition on the template
28 newmorphed0, matchnode = _replace_on_template(template, morphed_childs[0])
29 # replaces child1 partition on the template
30 newmorphed1, matchnode = _replace_on_template(template, morphed_childs[1])
31 newnode.add_child(newmorphed0)
32 newnode.add_child(newmorphed1)
33 newnode.add_prop("evoltype", "D")
34 node.add_prop("evoltype", "D")
35 e = EvolEvent()
36 e.etype = "D"
37 e.inparalogs = node.children[0].leaf_names()
38 e.outparalogs = node.children[1].leaf_names()
39 e.in_seqs = node.children[0].leaf_names()
40 e.out_seqs = node.children[1].leaf_names()
41 events.append(e)
42 return newnode, events
44 # Otherwise, we need to reconciliate species at both sides
45 # into a single partition.
46 else:
47 # gets the topology expected by the observed species
48 template = _get_expected_topology(sptree, all_species)
49 # replaces child0 partition on the template
50 template, matchnode = _replace_on_template(template, morphed_childs[0] )
51 # replaces child1 partition on the template
52 template, matchnode = _replace_on_template(template, morphed_childs[1])
53 template.add_prop("evoltype", "S")
54 node.add_prop("evoltype", "S")
55 e = EvolEvent()
56 e.etype = "S"
57 e.inparalogs = node.children[0].leaf_names()
58 e.orthologs = node.children[1].leaf_names()
59 e.in_seqs = node.children[0].leaf_names()
60 e.out_seqs = node.children[1].leaf_names()
61 events.append(e)
62 return template, events
63 elif len(node.children)==0:
64 return copy.deepcopy(node), events
65 else:
66 raise ValueError("Algorithm can only work with binary trees.")
68def _replace_on_template(orig_template, node):
69 template = copy.deepcopy(orig_template)
70 # detects partition within topo that matchs child1 species
71 nodespcs = node.get_species()
72 spseed = list(nodespcs)[0] # any sp name woulbe ok
73 # Set an start point
74 subtopo = list(template.search_nodes(children=[], name=spseed))[0]
75 # While subtopo does not cover all child species
76 while len(nodespcs - set(subtopo.leaf_names() ) )>0:
77 subtopo= subtopo.up
78 # Puts original partition on the expected topology template
79 nodecp = copy.deepcopy(node)
80 if subtopo.up is None:
81 return nodecp, nodecp
82 else:
83 parent = subtopo.up
84 parent.remove_child(subtopo)
85 parent.add_child(nodecp)
86 return template, nodecp
88def _get_expected_topology(t, species):
89 missing_sp = set(species) - set(t.leaf_names())
90 if missing_sp:
91 raise KeyError("* The following species are not contained in the species tree: "+ ','.join(missing_sp) )
93 node = list(t.search_nodes(children=[], name=list(species)[0]))[0]
95 sps = set(species)
96 while sps-set(node.leaf_names()) != set([]):
97 node = node.up
98 template = copy.deepcopy(node)
99 # make get_species() to work
100 #template._speciesFunction = _get_species_on_TOL
101 template.set_species_naming_function(_get_species_on_TOL)
102 template.detach()
103 for n in [template]+list(template.descendants()):
104 n.add_prop("evoltype", "L")
105 n.dist = 1
106 return template
108def _get_species_on_TOL(name):
109 return name
111def get_reconciled_tree_zmasek(gtree, sptree, inplace=False):
112 """
113 Reconciles the gene tree with the species tree
114 using Zmasek and Eddy's algorithm. Details can be
115 found in the paper:
117 Christian M. Zmasek, Sean R. Eddy: A simple algorithm
118 to infer gene duplication and speciation events on a
119 gene tree. Bioinformatics 17(9): 821-828 (2001)
121 :argument gtree: gene tree (PhyloTree instance)
123 :argument sptree: species tree (PhyloTree instance)
125 :argument False inplace: if True, the provided gene tree instance is
126 modified. Otherwise a reconciled copy of the gene tree is returned.
128 :returns: reconciled gene tree
129 """
130 # some cleanup operations
131 def cleanup(tree):
132 for node in tree.traverse(): node.del_prop("M")
134 if not inplace:
135 gtree = gtree.copy('deepcopy')
137 # check for missing species
138 missing_sp = gtree.get_species() - sptree.get_species()
139 if missing_sp:
140 raise KeyError("* The following species are not contained in the species tree: "+ ', '.join(missing_sp))
142 # initialization
143 sp2node = dict()
144 for node in sptree.leaves(): sp2node[node.species] = node
146 # set/compute the mapping function M(g) for the
147 # leaf nodes in the gene tree (see paper for details)
148 species = sptree.get_species()
149 for node in gtree.leaves():
150 node.add_prop("M",sp2node[node.species])
152 # visit each internal node in the gene tree
153 # and detect its event (duplication or speciation)
154 for node in gtree.traverse(strategy="postorder"):
155 if len(node.children) == 0:
156 continue # nothing to do for leaf nodes
158 if len(node.children) != 2:
159 cleanup(gtree)
160 raise ValueError("Algorithm can only work with binary trees.")
162 lca = node.children[0].M.get_common_ancestor(node.children[1].M) # LCA in the species tree
163 node.add_prop("M",lca)
165 node.add_prop("evoltype","S")
166 if id(node.children[0].M) == id(node.M) or id(node.children[1].M) == id(node.M):
167 node.add_prop("evoltype", "D")
169 cleanup(gtree)
170 return gtree