1 __VERSION__="ete2-2.0rev89"
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 from evolevents import EvolEvent
26
27 __all__ = ["get_evol_events_from_leaf", "get_evol_events_from_root"]
28
30 """ Returns a list of duplication and speciation events in
31 which the current node has been involved. Scanned nodes are
32 also labeled internally as dup=True|False. You can access this
33 labels using the 'node.dup' sintaxis.
34
35 Method: the algorithm scans all nodes from the given leafName to
36 the root. Nodes are assumed to be duplications when a species
37 overlap is found between its child linages. Method is described
38 more detail in:
39
40 "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon
41 T. Genome Biol. 2007;8(6):R109.
42 """
43
44 root = node.get_tree_root()
45
46
47 outgroups = root.get_children()
48 if len(outgroups) != 2:
49 raise TypeError, "Tree is not rooted"
50
51
52
53 o1 = set([n.name for n in outgroups[0].get_leaves()])
54 o2 = set([n.name for n in outgroups[1].get_leaves()])
55
56 if len(o2)<len(o1):
57 smaller_outg = outgroups[1]
58 else:
59 smaller_outg = outgroups[0]
60
61
62
63 all_events = []
64 current = node
65 ref_spcs = node.species
66 sister_leaves = set([])
67 browsed_spcs = set([current.species])
68 browsed_leaves = set([current])
69
70 fSize = len([n for n in root.get_leaves() if n.species == ref_spcs])
71
72
73 for n in root.get_descendants()+[root]:
74 n.del_feature("evoltype")
75
76 while current.up:
77
78 d = 0
79 for s in current.get_sisters():
80 for leaf in s.get_leaves():
81 d += current.get_distance(leaf)
82 sister_leaves.add(leaf)
83
84
85 sister_leaves = sister_leaves.difference(browsed_leaves)
86 if len(sister_leaves)==0:
87 current = current.up
88 continue
89
90 sister_spcs = set([n.species for n in sister_leaves])
91 overlaped_spces = browsed_spcs & sister_spcs
92 all_spcs = browsed_spcs | sister_spcs
93 score = float(len(overlaped_spces))/len(all_spcs)
94
95 event = EvolEvent()
96 event.fam_size = fSize
97 event.seed = node.name
98
99 event.sos = score
100 event.outgroup = smaller_outg.name
101
102 event.in_seqs = set([n.name for n in browsed_leaves])
103 event.out_seqs = set([n.name for n in sister_leaves])
104 event.inparalogs = set([n.name for n in browsed_leaves if n.species == ref_spcs])
105
106
107 if score >sos_thr and d > 0.0:
108 event.node = current.up
109 event.etype = "D"
110 event.outparalogs = set([n.name for n in sister_leaves if n.species == ref_spcs])
111 event.orthologs = set([])
112 current.up.add_feature("evoltype","D")
113 all_events.append(event)
114
115
116 elif score == sos_thr:
117 event.node = current.up
118 event.etype = "S"
119 event.orthologs = set([n.name for n in sister_leaves if n.species != ref_spcs])
120 event.outparalogs = set([])
121 current.up.add_feature("evoltype","S")
122 all_events.append(event)
123 else:
124 pass
125
126
127 browsed_spcs |= sister_spcs
128 browsed_leaves |= sister_leaves
129 sister_leaves = set([])
130
131 current = current.up
132 return all_events
133
135 """ Returns a list of **all** duplication and speciation
136 events detected after this node. Nodes are assumed to be
137 duplications when a species overlap is found between its child
138 linages. Method is described more detail in:
139
140 "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon
141 T. Genome Biol. 2007;8(6):R109.
142 """
143
144
145 root = node.get_tree_root()
146
147
148 outgroups = root.get_children()
149 if len(outgroups) != 2:
150 raise TypeError, "Tree is not rooted"
151
152
153 o1 = set([n.name for n in outgroups[0].get_leaves()])
154 o2 = set([n.name for n in outgroups[1].get_leaves()])
155
156
157 if len(o2)<len(o1):
158 smaller_outg = outgroups[1]
159 else:
160 smaller_outg = outgroups[0]
161
162
163 fSize = len( [n for n in root.get_leaves()] )
164
165
166 for n in root.get_descendants()+[root]:
167 n.del_feature("evoltype")
168
169
170 to_visit = []
171 current = root
172 all_events = []
173 while current:
174
175 childs = current.get_children()
176 to_visit += childs
177 if len(childs)>2:
178 raise TypeError, "nodes are expected to have two childs."
179 elif len(childs)==0:
180 pass
181 else:
182
183 sideA_leaves= set([n for n in childs[0].get_leaves()])
184 sideB_leaves= set([n for n in childs[1].get_leaves()])
185 sideA_spcs = set([n.species for n in childs[0].get_leaves()])
186 sideB_spcs = set([n.species for n in childs[1].get_leaves()])
187
188 overlaped_spcs = sideA_spcs & sideB_spcs
189 all_spcs = sideA_spcs | sideB_spcs
190 score = float(len(overlaped_spcs))/len(all_spcs)
191
192
193 event = EvolEvent()
194 event.fam_size = fSize
195 event.branch_supports = [current.support, current.children[0].support, current.children[1].support]
196
197
198 event.sos = score
199 event.outgroup_spcs = smaller_outg.get_species()
200 event.in_seqs = set([n.name for n in sideA_leaves])
201 event.out_seqs = set([n.name for n in sideB_leaves])
202 event.inparalogs = set([n.name for n in sideA_leaves])
203
204 if score >sos_thr:
205 event.node = current
206 event.etype = "D"
207 event.outparalogs = set([n.name for n in sideB_leaves])
208 event.orthologs = set([])
209 current.add_feature("evoltype","D")
210
211 else:
212 event.node = current
213 event.etype = "S"
214 event.orthologs = set([n.name for n in sideB_leaves])
215 event.outparalogs = set([])
216 current.add_feature("evoltype","S")
217
218 all_events.append(event)
219
220 try:
221 current = to_visit.pop(0)
222 except IndexError:
223 current = None
224 return all_events
225