Package ete2 :: Package phylo :: Module spoverlap
[hide private]
[frames] | no frames]

Source Code for Module ete2.phylo.spoverlap

  1  __VERSION__="ete2-2.0rev96"  
  2  # #START_LICENSE########################################################### 
  3  # 
  4  # Copyright (C) 2009 by Jaime Huerta Cepas. All rights reserved.   
  5  # email: jhcepas@gmail.com 
  6  # 
  7  # This file is part of the Environment for Tree Exploration program (ETE).  
  8  # http://ete.cgenomics.org 
  9  #   
 10  # ETE is free software: you can redistribute it and/or modify it 
 11  # under the terms of the GNU General Public License as published by 
 12  # the Free Software Foundation, either version 3 of the License, or 
 13  # (at your option) any later version. 
 14  #   
 15  # ETE is distributed in the hope that it will be useful, 
 16  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 17  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 18  # GNU General Public License for more details. 
 19  #   
 20  # You should have received a copy of the GNU General Public License 
 21  # along with ETE.  If not, see <http://www.gnu.org/licenses/>. 
 22  # 
 23  # #END_LICENSE############################################################# 
 24   
 25  from evolevents import EvolEvent 
 26   
 27  __all__ = ["get_evol_events_from_leaf", "get_evol_events_from_root"] 
 28   
29 -def get_evol_events_from_leaf(node, sos_thr=0.0):
30 """ Returns a list of duplication and speciation events in 31 which the current node has been involved. Scanned nodes are 32 also labeled internally as dup=True|False. You can access this 33 labels using the 'node.dup' sintaxis. 34 35 Method: the algorithm scans all nodes from the given leafName to 36 the root. Nodes are assumed to be duplications when a species 37 overlap is found between its child linages. Method is described 38 more detail in: 39 40 "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon 41 T. Genome Biol. 2007;8(6):R109. 42 """ 43 # Get the tree's root 44 root = node.get_tree_root() 45 46 # Checks that is actually rooted 47 outgroups = root.get_children() 48 if len(outgroups) != 2: 49 raise TypeError, "Tree is not rooted" 50 51 # Cautch the smaller outgroup (will be stored as the tree 52 # outgroup) 53 o1 = set([n.name for n in outgroups[0].get_leaves()]) 54 o2 = set([n.name for n in outgroups[1].get_leaves()]) 55 56 if len(o2)<len(o1): 57 smaller_outg = outgroups[1] 58 else: 59 smaller_outg = outgroups[0] 60 61 62 # Prepare to browse tree from leaf to root 63 all_events = [] 64 current = node 65 ref_spcs = node.species 66 sister_leaves = set([]) 67 browsed_spcs = set([current.species]) 68 browsed_leaves = set([current]) 69 # get family Size 70 fSize = len([n for n in root.get_leaves() if n.species == ref_spcs]) 71 72 # Clean previous analysis 73 for n in root.get_descendants()+[root]: 74 n.del_feature("evoltype") 75 76 while current.up: 77 # distances control (0.0 distance check) 78 d = 0 79 for s in current.get_sisters(): 80 for leaf in s.get_leaves(): 81 d += current.get_distance(leaf) 82 sister_leaves.add(leaf) 83 # Process sister node only if there is any new sequence. 84 # (previene dupliaciones por nombres repetidos) 85 sister_leaves = sister_leaves.difference(browsed_leaves) 86 if len(sister_leaves)==0: 87 current = current.up 88 continue 89 # Gets species at both sides of event 90 sister_spcs = set([n.species for n in sister_leaves]) 91 overlaped_spces = browsed_spcs & sister_spcs 92 all_spcs = browsed_spcs | sister_spcs 93 score = float(len(overlaped_spces))/len(all_spcs) 94 # Creates a new evolEvent 95 event = EvolEvent() 96 event.fam_size = fSize 97 event.seed = node.name 98 # event.e_newick = current.up.get_newick() # high mem usage!! 99 event.sos = score 100 event.outgroup = smaller_outg.name 101 # event.allseqs = set(current.up.get_leaf_names()) 102 event.in_seqs = set([n.name for n in browsed_leaves]) 103 event.out_seqs = set([n.name for n in sister_leaves]) 104 event.inparalogs = set([n.name for n in browsed_leaves if n.species == ref_spcs]) 105 106 # If species overlap: duplication 107 if score >sos_thr and d > 0.0: 108 event.node = current.up 109 event.etype = "D" 110 event.outparalogs = set([n.name for n in sister_leaves if n.species == ref_spcs]) 111 event.orthologs = set([]) 112 current.up.add_feature("evoltype","D") 113 all_events.append(event) 114 115 # If NO species overlap: speciation 116 elif score == sos_thr: 117 event.node = current.up 118 event.etype = "S" 119 event.orthologs = set([n.name for n in sister_leaves if n.species != ref_spcs]) 120 event.outparalogs = set([]) 121 current.up.add_feature("evoltype","S") 122 all_events.append(event) 123 else: 124 pass # do not add event if distances == 0 125 126 # Updates browsed species 127 browsed_spcs |= sister_spcs 128 browsed_leaves |= sister_leaves 129 sister_leaves = set([]) 130 # And keep ascending 131 current = current.up 132 return all_events
133
134 -def get_evol_events_from_root(node, sos_thr):
135 """ Returns a list of **all** duplication and speciation 136 events detected after this node. Nodes are assumed to be 137 duplications when a species overlap is found between its child 138 linages. Method is described more detail in: 139 140 "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon 141 T. Genome Biol. 2007;8(6):R109. 142 """ 143 144 # Get the tree's root 145 root = node.get_tree_root() 146 147 # Checks that is actually rooted 148 outgroups = root.get_children() 149 if len(outgroups) != 2: 150 raise TypeError, "Tree is not rooted" 151 152 # Cautch the smaller outgroup (will be stored as the tree outgroup) 153 o1 = set([n.name for n in outgroups[0].get_leaves()]) 154 o2 = set([n.name for n in outgroups[1].get_leaves()]) 155 156 157 if len(o2)<len(o1): 158 smaller_outg = outgroups[1] 159 else: 160 smaller_outg = outgroups[0] 161 162 # Get family size 163 fSize = len( [n for n in root.get_leaves()] ) 164 165 # Clean data from previous analyses 166 for n in root.get_descendants()+[root]: 167 n.del_feature("evoltype") 168 169 # Gets Prepared to browse the tree from root to leaves 170 to_visit = [] 171 current = root 172 all_events = [] 173 while current: 174 # Gets childs and appends them to the To_visit list 175 childs = current.get_children() 176 to_visit += childs 177 if len(childs)>2: 178 raise TypeError, "nodes are expected to have two childs." 179 elif len(childs)==0: 180 pass # leaf 181 else: 182 # Get leaves and species at both sides of event 183 sideA_leaves= set([n for n in childs[0].get_leaves()]) 184 sideB_leaves= set([n for n in childs[1].get_leaves()]) 185 sideA_spcs = set([n.species for n in childs[0].get_leaves()]) 186 sideB_spcs = set([n.species for n in childs[1].get_leaves()]) 187 # Calculates species overlap 188 overlaped_spcs = sideA_spcs & sideB_spcs 189 all_spcs = sideA_spcs | sideB_spcs 190 score = float(len(overlaped_spcs))/len(all_spcs) 191 192 # Creates a new evolEvent 193 event = EvolEvent() 194 event.fam_size = fSize 195 event.branch_supports = [current.support, current.children[0].support, current.children[1].support] 196 # event.seed = leafName 197 # event.e_newick = current.up.get_newick() # high mem usage!! 198 event.sos = score 199 event.outgroup_spcs = smaller_outg.get_species() 200 event.in_seqs = set([n.name for n in sideA_leaves]) 201 event.out_seqs = set([n.name for n in sideB_leaves]) 202 event.inparalogs = set([n.name for n in sideA_leaves]) 203 # If species overlap: duplication 204 if score >sos_thr: 205 event.node = current 206 event.etype = "D" 207 event.outparalogs = set([n.name for n in sideB_leaves]) 208 event.orthologs = set([]) 209 current.add_feature("evoltype","D") 210 # If NO species overlap: speciation 211 else: 212 event.node = current 213 event.etype = "S" 214 event.orthologs = set([n.name for n in sideB_leaves]) 215 event.outparalogs = set([]) 216 current.add_feature("evoltype","S") 217 218 all_events.append(event) 219 # Keep visiting nodes 220 try: 221 current = to_visit.pop(0) 222 except IndexError: 223 current = None 224 return all_events
225