Coverage for /home/deng/Projects/ete4/hackathon/ete4/ete4/phylo/reconciliation.py: 7%

103 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-03-21 09:19 +0100

1import copy 

2from .evolevents import EvolEvent 

3 

4 

5def get_reconciled_tree(node, sptree, events): 

6 """ Returns the recoliation gene tree with a provided species 

7 topology """ 

8 if len(node.children) == 2: 

9 # First visit childs 

10 morphed_childs = [] 

11 for ch in node.children: 

12 mc, ev = get_reconciled_tree(ch, sptree, events) 

13 morphed_childs.append(mc) 

14 

15 # morphed childs are the reconciled children. I trust its 

16 # topology. Remember tree is visited on recursive post-order 

17 sp_child_0 = morphed_childs[0].get_species() 

18 sp_child_1 = morphed_childs[1].get_species() 

19 all_species = sp_child_1 | sp_child_0 

20 # If childs represents a duplication (duplicated species) 

21 # Check that both are reconciliated to the same species 

22 if len(sp_child_0 & sp_child_1) > 0: 

23 newnode = copy.deepcopy(node) 

24 newnode.up = None 

25 newnode.remove_children() 

26 template = _get_expected_topology(sptree, all_species) 

27 # replaces child0 partition on the template 

28 newmorphed0, matchnode = _replace_on_template(template, morphed_childs[0]) 

29 # replaces child1 partition on the template 

30 newmorphed1, matchnode = _replace_on_template(template, morphed_childs[1]) 

31 newnode.add_child(newmorphed0) 

32 newnode.add_child(newmorphed1) 

33 newnode.add_prop("evoltype", "D") 

34 node.add_prop("evoltype", "D") 

35 e = EvolEvent() 

36 e.etype = "D" 

37 e.inparalogs = node.children[0].leaf_names() 

38 e.outparalogs = node.children[1].leaf_names() 

39 e.in_seqs = node.children[0].leaf_names() 

40 e.out_seqs = node.children[1].leaf_names() 

41 events.append(e) 

42 return newnode, events 

43 

44 # Otherwise, we need to reconciliate species at both sides 

45 # into a single partition. 

46 else: 

47 # gets the topology expected by the observed species 

48 template = _get_expected_topology(sptree, all_species) 

49 # replaces child0 partition on the template 

50 template, matchnode = _replace_on_template(template, morphed_childs[0] ) 

51 # replaces child1 partition on the template 

52 template, matchnode = _replace_on_template(template, morphed_childs[1]) 

53 template.add_prop("evoltype", "S") 

54 node.add_prop("evoltype", "S") 

55 e = EvolEvent() 

56 e.etype = "S" 

57 e.inparalogs = node.children[0].leaf_names() 

58 e.orthologs = node.children[1].leaf_names() 

59 e.in_seqs = node.children[0].leaf_names() 

60 e.out_seqs = node.children[1].leaf_names() 

61 events.append(e) 

62 return template, events 

63 elif len(node.children)==0: 

64 return copy.deepcopy(node), events 

65 else: 

66 raise ValueError("Algorithm can only work with binary trees.") 

67 

68def _replace_on_template(orig_template, node): 

69 template = copy.deepcopy(orig_template) 

70 # detects partition within topo that matchs child1 species 

71 nodespcs = node.get_species() 

72 spseed = list(nodespcs)[0] # any sp name woulbe ok 

73 # Set an start point 

74 subtopo = list(template.search_nodes(children=[], name=spseed))[0] 

75 # While subtopo does not cover all child species 

76 while len(nodespcs - set(subtopo.leaf_names() ) )>0: 

77 subtopo= subtopo.up 

78 # Puts original partition on the expected topology template 

79 nodecp = copy.deepcopy(node) 

80 if subtopo.up is None: 

81 return nodecp, nodecp 

82 else: 

83 parent = subtopo.up 

84 parent.remove_child(subtopo) 

85 parent.add_child(nodecp) 

86 return template, nodecp 

87 

88def _get_expected_topology(t, species): 

89 missing_sp = set(species) - set(t.leaf_names()) 

90 if missing_sp: 

91 raise KeyError("* The following species are not contained in the species tree: "+ ','.join(missing_sp) ) 

92 

93 node = list(t.search_nodes(children=[], name=list(species)[0]))[0] 

94 

95 sps = set(species) 

96 while sps-set(node.leaf_names()) != set([]): 

97 node = node.up 

98 template = copy.deepcopy(node) 

99 # make get_species() to work 

100 #template._speciesFunction = _get_species_on_TOL 

101 template.set_species_naming_function(_get_species_on_TOL) 

102 template.detach() 

103 for n in [template]+list(template.descendants()): 

104 n.add_prop("evoltype", "L") 

105 n.dist = 1 

106 return template 

107 

108def _get_species_on_TOL(name): 

109 return name 

110 

111def get_reconciled_tree_zmasek(gtree, sptree, inplace=False): 

112 """ 

113 Reconciles the gene tree with the species tree 

114 using Zmasek and Eddy's algorithm. Details can be 

115 found in the paper: 

116 

117 Christian M. Zmasek, Sean R. Eddy: A simple algorithm 

118 to infer gene duplication and speciation events on a 

119 gene tree. Bioinformatics 17(9): 821-828 (2001) 

120 

121 :argument gtree: gene tree (PhyloTree instance) 

122 

123 :argument sptree: species tree (PhyloTree instance) 

124 

125 :argument False inplace: if True, the provided gene tree instance is 

126 modified. Otherwise a reconciled copy of the gene tree is returned. 

127 

128 :returns: reconciled gene tree 

129 """ 

130 # some cleanup operations 

131 def cleanup(tree): 

132 for node in tree.traverse(): node.del_prop("M") 

133 

134 if not inplace: 

135 gtree = gtree.copy('deepcopy') 

136 

137 # check for missing species 

138 missing_sp = gtree.get_species() - sptree.get_species() 

139 if missing_sp: 

140 raise KeyError("* The following species are not contained in the species tree: "+ ', '.join(missing_sp)) 

141 

142 # initialization 

143 sp2node = dict() 

144 for node in sptree.leaves(): sp2node[node.species] = node 

145 

146 # set/compute the mapping function M(g) for the 

147 # leaf nodes in the gene tree (see paper for details) 

148 species = sptree.get_species() 

149 for node in gtree.leaves(): 

150 node.add_prop("M",sp2node[node.species]) 

151 

152 # visit each internal node in the gene tree 

153 # and detect its event (duplication or speciation) 

154 for node in gtree.traverse(strategy="postorder"): 

155 if len(node.children) == 0: 

156 continue # nothing to do for leaf nodes 

157 

158 if len(node.children) != 2: 

159 cleanup(gtree) 

160 raise ValueError("Algorithm can only work with binary trees.") 

161 

162 lca = node.children[0].M.get_common_ancestor(node.children[1].M) # LCA in the species tree 

163 node.add_prop("M",lca) 

164 

165 node.add_prop("evoltype","S") 

166 if id(node.children[0].M) == id(node.M) or id(node.children[1].M) == id(node.M): 

167 node.add_prop("evoltype", "D") 

168 

169 cleanup(gtree) 

170 return gtree