Coverage for /home/deng/Projects/metatree_drawer/treeprofiler_algo/pastml/pastml/parsimony.py: 16%
167 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
« prev ^ index » next coverage.py v7.2.7, created at 2024-03-21 09:19 +0100
1import logging
2from collections import Counter
4from pastml import get_personalized_feature_name, METHOD, STATES, CHARACTER, NUM_SCENARIOS, NUM_UNRESOLVED_NODES, \
5 NUM_NODES, NUM_TIPS, NUM_STATES_PER_NODE, PERC_UNRESOLVED
7STEPS = 'steps'
9DOWNPASS = 'DOWNPASS'
10ACCTRAN = 'ACCTRAN'
11DELTRAN = 'DELTRAN'
12MP = 'MP'
14MP_METHODS = {DOWNPASS, ACCTRAN, DELTRAN}
15META_MP_METHODS = {MP}
17BU_PARS_STATES = 'BOTTOM_UP_PARSIMONY'
18TD_PARS_STATES = 'TOP_DOWN_PARSIMONY'
19PARS_STATES = 'PARSIMONY'
20PARS_STATE2NUM = 'PARSIMONY_STEPS'
23def is_meta_mp(method):
24 """
25 Checks if the method is a meta max parsimony method, combining several methods, i.e. MP.
27 :param method: prediction method
28 :type method: str
29 :return: bool
30 """
31 return method in META_MP_METHODS
34def get_default_mp_method():
35 return DOWNPASS
38def is_parsimonious(method):
39 """
40 Checks if the method is max likelihood, i.e. is either joint or one of the marginal ones
41 (marginal itself, or MAP, or MPPA).
43 :param method: the ancestral state prediction method used by PastML.
44 :type method: str
45 :return: whether the method is parsimonious
46 :rtype: bool
47 """
48 return method in MP_METHODS | {MP}
51def initialise_parsimonious_states(tree, feature, states):
52 """
53 Initializes the bottom-up state arrays for tips based on their states given by the feature.
55 :param tree: the tree of interest
56 :type tree: ete3.Tree
57 :param feature: feature in which the tip states are stored (the value could be None for a missing state)
58 :type feature: str
59 :param states: possible character states
60 :type states: np.array(str)
61 :return: adds the get_personalised_feature_name(feature, BU_PARS) feature to tree tips.
62 """
63 ps_feature_down = get_personalized_feature_name(feature, BU_PARS_STATES)
64 ps_feature = get_personalized_feature_name(feature, PARS_STATES)
65 all_states = set(states)
67 for node in tree.traverse():
68 state = node.props.get(feature, set())
69 if isinstance(state, str):
70 state = {state}
71 if not state:
72 node.add_prop(ps_feature_down, all_states)
73 else:
74 node.add_prop(ps_feature_down, state)
75 node.add_prop(ps_feature, node.props.get(ps_feature_down))
77def get_most_common_states(state_iterable):
78 """
79 Gets the set of most common states among the state sets contained in the iterable argument
81 :param state_iterable: iterable of state sets
82 :type state_iterable: iterable
83 :return: set of most common states
84 :rtype: set(str)
85 """
86 state_counter = Counter()
87 for states in state_iterable:
88 state_counter.update(states)
89 max_count = state_counter.most_common(1)[0][1]
90 return {state for (state, count) in state_counter.items() if count == max_count}
93def uppass(tree, feature):
94 """
95 UPPASS traverses the tree starting from the tips and going up till the root,
96 and assigns to each parent node a state based on the states of its child nodes.
98 if N is a tip:
99 S(N) <- state of N
100 else:
101 L, R <- left and right children of N
102 UPPASS(L)
103 UPPASS(R)
104 if S(L) intersects with S(R):
105 S(N) <- intersection(S(L), S(R))
106 else:
107 S(N) <- union(S(L), S(R))
109 :param tree: the tree of interest
110 :type tree: ete3.Tree
111 :param feature: character for which the parsimonious states are reconstructed
112 :type feature: str
113 :return: adds get_personalized_feature_name(feature, BU_PARS_STATES) feature to the tree nodes
114 """
116 ps_feature = get_personalized_feature_name(feature, BU_PARS_STATES)
118 for node in tree.traverse('postorder'):
119 if not node.is_leaf:
120 children_states = get_most_common_states(child.props.get(ps_feature) for child in node.children)
121 node_states = node.props.get(ps_feature)
122 state_intersection = node_states & children_states
123 node.add_prop(ps_feature, state_intersection if state_intersection else node_states)
126def acctran(tree, character, feature=PARS_STATES):
127 """
128 ACCTRAN (accelerated transformation) (Farris, 1970) aims at reducing the number of ambiguities
129 in the parsimonious result. ACCTRAN forces the state changes to be performed as close to the root as possible,
130 and therefore prioritises the reverse mutations.
132 if N is not a tip:
133 L, R <- left and right children of N
134 if intersection(S(N), S(L)) is not empty:
135 S(L) <- intersection(S(N), S(L))
136 if intersection(S(N), S(R)) is not empty:
137 S(R) <- intersection(S(N), S(R))
138 ACCTRAN(L)
139 ACCTRAN(R)
141 :param tree: the tree of interest
142 :type tree: ete3.Tree
143 :param character: character for which the parsimonious states are reconstructed
144 :type character: str
145 :param feature: feature where the reconstructed states are to be saved
146 :type feature: str
147 :return: adds get_personalized_feature_name(feature, PARS_STATES) feature to the tree nodes
148 """
150 ps_feature_down = get_personalized_feature_name(character, BU_PARS_STATES)
152 for node in tree.traverse('preorder'):
153 if node.is_root:
154 node.add_prop(feature, node.props.get(ps_feature_down))
155 node_states = node.props.get(feature)
156 for child in node.children:
157 child_states = child.props.get(ps_feature_down)
158 state_intersection = node_states & child_states
159 child.add_prop(feature, state_intersection if state_intersection else child_states)
162def downpass(tree, feature, states):
163 """
164 DOWNPASS traverses the tree starting from the root and going down till the tips,
165 and for each node combines the state information from its supertree and its subtree (calculated at UPPASS).
166 As the root state was already the most parsimonious after the UPPASS,
167 we skip it and start directly with the root children.
169 if N is not a tip:
170 L, R <- left and right children of N
171 if N is root:
172 UP_S(N) <- union of all states
173 else:
174 P <- parent of N
175 B <- brother of N
176 UP_S(N) <- most_common_states(UP_S(P), S(B))
177 S(N) <- most_common_states(UP_S(N), S(L), S(R))
178 DOWNPASS(L)
179 DOWNPASS(R)
181 :param tree: the tree of interest
182 :type tree: ete3.Tree
183 :param feature: character for which the parsimonious states are reconstructed
184 :type feature: str
185 :param states: possible character states
186 :type states: np.array(str)
187 :return: adds get_personalized_feature_name(feature, PARS_STATES) feature to the tree nodes
188 """
190 ps_feature_down = get_personalized_feature_name(feature, BU_PARS_STATES)
191 ps_feature_up = get_personalized_feature_name(feature, TD_PARS_STATES)
192 ps_feature = get_personalized_feature_name(feature, PARS_STATES)
194 for node in tree.traverse('preorder'):
195 if node.is_root:
196 node.add_prop(ps_feature_up, set(states))
197 else:
198 node.add_prop(ps_feature_up,
199 get_most_common_states([node.up.props.get(ps_feature_up)]
200 + [sibling.props.get(ps_feature_down) for sibling in node.up.children
201 if sibling != node]))
202 down_up_states = get_most_common_states([node.props.get(ps_feature_up)]
203 + [child.props.get(ps_feature_down) for child in node.children]) \
204 if not node.is_leaf else node.props.get(ps_feature_up)
205 preset_states = node.props.get(ps_feature)
207 state_intersection = down_up_states & preset_states
209 node.add_prop(ps_feature, state_intersection if state_intersection else preset_states)
211 for node in tree.traverse():
212 node.del_prop(ps_feature_down)
213 node.del_prop(ps_feature_up)
216def deltran(tree, feature):
217 """
218 DELTRAN (delayed transformation) (Swofford & Maddison, 1987) aims at reducing the number of ambiguities
219 in the parsimonious result. DELTRAN makes the changes as close as possible to the leaves,
220 hence prioritizing parallel mutations. DELTRAN is performed after DOWNPASS.
222 if N is not a root:
223 P <- parent(N)
224 if intersection(S(N), S(P)) is not empty:
225 S(N) <- intersection(S(N), S(P))
226 if N is not a tip:
227 L, R <- left and right children of N
228 DELTRAN(L)
229 DELTRAN(R)
231 :param tree: the tree of interest
232 :type tree: ete3.Tree
233 :param feature: character for which the parsimonious states are reconstructed
234 :type feature: str
235 :return: modifies get_personalized_feature_name(feature, PARS_STATES) feature of the tree nodes
236 """
237 ps_feature = get_personalized_feature_name(feature, PARS_STATES)
239 for node in tree.traverse('preorder'):
240 if not node.is_root:
241 node_states = node.props.get(ps_feature)
242 parent_states = node.up.props.get(ps_feature)
243 state_intersection = node_states & parent_states
244 if state_intersection:
245 node.add_prop(ps_feature, state_intersection)
248def parsimonious_acr(forest, character, prediction_method, states, num_nodes, num_tips):
249 """
250 Calculates parsimonious states on the trees and stores them in the corresponding feature.
252 :param forest: trees of interest
253 :type forest: list(ete3.Tree)
254 :param character: character for which the parsimonious states are reconstructed
255 :type character: str
256 :param prediction_method: ACCTRAN (accelerated transformation), DELTRAN (delayed transformation), DOWNPASS or MP
257 :type prediction_method: str
258 :param states: possible character states
259 :type states: np.array(str)
260 :param num_nodes: total number of nodes in the forest
261 :type num_nodes: int
262 :param num_tips: total number of tips in the forest
263 :type num_tips: int
264 :return: mapping between reconstruction parameters and values
265 :rtype: dict
266 """
267 for tree in forest:
268 initialise_parsimonious_states(tree, character, states)
269 uppass(tree, character)
271 results = []
272 result = {STATES: states, NUM_NODES: num_nodes, NUM_TIPS: num_tips}
274 logger = logging.getLogger('pastml')
276 def process_result(method, feature):
277 out_feature = get_personalized_feature_name(character, method) if prediction_method != method else character
278 res = result.copy()
279 res[NUM_SCENARIOS], res[NUM_UNRESOLVED_NODES], res[NUM_STATES_PER_NODE] = 1, 0, 0
280 for tree in forest:
281 ns, nun, nspn = choose_parsimonious_states(tree, feature, out_feature)
282 res[NUM_SCENARIOS] *= ns
283 res[NUM_UNRESOLVED_NODES] += nun
284 res[NUM_STATES_PER_NODE] += nspn
285 res[NUM_STATES_PER_NODE] /= num_nodes
286 res[PERC_UNRESOLVED] = res[NUM_UNRESOLVED_NODES] * 100 / num_nodes
287 logger.debug('{} node{} unresolved ({:.2f}%) for {} by {}, '
288 'i.e. {:.4f} state{} per node in average.'
289 .format(res[NUM_UNRESOLVED_NODES], 's are' if res[NUM_UNRESOLVED_NODES] != 1 else ' is',
290 res[PERC_UNRESOLVED], character, method,
291 res[NUM_STATES_PER_NODE], 's' if res[NUM_STATES_PER_NODE] > 1 else ''))
292 res[CHARACTER] = out_feature
293 res[METHOD] = method
294 results.append(res)
296 if prediction_method in {ACCTRAN, MP}:
297 feature = get_personalized_feature_name(character, PARS_STATES)
298 if prediction_method == MP:
299 feature = get_personalized_feature_name(feature, ACCTRAN)
300 result[STEPS] = 0
301 for tree in forest:
302 acctran(tree, character, feature)
303 result[STEPS] += get_num_parsimonious_steps(tree, feature)
304 process_result(ACCTRAN, feature)
306 bu_feature = get_personalized_feature_name(character, BU_PARS_STATES)
307 for tree in forest:
308 for node in tree.traverse():
309 if prediction_method == ACCTRAN:
310 node.del_prop(bu_feature)
311 node.del_prop(feature)
313 if prediction_method != ACCTRAN:
314 feature = get_personalized_feature_name(character, PARS_STATES)
315 result[STEPS] = 0
316 for tree in forest:
317 downpass(tree, character, states)
318 if prediction_method in {DOWNPASS, MP}:
319 result[STEPS] += get_num_parsimonious_steps(tree, feature)
320 if prediction_method in {DOWNPASS, MP}:
321 process_result(DOWNPASS, feature)
322 result[STEPS] = 0
323 if prediction_method in {DELTRAN, MP}:
324 for tree in forest:
325 deltran(tree, character)
326 result[STEPS] += get_num_parsimonious_steps(tree, feature)
327 process_result(DELTRAN, feature)
328 for tree in forest:
329 for node in tree.traverse():
330 node.del_prop(feature)
332 logger.debug("Parsimonious reconstruction for {} requires {} state changes."
333 .format(character, result[STEPS]))
334 return results
337def choose_parsimonious_states(tree, ps_feature, out_feature):
338 """
339 Calculates the statistics for parsimony, and copies the parsimonious states to out_feature.
341 :param ps_feature: feature containing each node parsimonious states
342 :type ps_feature: str
343 :param out_feature: feature where the node parsimonious states are to be copied
344 :type out_feature: str
345 :param tree: the tree of interest
346 :type tree: ete3.Tree
347 :return: number of ancestral scenarios selected, number of unresolved nodes, and total numbest of selected states
348 :rtype: tuple(int, int, int)
349 """
350 num_scenarios = 1
351 unresolved_nodes = 0
352 num_states = 0
353 for node in tree.traverse():
354 states = node.props.get(ps_feature)
355 node.add_prop(out_feature, states)
356 n = len(states)
357 num_scenarios *= n
358 unresolved_nodes += 1 if n > 1 else 0
359 num_states += n
360 return num_scenarios, unresolved_nodes, num_states
363def get_num_parsimonious_steps(tree, feature):
364 ps_feature_num = get_personalized_feature_name(feature, PARS_STATE2NUM)
366 for node in tree.traverse('postorder'):
367 if node.is_leaf:
368 node.add_prop(ps_feature_num, {state: 0 for state in node.props.get(feature)})
369 else:
370 state2num = {}
371 for state in node.props.get(feature):
372 num = 0
373 for child in node.children:
374 child_state2num = child.props.get(ps_feature_num)
375 num += min(((0 if state == child_state else 1) + child_num)
376 for (child_state, child_num) in child_state2num.items())
377 state2num[state] = num
378 node.add_prop(ps_feature_num, state2num)
379 for child in node.children:
380 child.del_prop(ps_feature_num)
381 state2num = tree.props.get(ps_feature_num)
382 tree.del_prop(ps_feature_num)
383 return min(state2num.values())