Coverage for /home/deng/Projects/metatree_drawer/treeprofiler_algo/pastml/pastml/parsimony.py: 16%

167 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-03-21 09:19 +0100

1import logging 

2from collections import Counter 

3 

4from pastml import get_personalized_feature_name, METHOD, STATES, CHARACTER, NUM_SCENARIOS, NUM_UNRESOLVED_NODES, \ 

5 NUM_NODES, NUM_TIPS, NUM_STATES_PER_NODE, PERC_UNRESOLVED 

6 

7STEPS = 'steps' 

8 

9DOWNPASS = 'DOWNPASS' 

10ACCTRAN = 'ACCTRAN' 

11DELTRAN = 'DELTRAN' 

12MP = 'MP' 

13 

14MP_METHODS = {DOWNPASS, ACCTRAN, DELTRAN} 

15META_MP_METHODS = {MP} 

16 

17BU_PARS_STATES = 'BOTTOM_UP_PARSIMONY' 

18TD_PARS_STATES = 'TOP_DOWN_PARSIMONY' 

19PARS_STATES = 'PARSIMONY' 

20PARS_STATE2NUM = 'PARSIMONY_STEPS' 

21 

22 

23def is_meta_mp(method): 

24 """ 

25 Checks if the method is a meta max parsimony method, combining several methods, i.e. MP. 

26 

27 :param method: prediction method 

28 :type method: str 

29 :return: bool 

30 """ 

31 return method in META_MP_METHODS 

32 

33 

34def get_default_mp_method(): 

35 return DOWNPASS 

36 

37 

38def is_parsimonious(method): 

39 """ 

40 Checks if the method is max likelihood, i.e. is either joint or one of the marginal ones 

41 (marginal itself, or MAP, or MPPA). 

42 

43 :param method: the ancestral state prediction method used by PastML. 

44 :type method: str 

45 :return: whether the method is parsimonious 

46 :rtype: bool 

47 """ 

48 return method in MP_METHODS | {MP} 

49 

50 

51def initialise_parsimonious_states(tree, feature, states): 

52 """ 

53 Initializes the bottom-up state arrays for tips based on their states given by the feature. 

54 

55 :param tree: the tree of interest 

56 :type tree: ete3.Tree 

57 :param feature: feature in which the tip states are stored (the value could be None for a missing state) 

58 :type feature: str 

59 :param states: possible character states 

60 :type states: np.array(str) 

61 :return: adds the get_personalised_feature_name(feature, BU_PARS) feature to tree tips. 

62 """ 

63 ps_feature_down = get_personalized_feature_name(feature, BU_PARS_STATES) 

64 ps_feature = get_personalized_feature_name(feature, PARS_STATES) 

65 all_states = set(states) 

66 

67 for node in tree.traverse(): 

68 state = node.props.get(feature, set()) 

69 if isinstance(state, str): 

70 state = {state} 

71 if not state: 

72 node.add_prop(ps_feature_down, all_states) 

73 else: 

74 node.add_prop(ps_feature_down, state) 

75 node.add_prop(ps_feature, node.props.get(ps_feature_down)) 

76 

77def get_most_common_states(state_iterable): 

78 """ 

79 Gets the set of most common states among the state sets contained in the iterable argument 

80 

81 :param state_iterable: iterable of state sets 

82 :type state_iterable: iterable 

83 :return: set of most common states 

84 :rtype: set(str) 

85 """ 

86 state_counter = Counter() 

87 for states in state_iterable: 

88 state_counter.update(states) 

89 max_count = state_counter.most_common(1)[0][1] 

90 return {state for (state, count) in state_counter.items() if count == max_count} 

91 

92 

93def uppass(tree, feature): 

94 """ 

95 UPPASS traverses the tree starting from the tips and going up till the root, 

96 and assigns to each parent node a state based on the states of its child nodes. 

97 

98 if N is a tip: 

99 S(N) <- state of N 

100 else: 

101 L, R <- left and right children of N 

102 UPPASS(L) 

103 UPPASS(R) 

104 if S(L) intersects with S(R): 

105 S(N) <- intersection(S(L), S(R)) 

106 else: 

107 S(N) <- union(S(L), S(R)) 

108 

109 :param tree: the tree of interest 

110 :type tree: ete3.Tree 

111 :param feature: character for which the parsimonious states are reconstructed 

112 :type feature: str 

113 :return: adds get_personalized_feature_name(feature, BU_PARS_STATES) feature to the tree nodes 

114 """ 

115 

116 ps_feature = get_personalized_feature_name(feature, BU_PARS_STATES) 

117 

118 for node in tree.traverse('postorder'): 

119 if not node.is_leaf: 

120 children_states = get_most_common_states(child.props.get(ps_feature) for child in node.children) 

121 node_states = node.props.get(ps_feature) 

122 state_intersection = node_states & children_states 

123 node.add_prop(ps_feature, state_intersection if state_intersection else node_states) 

124 

125 

126def acctran(tree, character, feature=PARS_STATES): 

127 """ 

128 ACCTRAN (accelerated transformation) (Farris, 1970) aims at reducing the number of ambiguities 

129 in the parsimonious result. ACCTRAN forces the state changes to be performed as close to the root as possible, 

130 and therefore prioritises the reverse mutations. 

131 

132 if N is not a tip: 

133 L, R <- left and right children of N 

134 if intersection(S(N), S(L)) is not empty: 

135 S(L) <- intersection(S(N), S(L)) 

136 if intersection(S(N), S(R)) is not empty: 

137 S(R) <- intersection(S(N), S(R)) 

138 ACCTRAN(L) 

139 ACCTRAN(R) 

140 

141 :param tree: the tree of interest 

142 :type tree: ete3.Tree 

143 :param character: character for which the parsimonious states are reconstructed 

144 :type character: str 

145 :param feature: feature where the reconstructed states are to be saved 

146 :type feature: str 

147 :return: adds get_personalized_feature_name(feature, PARS_STATES) feature to the tree nodes 

148 """ 

149 

150 ps_feature_down = get_personalized_feature_name(character, BU_PARS_STATES) 

151 

152 for node in tree.traverse('preorder'): 

153 if node.is_root: 

154 node.add_prop(feature, node.props.get(ps_feature_down)) 

155 node_states = node.props.get(feature) 

156 for child in node.children: 

157 child_states = child.props.get(ps_feature_down) 

158 state_intersection = node_states & child_states 

159 child.add_prop(feature, state_intersection if state_intersection else child_states) 

160 

161 

162def downpass(tree, feature, states): 

163 """ 

164 DOWNPASS traverses the tree starting from the root and going down till the tips, 

165 and for each node combines the state information from its supertree and its subtree (calculated at UPPASS). 

166 As the root state was already the most parsimonious after the UPPASS, 

167 we skip it and start directly with the root children. 

168 

169 if N is not a tip: 

170 L, R <- left and right children of N 

171 if N is root: 

172 UP_S(N) <- union of all states 

173 else: 

174 P <- parent of N 

175 B <- brother of N 

176 UP_S(N) <- most_common_states(UP_S(P), S(B)) 

177 S(N) <- most_common_states(UP_S(N), S(L), S(R)) 

178 DOWNPASS(L) 

179 DOWNPASS(R) 

180 

181 :param tree: the tree of interest 

182 :type tree: ete3.Tree 

183 :param feature: character for which the parsimonious states are reconstructed 

184 :type feature: str 

185 :param states: possible character states 

186 :type states: np.array(str) 

187 :return: adds get_personalized_feature_name(feature, PARS_STATES) feature to the tree nodes 

188 """ 

189 

190 ps_feature_down = get_personalized_feature_name(feature, BU_PARS_STATES) 

191 ps_feature_up = get_personalized_feature_name(feature, TD_PARS_STATES) 

192 ps_feature = get_personalized_feature_name(feature, PARS_STATES) 

193 

194 for node in tree.traverse('preorder'): 

195 if node.is_root: 

196 node.add_prop(ps_feature_up, set(states)) 

197 else: 

198 node.add_prop(ps_feature_up, 

199 get_most_common_states([node.up.props.get(ps_feature_up)] 

200 + [sibling.props.get(ps_feature_down) for sibling in node.up.children 

201 if sibling != node])) 

202 down_up_states = get_most_common_states([node.props.get(ps_feature_up)] 

203 + [child.props.get(ps_feature_down) for child in node.children]) \ 

204 if not node.is_leaf else node.props.get(ps_feature_up) 

205 preset_states = node.props.get(ps_feature) 

206 

207 state_intersection = down_up_states & preset_states 

208 

209 node.add_prop(ps_feature, state_intersection if state_intersection else preset_states) 

210 

211 for node in tree.traverse(): 

212 node.del_prop(ps_feature_down) 

213 node.del_prop(ps_feature_up) 

214 

215 

216def deltran(tree, feature): 

217 """ 

218 DELTRAN (delayed transformation) (Swofford & Maddison, 1987) aims at reducing the number of ambiguities 

219 in the parsimonious result. DELTRAN makes the changes as close as possible to the leaves, 

220 hence prioritizing parallel mutations. DELTRAN is performed after DOWNPASS. 

221 

222 if N is not a root: 

223 P <- parent(N) 

224 if intersection(S(N), S(P)) is not empty: 

225 S(N) <- intersection(S(N), S(P)) 

226 if N is not a tip: 

227 L, R <- left and right children of N 

228 DELTRAN(L) 

229 DELTRAN(R) 

230 

231 :param tree: the tree of interest 

232 :type tree: ete3.Tree 

233 :param feature: character for which the parsimonious states are reconstructed 

234 :type feature: str 

235 :return: modifies get_personalized_feature_name(feature, PARS_STATES) feature of the tree nodes 

236 """ 

237 ps_feature = get_personalized_feature_name(feature, PARS_STATES) 

238 

239 for node in tree.traverse('preorder'): 

240 if not node.is_root: 

241 node_states = node.props.get(ps_feature) 

242 parent_states = node.up.props.get(ps_feature) 

243 state_intersection = node_states & parent_states 

244 if state_intersection: 

245 node.add_prop(ps_feature, state_intersection) 

246 

247 

248def parsimonious_acr(forest, character, prediction_method, states, num_nodes, num_tips): 

249 """ 

250 Calculates parsimonious states on the trees and stores them in the corresponding feature. 

251 

252 :param forest: trees of interest 

253 :type forest: list(ete3.Tree) 

254 :param character: character for which the parsimonious states are reconstructed 

255 :type character: str 

256 :param prediction_method: ACCTRAN (accelerated transformation), DELTRAN (delayed transformation), DOWNPASS or MP 

257 :type prediction_method: str 

258 :param states: possible character states 

259 :type states: np.array(str) 

260 :param num_nodes: total number of nodes in the forest 

261 :type num_nodes: int 

262 :param num_tips: total number of tips in the forest 

263 :type num_tips: int 

264 :return: mapping between reconstruction parameters and values 

265 :rtype: dict 

266 """ 

267 for tree in forest: 

268 initialise_parsimonious_states(tree, character, states) 

269 uppass(tree, character) 

270 

271 results = [] 

272 result = {STATES: states, NUM_NODES: num_nodes, NUM_TIPS: num_tips} 

273 

274 logger = logging.getLogger('pastml') 

275 

276 def process_result(method, feature): 

277 out_feature = get_personalized_feature_name(character, method) if prediction_method != method else character 

278 res = result.copy() 

279 res[NUM_SCENARIOS], res[NUM_UNRESOLVED_NODES], res[NUM_STATES_PER_NODE] = 1, 0, 0 

280 for tree in forest: 

281 ns, nun, nspn = choose_parsimonious_states(tree, feature, out_feature) 

282 res[NUM_SCENARIOS] *= ns 

283 res[NUM_UNRESOLVED_NODES] += nun 

284 res[NUM_STATES_PER_NODE] += nspn 

285 res[NUM_STATES_PER_NODE] /= num_nodes 

286 res[PERC_UNRESOLVED] = res[NUM_UNRESOLVED_NODES] * 100 / num_nodes 

287 logger.debug('{} node{} unresolved ({:.2f}%) for {} by {}, ' 

288 'i.e. {:.4f} state{} per node in average.' 

289 .format(res[NUM_UNRESOLVED_NODES], 's are' if res[NUM_UNRESOLVED_NODES] != 1 else ' is', 

290 res[PERC_UNRESOLVED], character, method, 

291 res[NUM_STATES_PER_NODE], 's' if res[NUM_STATES_PER_NODE] > 1 else '')) 

292 res[CHARACTER] = out_feature 

293 res[METHOD] = method 

294 results.append(res) 

295 

296 if prediction_method in {ACCTRAN, MP}: 

297 feature = get_personalized_feature_name(character, PARS_STATES) 

298 if prediction_method == MP: 

299 feature = get_personalized_feature_name(feature, ACCTRAN) 

300 result[STEPS] = 0 

301 for tree in forest: 

302 acctran(tree, character, feature) 

303 result[STEPS] += get_num_parsimonious_steps(tree, feature) 

304 process_result(ACCTRAN, feature) 

305 

306 bu_feature = get_personalized_feature_name(character, BU_PARS_STATES) 

307 for tree in forest: 

308 for node in tree.traverse(): 

309 if prediction_method == ACCTRAN: 

310 node.del_prop(bu_feature) 

311 node.del_prop(feature) 

312 

313 if prediction_method != ACCTRAN: 

314 feature = get_personalized_feature_name(character, PARS_STATES) 

315 result[STEPS] = 0 

316 for tree in forest: 

317 downpass(tree, character, states) 

318 if prediction_method in {DOWNPASS, MP}: 

319 result[STEPS] += get_num_parsimonious_steps(tree, feature) 

320 if prediction_method in {DOWNPASS, MP}: 

321 process_result(DOWNPASS, feature) 

322 result[STEPS] = 0 

323 if prediction_method in {DELTRAN, MP}: 

324 for tree in forest: 

325 deltran(tree, character) 

326 result[STEPS] += get_num_parsimonious_steps(tree, feature) 

327 process_result(DELTRAN, feature) 

328 for tree in forest: 

329 for node in tree.traverse(): 

330 node.del_prop(feature) 

331 

332 logger.debug("Parsimonious reconstruction for {} requires {} state changes." 

333 .format(character, result[STEPS])) 

334 return results 

335 

336 

337def choose_parsimonious_states(tree, ps_feature, out_feature): 

338 """ 

339 Calculates the statistics for parsimony, and copies the parsimonious states to out_feature. 

340 

341 :param ps_feature: feature containing each node parsimonious states 

342 :type ps_feature: str 

343 :param out_feature: feature where the node parsimonious states are to be copied 

344 :type out_feature: str 

345 :param tree: the tree of interest 

346 :type tree: ete3.Tree 

347 :return: number of ancestral scenarios selected, number of unresolved nodes, and total numbest of selected states 

348 :rtype: tuple(int, int, int) 

349 """ 

350 num_scenarios = 1 

351 unresolved_nodes = 0 

352 num_states = 0 

353 for node in tree.traverse(): 

354 states = node.props.get(ps_feature) 

355 node.add_prop(out_feature, states) 

356 n = len(states) 

357 num_scenarios *= n 

358 unresolved_nodes += 1 if n > 1 else 0 

359 num_states += n 

360 return num_scenarios, unresolved_nodes, num_states 

361 

362 

363def get_num_parsimonious_steps(tree, feature): 

364 ps_feature_num = get_personalized_feature_name(feature, PARS_STATE2NUM) 

365 

366 for node in tree.traverse('postorder'): 

367 if node.is_leaf: 

368 node.add_prop(ps_feature_num, {state: 0 for state in node.props.get(feature)}) 

369 else: 

370 state2num = {} 

371 for state in node.props.get(feature): 

372 num = 0 

373 for child in node.children: 

374 child_state2num = child.props.get(ps_feature_num) 

375 num += min(((0 if state == child_state else 1) + child_num) 

376 for (child_state, child_num) in child_state2num.items()) 

377 state2num[state] = num 

378 node.add_prop(ps_feature_num, state2num) 

379 for child in node.children: 

380 child.del_prop(ps_feature_num) 

381 state2num = tree.props.get(ps_feature_num) 

382 tree.del_prop(ps_feature_num) 

383 return min(state2num.values())