Coverage for /home/deng/Projects/metatree_drawer/metatreedrawer/treeprofiler/src/ls.py: 16%
44 statements
« prev ^ index » next coverage.py v7.2.7, created at 2024-08-07 10:33 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2024-08-07 10:33 +0200
1#!/usr/bin/env python3
2try:
3 from distutils.util import strtobool
4except ImportError:
5 from treeprofiler.src.utils import strtobool
7from treeprofiler.src.utils import add_suffix
9# Lineage specificity analysis
10# Function to calculate precision, sensitivity, and F1 score
11def calculate_metrics(node, total_with_trait, prop):
12 if not node.is_leaf:
13 clade_with_trait = sum(1 for child in node.leaves() if bool_checker(child, prop))
14 clade_total = len([leave for leave in node.leaves()])
15 precision = clade_with_trait / clade_total if clade_total else 0
16 sensitivity = clade_with_trait / total_with_trait if total_with_trait else 0
17 f1 = 2 * (precision * sensitivity) / (precision + sensitivity) if (precision + sensitivity) else 0
18 return precision, sensitivity, f1
19 return 0, 0, 0
21# Total number of nodes with the trait
22def get_total_trait(tree, prop):
23 return sum(1 for node in tree.leaves() if bool_checker(node, prop))
25def bool_checker(node, prop):
26 """
27 Check if the property of a node can be interpreted as a boolean 'True'.
29 :param node: The node whose property is to be checked.
30 :param prop: The property name to check.
31 :return: True if the property exists and is a boolean 'True', False otherwise.
32 """
33 prop_value = node.props.get(prop)
34 if prop_value is not None:
35 try:
36 return bool(strtobool(str(prop_value)))
37 except ValueError:
38 return False
39 return False
41###### start lineage specificity analysis ######
42def run_ls(tree, props, precision_cutoff=0.95, sensitivity_cutoff=0.95):
43 best_node = None
44 qualified_nodes = []
45 best_f1 = -1
46 for prop in props:
47 total_with_trait = get_total_trait(tree, prop)
48 # Calculating metrics for each clade
49 for node in tree.traverse("postorder"):
50 if not node.is_leaf:
51 #node.add_prop(trait=int(node.name[-1]) if node.is_leaf else 0)
52 precision, sensitivity, f1 = calculate_metrics(node, total_with_trait, prop)
53 node.add_prop(add_suffix(prop, "prec"), precision)
54 node.add_prop(add_suffix(prop, "sens"), sensitivity)
55 node.add_prop(add_suffix(prop, "f1"), f1)
56 #node.add_prop(precision=precision, sensitivity=sensitivity, f1_score=f1)
57 #print(f"Node: {node.name} , Precision: {precision}, Sensitivity: {sensitivity}, F1 Score: {f1}")
59 # Check if the node meets the lineage-specific criteria
60 if not node.is_root:
61 if precision >= precision_cutoff and sensitivity >= sensitivity_cutoff:
62 node.add_prop(add_suffix(prop, "ls_clade"), True)
63 qualified_nodes.append(node)
64 if f1 > best_f1:
65 best_f1 = f1
66 best_node = node
67 # if best_node:
68 # print(f"Root of Lineage-Specific Clade of {prop} Trait with f1 score {best_node.props.get(add_suffix(prop, 'f1'))}")
69 # best_node.add_prop(add_suffix(prop, "ls_clade"), True)
71 return best_node, qualified_nodes
73# #### find lineage-specific clades ####
74# def find_lineage_specific_root(tree):
75# best_node = None
76# best_f1 = -1
77# for node in tree.traverse("postorder"):
78# if not node.is_leaf:
79# precision, sensitivity, f1 = calculate_metrics(node, total_with_trait)
80# node.add_props(precision=precision, sensitivity=sensitivity, f1_score=f1)
82# # Check if the node meets the lineage-specific criteria
83# if precision >= 0.5 and sensitivity >= 0.5 and f1 > best_f1:
84# best_f1 = f1
85# best_node = node
86# return best_node