Coverage for /home/deng/Projects/metatree_drawer/metatreedrawer/treeprofiler/src/ls.py: 16%

44 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-08-07 10:33 +0200

1#!/usr/bin/env python3 

2try: 

3 from distutils.util import strtobool 

4except ImportError: 

5 from treeprofiler.src.utils import strtobool 

6 

7from treeprofiler.src.utils import add_suffix 

8 

9# Lineage specificity analysis 

10# Function to calculate precision, sensitivity, and F1 score 

11def calculate_metrics(node, total_with_trait, prop): 

12 if not node.is_leaf: 

13 clade_with_trait = sum(1 for child in node.leaves() if bool_checker(child, prop)) 

14 clade_total = len([leave for leave in node.leaves()]) 

15 precision = clade_with_trait / clade_total if clade_total else 0 

16 sensitivity = clade_with_trait / total_with_trait if total_with_trait else 0 

17 f1 = 2 * (precision * sensitivity) / (precision + sensitivity) if (precision + sensitivity) else 0 

18 return precision, sensitivity, f1 

19 return 0, 0, 0 

20 

21# Total number of nodes with the trait 

22def get_total_trait(tree, prop): 

23 return sum(1 for node in tree.leaves() if bool_checker(node, prop)) 

24 

25def bool_checker(node, prop): 

26 """ 

27 Check if the property of a node can be interpreted as a boolean 'True'. 

28 

29 :param node: The node whose property is to be checked. 

30 :param prop: The property name to check. 

31 :return: True if the property exists and is a boolean 'True', False otherwise. 

32 """ 

33 prop_value = node.props.get(prop) 

34 if prop_value is not None: 

35 try: 

36 return bool(strtobool(str(prop_value))) 

37 except ValueError: 

38 return False 

39 return False 

40 

41###### start lineage specificity analysis ###### 

42def run_ls(tree, props, precision_cutoff=0.95, sensitivity_cutoff=0.95): 

43 best_node = None 

44 qualified_nodes = [] 

45 best_f1 = -1 

46 for prop in props: 

47 total_with_trait = get_total_trait(tree, prop) 

48 # Calculating metrics for each clade 

49 for node in tree.traverse("postorder"): 

50 if not node.is_leaf: 

51 #node.add_prop(trait=int(node.name[-1]) if node.is_leaf else 0) 

52 precision, sensitivity, f1 = calculate_metrics(node, total_with_trait, prop) 

53 node.add_prop(add_suffix(prop, "prec"), precision) 

54 node.add_prop(add_suffix(prop, "sens"), sensitivity) 

55 node.add_prop(add_suffix(prop, "f1"), f1) 

56 #node.add_prop(precision=precision, sensitivity=sensitivity, f1_score=f1) 

57 #print(f"Node: {node.name} , Precision: {precision}, Sensitivity: {sensitivity}, F1 Score: {f1}") 

58 

59 # Check if the node meets the lineage-specific criteria 

60 if not node.is_root: 

61 if precision >= precision_cutoff and sensitivity >= sensitivity_cutoff: 

62 node.add_prop(add_suffix(prop, "ls_clade"), True) 

63 qualified_nodes.append(node) 

64 if f1 > best_f1: 

65 best_f1 = f1 

66 best_node = node 

67 # if best_node: 

68 # print(f"Root of Lineage-Specific Clade of {prop} Trait with f1 score {best_node.props.get(add_suffix(prop, 'f1'))}") 

69 # best_node.add_prop(add_suffix(prop, "ls_clade"), True) 

70 

71 return best_node, qualified_nodes 

72 

73# #### find lineage-specific clades #### 

74# def find_lineage_specific_root(tree): 

75# best_node = None 

76# best_f1 = -1 

77# for node in tree.traverse("postorder"): 

78# if not node.is_leaf: 

79# precision, sensitivity, f1 = calculate_metrics(node, total_with_trait) 

80# node.add_props(precision=precision, sensitivity=sensitivity, f1_score=f1) 

81 

82# # Check if the node meets the lineage-specific criteria 

83# if precision >= 0.5 and sensitivity >= 0.5 and f1 > best_f1: 

84# best_f1 = f1 

85# best_node = node 

86# return best_node