Coverage for /home/deng/Projects/metatree_drawer/treeprofiler_algo/pastml/pastml/models/JTTModel.py: 74%

31 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2024-03-21 09:19 +0100

1import numpy as np 

2 

3from pastml.models import Model 

4from pastml.models.CustomRatesModel import CustomRatesModel 

5 

6""" 

7The JTT matrix below is taken from https://www.ebi.ac.uk/goldman-srv/dayhoff/ 

8 

9See the following paper for more details: 

10Kosiol,C. and Goldman,N. The Different Versions of the Dayhoff Rate Matrix.  

11""" 

12 

13JTT = 'JTT' 

14 

15AA_NAMES = ['Alanine', 'Arginine', 'Asparagine', 'Aspartic Acid', 'Cysteine', 'Glutamine', 'Glutamic Acid', 'Glycine', 

16 'Histidine', 'Isoleucine', 'Leucine', 'Lysine', 'Methionine', 'Phenylalanine', 'Proline', 'Serine', 

17 'Threonine', 'Tryptophan', 'Tyrosine', 'Valine'] 

18 

19AA_3_LETTER_CODES = ['Ala', 'Arg', 'Asn', 'Asp', 'Cys', 'Gln', 'Glu', 'Gly', 

20 'His', 'Ile', 'Leu', 'Lys', 'Met', 'Phe', 'Pro', 'Ser', 

21 'Thr', 'Trp', 'Tyr', 'Val'] 

22 

23AA_1_LETTER_CODES = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 

24 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 

25 'T', 'W', 'Y', 'V'] 

26 

27JTT_STATES = np.array(AA_1_LETTER_CODES) 

28 

29NUM_AA = 20 

30 

31JTT_RATE_MATRIX = np.zeros(shape=(NUM_AA, NUM_AA), dtype=np.float64) 

32JTT_RATE_MATRIX[np.tril_indices(NUM_AA, k=-1)] = \ 

33 [0.531678, 

34 0.557967, 0.451095, 

35 0.827445, 0.154899, 5.549530, 

36 0.574478, 1.019843, 0.313311, 0.105625, 

37 0.556725, 3.021995, 0.768834, 0.521646, 0.091304, 

38 1.066681, 0.318483, 0.578115, 7.766557, 0.053907, 3.417706, 

39 1.740159, 1.359652, 0.773313, 1.272434, 0.546389, 0.231294, 1.115632, 

40 0.219970, 3.210671, 4.025778, 1.032342, 0.724998, 5.684080, 0.243768, 0.201696, 

41 0.361684, 0.239195, 0.491003, 0.115968, 0.150559, 0.078270, 0.111773, 0.053769, 0.181788, 

42 0.310007, 0.372261, 0.137289, 0.061486, 0.164593, 0.709004, 0.097485, 0.069492, 0.540571, 2.335139, 

43 0.369437, 6.529255, 2.529517, 0.282466, 0.049009, 2.966732, 1.731684, 0.269840, 0.525096, 0.202562, 0.146481, 

44 0.469395, 0.431045, 0.330720, 0.190001, 0.409202, 0.456901, 0.175084, 0.130379, 0.329660, 4.831666, 3.856906, 0.624581, 

45 0.138293, 0.065314, 0.073481, 0.032522, 0.678335, 0.045683, 0.043829, 0.050212, 0.453428, 0.777090, 2.500294, 0.024521, 0.436181, 

46 1.959599, 0.710489, 0.121804, 0.127164, 0.123653, 1.608126, 0.191994, 0.208081, 1.141961, 0.098580, 1.060504, 0.216345, 0.164215, 0.148483, 

47 3.887095, 1.001551, 5.057964, 0.589268, 2.155331, 0.548807, 0.312449, 1.874296, 0.743458, 0.405119, 0.592511, 0.474478, 0.285564, 0.943971, 2.788406, 

48 4.582565, 0.650282, 2.351311, 0.425159, 0.469823, 0.523825, 0.331584, 0.316862, 0.477355, 2.553806, 0.272514, 0.965641, 2.114728, 0.138904, 1.176961, 4.777647, 

49 0.084329, 1.257961, 0.027700, 0.057466, 1.104181, 0.172206, 0.114381, 0.544180, 0.128193, 0.134510, 0.530324, 0.089134, 0.201334, 0.537922, 0.069965, 0.310927, 0.080556, 

50 0.139492, 0.235601, 0.700693, 0.453952, 2.114852, 0.254745, 0.063452, 0.052500, 5.848400, 0.303445, 0.241094, 0.087904, 0.189870, 5.484236, 0.113850, 0.628608, 0.201094, 0.747889, 

51 2.924161, 0.171995, 0.164525, 0.315261, 0.621323, 0.179771, 0.465271, 0.470140, 0.121827, 9.533943, 1.761439, 0.124066, 3.038533, 0.593478, 0.211561, 0.408532, 1.143980, 0.239697, 0.165473] 

52JTT_RATE_MATRIX = np.maximum(JTT_RATE_MATRIX, JTT_RATE_MATRIX.T) 

53 

54JTT_FREQUENCIES = np.array( 

55 [0.076862, 0.051057, 0.042546, 0.051269, 0.020279, 0.041061, 0.061820, 0.074714, 0.022983, 0.052569, 0.091111, 0.059498, 0.023414, 0.040530, 0.050532, 0.068225, 0.058518, 0.014336, 0.032303, 0.066374]) 

56JTT_FREQUENCIES = JTT_FREQUENCIES / JTT_FREQUENCIES.sum() 

57 

58state_order = np.argsort(JTT_STATES) 

59JTT_STATES = JTT_STATES[state_order] 

60JTT_FREQUENCIES = JTT_FREQUENCIES[state_order] 

61JTT_RATE_MATRIX = JTT_RATE_MATRIX[:, state_order][state_order, :] 

62 

63 

64class JTTModel(CustomRatesModel): 

65 

66 def __init__(self, forest_stats, sf=None, tau=0, optimise_tau=False, parameter_file=None, reoptimise=False, **kwargs): 

67 kwargs['states'] = JTT_STATES 

68 if 'frequency_smoothing' in kwargs: 

69 del kwargs['frequency_smoothing'] 

70 CustomRatesModel.__init__(self, forest_stats=forest_stats, sf=sf, 

71 frequencies=JTT_FREQUENCIES, rate_matrix=JTT_RATE_MATRIX, 

72 parameter_file=parameter_file, reoptimise=reoptimise, frequency_smoothing=False, 

73 tau=tau, optimise_tau=optimise_tau, **kwargs) 

74 self._optimise_frequencies = False 

75 self.name = JTT 

76 

77 @CustomRatesModel.states.setter 

78 def states(self, states): 

79 raise NotImplementedError('The JTT states are preset and cannot be changed.') 

80 

81 def parse_parameters(self, params, reoptimise=False): 

82 """ 

83 Update this model's values from the input parameters. 

84 JTT model can only have the basic parameters (scaling factor and smoothing factor, see pastml.models.Model). 

85 

86 :param params: dict {key->value} 

87 :param reoptimise: whether these model parameters should be treated as starting values (True) 

88 or as fixed values (False) 

89 :return: dict with parameter values (same as input) 

90 """ 

91 

92 # This model sets fixed frequencies 

93 # and hence should only read the basic parameters (scaling and smoothing factors) from the input file 

94 return Model.parse_parameters(self, params, reoptimise) 

95