Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import math 

2from elfragmentador import annotate, constants 

3from elfragmentador import isoforms 

4from numpy import float64 

5from typing import Any, Dict, List, Tuple, Union 

6 

7 

8def get_site_localizing_ions( 

9 seq: str, mod: List[str], aas: List[str] 

10) -> Union[ 

11 Tuple[Dict[str, Dict[Any, Any]], Dict[str, Dict[str, float64]]], 

12 Tuple[Dict[str, Dict[str, float64]], Dict[str, Dict[str, float64]]], 

13]: 

14 """ 

15 get_site_localizing_ions 

16 

17 [extended_summary] 

18 

19 Parameters 

20 ---------- 

21 seq : str 

22 [description] 

23 mod : List[str] 

24 [description] 

25 aas : List[str] 

26 [description] 

27 

28 Returns 

29 ------- 

30 Tuple[ 

31 Dict[str, Dict[str, float64]], 

32 Dict[str, Dict[str, float64]]] 

33 

34 Returns two dictionaries whose keys are isoform sequences and the values are 

35 dictionaries of ion:mass pairs. 

36 

37 The first one contins only the ions unique to each sequence and the second contains 

38 all the ions for each modified sequence 

39 

40 Example 

41 ------- 

42 >>> seq = "MY[PHOSPHO]PTMIDE" 

43 >>> mods_list = ["PHOSPHO"] 

44 >>> aas_list = ["YST"] 

45 >>> sli = get_site_localizing_ions(seq, mods_list, aas_list) 

46 >>> sorted(sli[0].keys()) 

47 ['MYPT[PHOSPHO]MIDE', 'MY[PHOSPHO]PTMIDE'] 

48 >>> sorted(sli[0]['MYPT[PHOSPHO]MIDE'].keys()) 

49 ['z1b2', 'z1b3', 'z1y5', 'z1y6', 'z2b2', 'z2b3', 'z2y5', 'z2y6', 'z3b2', 'z3b3', 'z3y5', 'z3y6'] 

50 >>> sorted(sli[1].keys()) 

51 ['MYPT[PHOSPHO]MIDE', 'MY[PHOSPHO]PTMIDE'] 

52 >>> sorted(sli[1]['MYPT[PHOSPHO]MIDE'].keys()) 

53 ['z1b1', 'z1b2', 'z1b3', ..., 'z1y6', 'z1y7', 'z2b1', 'z2b2', 'z2b3', 'z2b4', 'z2b5', 'z2b6', 'z2b7', 'z2y1', 'z2y2', 'z2y3', 'z2y4', 'z2y5', 'z2y6', 'z2y7', 'z3b1', 'z3b2', 'z3b3', 'z3b4', 'z3b5', 'z3b6', 'z3b7', 'z3y1', 'z3y2', 'z3y3', 'z3y4', 'z3y5', 'z3y6', 'z3y7'] 

54 >>> sli[1]['MYPT[PHOSPHO]MIDE']['z1y6'] 

55 785.278700167 

56 >>> out = get_site_localizing_ions(seq, mods_list, aas_list) 

57 >>> [{k: len(x[k]) for k in sorted(x)} for x in out] # Show the length of every sub-item 

58 [{'MYPT[PHOSPHO]MIDE': 12, 'MY[PHOSPHO]PTMIDE': 12}, {'MYPT[PHOSPHO]MIDE': 42, 'MY[PHOSPHO]PTMIDE': 42}] 

59 """ 

60 mod_isoforms = isoforms.get_mod_isoforms(seq, mod, aas) 

61 mod_isoform_ions = {k: annotate.get_peptide_ions(k) for k in mod_isoforms} 

62 

63 filtered_out_dict = {k: {} for k in mod_isoforms} 

64 out_dict = {k: {} for k in mod_isoforms} 

65 for ion in mod_isoform_ions[mod_isoforms[0]]: 

66 unique_vals = list( 

67 set([round(v[ion], 10) for k, v in mod_isoform_ions.items()]) 

68 ) 

69 

70 for mi in mod_isoforms: 

71 out_dict[mi].update({ion: mod_isoform_ions[mi][ion]}) 

72 if len(unique_vals) > 1: 

73 filtered_out_dict[mi].update({ion: mod_isoform_ions[mi][ion]}) 

74 

75 return filtered_out_dict, out_dict 

76 

77 

78# TODO consider if this has to be a public API 

79def calc_ascore( 

80 seq: str, 

81 mod: List[str], 

82 aas: List[str], 

83 mzs: Union[List[float64], List[float]], 

84 ints: Union[List[float], List[int]], 

85) -> Dict[str, Union[float, int]]: 

86 WINDOW_SIZE = 100 # daltons 

87 # BINNING = WINDOW_SIZE / (constants.TOLERANCE_FTMS / (1e6) )# aka, how many peaks can fit in a 100da window 

88 # Theoretically this would be correct but leads to really high scores... 

89 BINNING = 100 

90 N_PER_WINDOW = list(range(1, 11, 1)) 

91 

92 sli, all_ions = get_site_localizing_ions(seq, mod, aas) 

93 max_mz = math.ceil(max(mzs) / WINDOW_SIZE) * WINDOW_SIZE 

94 norm_spectra = {str(x): [] for x in N_PER_WINDOW} 

95 

96 for range_start in range(0, max_mz, WINDOW_SIZE): 

97 curr_mzs_pairs = [ 

98 [m, i] 

99 for m, i in zip(mzs, ints) 

100 if m <= range_start + WINDOW_SIZE and m > range_start 

101 ] 

102 if len(curr_mzs_pairs) == 0: 

103 continue 

104 

105 # from : https://stackoverflow.com/questions/13070461/ 

106 sorted_indices = sorted( 

107 range(len(curr_mzs_pairs)), key=lambda x: curr_mzs_pairs[x][1] 

108 ) 

109 for n in N_PER_WINDOW: 

110 keep_indices = sorted_indices[-n:] 

111 norm_spectra[str(n)].extend([curr_mzs_pairs[x][0] for x in keep_indices]) 

112 

113 # First pass finding best depth 

114 scores = {k: [] for k in norm_spectra} 

115 for norm_depth, norm_depth_spectra in norm_spectra.items(): 

116 prior = int(norm_depth) / BINNING 

117 norm_depth_spectra_ints = [1 for _ in range(len(norm_depth_spectra))] 

118 tmp_scores = _calculate_scores_dict( 

119 norm_depth_spectra, norm_depth_spectra_ints, all_ions, prior 

120 ) 

121 scores[norm_depth].extend(list(tmp_scores.values())) 

122 

123 try: 

124 # define best as highest delta score 

125 deltascores = {k: sorted(v)[-1] - sorted(v)[-2] for k, v in scores.items()} 

126 # define best as highest cumulative score 

127 # deltascores = {k: sum(v) for k, v in scores.items()} 

128 except IndexError: 

129 return {seq: max([x[0] for x in scores.values()]), "": 0} 

130 

131 deltascores 

132 max_deltascores = max(deltascores.values()) 

133 best_depth = [k for k, v in deltascores.items() if v == max_deltascores][0] 

134 best_depth 

135 

136 # second pass finding score 

137 prior = int(best_depth) / BINNING 

138 norm_depth_spectra = norm_spectra[best_depth] 

139 norm_depth_spectra_ints = [1 for _ in range(len(norm_depth_spectra))] 

140 scores = {k: None for k in sli} 

141 

142 scores = _calculate_scores_dict( 

143 norm_depth_spectra, norm_depth_spectra_ints, sli, prior 

144 ) 

145 return scores 

146 

147 

148def calc_delta_ascore( 

149 seq: str, 

150 mod: List[str], 

151 aas: List[str], 

152 mzs: Union[List[float64], List[float]], 

153 ints: Union[List[float], List[int]], 

154) -> float: 

155 ascores = calc_ascore(seq, mod, aas, mzs, ints) 

156 seq_score = ascores.pop(seq) 

157 return seq_score - max(ascores.values()) 

158 

159 

160def _calculate_scores_dict( 

161 mzs: Union[List[float64], List[float]], 

162 ints: List[int], 

163 ions_dict: Dict[str, Dict[str, float64]], 

164 prior: float, 

165) -> Dict[str, float]: 

166 EPSILON = 1e-31 

167 

168 scores = {k: None for k in ions_dict} 

169 for isoform, theo_peaks_isoform in ions_dict.items(): 

170 num_tot_peaks = len(theo_peaks_isoform) 

171 matched_peaks = sum( 

172 annotate.annotate_peaks2( 

173 theo_peaks_isoform, 

174 mzs=mzs, 

175 intensities=ints, 

176 ).values() 

177 ) 

178 unmatched_peaks = num_tot_peaks - matched_peaks 

179 prob = (prior ** matched_peaks) * ((1 - prior) ** unmatched_peaks) 

180 score = -10 * math.log10(prob + EPSILON) 

181 scores[isoform] = score 

182 return scores