Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import warnings 

2from typing import List, Generator, Iterable 

3from elfragmentador.annotate import peptide_parser 

4from elfragmentador.encoding_decoding import clip_explicit_terminus 

5 

6 

7class _unique_element: 

8 """ 

9 Part of the answer from https://stackoverflow.com/questions/6284396 

10 """ 

11 

12 def __init__(self, value, occurrences): 

13 self.value = value 

14 self.occurrences = occurrences 

15 

16 

17def perm_unique(elements: Iterable) -> Generator: 

18 """perm_unique Gets permutations of elements taking into account repeated. 

19 

20 Part of the answer from https://stackoverflow.com/questions/6284396 

21 

22 Permutes the elements passed but skips all permutations where elements are 

23 the same. For instance (0, 1, 0) would five 3 possibilities. 

24 

25 Parameters 

26 ---------- 

27 elements : List or str 

28 Elements to be permuted 

29 

30 Returns 

31 ------- 

32 Generator 

33 A list with all permutations 

34 

35 Examples 

36 -------- 

37 >>> out = list(perm_unique("COM")) 

38 >>> sorted(out) 

39 [('C', 'M', 'O'), ('C', 'O', 'M'), ('M', 'C', 'O'), ('M', 'O', 'C'), ('O', 'C', 'M'), ('O', 'M', 'C')] 

40 >>> out = list(perm_unique("CCM")) 

41 >>> sorted(out) 

42 [('C', 'C', 'M'), ('C', 'M', 'C'), ('M', 'C', 'C')] 

43 >>> out = list(perm_unique([0,1,0])) 

44 >>> sorted(out) 

45 [(0, 0, 1), (0, 1, 0), (1, 0, 0)] 

46 """ 

47 eset = set(elements) 

48 listunique = [_unique_element(i, elements.count(i)) for i in eset] 

49 u = len(elements) 

50 return _perm_unique_helper(listunique, [0] * u, u - 1) 

51 

52 

53def _perm_unique_helper(listunique, result_list, d): 

54 """ 

55 Part of the answer from https://stackoverflow.com/questions/6284396 

56 """ 

57 if d < 0: 

58 yield tuple(result_list) 

59 else: 

60 for i in listunique: 

61 if i.occurrences > 0: 

62 result_list[d] = i.value 

63 i.occurrences -= 1 

64 for g in _perm_unique_helper(listunique, result_list, d - 1): 

65 yield g 

66 i.occurrences += 1 

67 

68 

69def _get_mod_isoforms(seq: str, mod: str, aas: str) -> List[str]: 

70 # mod = "PHOSPHO" 

71 # seq = "S[PHOSPHO]AS" 

72 # aas = "STY" 

73 if mod not in seq: 

74 return [seq] 

75 

76 parsed_seq = list(peptide_parser(seq)) 

77 parsed_seq = clip_explicit_terminus(parsed_seq) 

78 stripped_seq = [x.replace(f"[{mod}]", "") for x in parsed_seq] 

79 

80 placeholder_seq = [ 

81 x if not any([x[:1] == y for y in aas]) else x[:1] + "{}" for x in stripped_seq 

82 ] 

83 placeholder_seq = "".join(placeholder_seq) 

84 mod_sampler = [x[1:] for x in parsed_seq if any([x[:1] == y for y in aas])] 

85 

86 if len(set(mod_sampler)) == 1: 

87 perm_iter = [mod_sampler] 

88 else: 

89 perm_iter = list(perm_unique(mod_sampler)) 

90 

91 out_seqs = [] 

92 

93 for _, x in enumerate(perm_iter): 

94 out_seqs.append(placeholder_seq.format(*x)) 

95 

96 return list(set(out_seqs)) 

97 

98 

99def get_mod_isoforms(seq: str, mods_list: List[str], aas_list: List[str]) -> List[str]: 

100 """get_mod_isoforms 

101 

102 Gets modification isoforms for a peptide with modifications 

103 

104 Parameters 

105 ---------- 

106 seq : str 

107 Sequence used 

108 mods_list : List[str] 

109 List of modification names that can be permuted 

110 aas_list : List[str] 

111 List of the aminoacids that can be the modified by each modification. 

112 

113 Details 

114 ------- 

115 This functions expects the modification and the aminoacid list to be the same length 

116 

117 Returns 

118 ------- 

119 List[str] 

120 A list with the combination of the sequence with the modifications. 

121 

122 Example 

123 ------- 

124 >>> seq = "M[OXIDATION]YPEPT[PHOSPHO]MIDES" 

125 >>> mods_list = ["PHOSPHO", "OXIDATION"] 

126 >>> aas_list = ["STY", "M"] 

127 >>> out = list(get_mod_isoforms(seq, mods_list, aas_list)) 

128 >>> sorted(out) 

129 ['MYPEPTM[OXIDATION]IDES[PHOSPHO]', ... 'M[OXIDATION]Y[PHOSPHO]PEPTMIDES'] 

130 """ 

131 seqs = [seq] 

132 

133 for mod, aas in zip(mods_list, aas_list): 

134 tmp_seqs = [] 

135 for s in seqs: 

136 x = _get_mod_isoforms(s, mod, aas) 

137 tmp_seqs.extend(list(set(x))) 

138 if len(tmp_seqs) > 10000: 

139 warnings.warn("Large number of mod combinations found, clipping at 1k") 

140 continue 

141 

142 seqs.extend(tmp_seqs) 

143 seqs.extend([seq]) 

144 

145 return list(set(seqs))