Coverage for /Users/gavin/repos/EnsemblLite/src/ensembl_lite/convert.py: 100%

28 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-12-25 11:36 +1100

1import typing 

2 

3import numpy 

4 

5from cogent3 import Sequence 

6from cogent3.core.alignment import Aligned 

7from cogent3.core.location import LostSpan, Map, Span 

8 

9 

10O = typing.Tuple[numpy.ndarray, Sequence] 

11 

12 

13def seq_to_gap_coords(seq: Sequence) -> O: 

14 """returns coordinates of sequence gaps""" 

15 m, s = seq.parse_out_gaps() 

16 # Assuming the maximum integer is < 2^31 

17 return numpy.array(m.get_gap_coordinates(), dtype=numpy.int32), s 

18 

19 

20def gap_coords_to_seq(coords: numpy.ndarray, ungapped: Sequence) -> Aligned: 

21 """returns Aligned instance 

22 

23 Parameters 

24 ---------- 

25 coords 

26 2D array with first column being gap insertion point and 

27 second column being gap length 

28 ungapped 

29 the ungapped sequence instance the coordinates correspond to 

30 """ 

31 segment_start = None 

32 spans = [] 

33 insert = 0 

34 for insert, length in coords: 

35 gap = LostSpan(length) 

36 if segment_start is None: 

37 # this is the first gap 

38 if insert: 

39 # gap is within seq, so we include segment span first 

40 spans.append(Span(start=0, end=insert)) 

41 # followed by the lost span 

42 

43 # because alternate (insert == 0) means 

44 # gap is before seq, we just append gap for both cases 

45 spans.append(gap) 

46 # next segment_start is current insert point 

47 segment_start = insert 

48 continue 

49 

50 spans.extend((Span(start=segment_start, end=insert), gap)) 

51 segment_start = insert 

52 

53 segment_total = sum(len(s) for s in spans if isinstance(s, Span)) 

54 if len(ungapped) - segment_total: 

55 # last gap was also internal, so we add span for remainder of 

56 # sequence 

57 spans.append(Span(start=insert, end=len(ungapped))) 

58 

59 m = Map(spans=spans, parent_length=sum(len(s) for s in spans)) 

60 return Aligned(m, ungapped)