Coverage for /Users/gavin/repos/EnsemblLite/src/ensembl_lite/_convert.py: 100%

27 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2024-03-25 13:40 +1100

1import numpy 

2 

3from cogent3 import Sequence 

4from cogent3.core.alignment import Aligned 

5from cogent3.core.location import LostSpan, Map, Span 

6 

7 

8O = tuple[numpy.ndarray, Sequence] 

9 

10 

11def seq_to_gap_coords(seq: Sequence) -> O: 

12 """returns coordinates of sequence gaps""" 

13 m, s = seq.parse_out_gaps() 

14 # Assuming the maximum integer is < 2^31 

15 return numpy.array(m.get_gap_coordinates(), dtype=numpy.int32), s 

16 

17 

18def gap_coords_to_seq(coords: numpy.ndarray, ungapped: Sequence) -> Aligned: 

19 """returns Aligned instance 

20 

21 Parameters 

22 ---------- 

23 coords 

24 2D array with first column being gap insertion point and 

25 second column being gap length 

26 ungapped 

27 the ungapped sequence instance the coordinates correspond to 

28 """ 

29 segment_start = None 

30 spans = [] 

31 insert = 0 

32 for insert, length in coords: 

33 gap = LostSpan(length) 

34 if segment_start is None: 

35 # this is the first gap 

36 if insert: 

37 # gap is within seq, so we include segment span first 

38 spans.append(Span(start=0, end=insert)) 

39 # followed by the lost span 

40 

41 # because alternate (insert == 0) means 

42 # gap is before seq, we just append gap for both cases 

43 spans.append(gap) 

44 # next segment_start is current insert point 

45 segment_start = insert 

46 continue 

47 

48 spans.extend((Span(start=segment_start, end=insert), gap)) 

49 segment_start = insert 

50 

51 segment_total = sum(len(s) for s in spans if isinstance(s, Span)) 

52 if len(ungapped) - segment_total: 

53 # last gap was also internal, so we add span for remainder of 

54 # sequence 

55 spans.append(Span(start=insert, end=len(ungapped))) 

56 

57 m = Map(spans=spans, parent_length=sum(len(s) for s in spans)) 

58 return Aligned(m, ungapped)