Coverage for /Users/gavin/repos/EnsemblLite/src/ensembl_lite/convert.py: 100%
28 statements
« prev ^ index » next coverage.py v7.2.3, created at 2023-12-25 11:36 +1100
« prev ^ index » next coverage.py v7.2.3, created at 2023-12-25 11:36 +1100
1import typing
3import numpy
5from cogent3 import Sequence
6from cogent3.core.alignment import Aligned
7from cogent3.core.location import LostSpan, Map, Span
10O = typing.Tuple[numpy.ndarray, Sequence]
13def seq_to_gap_coords(seq: Sequence) -> O:
14 """returns coordinates of sequence gaps"""
15 m, s = seq.parse_out_gaps()
16 # Assuming the maximum integer is < 2^31
17 return numpy.array(m.get_gap_coordinates(), dtype=numpy.int32), s
20def gap_coords_to_seq(coords: numpy.ndarray, ungapped: Sequence) -> Aligned:
21 """returns Aligned instance
23 Parameters
24 ----------
25 coords
26 2D array with first column being gap insertion point and
27 second column being gap length
28 ungapped
29 the ungapped sequence instance the coordinates correspond to
30 """
31 segment_start = None
32 spans = []
33 insert = 0
34 for insert, length in coords:
35 gap = LostSpan(length)
36 if segment_start is None:
37 # this is the first gap
38 if insert:
39 # gap is within seq, so we include segment span first
40 spans.append(Span(start=0, end=insert))
41 # followed by the lost span
43 # because alternate (insert == 0) means
44 # gap is before seq, we just append gap for both cases
45 spans.append(gap)
46 # next segment_start is current insert point
47 segment_start = insert
48 continue
50 spans.extend((Span(start=segment_start, end=insert), gap))
51 segment_start = insert
53 segment_total = sum(len(s) for s in spans if isinstance(s, Span))
54 if len(ungapped) - segment_total:
55 # last gap was also internal, so we add span for remainder of
56 # sequence
57 spans.append(Span(start=insert, end=len(ungapped)))
59 m = Map(spans=spans, parent_length=sum(len(s) for s in spans))
60 return Aligned(m, ungapped)