Coverage for /Users/gavin/repos/EnsemblLite/src/ensembl_lite/_convert.py: 100%
27 statements
« prev ^ index » next coverage.py v7.2.3, created at 2024-03-25 13:40 +1100
« prev ^ index » next coverage.py v7.2.3, created at 2024-03-25 13:40 +1100
1import numpy
3from cogent3 import Sequence
4from cogent3.core.alignment import Aligned
5from cogent3.core.location import LostSpan, Map, Span
8O = tuple[numpy.ndarray, Sequence]
11def seq_to_gap_coords(seq: Sequence) -> O:
12 """returns coordinates of sequence gaps"""
13 m, s = seq.parse_out_gaps()
14 # Assuming the maximum integer is < 2^31
15 return numpy.array(m.get_gap_coordinates(), dtype=numpy.int32), s
18def gap_coords_to_seq(coords: numpy.ndarray, ungapped: Sequence) -> Aligned:
19 """returns Aligned instance
21 Parameters
22 ----------
23 coords
24 2D array with first column being gap insertion point and
25 second column being gap length
26 ungapped
27 the ungapped sequence instance the coordinates correspond to
28 """
29 segment_start = None
30 spans = []
31 insert = 0
32 for insert, length in coords:
33 gap = LostSpan(length)
34 if segment_start is None:
35 # this is the first gap
36 if insert:
37 # gap is within seq, so we include segment span first
38 spans.append(Span(start=0, end=insert))
39 # followed by the lost span
41 # because alternate (insert == 0) means
42 # gap is before seq, we just append gap for both cases
43 spans.append(gap)
44 # next segment_start is current insert point
45 segment_start = insert
46 continue
48 spans.extend((Span(start=segment_start, end=insert), gap))
49 segment_start = insert
51 segment_total = sum(len(s) for s in spans if isinstance(s, Span))
52 if len(ungapped) - segment_total:
53 # last gap was also internal, so we add span for remainder of
54 # sequence
55 spans.append(Span(start=insert, end=len(ungapped)))
57 m = Map(spans=spans, parent_length=sum(len(s) for s in spans))
58 return Aligned(m, ungapped)