Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/arrays/_ranges.py : 9%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Helper functions to generate range-like data for DatetimeArray
3(and possibly TimedeltaArray/PeriodArray)
4"""
6from typing import Tuple
8import numpy as np
10from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp
12from pandas.tseries.offsets import DateOffset, Tick, generate_range
15def generate_regular_range(
16 start: Timestamp, end: Timestamp, periods: int, freq: DateOffset
17) -> Tuple[np.ndarray, str]:
18 """
19 Generate a range of dates with the spans between dates described by
20 the given `freq` DateOffset.
22 Parameters
23 ----------
24 start : Timestamp or None
25 first point of produced date range
26 end : Timestamp or None
27 last point of produced date range
28 periods : int
29 number of periods in produced date range
30 freq : DateOffset
31 describes space between dates in produced date range
33 Returns
34 -------
35 ndarray[np.int64] representing nanosecond unix timestamps
36 """
37 if isinstance(freq, Tick):
38 stride = freq.nanos
39 if periods is None:
40 b = Timestamp(start).value
41 # cannot just use e = Timestamp(end) + 1 because arange breaks when
42 # stride is too large, see GH10887
43 e = b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1
44 # end.tz == start.tz by this point due to _generate implementation
45 tz = start.tz
46 elif start is not None:
47 b = Timestamp(start).value
48 e = _generate_range_overflow_safe(b, periods, stride, side="start")
49 tz = start.tz
50 elif end is not None:
51 e = Timestamp(end).value + stride
52 b = _generate_range_overflow_safe(e, periods, stride, side="end")
53 tz = end.tz
54 else:
55 raise ValueError(
56 "at least 'start' or 'end' should be specified "
57 "if a 'period' is given."
58 )
60 with np.errstate(over="raise"):
61 # If the range is sufficiently large, np.arange may overflow
62 # and incorrectly return an empty array if not caught.
63 try:
64 values = np.arange(b, e, stride, dtype=np.int64)
65 except FloatingPointError:
66 xdr = [b]
67 while xdr[-1] != e:
68 xdr.append(xdr[-1] + stride)
69 values = np.array(xdr[:-1], dtype=np.int64)
71 else:
72 tz = None
73 # start and end should have the same timezone by this point
74 if start is not None:
75 tz = start.tz
76 elif end is not None:
77 tz = end.tz
79 xdr = generate_range(start=start, end=end, periods=periods, offset=freq)
81 values = np.array([x.value for x in xdr], dtype=np.int64)
83 return values, tz
86def _generate_range_overflow_safe(
87 endpoint: int, periods: int, stride: int, side: str = "start"
88) -> int:
89 """
90 Calculate the second endpoint for passing to np.arange, checking
91 to avoid an integer overflow. Catch OverflowError and re-raise
92 as OutOfBoundsDatetime.
94 Parameters
95 ----------
96 endpoint : int
97 nanosecond timestamp of the known endpoint of the desired range
98 periods : int
99 number of periods in the desired range
100 stride : int
101 nanoseconds between periods in the desired range
102 side : {'start', 'end'}
103 which end of the range `endpoint` refers to
105 Returns
106 -------
107 other_end : int
109 Raises
110 ------
111 OutOfBoundsDatetime
112 """
113 # GH#14187 raise instead of incorrectly wrapping around
114 assert side in ["start", "end"]
116 i64max = np.uint64(np.iinfo(np.int64).max)
117 msg = f"Cannot generate range with {side}={endpoint} and periods={periods}"
119 with np.errstate(over="raise"):
120 # if periods * strides cannot be multiplied within the *uint64* bounds,
121 # we cannot salvage the operation by recursing, so raise
122 try:
123 addend = np.uint64(periods) * np.uint64(np.abs(stride))
124 except FloatingPointError:
125 raise OutOfBoundsDatetime(msg)
127 if np.abs(addend) <= i64max:
128 # relatively easy case without casting concerns
129 return _generate_range_overflow_safe_signed(endpoint, periods, stride, side)
131 elif (endpoint > 0 and side == "start" and stride > 0) or (
132 endpoint < 0 and side == "end" and stride > 0
133 ):
134 # no chance of not-overflowing
135 raise OutOfBoundsDatetime(msg)
137 elif side == "end" and endpoint > i64max and endpoint - stride <= i64max:
138 # in _generate_regular_range we added `stride` thereby overflowing
139 # the bounds. Adjust to fix this.
140 return _generate_range_overflow_safe(
141 endpoint - stride, periods - 1, stride, side
142 )
144 # split into smaller pieces
145 mid_periods = periods // 2
146 remaining = periods - mid_periods
147 assert 0 < remaining < periods, (remaining, periods, endpoint, stride)
149 midpoint = _generate_range_overflow_safe(endpoint, mid_periods, stride, side)
150 return _generate_range_overflow_safe(midpoint, remaining, stride, side)
153def _generate_range_overflow_safe_signed(
154 endpoint: int, periods: int, stride: int, side: str
155) -> int:
156 """
157 A special case for _generate_range_overflow_safe where `periods * stride`
158 can be calculated without overflowing int64 bounds.
159 """
160 assert side in ["start", "end"]
161 if side == "end":
162 stride *= -1
164 with np.errstate(over="raise"):
165 addend = np.int64(periods) * np.int64(stride)
166 try:
167 # easy case with no overflows
168 return np.int64(endpoint) + addend
169 except (FloatingPointError, OverflowError):
170 # with endpoint negative and addend positive we risk
171 # FloatingPointError; with reversed signed we risk OverflowError
172 pass
174 # if stride and endpoint had opposite signs, then endpoint + addend
175 # should never overflow. so they must have the same signs
176 assert (stride > 0 and endpoint >= 0) or (stride < 0 and endpoint <= 0)
178 if stride > 0:
179 # watch out for very special case in which we just slightly
180 # exceed implementation bounds, but when passing the result to
181 # np.arange will get a result slightly within the bounds
182 result = np.uint64(endpoint) + np.uint64(addend)
183 i64max = np.uint64(np.iinfo(np.int64).max)
184 assert result > i64max
185 if result <= i64max + np.uint64(stride):
186 return result
188 raise OutOfBoundsDatetime(
189 f"Cannot generate range with {side}={endpoint} and periods={periods}"
190 )