Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/indexes/timedeltas.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" implement the TimedeltaIndex """
2from datetime import datetime
4import numpy as np
6from pandas._libs import NaT, Timedelta, index as libindex
7from pandas.util._decorators import Appender, Substitution
9from pandas.core.dtypes.common import (
10 _TD_DTYPE,
11 is_float,
12 is_integer,
13 is_list_like,
14 is_scalar,
15 is_timedelta64_dtype,
16 is_timedelta64_ns_dtype,
17 pandas_dtype,
18)
19from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
21from pandas.core.accessor import delegate_names
22from pandas.core.arrays import datetimelike as dtl
23from pandas.core.arrays.timedeltas import TimedeltaArray, _is_convertible_to_td
24from pandas.core.base import _shared_docs
25import pandas.core.common as com
26from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
27from pandas.core.indexes.datetimelike import (
28 DatetimeIndexOpsMixin,
29 DatetimelikeDelegateMixin,
30 DatetimeTimedeltaMixin,
31)
32from pandas.core.indexes.extension import inherit_names
34from pandas.tseries.frequencies import to_offset
37class TimedeltaDelegateMixin(DatetimelikeDelegateMixin):
38 # Most attrs are dispatched via datetimelike_{ops,methods}
39 # Some are "raw" methods, the result is not re-boxed in an Index
40 # We also have a few "extra" attrs, which may or may not be raw,
41 # which we don't want to expose in the .dt accessor.
42 _raw_properties = {"components", "_box_func"}
43 _raw_methods = {"to_pytimedelta", "sum", "std", "median", "_format_native_types"}
45 _delegated_properties = TimedeltaArray._datetimelike_ops + list(_raw_properties)
46 _delegated_methods = TimedeltaArray._datetimelike_methods + list(_raw_methods)
49@inherit_names(
50 ["_box_values", "__neg__", "__pos__", "__abs__"], TimedeltaArray, wrap=True
51)
52@inherit_names(
53 [
54 "_bool_ops",
55 "_object_ops",
56 "_field_ops",
57 "_datetimelike_ops",
58 "_datetimelike_methods",
59 "_other_ops",
60 ],
61 TimedeltaArray,
62)
63@delegate_names(
64 TimedeltaArray, TimedeltaDelegateMixin._delegated_properties, typ="property"
65)
66@delegate_names(
67 TimedeltaArray,
68 TimedeltaDelegateMixin._delegated_methods,
69 typ="method",
70 overwrite=True,
71)
72class TimedeltaIndex(
73 DatetimeTimedeltaMixin, dtl.TimelikeOps, TimedeltaDelegateMixin,
74):
75 """
76 Immutable ndarray of timedelta64 data, represented internally as int64, and
77 which can be boxed to timedelta objects.
79 Parameters
80 ----------
81 data : array-like (1-dimensional), optional
82 Optional timedelta-like data to construct index with.
83 unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional
84 Which is an integer/float number.
85 freq : str or pandas offset object, optional
86 One of pandas date offset strings or corresponding objects. The string
87 'infer' can be passed in order to set the frequency of the index as the
88 inferred frequency upon creation.
89 copy : bool
90 Make a copy of input ndarray.
91 name : object
92 Name to be stored in the index.
94 Attributes
95 ----------
96 days
97 seconds
98 microseconds
99 nanoseconds
100 components
101 inferred_freq
103 Methods
104 -------
105 to_pytimedelta
106 to_series
107 round
108 floor
109 ceil
110 to_frame
111 mean
113 See Also
114 --------
115 Index : The base pandas Index type.
116 Timedelta : Represents a duration between two dates or times.
117 DatetimeIndex : Index of datetime64 data.
118 PeriodIndex : Index of Period data.
119 timedelta_range : Create a fixed-frequency TimedeltaIndex.
121 Notes
122 -----
123 To learn more about the frequency strings, please see `this link
124 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
125 """
127 _typ = "timedeltaindex"
129 _engine_type = libindex.TimedeltaEngine
131 _comparables = ["name", "freq"]
132 _attributes = ["name", "freq"]
133 _is_numeric_dtype = True
134 _infer_as_myclass = True
136 # -------------------------------------------------------------------
137 # Constructors
139 def __new__(
140 cls,
141 data=None,
142 unit=None,
143 freq=None,
144 closed=None,
145 dtype=_TD_DTYPE,
146 copy=False,
147 name=None,
148 ):
149 name = maybe_extract_name(name, data, cls)
151 if is_scalar(data):
152 raise TypeError(
153 f"{cls.__name__}() must be called with a "
154 f"collection of some kind, {repr(data)} was passed"
155 )
157 if unit in {"Y", "y", "M"}:
158 raise ValueError(
159 "Units 'M' and 'Y' are no longer supported, as they do not "
160 "represent unambiguous timedelta values durations."
161 )
163 if isinstance(data, TimedeltaArray):
164 if copy:
165 data = data.copy()
166 return cls._simple_new(data, name=name, freq=freq)
168 if isinstance(data, TimedeltaIndex) and freq is None and name is None:
169 if copy:
170 return data.copy()
171 else:
172 return data._shallow_copy()
174 # - Cases checked above all return/raise before reaching here - #
176 tdarr = TimedeltaArray._from_sequence(
177 data, freq=freq, unit=unit, dtype=dtype, copy=copy
178 )
179 return cls._simple_new(tdarr._data, freq=tdarr.freq, name=name)
181 @classmethod
182 def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):
183 # `dtype` is passed by _shallow_copy in corner cases, should always
184 # be timedelta64[ns] if present
185 if not isinstance(values, TimedeltaArray):
186 values = TimedeltaArray._simple_new(values, dtype=dtype, freq=freq)
187 else:
188 if freq is None:
189 freq = values.freq
190 assert isinstance(values, TimedeltaArray), type(values)
191 assert dtype == _TD_DTYPE, dtype
192 assert values.dtype == "m8[ns]", values.dtype
194 tdarr = TimedeltaArray._simple_new(values._data, freq=freq)
195 result = object.__new__(cls)
196 result._data = tdarr
197 result._name = name
198 # For groupby perf. See note in indexes/base about _index_data
199 result._index_data = tdarr._data
201 result._reset_identity()
202 return result
204 # -------------------------------------------------------------------
205 # Rendering Methods
207 @property
208 def _formatter_func(self):
209 from pandas.io.formats.format import _get_format_timedelta64
211 return _get_format_timedelta64(self, box=True)
213 # -------------------------------------------------------------------
215 @Appender(_index_shared_docs["astype"])
216 def astype(self, dtype, copy=True):
217 dtype = pandas_dtype(dtype)
218 if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
219 # Have to repeat the check for 'timedelta64' (not ns) dtype
220 # so that we can return a numeric index, since pandas will return
221 # a TimedeltaIndex when dtype='timedelta'
222 result = self._data.astype(dtype, copy=copy)
223 if self.hasnans:
224 return Index(result, name=self.name)
225 return Index(result.astype("i8"), name=self.name)
226 return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy)
228 def _maybe_promote(self, other):
229 if other.inferred_type == "timedelta":
230 other = TimedeltaIndex(other)
231 return self, other
233 def get_value(self, series, key):
234 """
235 Fast lookup of value from 1-dimensional ndarray. Only use this if you
236 know what you're doing
237 """
239 if _is_convertible_to_td(key):
240 key = Timedelta(key)
241 return self.get_value_maybe_box(series, key)
243 try:
244 value = Index.get_value(self, series, key)
245 except KeyError:
246 try:
247 loc = self._get_string_slice(key)
248 return series[loc]
249 except (TypeError, ValueError, KeyError):
250 pass
252 try:
253 return self.get_value_maybe_box(series, key)
254 except (TypeError, ValueError, KeyError):
255 raise KeyError(key)
256 else:
257 return com.maybe_box(self, value, series, key)
259 def get_value_maybe_box(self, series, key: Timedelta):
260 values = self._engine.get_value(com.values_from_object(series), key)
261 return com.maybe_box(self, values, series, key)
263 def get_loc(self, key, method=None, tolerance=None):
264 """
265 Get integer location for requested label
267 Returns
268 -------
269 loc : int
270 """
271 if is_list_like(key) or (isinstance(key, datetime) and key is not NaT):
272 # GH#20464 datetime check here is to ensure we don't allow
273 # datetime objects to be incorrectly treated as timedelta
274 # objects; NaT is a special case because it plays a double role
275 # as Not-A-Timedelta
276 raise TypeError
278 if isna(key):
279 key = NaT
281 if tolerance is not None:
282 # try converting tolerance now, so errors don't get swallowed by
283 # the try/except clauses below
284 tolerance = self._convert_tolerance(tolerance, np.asarray(key))
286 if _is_convertible_to_td(key) or key is NaT:
287 key = Timedelta(key)
288 return Index.get_loc(self, key, method, tolerance)
290 try:
291 return Index.get_loc(self, key, method, tolerance)
292 except (KeyError, ValueError, TypeError):
293 try:
294 return self._get_string_slice(key)
295 except (TypeError, KeyError, ValueError):
296 pass
298 try:
299 stamp = Timedelta(key)
300 return Index.get_loc(self, stamp, method, tolerance)
301 except (KeyError, ValueError):
302 raise KeyError(key)
304 def _maybe_cast_slice_bound(self, label, side, kind):
305 """
306 If label is a string, cast it to timedelta according to resolution.
308 Parameters
309 ----------
310 label : object
311 side : {'left', 'right'}
312 kind : {'ix', 'loc', 'getitem'}
314 Returns
315 -------
316 label : object
317 """
318 assert kind in ["ix", "loc", "getitem", None]
320 if isinstance(label, str):
321 parsed = Timedelta(label)
322 lbound = parsed.round(parsed.resolution_string)
323 if side == "left":
324 return lbound
325 else:
326 return lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns")
327 elif is_integer(label) or is_float(label):
328 self._invalid_indexer("slice", label)
330 return label
332 def _get_string_slice(self, key):
333 if is_integer(key) or is_float(key) or key is NaT:
334 self._invalid_indexer("slice", key)
335 loc = self._partial_td_slice(key)
336 return loc
338 def _partial_td_slice(self, key):
340 # given a key, try to figure out a location for a partial slice
341 if not isinstance(key, str):
342 return key
344 raise NotImplementedError
346 @Substitution(klass="TimedeltaIndex")
347 @Appender(_shared_docs["searchsorted"])
348 def searchsorted(self, value, side="left", sorter=None):
349 if isinstance(value, (np.ndarray, Index)):
350 if not type(self._data)._is_recognized_dtype(value):
351 raise TypeError(
352 "searchsorted requires compatible dtype or scalar, "
353 f"not {type(value).__name__}"
354 )
355 value = type(self._data)(value)
356 self._data._check_compatible_with(value)
358 elif isinstance(value, self._data._recognized_scalars):
359 self._data._check_compatible_with(value)
360 value = self._data._scalar_type(value)
362 elif not isinstance(value, TimedeltaArray):
363 raise TypeError(
364 "searchsorted requires compatible dtype or scalar, "
365 f"not {type(value).__name__}"
366 )
368 return self._data.searchsorted(value, side=side, sorter=sorter)
370 def is_type_compatible(self, typ) -> bool:
371 return typ == self.inferred_type or typ == "timedelta"
373 @property
374 def inferred_type(self) -> str:
375 return "timedelta64"
377 def insert(self, loc, item):
378 """
379 Make new Index inserting new item at location
381 Parameters
382 ----------
383 loc : int
384 item : object
385 If not either a Python datetime or a numpy integer-like, returned
386 Index dtype will be object rather than datetime.
388 Returns
389 -------
390 new_index : Index
391 """
392 # try to convert if possible
393 if isinstance(item, self._data._recognized_scalars):
394 item = self._data._scalar_type(item)
395 elif is_valid_nat_for_dtype(item, self.dtype):
396 # GH 18295
397 item = self._na_value
398 elif is_scalar(item) and isna(item):
399 # i.e. datetime64("NaT")
400 raise TypeError(
401 f"cannot insert {type(self).__name__} with incompatible label"
402 )
404 freq = None
405 if isinstance(item, self._data._scalar_type) or item is NaT:
406 self._data._check_compatible_with(item, setitem=True)
408 # check freq can be preserved on edge cases
409 if self.size and self.freq is not None:
410 if item is NaT:
411 pass
412 elif (loc == 0 or loc == -len(self)) and item + self.freq == self[0]:
413 freq = self.freq
414 elif (loc == len(self)) and item - self.freq == self[-1]:
415 freq = self.freq
416 item = item.asm8
418 try:
419 new_i8s = np.concatenate(
420 (self[:loc].asi8, [item.view(np.int64)], self[loc:].asi8)
421 )
422 return self._shallow_copy(new_i8s, freq=freq)
423 except (AttributeError, TypeError):
425 # fall back to object index
426 if isinstance(item, str):
427 return self.astype(object).insert(loc, item)
428 raise TypeError(
429 f"cannot insert {type(self).__name__} with incompatible label"
430 )
433TimedeltaIndex._add_logical_methods_disabled()
436def timedelta_range(
437 start=None, end=None, periods=None, freq=None, name=None, closed=None
438) -> TimedeltaIndex:
439 """
440 Return a fixed frequency TimedeltaIndex, with day as the default
441 frequency.
443 Parameters
444 ----------
445 start : str or timedelta-like, default None
446 Left bound for generating timedeltas.
447 end : str or timedelta-like, default None
448 Right bound for generating timedeltas.
449 periods : int, default None
450 Number of periods to generate.
451 freq : str or DateOffset, default 'D'
452 Frequency strings can have multiples, e.g. '5H'.
453 name : str, default None
454 Name of the resulting TimedeltaIndex.
455 closed : str, default None
456 Make the interval closed with respect to the given frequency to
457 the 'left', 'right', or both sides (None).
459 Returns
460 -------
461 rng : TimedeltaIndex
463 Notes
464 -----
465 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
466 exactly three must be specified. If ``freq`` is omitted, the resulting
467 ``TimedeltaIndex`` will have ``periods`` linearly spaced elements between
468 ``start`` and ``end`` (closed on both sides).
470 To learn more about the frequency strings, please see `this link
471 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
473 Examples
474 --------
476 >>> pd.timedelta_range(start='1 day', periods=4)
477 TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'],
478 dtype='timedelta64[ns]', freq='D')
480 The ``closed`` parameter specifies which endpoint is included. The default
481 behavior is to include both endpoints.
483 >>> pd.timedelta_range(start='1 day', periods=4, closed='right')
484 TimedeltaIndex(['2 days', '3 days', '4 days'],
485 dtype='timedelta64[ns]', freq='D')
487 The ``freq`` parameter specifies the frequency of the TimedeltaIndex.
488 Only fixed frequencies can be passed, non-fixed frequencies such as
489 'M' (month end) will raise.
491 >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H')
492 TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
493 '1 days 18:00:00', '2 days 00:00:00'],
494 dtype='timedelta64[ns]', freq='6H')
496 Specify ``start``, ``end``, and ``periods``; the frequency is generated
497 automatically (linearly spaced).
499 >>> pd.timedelta_range(start='1 day', end='5 days', periods=4)
500 TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00',
501 '5 days 00:00:00'],
502 dtype='timedelta64[ns]', freq=None)
503 """
504 if freq is None and com.any_none(periods, start, end):
505 freq = "D"
507 freq, freq_infer = dtl.maybe_infer_freq(freq)
508 tdarr = TimedeltaArray._generate_range(start, end, periods, freq, closed=closed)
509 return TimedeltaIndex._simple_new(tdarr._data, freq=tdarr.freq, name=name)