Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/indexes/interval.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" define the IntervalIndex """
2from operator import le, lt
3import textwrap
4from typing import Any, Optional, Tuple, Union
6import numpy as np
8from pandas._config import get_option
10from pandas._libs import Timedelta, Timestamp, lib
11from pandas._libs.interval import Interval, IntervalMixin, IntervalTree
12from pandas._typing import AnyArrayLike
13from pandas.util._decorators import Appender, Substitution, cache_readonly
14from pandas.util._exceptions import rewrite_exception
16from pandas.core.dtypes.cast import (
17 find_common_type,
18 infer_dtype_from_scalar,
19 maybe_downcast_to_dtype,
20)
21from pandas.core.dtypes.common import (
22 ensure_platform_int,
23 is_categorical,
24 is_datetime64tz_dtype,
25 is_datetime_or_timedelta_dtype,
26 is_dtype_equal,
27 is_float,
28 is_float_dtype,
29 is_integer,
30 is_integer_dtype,
31 is_interval_dtype,
32 is_list_like,
33 is_number,
34 is_object_dtype,
35 is_scalar,
36)
37from pandas.core.dtypes.generic import ABCSeries
38from pandas.core.dtypes.missing import isna
40from pandas.core.algorithms import take_1d
41from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs
42import pandas.core.common as com
43import pandas.core.indexes.base as ibase
44from pandas.core.indexes.base import (
45 Index,
46 InvalidIndexError,
47 _index_shared_docs,
48 default_pprint,
49 ensure_index,
50 maybe_extract_name,
51)
52from pandas.core.indexes.datetimes import DatetimeIndex, date_range
53from pandas.core.indexes.extension import ExtensionIndex, inherit_names
54from pandas.core.indexes.multi import MultiIndex
55from pandas.core.indexes.timedeltas import TimedeltaIndex, timedelta_range
56from pandas.core.ops import get_op_result_name
58from pandas.tseries.frequencies import to_offset
59from pandas.tseries.offsets import DateOffset
61_VALID_CLOSED = {"left", "right", "both", "neither"}
62_index_doc_kwargs = dict(ibase._index_doc_kwargs)
64_index_doc_kwargs.update(
65 dict(
66 klass="IntervalIndex",
67 qualname="IntervalIndex",
68 target_klass="IntervalIndex or list of Intervals",
69 name=textwrap.dedent(
70 """\
71 name : object, optional
72 Name to be stored in the index.
73 """
74 ),
75 )
76)
79def _get_next_label(label):
80 dtype = getattr(label, "dtype", type(label))
81 if isinstance(label, (Timestamp, Timedelta)):
82 dtype = "datetime64"
83 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
84 return label + np.timedelta64(1, "ns")
85 elif is_integer_dtype(dtype):
86 return label + 1
87 elif is_float_dtype(dtype):
88 return np.nextafter(label, np.infty)
89 else:
90 raise TypeError(f"cannot determine next label for type {repr(type(label))}")
93def _get_prev_label(label):
94 dtype = getattr(label, "dtype", type(label))
95 if isinstance(label, (Timestamp, Timedelta)):
96 dtype = "datetime64"
97 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
98 return label - np.timedelta64(1, "ns")
99 elif is_integer_dtype(dtype):
100 return label - 1
101 elif is_float_dtype(dtype):
102 return np.nextafter(label, -np.infty)
103 else:
104 raise TypeError(f"cannot determine next label for type {repr(type(label))}")
107def _new_IntervalIndex(cls, d):
108 """
109 This is called upon unpickling, rather than the default which doesn't have
110 arguments and breaks __new__.
111 """
112 return cls.from_arrays(**d)
115class SetopCheck:
116 """
117 This is called to decorate the set operations of IntervalIndex
118 to perform the type check in advance.
119 """
121 def __init__(self, op_name):
122 self.op_name = op_name
124 def __call__(self, setop):
125 def func(intvidx_self, other, sort=False):
126 intvidx_self._assert_can_do_setop(other)
127 other = ensure_index(other)
129 if not isinstance(other, IntervalIndex):
130 result = getattr(intvidx_self.astype(object), self.op_name)(other)
131 if self.op_name in ("difference",):
132 result = result.astype(intvidx_self.dtype)
133 return result
134 elif intvidx_self.closed != other.closed:
135 raise ValueError(
136 "can only do set operations between two IntervalIndex "
137 "objects that are closed on the same side"
138 )
140 # GH 19016: ensure set op will not return a prohibited dtype
141 subtypes = [intvidx_self.dtype.subtype, other.dtype.subtype]
142 common_subtype = find_common_type(subtypes)
143 if is_object_dtype(common_subtype):
144 raise TypeError(
145 f"can only do {self.op_name} between two IntervalIndex "
146 "objects that have compatible dtypes"
147 )
149 return setop(intvidx_self, other, sort)
151 return func
154@Appender(
155 _interval_shared_docs["class"]
156 % dict(
157 klass="IntervalIndex",
158 summary="Immutable index of intervals that are closed on the same side.",
159 name=_index_doc_kwargs["name"],
160 versionadded="0.20.0",
161 extra_attributes="is_overlapping\nvalues\n",
162 extra_methods="",
163 examples=textwrap.dedent(
164 """\
165 Examples
166 --------
167 A new ``IntervalIndex`` is typically constructed using
168 :func:`interval_range`:
170 >>> pd.interval_range(start=0, end=5)
171 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
172 closed='right',
173 dtype='interval[int64]')
175 It may also be constructed using one of the constructor
176 methods: :meth:`IntervalIndex.from_arrays`,
177 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`.
179 See further examples in the doc strings of ``interval_range`` and the
180 mentioned constructor methods.
181 """
182 ),
183 )
184)
185@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True)
186@inherit_names(
187 [
188 "__len__",
189 "__array__",
190 "overlaps",
191 "contains",
192 "size",
193 "dtype",
194 "left",
195 "right",
196 "length",
197 ],
198 IntervalArray,
199)
200@inherit_names(
201 ["is_non_overlapping_monotonic", "mid", "_ndarray_values", "closed"],
202 IntervalArray,
203 cache=True,
204)
205class IntervalIndex(IntervalMixin, ExtensionIndex):
206 _typ = "intervalindex"
207 _comparables = ["name"]
208 _attributes = ["name", "closed"]
210 # we would like our indexing holder to defer to us
211 _defer_to_indexing = True
213 # Immutable, so we are able to cache computations like isna in '_mask'
214 _mask = None
216 # --------------------------------------------------------------------
217 # Constructors
219 def __new__(
220 cls,
221 data,
222 closed=None,
223 dtype=None,
224 copy: bool = False,
225 name=None,
226 verify_integrity: bool = True,
227 ):
229 name = maybe_extract_name(name, data, cls)
231 with rewrite_exception("IntervalArray", cls.__name__):
232 array = IntervalArray(
233 data,
234 closed=closed,
235 copy=copy,
236 dtype=dtype,
237 verify_integrity=verify_integrity,
238 )
240 return cls._simple_new(array, name)
242 @classmethod
243 def _simple_new(cls, array, name, closed=None):
244 """
245 Construct from an IntervalArray
247 Parameters
248 ----------
249 array : IntervalArray
250 name : str
251 Attached as result.name
252 closed : Any
253 Ignored.
254 """
255 result = IntervalMixin.__new__(cls)
256 result._data = array
257 result.name = name
258 result._no_setting_name = False
259 result._reset_identity()
260 return result
262 @classmethod
263 @Appender(
264 _interval_shared_docs["from_breaks"]
265 % dict(
266 klass="IntervalIndex",
267 examples=textwrap.dedent(
268 """\
269 Examples
270 --------
271 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])
272 IntervalIndex([(0, 1], (1, 2], (2, 3]],
273 closed='right',
274 dtype='interval[int64]')
275 """
276 ),
277 )
278 )
279 def from_breaks(
280 cls, breaks, closed: str = "right", name=None, copy: bool = False, dtype=None
281 ):
282 with rewrite_exception("IntervalArray", cls.__name__):
283 array = IntervalArray.from_breaks(
284 breaks, closed=closed, copy=copy, dtype=dtype
285 )
286 return cls._simple_new(array, name=name)
288 @classmethod
289 @Appender(
290 _interval_shared_docs["from_arrays"]
291 % dict(
292 klass="IntervalIndex",
293 examples=textwrap.dedent(
294 """\
295 Examples
296 --------
297 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
298 IntervalIndex([(0, 1], (1, 2], (2, 3]],
299 closed='right',
300 dtype='interval[int64]')
301 """
302 ),
303 )
304 )
305 def from_arrays(
306 cls,
307 left,
308 right,
309 closed: str = "right",
310 name=None,
311 copy: bool = False,
312 dtype=None,
313 ):
314 with rewrite_exception("IntervalArray", cls.__name__):
315 array = IntervalArray.from_arrays(
316 left, right, closed, copy=copy, dtype=dtype
317 )
318 return cls._simple_new(array, name=name)
320 @classmethod
321 @Appender(
322 _interval_shared_docs["from_tuples"]
323 % dict(
324 klass="IntervalIndex",
325 examples=textwrap.dedent(
326 """\
327 Examples
328 --------
329 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])
330 IntervalIndex([(0, 1], (1, 2]],
331 closed='right',
332 dtype='interval[int64]')
333 """
334 ),
335 )
336 )
337 def from_tuples(
338 cls, data, closed: str = "right", name=None, copy: bool = False, dtype=None
339 ):
340 with rewrite_exception("IntervalArray", cls.__name__):
341 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)
342 return cls._simple_new(arr, name=name)
344 # --------------------------------------------------------------------
346 @Appender(_index_shared_docs["_shallow_copy"])
347 def _shallow_copy(self, left=None, right=None, **kwargs):
348 result = self._data._shallow_copy(left=left, right=right)
349 attributes = self._get_attributes_dict()
350 attributes.update(kwargs)
351 return self._simple_new(result, **attributes)
353 @cache_readonly
354 def _isnan(self):
355 """
356 Return a mask indicating if each value is NA.
357 """
358 if self._mask is None:
359 self._mask = isna(self.left)
360 return self._mask
362 @cache_readonly
363 def _engine(self):
364 left = self._maybe_convert_i8(self.left)
365 right = self._maybe_convert_i8(self.right)
366 return IntervalTree(left, right, closed=self.closed)
368 def __contains__(self, key) -> bool:
369 """
370 return a boolean if this key is IN the index
371 We *only* accept an Interval
373 Parameters
374 ----------
375 key : Interval
377 Returns
378 -------
379 bool
380 """
381 if not isinstance(key, Interval):
382 return False
384 try:
385 self.get_loc(key)
386 return True
387 except KeyError:
388 return False
390 @cache_readonly
391 def _multiindex(self):
392 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])
394 @cache_readonly
395 def values(self):
396 """
397 Return the IntervalIndex's data as an IntervalArray.
398 """
399 return self._data
401 @cache_readonly
402 def _values(self):
403 return self._data
405 @property
406 def _has_complex_internals(self):
407 # used to avoid libreduction code paths, which raise or require conversion
408 return True
410 def __array_wrap__(self, result, context=None):
411 # we don't want the superclass implementation
412 return result
414 def __reduce__(self):
415 d = dict(left=self.left, right=self.right)
416 d.update(self._get_attributes_dict())
417 return _new_IntervalIndex, (type(self), d), None
419 @Appender(_index_shared_docs["astype"])
420 def astype(self, dtype, copy=True):
421 with rewrite_exception("IntervalArray", type(self).__name__):
422 new_values = self.values.astype(dtype, copy=copy)
423 if is_interval_dtype(new_values):
424 return self._shallow_copy(new_values.left, new_values.right)
425 return Index.astype(self, dtype, copy=copy)
427 @property
428 def inferred_type(self) -> str:
429 """Return a string of the type inferred from the values"""
430 return "interval"
432 @Appender(Index.memory_usage.__doc__)
433 def memory_usage(self, deep: bool = False) -> int:
434 # we don't use an explicit engine
435 # so return the bytes here
436 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)
438 @cache_readonly
439 def is_monotonic(self) -> bool:
440 """
441 Return True if the IntervalIndex is monotonic increasing (only equal or
442 increasing values), else False
443 """
444 return self.is_monotonic_increasing
446 @cache_readonly
447 def is_monotonic_increasing(self) -> bool:
448 """
449 Return True if the IntervalIndex is monotonic increasing (only equal or
450 increasing values), else False
451 """
452 return self._engine.is_monotonic_increasing
454 @cache_readonly
455 def is_monotonic_decreasing(self) -> bool:
456 """
457 Return True if the IntervalIndex is monotonic decreasing (only equal or
458 decreasing values), else False
459 """
460 return self[::-1].is_monotonic_increasing
462 @cache_readonly
463 def is_unique(self):
464 """
465 Return True if the IntervalIndex contains unique elements, else False.
466 """
467 left = self.left
468 right = self.right
470 if self.isna().sum() > 1:
471 return False
473 if left.is_unique or right.is_unique:
474 return True
476 seen_pairs = set()
477 check_idx = np.where(left.duplicated(keep=False))[0]
478 for idx in check_idx:
479 pair = (left[idx], right[idx])
480 if pair in seen_pairs:
481 return False
482 seen_pairs.add(pair)
484 return True
486 @property
487 def is_overlapping(self):
488 """
489 Return True if the IntervalIndex has overlapping intervals, else False.
491 Two intervals overlap if they share a common point, including closed
492 endpoints. Intervals that only have an open endpoint in common do not
493 overlap.
495 .. versionadded:: 0.24.0
497 Returns
498 -------
499 bool
500 Boolean indicating if the IntervalIndex has overlapping intervals.
502 See Also
503 --------
504 Interval.overlaps : Check whether two Interval objects overlap.
505 IntervalIndex.overlaps : Check an IntervalIndex elementwise for
506 overlaps.
508 Examples
509 --------
510 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
511 >>> index
512 IntervalIndex([(0, 2], (1, 3], (4, 5]],
513 closed='right',
514 dtype='interval[int64]')
515 >>> index.is_overlapping
516 True
518 Intervals that share closed endpoints overlap:
520 >>> index = pd.interval_range(0, 3, closed='both')
521 >>> index
522 IntervalIndex([[0, 1], [1, 2], [2, 3]],
523 closed='both',
524 dtype='interval[int64]')
525 >>> index.is_overlapping
526 True
528 Intervals that only have an open endpoint in common do not overlap:
530 >>> index = pd.interval_range(0, 3, closed='left')
531 >>> index
532 IntervalIndex([[0, 1), [1, 2), [2, 3)],
533 closed='left',
534 dtype='interval[int64]')
535 >>> index.is_overlapping
536 False
537 """
538 # GH 23309
539 return self._engine.is_overlapping
541 @Appender(_index_shared_docs["_convert_scalar_indexer"])
542 def _convert_scalar_indexer(self, key, kind=None):
543 if kind == "iloc":
544 return super()._convert_scalar_indexer(key, kind=kind)
545 return key
547 def _maybe_cast_slice_bound(self, label, side, kind):
548 return getattr(self, side)._maybe_cast_slice_bound(label, side, kind)
550 @Appender(_index_shared_docs["_convert_list_indexer"])
551 def _convert_list_indexer(self, keyarr, kind=None):
552 """
553 we are passed a list-like indexer. Return the
554 indexer for matching intervals.
555 """
556 locs = self.get_indexer_for(keyarr)
558 # we have missing values
559 if (locs == -1).any():
560 raise KeyError
562 return locs
564 def _can_reindex(self, indexer: np.ndarray) -> None:
565 """
566 Check if we are allowing reindexing with this particular indexer.
568 Parameters
569 ----------
570 indexer : an integer indexer
572 Raises
573 ------
574 ValueError if its a duplicate axis
575 """
577 # trying to reindex on an axis with duplicates
578 if self.is_overlapping and len(indexer):
579 raise ValueError("cannot reindex from an overlapping axis")
581 def _needs_i8_conversion(self, key):
582 """
583 Check if a given key needs i8 conversion. Conversion is necessary for
584 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An
585 Interval-like requires conversion if it's endpoints are one of the
586 aforementioned types.
588 Assumes that any list-like data has already been cast to an Index.
590 Parameters
591 ----------
592 key : scalar or Index-like
593 The key that should be checked for i8 conversion
595 Returns
596 -------
597 bool
598 """
599 if is_interval_dtype(key) or isinstance(key, Interval):
600 return self._needs_i8_conversion(key.left)
602 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex)
603 return isinstance(key, i8_types)
605 def _maybe_convert_i8(self, key):
606 """
607 Maybe convert a given key to it's equivalent i8 value(s). Used as a
608 preprocessing step prior to IntervalTree queries (self._engine), which
609 expects numeric data.
611 Parameters
612 ----------
613 key : scalar or list-like
614 The key that should maybe be converted to i8.
616 Returns
617 -------
618 scalar or list-like
619 The original key if no conversion occurred, int if converted scalar,
620 Int64Index if converted list-like.
621 """
622 original = key
623 if is_list_like(key):
624 key = ensure_index(key)
626 if not self._needs_i8_conversion(key):
627 return original
629 scalar = is_scalar(key)
630 if is_interval_dtype(key) or isinstance(key, Interval):
631 # convert left/right and reconstruct
632 left = self._maybe_convert_i8(key.left)
633 right = self._maybe_convert_i8(key.right)
634 constructor = Interval if scalar else IntervalIndex.from_arrays
635 return constructor(left, right, closed=self.closed)
637 if scalar:
638 # Timestamp/Timedelta
639 key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True)
640 else:
641 # DatetimeIndex/TimedeltaIndex
642 key_dtype, key_i8 = key.dtype, Index(key.asi8)
643 if key.hasnans:
644 # convert NaT from it's i8 value to np.nan so it's not viewed
645 # as a valid value, maybe causing errors (e.g. is_overlapping)
646 key_i8 = key_i8.where(~key._isnan)
648 # ensure consistency with IntervalIndex subtype
649 subtype = self.dtype.subtype
651 if not is_dtype_equal(subtype, key_dtype):
652 raise ValueError(
653 f"Cannot index an IntervalIndex of subtype {subtype} with "
654 f"values of dtype {key_dtype}"
655 )
657 return key_i8
659 def _check_method(self, method):
660 if method is None:
661 return
663 if method in ["bfill", "backfill", "pad", "ffill", "nearest"]:
664 raise NotImplementedError(
665 f"method {method} not yet implemented for IntervalIndex"
666 )
668 raise ValueError("Invalid fill method")
670 def _searchsorted_monotonic(self, label, side, exclude_label=False):
671 if not self.is_non_overlapping_monotonic:
672 raise KeyError(
673 "can only get slices from an IntervalIndex if bounds are "
674 "non-overlapping and all monotonic increasing or decreasing"
675 )
677 if isinstance(label, IntervalMixin):
678 raise NotImplementedError("Interval objects are not currently supported")
680 # GH 20921: "not is_monotonic_increasing" for the second condition
681 # instead of "is_monotonic_decreasing" to account for single element
682 # indexes being both increasing and decreasing
683 if (side == "left" and self.left.is_monotonic_increasing) or (
684 side == "right" and not self.left.is_monotonic_increasing
685 ):
686 sub_idx = self.right
687 if self.open_right or exclude_label:
688 label = _get_next_label(label)
689 else:
690 sub_idx = self.left
691 if self.open_left or exclude_label:
692 label = _get_prev_label(label)
694 return sub_idx._searchsorted_monotonic(label, side)
696 def get_loc(
697 self, key: Any, method: Optional[str] = None, tolerance=None
698 ) -> Union[int, slice, np.ndarray]:
699 """
700 Get integer location, slice or boolean mask for requested label.
702 Parameters
703 ----------
704 key : label
705 method : {None}, optional
706 * default: matches where the label is within an interval only.
708 Returns
709 -------
710 int if unique index, slice if monotonic index, else mask
712 Examples
713 --------
714 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
715 >>> index = pd.IntervalIndex([i1, i2])
716 >>> index.get_loc(1)
717 0
719 You can also supply a point inside an interval.
721 >>> index.get_loc(1.5)
722 1
724 If a label is in several intervals, you get the locations of all the
725 relevant intervals.
727 >>> i3 = pd.Interval(0, 2)
728 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3])
729 >>> overlapping_index.get_loc(0.5)
730 array([ True, False, True])
732 Only exact matches will be returned if an interval is provided.
734 >>> index.get_loc(pd.Interval(0, 1))
735 0
736 """
737 self._check_method(method)
739 # list-like are invalid labels for II but in some cases may work, e.g
740 # single element array of comparable type, so guard against them early
741 if is_list_like(key):
742 raise KeyError(key)
744 if isinstance(key, Interval):
745 if self.closed != key.closed:
746 raise KeyError(key)
747 mask = (self.left == key.left) & (self.right == key.right)
748 else:
749 # assume scalar
750 op_left = le if self.closed_left else lt
751 op_right = le if self.closed_right else lt
752 try:
753 mask = op_left(self.left, key) & op_right(key, self.right)
754 except TypeError:
755 # scalar is not comparable to II subtype --> invalid label
756 raise KeyError(key)
758 matches = mask.sum()
759 if matches == 0:
760 raise KeyError(key)
761 elif matches == 1:
762 return mask.argmax()
763 return lib.maybe_booleans_to_slice(mask.view("u1"))
765 @Substitution(
766 **dict(
767 _index_doc_kwargs,
768 **{
769 "raises_section": textwrap.dedent(
770 """
771 Raises
772 ------
773 NotImplementedError
774 If any method argument other than the default of
775 None is specified as these are not yet implemented.
776 """
777 )
778 },
779 )
780 )
781 @Appender(_index_shared_docs["get_indexer"])
782 def get_indexer(
783 self,
784 target: AnyArrayLike,
785 method: Optional[str] = None,
786 limit: Optional[int] = None,
787 tolerance: Optional[Any] = None,
788 ) -> np.ndarray:
790 self._check_method(method)
792 if self.is_overlapping:
793 raise InvalidIndexError(
794 "cannot handle overlapping indices; "
795 "use IntervalIndex.get_indexer_non_unique"
796 )
798 target_as_index = ensure_index(target)
800 if isinstance(target_as_index, IntervalIndex):
801 # equal indexes -> 1:1 positional match
802 if self.equals(target_as_index):
803 return np.arange(len(self), dtype="intp")
805 # different closed or incompatible subtype -> no matches
806 common_subtype = find_common_type(
807 [self.dtype.subtype, target_as_index.dtype.subtype]
808 )
809 if self.closed != target_as_index.closed or is_object_dtype(common_subtype):
810 return np.repeat(np.intp(-1), len(target_as_index))
812 # non-overlapping -> at most one match per interval in target_as_index
813 # want exact matches -> need both left/right to match, so defer to
814 # left/right get_indexer, compare elementwise, equality -> match
815 left_indexer = self.left.get_indexer(target_as_index.left)
816 right_indexer = self.right.get_indexer(target_as_index.right)
817 indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
818 elif is_categorical(target_as_index):
819 # get an indexer for unique categories then propagate to codes via take_1d
820 categories_indexer = self.get_indexer(target_as_index.categories)
821 indexer = take_1d(categories_indexer, target_as_index.codes, fill_value=-1)
822 elif not is_object_dtype(target_as_index):
823 # homogeneous scalar index: use IntervalTree
824 target_as_index = self._maybe_convert_i8(target_as_index)
825 indexer = self._engine.get_indexer(target_as_index.values)
826 else:
827 # heterogeneous scalar index: defer elementwise to get_loc
828 # (non-overlapping so get_loc guarantees scalar of KeyError)
829 indexer = []
830 for key in target_as_index:
831 try:
832 loc = self.get_loc(key)
833 except KeyError:
834 loc = -1
835 indexer.append(loc)
837 return ensure_platform_int(indexer)
839 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
840 def get_indexer_non_unique(
841 self, target: AnyArrayLike
842 ) -> Tuple[np.ndarray, np.ndarray]:
843 target_as_index = ensure_index(target)
845 # check that target_as_index IntervalIndex is compatible
846 if isinstance(target_as_index, IntervalIndex):
847 common_subtype = find_common_type(
848 [self.dtype.subtype, target_as_index.dtype.subtype]
849 )
850 if self.closed != target_as_index.closed or is_object_dtype(common_subtype):
851 # different closed or incompatible subtype -> no matches
852 return (
853 np.repeat(-1, len(target_as_index)),
854 np.arange(len(target_as_index)),
855 )
857 if is_object_dtype(target_as_index) or isinstance(
858 target_as_index, IntervalIndex
859 ):
860 # target_as_index might contain intervals: defer elementwise to get_loc
861 indexer, missing = [], []
862 for i, key in enumerate(target_as_index):
863 try:
864 locs = self.get_loc(key)
865 if isinstance(locs, slice):
866 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp")
867 locs = np.array(locs, ndmin=1)
868 except KeyError:
869 missing.append(i)
870 locs = np.array([-1])
871 indexer.append(locs)
872 indexer = np.concatenate(indexer)
873 else:
874 target_as_index = self._maybe_convert_i8(target_as_index)
875 indexer, missing = self._engine.get_indexer_non_unique(
876 target_as_index.values
877 )
879 return ensure_platform_int(indexer), ensure_platform_int(missing)
881 def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray:
882 """
883 Guaranteed return of an indexer even when overlapping.
885 This dispatches to get_indexer or get_indexer_non_unique
886 as appropriate.
888 Returns
889 -------
890 numpy.ndarray
891 List of indices.
892 """
893 if self.is_overlapping:
894 return self.get_indexer_non_unique(target)[0]
895 return self.get_indexer(target, **kwargs)
897 @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs)
898 def get_value(self, series: ABCSeries, key: Any) -> Any:
900 if com.is_bool_indexer(key):
901 loc = key
902 elif is_list_like(key):
903 if self.is_overlapping:
904 loc, missing = self.get_indexer_non_unique(key)
905 if len(missing):
906 raise KeyError
907 else:
908 loc = self.get_indexer(key)
909 elif isinstance(key, slice):
910 if not (key.step is None or key.step == 1):
911 raise ValueError("cannot support not-default step in a slice")
912 loc = self._convert_slice_indexer(key, kind="getitem")
913 else:
914 loc = self.get_loc(key)
915 return series.iloc[loc]
917 @Appender(_index_shared_docs["where"])
918 def where(self, cond, other=None):
919 if other is None:
920 other = self._na_value
921 values = np.where(cond, self.values, other)
922 return self._shallow_copy(values)
924 def delete(self, loc):
925 """
926 Return a new IntervalIndex with passed location(-s) deleted
928 Returns
929 -------
930 IntervalIndex
931 """
932 new_left = self.left.delete(loc)
933 new_right = self.right.delete(loc)
934 return self._shallow_copy(new_left, new_right)
936 def insert(self, loc, item):
937 """
938 Return a new IntervalIndex inserting new item at location. Follows
939 Python list.append semantics for negative values. Only Interval
940 objects and NA can be inserted into an IntervalIndex
942 Parameters
943 ----------
944 loc : int
945 item : object
947 Returns
948 -------
949 IntervalIndex
950 """
951 if isinstance(item, Interval):
952 if item.closed != self.closed:
953 raise ValueError(
954 "inserted item must be closed on the same side as the index"
955 )
956 left_insert = item.left
957 right_insert = item.right
958 elif is_scalar(item) and isna(item):
959 # GH 18295
960 left_insert = right_insert = item
961 else:
962 raise ValueError(
963 "can only insert Interval objects and NA into an IntervalIndex"
964 )
966 new_left = self.left.insert(loc, left_insert)
967 new_right = self.right.insert(loc, right_insert)
968 return self._shallow_copy(new_left, new_right)
970 def _concat_same_dtype(self, to_concat, name):
971 """
972 assert that we all have the same .closed
973 we allow a 0-len index here as well
974 """
975 if not len({i.closed for i in to_concat if len(i)}) == 1:
976 raise ValueError(
977 "can only append two IntervalIndex objects "
978 "that are closed on the same side"
979 )
980 return super()._concat_same_dtype(to_concat, name)
982 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
983 def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
984 result = self._data.take(
985 indices, axis=axis, allow_fill=allow_fill, fill_value=fill_value, **kwargs
986 )
987 return self._shallow_copy(result)
989 def __getitem__(self, value):
990 result = self._data[value]
991 if isinstance(result, IntervalArray):
992 return self._shallow_copy(result)
993 else:
994 # scalar
995 return result
997 # --------------------------------------------------------------------
998 # Rendering Methods
999 # __repr__ associated methods are based on MultiIndex
1001 def _format_with_header(self, header, **kwargs):
1002 return header + list(self._format_native_types(**kwargs))
1004 def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs):
1005 # GH 28210: use base method but with different default na_rep
1006 return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs)
1008 def _format_data(self, name=None):
1010 # TODO: integrate with categorical and make generic
1011 # name argument is unused here; just for compat with base / categorical
1012 n = len(self)
1013 max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10)
1015 formatter = str
1017 if n == 0:
1018 summary = "[]"
1019 elif n == 1:
1020 first = formatter(self[0])
1021 summary = f"[{first}]"
1022 elif n == 2:
1023 first = formatter(self[0])
1024 last = formatter(self[-1])
1025 summary = f"[{first}, {last}]"
1026 else:
1028 if n > max_seq_items:
1029 n = min(max_seq_items // 2, 10)
1030 head = [formatter(x) for x in self[:n]]
1031 tail = [formatter(x) for x in self[-n:]]
1032 head_joined = ", ".join(head)
1033 tail_joined = ", ".join(tail)
1034 summary = f"[{head_joined} ... {tail_joined}]"
1035 else:
1036 tail = [formatter(x) for x in self]
1037 joined = ", ".join(tail)
1038 summary = f"[{joined}]"
1040 return summary + "," + self._format_space()
1042 def _format_attrs(self):
1043 attrs = [("closed", repr(self.closed))]
1044 if self.name is not None:
1045 attrs.append(("name", default_pprint(self.name)))
1046 attrs.append(("dtype", f"'{self.dtype}'"))
1047 return attrs
1049 def _format_space(self) -> str:
1050 space = " " * (len(type(self).__name__) + 1)
1051 return f"\n{space}"
1053 # --------------------------------------------------------------------
1055 def argsort(self, *args, **kwargs):
1056 return np.lexsort((self.right, self.left))
1058 def equals(self, other) -> bool:
1059 """
1060 Determines if two IntervalIndex objects contain the same elements.
1061 """
1062 if self.is_(other):
1063 return True
1065 # if we can coerce to an II
1066 # then we can compare
1067 if not isinstance(other, IntervalIndex):
1068 if not is_interval_dtype(other):
1069 return False
1070 other = Index(getattr(other, ".values", other))
1072 return (
1073 self.left.equals(other.left)
1074 and self.right.equals(other.right)
1075 and self.closed == other.closed
1076 )
1078 @Appender(_index_shared_docs["intersection"])
1079 @SetopCheck(op_name="intersection")
1080 def intersection(
1081 self, other: "IntervalIndex", sort: bool = False
1082 ) -> "IntervalIndex":
1083 if self.left.is_unique and self.right.is_unique:
1084 taken = self._intersection_unique(other)
1085 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1:
1086 # Swap other/self if other is unique and self does not have
1087 # multiple NaNs
1088 taken = other._intersection_unique(self)
1089 else:
1090 # duplicates
1091 taken = self._intersection_non_unique(other)
1093 if sort is None:
1094 taken = taken.sort_values()
1096 return taken
1098 def _intersection_unique(self, other: "IntervalIndex") -> "IntervalIndex":
1099 """
1100 Used when the IntervalIndex does not have any common endpoint,
1101 no mater left or right.
1102 Return the intersection with another IntervalIndex.
1104 Parameters
1105 ----------
1106 other : IntervalIndex
1108 Returns
1109 -------
1110 IntervalIndex
1111 """
1112 lindexer = self.left.get_indexer(other.left)
1113 rindexer = self.right.get_indexer(other.right)
1115 match = (lindexer == rindexer) & (lindexer != -1)
1116 indexer = lindexer.take(match.nonzero()[0])
1118 return self.take(indexer)
1120 def _intersection_non_unique(self, other: "IntervalIndex") -> "IntervalIndex":
1121 """
1122 Used when the IntervalIndex does have some common endpoints,
1123 on either sides.
1124 Return the intersection with another IntervalIndex.
1126 Parameters
1127 ----------
1128 other : IntervalIndex
1130 Returns
1131 -------
1132 IntervalIndex
1133 """
1134 mask = np.zeros(len(self), dtype=bool)
1136 if self.hasnans and other.hasnans:
1137 first_nan_loc = np.arange(len(self))[self.isna()][0]
1138 mask[first_nan_loc] = True
1140 other_tups = set(zip(other.left, other.right))
1141 for i, tup in enumerate(zip(self.left, self.right)):
1142 if tup in other_tups:
1143 mask[i] = True
1145 return self[mask]
1147 def _setop(op_name: str, sort=None):
1148 @SetopCheck(op_name=op_name)
1149 def func(self, other, sort=sort):
1150 result = getattr(self._multiindex, op_name)(other._multiindex, sort=sort)
1151 result_name = get_op_result_name(self, other)
1153 # GH 19101: ensure empty results have correct dtype
1154 if result.empty:
1155 result = result.values.astype(self.dtype.subtype)
1156 else:
1157 result = result.values
1159 return type(self).from_tuples(result, closed=self.closed, name=result_name)
1161 return func
1163 @property
1164 def is_all_dates(self) -> bool:
1165 """
1166 This is False even when left/right contain datetime-like objects,
1167 as the check is done on the Interval itself
1168 """
1169 return False
1171 union = _setop("union")
1172 difference = _setop("difference")
1173 symmetric_difference = _setop("symmetric_difference")
1175 # TODO: arithmetic operations
1177 # GH#30817 until IntervalArray implements inequalities, get them from Index
1178 def __lt__(self, other):
1179 return Index.__lt__(self, other)
1181 def __le__(self, other):
1182 return Index.__le__(self, other)
1184 def __gt__(self, other):
1185 return Index.__gt__(self, other)
1187 def __ge__(self, other):
1188 return Index.__ge__(self, other)
1191IntervalIndex._add_logical_methods_disabled()
1194def _is_valid_endpoint(endpoint) -> bool:
1195 """
1196 Helper for interval_range to check if start/end are valid types.
1197 """
1198 return any(
1199 [
1200 is_number(endpoint),
1201 isinstance(endpoint, Timestamp),
1202 isinstance(endpoint, Timedelta),
1203 endpoint is None,
1204 ]
1205 )
1208def _is_type_compatible(a, b) -> bool:
1209 """
1210 Helper for interval_range to check type compat of start/end/freq.
1211 """
1212 is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset))
1213 is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset))
1214 return (
1215 (is_number(a) and is_number(b))
1216 or (is_ts_compat(a) and is_ts_compat(b))
1217 or (is_td_compat(a) and is_td_compat(b))
1218 or com.any_none(a, b)
1219 )
1222def interval_range(
1223 start=None, end=None, periods=None, freq=None, name=None, closed="right"
1224):
1225 """
1226 Return a fixed frequency IntervalIndex.
1228 Parameters
1229 ----------
1230 start : numeric or datetime-like, default None
1231 Left bound for generating intervals.
1232 end : numeric or datetime-like, default None
1233 Right bound for generating intervals.
1234 periods : int, default None
1235 Number of periods to generate.
1236 freq : numeric, str, or DateOffset, default None
1237 The length of each interval. Must be consistent with the type of start
1238 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1
1239 for numeric and 'D' for datetime-like.
1240 name : str, default None
1241 Name of the resulting IntervalIndex.
1242 closed : {'left', 'right', 'both', 'neither'}, default 'right'
1243 Whether the intervals are closed on the left-side, right-side, both
1244 or neither.
1246 Returns
1247 -------
1248 IntervalIndex
1250 See Also
1251 --------
1252 IntervalIndex : An Index of intervals that are all closed on the same side.
1254 Notes
1255 -----
1256 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
1257 exactly three must be specified. If ``freq`` is omitted, the resulting
1258 ``IntervalIndex`` will have ``periods`` linearly spaced elements between
1259 ``start`` and ``end``, inclusively.
1261 To learn more about datetime-like frequency strings, please see `this link
1262 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
1264 Examples
1265 --------
1266 Numeric ``start`` and ``end`` is supported.
1268 >>> pd.interval_range(start=0, end=5)
1269 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
1270 closed='right', dtype='interval[int64]')
1272 Additionally, datetime-like input is also supported.
1274 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1275 ... end=pd.Timestamp('2017-01-04'))
1276 IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
1277 (2017-01-03, 2017-01-04]],
1278 closed='right', dtype='interval[datetime64[ns]]')
1280 The ``freq`` parameter specifies the frequency between the left and right.
1281 endpoints of the individual intervals within the ``IntervalIndex``. For
1282 numeric ``start`` and ``end``, the frequency must also be numeric.
1284 >>> pd.interval_range(start=0, periods=4, freq=1.5)
1285 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
1286 closed='right', dtype='interval[float64]')
1288 Similarly, for datetime-like ``start`` and ``end``, the frequency must be
1289 convertible to a DateOffset.
1291 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1292 ... periods=3, freq='MS')
1293 IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
1294 (2017-03-01, 2017-04-01]],
1295 closed='right', dtype='interval[datetime64[ns]]')
1297 Specify ``start``, ``end``, and ``periods``; the frequency is generated
1298 automatically (linearly spaced).
1300 >>> pd.interval_range(start=0, end=6, periods=4)
1301 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
1302 closed='right',
1303 dtype='interval[float64]')
1305 The ``closed`` parameter specifies which endpoints of the individual
1306 intervals within the ``IntervalIndex`` are closed.
1308 >>> pd.interval_range(end=5, periods=4, closed='both')
1309 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
1310 closed='both', dtype='interval[int64]')
1311 """
1312 start = com.maybe_box_datetimelike(start)
1313 end = com.maybe_box_datetimelike(end)
1314 endpoint = start if start is not None else end
1316 if freq is None and com.any_none(periods, start, end):
1317 freq = 1 if is_number(endpoint) else "D"
1319 if com.count_not_none(start, end, periods, freq) != 3:
1320 raise ValueError(
1321 "Of the four parameters: start, end, periods, and "
1322 "freq, exactly three must be specified"
1323 )
1325 if not _is_valid_endpoint(start):
1326 raise ValueError(f"start must be numeric or datetime-like, got {start}")
1327 elif not _is_valid_endpoint(end):
1328 raise ValueError(f"end must be numeric or datetime-like, got {end}")
1330 if is_float(periods):
1331 periods = int(periods)
1332 elif not is_integer(periods) and periods is not None:
1333 raise TypeError(f"periods must be a number, got {periods}")
1335 if freq is not None and not is_number(freq):
1336 try:
1337 freq = to_offset(freq)
1338 except ValueError:
1339 raise ValueError(
1340 f"freq must be numeric or convertible to DateOffset, got {freq}"
1341 )
1343 # verify type compatibility
1344 if not all(
1345 [
1346 _is_type_compatible(start, end),
1347 _is_type_compatible(start, freq),
1348 _is_type_compatible(end, freq),
1349 ]
1350 ):
1351 raise TypeError("start, end, freq need to be type compatible")
1353 # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
1354 if periods is not None:
1355 periods += 1
1357 if is_number(endpoint):
1358 # force consistency between start/end/freq (lower end if freq skips it)
1359 if com.all_not_none(start, end, freq):
1360 end -= (end - start) % freq
1362 # compute the period/start/end if unspecified (at most one)
1363 if periods is None:
1364 periods = int((end - start) // freq) + 1
1365 elif start is None:
1366 start = end - (periods - 1) * freq
1367 elif end is None:
1368 end = start + (periods - 1) * freq
1370 breaks = np.linspace(start, end, periods)
1371 if all(is_integer(x) for x in com.not_none(start, end, freq)):
1372 # np.linspace always produces float output
1373 breaks = maybe_downcast_to_dtype(breaks, "int64")
1374 else:
1375 # delegate to the appropriate range function
1376 if isinstance(endpoint, Timestamp):
1377 range_func = date_range
1378 else:
1379 range_func = timedelta_range
1381 breaks = range_func(start=start, end=end, periods=periods, freq=freq)
1383 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)