Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/indexes/range.py : 27%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from datetime import timedelta
2import operator
3from sys import getsizeof
4from typing import Optional, Union
5import warnings
7import numpy as np
9from pandas._libs import index as libindex
10import pandas.compat as compat
11from pandas.compat.numpy import function as nv
12from pandas.util._decorators import Appender, cache_readonly
14from pandas.core.dtypes.common import (
15 ensure_platform_int,
16 ensure_python_int,
17 is_integer,
18 is_integer_dtype,
19 is_list_like,
20 is_scalar,
21 is_timedelta64_dtype,
22)
23from pandas.core.dtypes.generic import ABCTimedeltaIndex
25from pandas.core import ops
26import pandas.core.common as com
27from pandas.core.construction import extract_array
28import pandas.core.indexes.base as ibase
29from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
30from pandas.core.indexes.numeric import Int64Index
31from pandas.core.ops.common import unpack_zerodim_and_defer
33from pandas.io.formats.printing import pprint_thing
36class RangeIndex(Int64Index):
37 """
38 Immutable Index implementing a monotonic integer range.
40 RangeIndex is a memory-saving special case of Int64Index limited to
41 representing monotonic ranges. Using RangeIndex may in some instances
42 improve computing speed.
44 This is the default index type used
45 by DataFrame and Series when no explicit index is provided by the user.
47 Parameters
48 ----------
49 start : int (default: 0), or other RangeIndex instance
50 If int and "stop" is not given, interpreted as "stop" instead.
51 stop : int (default: 0)
52 step : int (default: 1)
53 name : object, optional
54 Name to be stored in the index.
55 copy : bool, default False
56 Unused, accepted for homogeneity with other index types.
58 Attributes
59 ----------
60 start
61 stop
62 step
64 Methods
65 -------
66 from_range
68 See Also
69 --------
70 Index : The base pandas Index type.
71 Int64Index : Index of int64 data.
72 """
74 _typ = "rangeindex"
75 _engine_type = libindex.Int64Engine
76 _range: range
78 # check whether self._data has been called
79 _cached_data: Optional[np.ndarray] = None
80 # --------------------------------------------------------------------
81 # Constructors
83 def __new__(
84 cls, start=None, stop=None, step=None, dtype=None, copy=False, name=None,
85 ):
87 cls._validate_dtype(dtype)
88 name = maybe_extract_name(name, start, cls)
90 # RangeIndex
91 if isinstance(start, RangeIndex):
92 start = start._range
93 return cls._simple_new(start, dtype=dtype, name=name)
95 # validate the arguments
96 if com.all_none(start, stop, step):
97 raise TypeError("RangeIndex(...) must be called with integers")
99 start = ensure_python_int(start) if start is not None else 0
101 if stop is None:
102 start, stop = 0, start
103 else:
104 stop = ensure_python_int(stop)
106 step = ensure_python_int(step) if step is not None else 1
107 if step == 0:
108 raise ValueError("Step must not be zero")
110 rng = range(start, stop, step)
111 return cls._simple_new(rng, dtype=dtype, name=name)
113 @classmethod
114 def from_range(cls, data, name=None, dtype=None):
115 """
116 Create RangeIndex from a range object.
118 Returns
119 -------
120 RangeIndex
121 """
122 if not isinstance(data, range):
123 raise TypeError(
124 f"{cls.__name__}(...) must be called with object coercible to a "
125 f"range, {repr(data)} was passed"
126 )
128 cls._validate_dtype(dtype)
129 return cls._simple_new(data, dtype=dtype, name=name)
131 @classmethod
132 def _simple_new(cls, values, name=None, dtype=None):
133 result = object.__new__(cls)
135 # handle passed None, non-integers
136 if values is None:
137 # empty
138 values = range(0, 0, 1)
139 elif not isinstance(values, range):
140 return Index(values, dtype=dtype, name=name)
142 result._range = values
143 result.name = name
145 result._reset_identity()
146 return result
148 # --------------------------------------------------------------------
150 @cache_readonly
151 def _constructor(self):
152 """ return the class to use for construction """
153 return Int64Index
155 @property
156 def _data(self):
157 """
158 An int array that for performance reasons is created only when needed.
160 The constructed array is saved in ``_cached_data``. This allows us to
161 check if the array has been created without accessing ``_data`` and
162 triggering the construction.
163 """
164 if self._cached_data is None:
165 self._cached_data = np.arange(
166 self.start, self.stop, self.step, dtype=np.int64
167 )
168 return self._cached_data
170 @cache_readonly
171 def _int64index(self):
172 return Int64Index._simple_new(self._data, name=self.name)
174 def _get_data_as_items(self):
175 """ return a list of tuples of start, stop, step """
176 rng = self._range
177 return [("start", rng.start), ("stop", rng.stop), ("step", rng.step)]
179 def __reduce__(self):
180 d = self._get_attributes_dict()
181 d.update(dict(self._get_data_as_items()))
182 return ibase._new_Index, (type(self), d), None
184 # --------------------------------------------------------------------
185 # Rendering Methods
187 def _format_attrs(self):
188 """
189 Return a list of tuples of the (attr, formatted_value)
190 """
191 attrs = self._get_data_as_items()
192 if self.name is not None:
193 attrs.append(("name", ibase.default_pprint(self.name)))
194 return attrs
196 def _format_data(self, name=None):
197 # we are formatting thru the attributes
198 return None
200 def _format_with_header(self, header, na_rep="NaN", **kwargs):
201 return header + list(map(pprint_thing, self._range))
203 # --------------------------------------------------------------------
204 _deprecation_message = (
205 "RangeIndex.{} is deprecated and will be "
206 "removed in a future version. Use RangeIndex.{} "
207 "instead"
208 )
210 @cache_readonly
211 def start(self):
212 """
213 The value of the `start` parameter (``0`` if this was not supplied).
214 """
215 # GH 25710
216 return self._range.start
218 @property
219 def _start(self):
220 """
221 The value of the `start` parameter (``0`` if this was not supplied).
223 .. deprecated:: 0.25.0
224 Use ``start`` instead.
225 """
226 warnings.warn(
227 self._deprecation_message.format("_start", "start"),
228 FutureWarning,
229 stacklevel=2,
230 )
231 return self.start
233 @cache_readonly
234 def stop(self):
235 """
236 The value of the `stop` parameter.
237 """
238 return self._range.stop
240 @property
241 def _stop(self):
242 """
243 The value of the `stop` parameter.
245 .. deprecated:: 0.25.0
246 Use ``stop`` instead.
247 """
248 # GH 25710
249 warnings.warn(
250 self._deprecation_message.format("_stop", "stop"),
251 FutureWarning,
252 stacklevel=2,
253 )
254 return self.stop
256 @cache_readonly
257 def step(self):
258 """
259 The value of the `step` parameter (``1`` if this was not supplied).
260 """
261 # GH 25710
262 return self._range.step
264 @property
265 def _step(self):
266 """
267 The value of the `step` parameter (``1`` if this was not supplied).
269 .. deprecated:: 0.25.0
270 Use ``step`` instead.
271 """
272 # GH 25710
273 warnings.warn(
274 self._deprecation_message.format("_step", "step"),
275 FutureWarning,
276 stacklevel=2,
277 )
278 return self.step
280 @cache_readonly
281 def nbytes(self) -> int:
282 """
283 Return the number of bytes in the underlying data.
284 """
285 rng = self._range
286 return getsizeof(rng) + sum(
287 getsizeof(getattr(rng, attr_name))
288 for attr_name in ["start", "stop", "step"]
289 )
291 def memory_usage(self, deep: bool = False) -> int:
292 """
293 Memory usage of my values
295 Parameters
296 ----------
297 deep : bool
298 Introspect the data deeply, interrogate
299 `object` dtypes for system-level memory consumption
301 Returns
302 -------
303 bytes used
305 Notes
306 -----
307 Memory usage does not include memory consumed by elements that
308 are not components of the array if deep=False
310 See Also
311 --------
312 numpy.ndarray.nbytes
313 """
314 return self.nbytes
316 @property
317 def dtype(self) -> np.dtype:
318 return np.dtype(np.int64)
320 @property
321 def is_unique(self) -> bool:
322 """ return if the index has unique values """
323 return True
325 @cache_readonly
326 def is_monotonic_increasing(self) -> bool:
327 return self._range.step > 0 or len(self) <= 1
329 @cache_readonly
330 def is_monotonic_decreasing(self) -> bool:
331 return self._range.step < 0 or len(self) <= 1
333 @property
334 def has_duplicates(self) -> bool:
335 return False
337 def __contains__(self, key: Union[int, np.integer]) -> bool:
338 hash(key)
339 try:
340 key = ensure_python_int(key)
341 except TypeError:
342 return False
343 return key in self._range
345 @Appender(_index_shared_docs["get_loc"])
346 def get_loc(self, key, method=None, tolerance=None):
347 if is_integer(key) and method is None and tolerance is None:
348 new_key = int(key)
349 try:
350 return self._range.index(new_key)
351 except ValueError:
352 raise KeyError(key)
353 return super().get_loc(key, method=method, tolerance=tolerance)
355 @Appender(_index_shared_docs["get_indexer"])
356 def get_indexer(self, target, method=None, limit=None, tolerance=None):
357 if com.any_not_none(method, tolerance, limit) or not is_list_like(target):
358 return super().get_indexer(
359 target, method=method, tolerance=tolerance, limit=limit
360 )
362 if self.step > 0:
363 start, stop, step = self.start, self.stop, self.step
364 else:
365 # GH 28678: work on reversed range for simplicity
366 reverse = self._range[::-1]
367 start, stop, step = reverse.start, reverse.stop, reverse.step
369 target_array = np.asarray(target)
370 if not (is_integer_dtype(target_array) and target_array.ndim == 1):
371 # checks/conversions/roundings are delegated to general method
372 return super().get_indexer(target, method=method, tolerance=tolerance)
374 locs = target_array - start
375 valid = (locs % step == 0) & (locs >= 0) & (target_array < stop)
376 locs[~valid] = -1
377 locs[valid] = locs[valid] / step
379 if step != self.step:
380 # We reversed this range: transform to original locs
381 locs[valid] = len(self) - 1 - locs[valid]
382 return ensure_platform_int(locs)
384 def tolist(self):
385 return list(self._range)
387 @Appender(_index_shared_docs["_shallow_copy"])
388 def _shallow_copy(self, values=None, **kwargs):
389 if values is None:
390 name = kwargs.get("name", self.name)
391 return self._simple_new(self._range, name=name)
392 else:
393 kwargs.setdefault("name", self.name)
394 return self._int64index._shallow_copy(values, **kwargs)
396 @Appender(ibase._index_shared_docs["copy"])
397 def copy(self, name=None, deep=False, dtype=None, **kwargs):
398 self._validate_dtype(dtype)
399 if name is None:
400 name = self.name
401 return self.from_range(self._range, name=name)
403 def _minmax(self, meth):
404 no_steps = len(self) - 1
405 if no_steps == -1:
406 return np.nan
407 elif (meth == "min" and self.step > 0) or (meth == "max" and self.step < 0):
408 return self.start
410 return self.start + self.step * no_steps
412 def min(self, axis=None, skipna=True, *args, **kwargs):
413 """The minimum value of the RangeIndex"""
414 nv.validate_minmax_axis(axis)
415 nv.validate_min(args, kwargs)
416 return self._minmax("min")
418 def max(self, axis=None, skipna=True, *args, **kwargs):
419 """The maximum value of the RangeIndex"""
420 nv.validate_minmax_axis(axis)
421 nv.validate_max(args, kwargs)
422 return self._minmax("max")
424 def argsort(self, *args, **kwargs):
425 """
426 Returns the indices that would sort the index and its
427 underlying data.
429 Returns
430 -------
431 argsorted : numpy array
433 See Also
434 --------
435 numpy.ndarray.argsort
436 """
437 nv.validate_argsort(args, kwargs)
439 if self._range.step > 0:
440 return np.arange(len(self))
441 else:
442 return np.arange(len(self) - 1, -1, -1)
444 def equals(self, other):
445 """
446 Determines if two Index objects contain the same elements.
447 """
448 if isinstance(other, RangeIndex):
449 return self._range == other._range
450 return super().equals(other)
452 def intersection(self, other, sort=False):
453 """
454 Form the intersection of two Index objects.
456 Parameters
457 ----------
458 other : Index or array-like
459 sort : False or None, default False
460 Sort the resulting index if possible
462 .. versionadded:: 0.24.0
464 .. versionchanged:: 0.24.1
466 Changed the default to ``False`` to match the behaviour
467 from before 0.24.0.
469 Returns
470 -------
471 intersection : Index
472 """
473 self._validate_sort_keyword(sort)
475 if self.equals(other):
476 return self._get_reconciled_name_object(other)
478 if not isinstance(other, RangeIndex):
479 return super().intersection(other, sort=sort)
481 if not len(self) or not len(other):
482 return self._simple_new(None)
484 first = self._range[::-1] if self.step < 0 else self._range
485 second = other._range[::-1] if other.step < 0 else other._range
487 # check whether intervals intersect
488 # deals with in- and decreasing ranges
489 int_low = max(first.start, second.start)
490 int_high = min(first.stop, second.stop)
491 if int_high <= int_low:
492 return self._simple_new(None)
494 # Method hint: linear Diophantine equation
495 # solve intersection problem
496 # performance hint: for identical step sizes, could use
497 # cheaper alternative
498 gcd, s, t = self._extended_gcd(first.step, second.step)
500 # check whether element sets intersect
501 if (first.start - second.start) % gcd:
502 return self._simple_new(None)
504 # calculate parameters for the RangeIndex describing the
505 # intersection disregarding the lower bounds
506 tmp_start = first.start + (second.start - first.start) * first.step // gcd * s
507 new_step = first.step * second.step // gcd
508 new_range = range(tmp_start, int_high, new_step)
509 new_index = self._simple_new(new_range)
511 # adjust index to limiting interval
512 new_start = new_index._min_fitting_element(int_low)
513 new_range = range(new_start, new_index.stop, new_index.step)
514 new_index = self._simple_new(new_range)
516 if (self.step < 0 and other.step < 0) is not (new_index.step < 0):
517 new_index = new_index[::-1]
518 if sort is None:
519 new_index = new_index.sort_values()
520 return new_index
522 def _min_fitting_element(self, lower_limit):
523 """Returns the smallest element greater than or equal to the limit"""
524 no_steps = -(-(lower_limit - self.start) // abs(self.step))
525 return self.start + abs(self.step) * no_steps
527 def _max_fitting_element(self, upper_limit):
528 """Returns the largest element smaller than or equal to the limit"""
529 no_steps = (upper_limit - self.start) // abs(self.step)
530 return self.start + abs(self.step) * no_steps
532 def _extended_gcd(self, a, b):
533 """
534 Extended Euclidean algorithms to solve Bezout's identity:
535 a*x + b*y = gcd(x, y)
536 Finds one particular solution for x, y: s, t
537 Returns: gcd, s, t
538 """
539 s, old_s = 0, 1
540 t, old_t = 1, 0
541 r, old_r = b, a
542 while r:
543 quotient = old_r // r
544 old_r, r = r, old_r - quotient * r
545 old_s, s = s, old_s - quotient * s
546 old_t, t = t, old_t - quotient * t
547 return old_r, old_s, old_t
549 def _union(self, other, sort):
550 """
551 Form the union of two Index objects and sorts if possible
553 Parameters
554 ----------
555 other : Index or array-like
557 sort : False or None, default None
558 Whether to sort resulting index. ``sort=None`` returns a
559 monotonically increasing ``RangeIndex`` if possible or a sorted
560 ``Int64Index`` if not. ``sort=False`` always returns an
561 unsorted ``Int64Index``
563 .. versionadded:: 0.25.0
565 Returns
566 -------
567 union : Index
568 """
569 if not len(other) or self.equals(other) or not len(self):
570 return super()._union(other, sort=sort)
572 if isinstance(other, RangeIndex) and sort is None:
573 start_s, step_s = self.start, self.step
574 end_s = self.start + self.step * (len(self) - 1)
575 start_o, step_o = other.start, other.step
576 end_o = other.start + other.step * (len(other) - 1)
577 if self.step < 0:
578 start_s, step_s, end_s = end_s, -step_s, start_s
579 if other.step < 0:
580 start_o, step_o, end_o = end_o, -step_o, start_o
581 if len(self) == 1 and len(other) == 1:
582 step_s = step_o = abs(self.start - other.start)
583 elif len(self) == 1:
584 step_s = step_o
585 elif len(other) == 1:
586 step_o = step_s
587 start_r = min(start_s, start_o)
588 end_r = max(end_s, end_o)
589 if step_o == step_s:
590 if (
591 (start_s - start_o) % step_s == 0
592 and (start_s - end_o) <= step_s
593 and (start_o - end_s) <= step_s
594 ):
595 return type(self)(start_r, end_r + step_s, step_s)
596 if (
597 (step_s % 2 == 0)
598 and (abs(start_s - start_o) <= step_s / 2)
599 and (abs(end_s - end_o) <= step_s / 2)
600 ):
601 return type(self)(start_r, end_r + step_s / 2, step_s / 2)
602 elif step_o % step_s == 0:
603 if (
604 (start_o - start_s) % step_s == 0
605 and (start_o + step_s >= start_s)
606 and (end_o - step_s <= end_s)
607 ):
608 return type(self)(start_r, end_r + step_s, step_s)
609 elif step_s % step_o == 0:
610 if (
611 (start_s - start_o) % step_o == 0
612 and (start_s + step_o >= start_o)
613 and (end_s - step_o <= end_o)
614 ):
615 return type(self)(start_r, end_r + step_o, step_o)
616 return self._int64index._union(other, sort=sort)
618 @Appender(_index_shared_docs["join"])
619 def join(self, other, how="left", level=None, return_indexers=False, sort=False):
620 if how == "outer" and self is not other:
621 # note: could return RangeIndex in more circumstances
622 return self._int64index.join(other, how, level, return_indexers, sort)
624 return super().join(other, how, level, return_indexers, sort)
626 def _concat_same_dtype(self, indexes, name):
627 """
628 Concatenates multiple RangeIndex instances. All members of "indexes" must
629 be of type RangeIndex; result will be RangeIndex if possible, Int64Index
630 otherwise. E.g.:
631 indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
632 indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
633 """
634 start = step = next_ = None
636 # Filter the empty indexes
637 non_empty_indexes = [obj for obj in indexes if len(obj)]
639 for obj in non_empty_indexes:
640 rng: range = obj._range
642 if start is None:
643 # This is set by the first non-empty index
644 start = rng.start
645 if step is None and len(rng) > 1:
646 step = rng.step
647 elif step is None:
648 # First non-empty index had only one element
649 if rng.start == start:
650 result = Int64Index(np.concatenate([x._values for x in indexes]))
651 return result.rename(name)
653 step = rng.start - start
655 non_consecutive = (step != rng.step and len(rng) > 1) or (
656 next_ is not None and rng.start != next_
657 )
658 if non_consecutive:
659 result = Int64Index(np.concatenate([x._values for x in indexes]))
660 return result.rename(name)
662 if step is not None:
663 next_ = rng[-1] + step
665 if non_empty_indexes:
666 # Get the stop value from "next" or alternatively
667 # from the last non-empty index
668 stop = non_empty_indexes[-1].stop if next_ is None else next_
669 return RangeIndex(start, stop, step).rename(name)
671 # Here all "indexes" had 0 length, i.e. were empty.
672 # In this case return an empty range index.
673 return RangeIndex(0, 0).rename(name)
675 def __len__(self) -> int:
676 """
677 return the length of the RangeIndex
678 """
679 return len(self._range)
681 @property
682 def size(self) -> int:
683 return len(self)
685 def __getitem__(self, key):
686 """
687 Conserve RangeIndex type for scalar and slice keys.
688 """
689 if isinstance(key, slice):
690 new_range = self._range[key]
691 return self._simple_new(new_range, name=self.name)
692 elif is_integer(key):
693 new_key = int(key)
694 try:
695 return self._range[new_key]
696 except IndexError:
697 raise IndexError(
698 f"index {key} is out of bounds for axis 0 with size {len(self)}"
699 )
700 elif is_scalar(key):
701 raise IndexError(
702 "only integers, slices (`:`), "
703 "ellipsis (`...`), numpy.newaxis (`None`) "
704 "and integer or boolean "
705 "arrays are valid indices"
706 )
707 # fall back to Int64Index
708 return super().__getitem__(key)
710 @unpack_zerodim_and_defer("__floordiv__")
711 def __floordiv__(self, other):
713 if is_integer(other) and other != 0:
714 if len(self) == 0 or self.start % other == 0 and self.step % other == 0:
715 start = self.start // other
716 step = self.step // other
717 stop = start + len(self) * step
718 new_range = range(start, stop, step or 1)
719 return self._simple_new(new_range, name=self.name)
720 if len(self) == 1:
721 start = self.start // other
722 new_range = range(start, start + 1, 1)
723 return self._simple_new(new_range, name=self.name)
724 return self._int64index // other
726 def all(self) -> bool:
727 return 0 not in self._range
729 def any(self) -> bool:
730 return any(self._range)
732 @classmethod
733 def _add_numeric_methods_binary(cls):
734 """ add in numeric methods, specialized to RangeIndex """
736 def _make_evaluate_binop(op, step=False):
737 """
738 Parameters
739 ----------
740 op : callable that accepts 2 parms
741 perform the binary op
742 step : callable, optional, default to False
743 op to apply to the step parm if not None
744 if False, use the existing step
745 """
747 @unpack_zerodim_and_defer(op.__name__)
748 def _evaluate_numeric_binop(self, other):
749 if isinstance(other, ABCTimedeltaIndex):
750 # Defer to TimedeltaIndex implementation
751 return NotImplemented
752 elif isinstance(other, (timedelta, np.timedelta64)):
753 # GH#19333 is_integer evaluated True on timedelta64,
754 # so we need to catch these explicitly
755 return op(self._int64index, other)
756 elif is_timedelta64_dtype(other):
757 # Must be an np.ndarray; GH#22390
758 return op(self._int64index, other)
760 other = extract_array(other, extract_numpy=True)
761 attrs = self._get_attributes_dict()
763 left, right = self, other
765 try:
766 # apply if we have an override
767 if step:
768 with np.errstate(all="ignore"):
769 rstep = step(left.step, right)
771 # we don't have a representable op
772 # so return a base index
773 if not is_integer(rstep) or not rstep:
774 raise ValueError
776 else:
777 rstep = left.step
779 with np.errstate(all="ignore"):
780 rstart = op(left.start, right)
781 rstop = op(left.stop, right)
783 result = type(self)(rstart, rstop, rstep, **attrs)
785 # for compat with numpy / Int64Index
786 # even if we can represent as a RangeIndex, return
787 # as a Float64Index if we have float-like descriptors
788 if not all(is_integer(x) for x in [rstart, rstop, rstep]):
789 result = result.astype("float64")
791 return result
793 except (ValueError, TypeError, ZeroDivisionError):
794 # Defer to Int64Index implementation
795 return op(self._int64index, other)
796 # TODO: Do attrs get handled reliably?
798 name = f"__{op.__name__}__"
799 return compat.set_function_name(_evaluate_numeric_binop, name, cls)
801 cls.__add__ = _make_evaluate_binop(operator.add)
802 cls.__radd__ = _make_evaluate_binop(ops.radd)
803 cls.__sub__ = _make_evaluate_binop(operator.sub)
804 cls.__rsub__ = _make_evaluate_binop(ops.rsub)
805 cls.__mul__ = _make_evaluate_binop(operator.mul, step=operator.mul)
806 cls.__rmul__ = _make_evaluate_binop(ops.rmul, step=ops.rmul)
807 cls.__truediv__ = _make_evaluate_binop(operator.truediv, step=operator.truediv)
808 cls.__rtruediv__ = _make_evaluate_binop(ops.rtruediv, step=ops.rtruediv)
811RangeIndex._add_numeric_methods()