Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/indexes/numeric.py : 38%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import numpy as np
3from pandas._libs import index as libindex, lib
4from pandas._typing import Dtype
5from pandas.util._decorators import Appender, cache_readonly
7from pandas.core.dtypes.cast import astype_nansafe
8from pandas.core.dtypes.common import (
9 is_bool,
10 is_bool_dtype,
11 is_dtype_equal,
12 is_extension_array_dtype,
13 is_float,
14 is_float_dtype,
15 is_integer_dtype,
16 is_scalar,
17 is_signed_integer_dtype,
18 is_unsigned_integer_dtype,
19 needs_i8_conversion,
20 pandas_dtype,
21)
22from pandas.core.dtypes.generic import (
23 ABCFloat64Index,
24 ABCInt64Index,
25 ABCRangeIndex,
26 ABCSeries,
27 ABCUInt64Index,
28)
29from pandas.core.dtypes.missing import isna
31from pandas.core import algorithms
32import pandas.core.common as com
33from pandas.core.indexes.base import (
34 Index,
35 InvalidIndexError,
36 _index_shared_docs,
37 maybe_extract_name,
38)
39from pandas.core.ops import get_op_result_name
41_num_index_shared_docs = dict()
44class NumericIndex(Index):
45 """
46 Provide numeric type operations.
48 This is an abstract class.
49 """
51 _is_numeric_dtype = True
53 def __new__(cls, data=None, dtype=None, copy=False, name=None):
54 cls._validate_dtype(dtype)
56 # Coerce to ndarray if not already ndarray or Index
57 if not isinstance(data, (np.ndarray, Index)):
58 if is_scalar(data):
59 raise cls._scalar_data_error(data)
61 # other iterable of some kind
62 if not isinstance(data, (ABCSeries, list, tuple)):
63 data = list(data)
65 data = np.asarray(data, dtype=dtype)
67 if issubclass(data.dtype.type, str):
68 cls._string_data_error(data)
70 if copy or not is_dtype_equal(data.dtype, cls._default_dtype):
71 subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
72 cls._assert_safe_casting(data, subarr)
73 else:
74 subarr = data
76 if subarr.ndim > 1:
77 # GH#13601, GH#20285, GH#27125
78 raise ValueError("Index data must be 1-dimensional")
80 name = maybe_extract_name(name, data, cls)
81 return cls._simple_new(subarr, name=name)
83 @classmethod
84 def _validate_dtype(cls, dtype: Dtype) -> None:
85 if dtype is None:
86 return
87 validation_metadata = {
88 "int64index": (is_signed_integer_dtype, "signed integer"),
89 "uint64index": (is_unsigned_integer_dtype, "unsigned integer"),
90 "float64index": (is_float_dtype, "float"),
91 "rangeindex": (is_signed_integer_dtype, "signed integer"),
92 }
94 validation_func, expected = validation_metadata[cls._typ]
95 if not validation_func(dtype):
96 raise ValueError(
97 f"Incorrect `dtype` passed: expected {expected}, received {dtype}"
98 )
100 @Appender(_index_shared_docs["_maybe_cast_slice_bound"])
101 def _maybe_cast_slice_bound(self, label, side, kind):
102 assert kind in ["ix", "loc", "getitem", None]
104 # we will try to coerce to integers
105 return self._maybe_cast_indexer(label)
107 @Appender(_index_shared_docs["_shallow_copy"])
108 def _shallow_copy(self, values=None, **kwargs):
109 if values is not None and not self._can_hold_na:
110 # Ensure we are not returning an Int64Index with float data:
111 return self._shallow_copy_with_infer(values=values, **kwargs)
112 return super()._shallow_copy(values=values, **kwargs)
114 def _convert_for_op(self, value):
115 """
116 Convert value to be insertable to ndarray.
117 """
118 if is_bool(value) or is_bool_dtype(value):
119 # force conversion to object
120 # so we don't lose the bools
121 raise TypeError
123 return value
125 def _convert_tolerance(self, tolerance, target):
126 tolerance = np.asarray(tolerance)
127 if target.size != tolerance.size and tolerance.size > 1:
128 raise ValueError("list-like tolerance size must match target index size")
129 if not np.issubdtype(tolerance.dtype, np.number):
130 if tolerance.ndim > 0:
131 raise ValueError(
132 f"tolerance argument for {type(self).__name__} must contain "
133 "numeric elements if it is list type"
134 )
135 else:
136 raise ValueError(
137 f"tolerance argument for {type(self).__name__} must be numeric "
138 f"if it is a scalar: {repr(tolerance)}"
139 )
140 return tolerance
142 @classmethod
143 def _assert_safe_casting(cls, data, subarr):
144 """
145 Subclasses need to override this only if the process of casting data
146 from some accepted dtype to the internal dtype(s) bears the risk of
147 truncation (e.g. float to int).
148 """
149 pass
151 def _concat_same_dtype(self, indexes, name):
152 result = type(indexes[0])(np.concatenate([x._values for x in indexes]))
153 return result.rename(name)
155 @property
156 def is_all_dates(self) -> bool:
157 """
158 Checks that all the labels are datetime objects.
159 """
160 return False
162 @Appender(Index.insert.__doc__)
163 def insert(self, loc, item):
164 # treat NA values as nans:
165 if is_scalar(item) and isna(item):
166 item = self._na_value
167 return super().insert(loc, item)
169 def _union(self, other, sort):
170 # Right now, we treat union(int, float) a bit special.
171 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
172 # We may change union(int, float) to go to object.
173 # float | [u]int -> float (the special case)
174 # <T> | <T> -> T
175 # <T> | <U> -> object
176 needs_cast = (is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or (
177 is_integer_dtype(other.dtype) and is_float_dtype(self.dtype)
178 )
179 if needs_cast:
180 first = self.astype("float")
181 second = other.astype("float")
182 return first._union(second, sort)
183 else:
184 return super()._union(other, sort)
187_num_index_shared_docs[
188 "class_descr"
189] = """
190 Immutable ndarray implementing an ordered, sliceable set. The basic object
191 storing axis labels for all pandas objects. %(klass)s is a special case
192 of `Index` with purely %(ltype)s labels. %(extra)s.
194 Parameters
195 ----------
196 data : array-like (1-dimensional)
197 dtype : NumPy dtype (default: %(dtype)s)
198 copy : bool
199 Make a copy of input ndarray.
200 name : object
201 Name to be stored in the index.
203 Attributes
204 ----------
205 None
207 Methods
208 -------
209 None
211 See Also
212 --------
213 Index : The base pandas Index type.
215 Notes
216 -----
217 An Index instance can **only** contain hashable objects.
218"""
220_int64_descr_args = dict(klass="Int64Index", ltype="integer", dtype="int64", extra="")
223class IntegerIndex(NumericIndex):
224 """
225 This is an abstract class for Int64Index, UInt64Index.
226 """
228 def __contains__(self, key) -> bool:
229 """
230 Check if key is a float and has a decimal. If it has, return False.
231 """
232 hash(key)
233 try:
234 if is_float(key) and int(key) != key:
235 return False
236 return key in self._engine
237 except (OverflowError, TypeError, ValueError):
238 return False
241class Int64Index(IntegerIndex):
242 __doc__ = _num_index_shared_docs["class_descr"] % _int64_descr_args
244 _typ = "int64index"
245 _can_hold_na = False
246 _engine_type = libindex.Int64Engine
247 _default_dtype = np.int64
249 @property
250 def inferred_type(self) -> str:
251 """
252 Always 'integer' for ``Int64Index``
253 """
254 return "integer"
256 @property
257 def asi8(self) -> np.ndarray:
258 # do not cache or you'll create a memory leak
259 return self.values.view("i8")
261 @Appender(_index_shared_docs["_convert_scalar_indexer"])
262 def _convert_scalar_indexer(self, key, kind=None):
263 assert kind in ["ix", "loc", "getitem", "iloc", None]
265 # don't coerce ilocs to integers
266 if kind != "iloc":
267 key = self._maybe_cast_indexer(key)
268 return super()._convert_scalar_indexer(key, kind=kind)
270 def _wrap_joined_index(self, joined, other):
271 name = get_op_result_name(self, other)
272 return Int64Index(joined, name=name)
274 @classmethod
275 def _assert_safe_casting(cls, data, subarr):
276 """
277 Ensure incoming data can be represented as ints.
278 """
279 if not issubclass(data.dtype.type, np.signedinteger):
280 if not np.array_equal(data, subarr):
281 raise TypeError("Unsafe NumPy casting, you must explicitly cast")
283 def _is_compatible_with_other(self, other):
284 return super()._is_compatible_with_other(other) or all(
285 isinstance(type(obj), (ABCInt64Index, ABCFloat64Index, ABCRangeIndex))
286 for obj in [self, other]
287 )
290Int64Index._add_numeric_methods()
291Int64Index._add_logical_methods()
293_uint64_descr_args = dict(
294 klass="UInt64Index", ltype="unsigned integer", dtype="uint64", extra=""
295)
298class UInt64Index(IntegerIndex):
299 __doc__ = _num_index_shared_docs["class_descr"] % _uint64_descr_args
301 _typ = "uint64index"
302 _can_hold_na = False
303 _engine_type = libindex.UInt64Engine
304 _default_dtype = np.uint64
306 @property
307 def inferred_type(self) -> str:
308 """
309 Always 'integer' for ``UInt64Index``
310 """
311 return "integer"
313 @property
314 def asi8(self) -> np.ndarray:
315 # do not cache or you'll create a memory leak
316 return self.values.view("u8")
318 @Appender(_index_shared_docs["_convert_scalar_indexer"])
319 def _convert_scalar_indexer(self, key, kind=None):
320 assert kind in ["ix", "loc", "getitem", "iloc", None]
322 # don't coerce ilocs to integers
323 if kind != "iloc":
324 key = self._maybe_cast_indexer(key)
325 return super()._convert_scalar_indexer(key, kind=kind)
327 @Appender(_index_shared_docs["_convert_arr_indexer"])
328 def _convert_arr_indexer(self, keyarr):
329 # Cast the indexer to uint64 if possible so that the values returned
330 # from indexing are also uint64.
331 dtype = None
332 if is_integer_dtype(keyarr) or (
333 lib.infer_dtype(keyarr, skipna=False) == "integer"
334 ):
335 dtype = np.uint64
337 return com.asarray_tuplesafe(keyarr, dtype=dtype)
339 @Appender(_index_shared_docs["_convert_index_indexer"])
340 def _convert_index_indexer(self, keyarr):
341 # Cast the indexer to uint64 if possible so
342 # that the values returned from indexing are
343 # also uint64.
344 if keyarr.is_integer():
345 return keyarr.astype(np.uint64)
346 return keyarr
348 def _wrap_joined_index(self, joined, other):
349 name = get_op_result_name(self, other)
350 return UInt64Index(joined, name=name)
352 @classmethod
353 def _assert_safe_casting(cls, data, subarr):
354 """
355 Ensure incoming data can be represented as uints.
356 """
357 if not issubclass(data.dtype.type, np.unsignedinteger):
358 if not np.array_equal(data, subarr):
359 raise TypeError("Unsafe NumPy casting, you must explicitly cast")
361 def _is_compatible_with_other(self, other):
362 return super()._is_compatible_with_other(other) or all(
363 isinstance(type(obj), (ABCUInt64Index, ABCFloat64Index))
364 for obj in [self, other]
365 )
368UInt64Index._add_numeric_methods()
369UInt64Index._add_logical_methods()
371_float64_descr_args = dict(
372 klass="Float64Index", dtype="float64", ltype="float", extra=""
373)
376class Float64Index(NumericIndex):
377 __doc__ = _num_index_shared_docs["class_descr"] % _float64_descr_args
379 _typ = "float64index"
380 _engine_type = libindex.Float64Engine
381 _default_dtype = np.float64
383 @property
384 def inferred_type(self) -> str:
385 """
386 Always 'floating' for ``Float64Index``
387 """
388 return "floating"
390 @Appender(_index_shared_docs["astype"])
391 def astype(self, dtype, copy=True):
392 dtype = pandas_dtype(dtype)
393 if needs_i8_conversion(dtype):
394 raise TypeError(
395 f"Cannot convert Float64Index to dtype {dtype}; integer "
396 "values are required for conversion"
397 )
398 elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype):
399 # TODO(jreback); this can change once we have an EA Index type
400 # GH 13149
401 arr = astype_nansafe(self.values, dtype=dtype)
402 return Int64Index(arr)
403 return super().astype(dtype, copy=copy)
405 @Appender(_index_shared_docs["_convert_scalar_indexer"])
406 def _convert_scalar_indexer(self, key, kind=None):
407 assert kind in ["ix", "loc", "getitem", "iloc", None]
409 if kind == "iloc":
410 return self._validate_indexer("positional", key, kind)
412 return key
414 @Appender(_index_shared_docs["_convert_slice_indexer"])
415 def _convert_slice_indexer(self, key, kind=None):
416 # if we are not a slice, then we are done
417 if not isinstance(key, slice):
418 return key
420 if kind == "iloc":
421 return super()._convert_slice_indexer(key, kind=kind)
423 # translate to locations
424 return self.slice_indexer(key.start, key.stop, key.step, kind=kind)
426 def _format_native_types(
427 self, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs
428 ):
429 from pandas.io.formats.format import FloatArrayFormatter
431 formatter = FloatArrayFormatter(
432 self.values,
433 na_rep=na_rep,
434 float_format=float_format,
435 decimal=decimal,
436 quoting=quoting,
437 fixed_width=False,
438 )
439 return formatter.get_result_as_array()
441 def get_value(self, series, key):
442 """
443 We always want to get an index value, never a value.
444 """
445 if not is_scalar(key):
446 raise InvalidIndexError
448 k = com.values_from_object(key)
449 loc = self.get_loc(k)
450 new_values = com.values_from_object(series)[loc]
452 return new_values
454 def equals(self, other) -> bool:
455 """
456 Determines if two Index objects contain the same elements.
457 """
458 if self is other:
459 return True
461 if not isinstance(other, Index):
462 return False
464 # need to compare nans locations and make sure that they are the same
465 # since nans don't compare equal this is a bit tricky
466 try:
467 if not isinstance(other, Float64Index):
468 other = self._constructor(other)
469 if not is_dtype_equal(self.dtype, other.dtype) or self.shape != other.shape:
470 return False
471 left, right = self._ndarray_values, other._ndarray_values
472 return ((left == right) | (self._isnan & other._isnan)).all()
473 except (TypeError, ValueError):
474 return False
476 def __contains__(self, other) -> bool:
477 if super().__contains__(other):
478 return True
480 try:
481 # if other is a sequence this throws a ValueError
482 return np.isnan(other) and self.hasnans
483 except ValueError:
484 try:
485 return len(other) <= 1 and other.item() in self
486 except AttributeError:
487 return len(other) <= 1 and other in self
488 except TypeError:
489 pass
490 except TypeError:
491 pass
493 return False
495 @Appender(_index_shared_docs["get_loc"])
496 def get_loc(self, key, method=None, tolerance=None):
497 try:
498 if np.all(np.isnan(key)) or is_bool(key):
499 nan_idxs = self._nan_idxs
500 try:
501 return nan_idxs.item()
502 except ValueError:
503 if not len(nan_idxs):
504 raise KeyError(key)
505 return nan_idxs
506 except (TypeError, NotImplementedError):
507 pass
508 return super().get_loc(key, method=method, tolerance=tolerance)
510 @cache_readonly
511 def is_unique(self) -> bool:
512 return super().is_unique and self._nan_idxs.size < 2
514 @Appender(Index.isin.__doc__)
515 def isin(self, values, level=None):
516 if level is not None:
517 self._validate_index_level(level)
518 return algorithms.isin(np.array(self), values)
520 def _is_compatible_with_other(self, other):
521 return super()._is_compatible_with_other(other) or all(
522 isinstance(
523 type(obj),
524 (ABCInt64Index, ABCFloat64Index, ABCUInt64Index, ABCRangeIndex),
525 )
526 for obj in [self, other]
527 )
530Float64Index._add_numeric_methods()
531Float64Index._add_logical_methods_disabled()