Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/arrays/base.py : 49%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""An interface for extending pandas with custom arrays.
3.. warning::
5 This is an experimental API and subject to breaking changes
6 without warning.
7"""
8import operator
9from typing import Any, Callable, Dict, Optional, Sequence, Tuple, Union
11import numpy as np
13from pandas._libs import lib
14from pandas._typing import ArrayLike
15from pandas.compat import set_function_name
16from pandas.compat.numpy import function as nv
17from pandas.errors import AbstractMethodError
18from pandas.util._decorators import Appender, Substitution
19from pandas.util._validators import validate_fillna_kwargs
21from pandas.core.dtypes.common import is_array_like, is_list_like
22from pandas.core.dtypes.dtypes import ExtensionDtype
23from pandas.core.dtypes.generic import ABCExtensionArray, ABCIndexClass, ABCSeries
24from pandas.core.dtypes.missing import isna
26from pandas.core import ops
27from pandas.core.algorithms import _factorize_array, unique
28from pandas.core.missing import backfill_1d, pad_1d
29from pandas.core.sorting import nargsort
31_extension_array_shared_docs: Dict[str, str] = dict()
34def try_cast_to_ea(cls_or_instance, obj, dtype=None):
35 """
36 Call to `_from_sequence` that returns the object unchanged on Exception.
38 Parameters
39 ----------
40 cls_or_instance : ExtensionArray subclass or instance
41 obj : arraylike
42 Values to pass to cls._from_sequence
43 dtype : ExtensionDtype, optional
45 Returns
46 -------
47 ExtensionArray or obj
48 """
49 try:
50 result = cls_or_instance._from_sequence(obj, dtype=dtype)
51 except Exception:
52 # We can't predict what downstream EA constructors may raise
53 result = obj
54 return result
57class ExtensionArray:
58 """
59 Abstract base class for custom 1-D array types.
61 pandas will recognize instances of this class as proper arrays
62 with a custom type and will not attempt to coerce them to objects. They
63 may be stored directly inside a :class:`DataFrame` or :class:`Series`.
65 .. versionadded:: 0.23.0
67 Attributes
68 ----------
69 dtype
70 nbytes
71 ndim
72 shape
74 Methods
75 -------
76 argsort
77 astype
78 copy
79 dropna
80 factorize
81 fillna
82 isna
83 ravel
84 repeat
85 searchsorted
86 shift
87 take
88 unique
89 view
90 _concat_same_type
91 _formatter
92 _from_factorized
93 _from_sequence
94 _from_sequence_of_strings
95 _ndarray_values
96 _reduce
97 _values_for_argsort
98 _values_for_factorize
100 Notes
101 -----
102 The interface includes the following abstract methods that must be
103 implemented by subclasses:
105 * _from_sequence
106 * _from_factorized
107 * __getitem__
108 * __len__
109 * dtype
110 * nbytes
111 * isna
112 * take
113 * copy
114 * _concat_same_type
116 A default repr displaying the type, (truncated) data, length,
117 and dtype is provided. It can be customized or replaced by
118 by overriding:
120 * __repr__ : A default repr for the ExtensionArray.
121 * _formatter : Print scalars inside a Series or DataFrame.
123 Some methods require casting the ExtensionArray to an ndarray of Python
124 objects with ``self.astype(object)``, which may be expensive. When
125 performance is a concern, we highly recommend overriding the following
126 methods:
128 * fillna
129 * dropna
130 * unique
131 * factorize / _values_for_factorize
132 * argsort / _values_for_argsort
133 * searchsorted
135 The remaining methods implemented on this class should be performant,
136 as they only compose abstract methods. Still, a more efficient
137 implementation may be available, and these methods can be overridden.
139 One can implement methods to handle array reductions.
141 * _reduce
143 One can implement methods to handle parsing from strings that will be used
144 in methods such as ``pandas.io.parsers.read_csv``.
146 * _from_sequence_of_strings
148 This class does not inherit from 'abc.ABCMeta' for performance reasons.
149 Methods and properties required by the interface raise
150 ``pandas.errors.AbstractMethodError`` and no ``register`` method is
151 provided for registering virtual subclasses.
153 ExtensionArrays are limited to 1 dimension.
155 They may be backed by none, one, or many NumPy arrays. For example,
156 ``pandas.Categorical`` is an extension array backed by two arrays,
157 one for codes and one for categories. An array of IPv6 address may
158 be backed by a NumPy structured array with two fields, one for the
159 lower 64 bits and one for the upper 64 bits. Or they may be backed
160 by some other storage type, like Python lists. Pandas makes no
161 assumptions on how the data are stored, just that it can be converted
162 to a NumPy array.
163 The ExtensionArray interface does not impose any rules on how this data
164 is stored. However, currently, the backing data cannot be stored in
165 attributes called ``.values`` or ``._values`` to ensure full compatibility
166 with pandas internals. But other names as ``.data``, ``._data``,
167 ``._items``, ... can be freely used.
169 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects
170 that
172 1. You defer by returning ``NotImplemented`` when any Series are present
173 in `inputs`. Pandas will extract the arrays and call the ufunc again.
174 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.
175 Pandas inspect this to determine whether the ufunc is valid for the
176 types present.
178 See :ref:`extending.extension.ufunc` for more.
179 """
181 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
182 # Don't override this.
183 _typ = "extension"
185 # ------------------------------------------------------------------------
186 # Constructors
187 # ------------------------------------------------------------------------
189 @classmethod
190 def _from_sequence(cls, scalars, dtype=None, copy=False):
191 """
192 Construct a new ExtensionArray from a sequence of scalars.
194 Parameters
195 ----------
196 scalars : Sequence
197 Each element will be an instance of the scalar type for this
198 array, ``cls.dtype.type``.
199 dtype : dtype, optional
200 Construct for this particular dtype. This should be a Dtype
201 compatible with the ExtensionArray.
202 copy : bool, default False
203 If True, copy the underlying data.
205 Returns
206 -------
207 ExtensionArray
208 """
209 raise AbstractMethodError(cls)
211 @classmethod
212 def _from_sequence_of_strings(cls, strings, dtype=None, copy=False):
213 """Construct a new ExtensionArray from a sequence of strings.
215 .. versionadded:: 0.24.0
217 Parameters
218 ----------
219 strings : Sequence
220 Each element will be an instance of the scalar type for this
221 array, ``cls.dtype.type``.
222 dtype : dtype, optional
223 Construct for this particular dtype. This should be a Dtype
224 compatible with the ExtensionArray.
225 copy : bool, default False
226 If True, copy the underlying data.
228 Returns
229 -------
230 ExtensionArray
231 """
232 raise AbstractMethodError(cls)
234 @classmethod
235 def _from_factorized(cls, values, original):
236 """
237 Reconstruct an ExtensionArray after factorization.
239 Parameters
240 ----------
241 values : ndarray
242 An integer ndarray with the factorized values.
243 original : ExtensionArray
244 The original ExtensionArray that factorize was called on.
246 See Also
247 --------
248 factorize
249 ExtensionArray.factorize
250 """
251 raise AbstractMethodError(cls)
253 # ------------------------------------------------------------------------
254 # Must be a Sequence
255 # ------------------------------------------------------------------------
257 def __getitem__(self, item):
258 # type (Any) -> Any
259 """
260 Select a subset of self.
262 Parameters
263 ----------
264 item : int, slice, or ndarray
265 * int: The position in 'self' to get.
267 * slice: A slice object, where 'start', 'stop', and 'step' are
268 integers or None
270 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
272 Returns
273 -------
274 item : scalar or ExtensionArray
276 Notes
277 -----
278 For scalar ``item``, return a scalar value suitable for the array's
279 type. This should be an instance of ``self.dtype.type``.
281 For slice ``key``, return an instance of ``ExtensionArray``, even
282 if the slice is length 0 or 1.
284 For a boolean mask, return an instance of ``ExtensionArray``, filtered
285 to the values where ``item`` is True.
286 """
287 raise AbstractMethodError(self)
289 def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
290 """
291 Set one or more values inplace.
293 This method is not required to satisfy the pandas extension array
294 interface.
296 Parameters
297 ----------
298 key : int, ndarray, or slice
299 When called from, e.g. ``Series.__setitem__``, ``key`` will be
300 one of
302 * scalar int
303 * ndarray of integers.
304 * boolean ndarray
305 * slice object
307 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
308 value or values to be set of ``key``.
310 Returns
311 -------
312 None
313 """
314 # Some notes to the ExtensionArray implementor who may have ended up
315 # here. While this method is not required for the interface, if you
316 # *do* choose to implement __setitem__, then some semantics should be
317 # observed:
318 #
319 # * Setting multiple values : ExtensionArrays should support setting
320 # multiple values at once, 'key' will be a sequence of integers and
321 # 'value' will be a same-length sequence.
322 #
323 # * Broadcasting : For a sequence 'key' and a scalar 'value',
324 # each position in 'key' should be set to 'value'.
325 #
326 # * Coercion : Most users will expect basic coercion to work. For
327 # example, a string like '2018-01-01' is coerced to a datetime
328 # when setting on a datetime64ns array. In general, if the
329 # __init__ method coerces that value, then so should __setitem__
330 # Note, also, that Series/DataFrame.where internally use __setitem__
331 # on a copy of the data.
332 raise NotImplementedError(f"{type(self)} does not implement __setitem__.")
334 def __len__(self) -> int:
335 """
336 Length of this array
338 Returns
339 -------
340 length : int
341 """
342 raise AbstractMethodError(self)
344 def __iter__(self):
345 """
346 Iterate over elements of the array.
347 """
348 # This needs to be implemented so that pandas recognizes extension
349 # arrays as list-like. The default implementation makes successive
350 # calls to ``__getitem__``, which may be slower than necessary.
351 for i in range(len(self)):
352 yield self[i]
354 def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default):
355 """
356 Convert to a NumPy ndarray.
358 .. versionadded:: 1.0.0
360 This is similar to :meth:`numpy.asarray`, but may provide additional control
361 over how the conversion is done.
363 Parameters
364 ----------
365 dtype : str or numpy.dtype, optional
366 The dtype to pass to :meth:`numpy.asarray`.
367 copy : bool, default False
368 Whether to ensure that the returned value is a not a view on
369 another array. Note that ``copy=False`` does not *ensure* that
370 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
371 a copy is made, even if not strictly necessary.
372 na_value : Any, optional
373 The value to use for missing values. The default value depends
374 on `dtype` and the type of the array.
376 Returns
377 -------
378 numpy.ndarray
379 """
380 result = np.asarray(self, dtype=dtype)
381 if copy or na_value is not lib.no_default:
382 result = result.copy()
383 if na_value is not lib.no_default:
384 result[self.isna()] = na_value
385 return result
387 # ------------------------------------------------------------------------
388 # Required attributes
389 # ------------------------------------------------------------------------
391 @property
392 def dtype(self) -> ExtensionDtype:
393 """
394 An instance of 'ExtensionDtype'.
395 """
396 raise AbstractMethodError(self)
398 @property
399 def shape(self) -> Tuple[int, ...]:
400 """
401 Return a tuple of the array dimensions.
402 """
403 return (len(self),)
405 @property
406 def size(self) -> int:
407 """
408 The number of elements in the array.
409 """
410 return np.prod(self.shape)
412 @property
413 def ndim(self) -> int:
414 """
415 Extension Arrays are only allowed to be 1-dimensional.
416 """
417 return 1
419 @property
420 def nbytes(self) -> int:
421 """
422 The number of bytes needed to store this object in memory.
423 """
424 # If this is expensive to compute, return an approximate lower bound
425 # on the number of bytes needed.
426 raise AbstractMethodError(self)
428 # ------------------------------------------------------------------------
429 # Additional Methods
430 # ------------------------------------------------------------------------
432 def astype(self, dtype, copy=True):
433 """
434 Cast to a NumPy array with 'dtype'.
436 Parameters
437 ----------
438 dtype : str or dtype
439 Typecode or data-type to which the array is cast.
440 copy : bool, default True
441 Whether to copy the data, even if not necessary. If False,
442 a copy is made only if the old dtype does not match the
443 new dtype.
445 Returns
446 -------
447 array : ndarray
448 NumPy ndarray with 'dtype' for its dtype.
449 """
450 return np.array(self, dtype=dtype, copy=copy)
452 def isna(self) -> ArrayLike:
453 """
454 A 1-D array indicating if each value is missing.
456 Returns
457 -------
458 na_values : Union[np.ndarray, ExtensionArray]
459 In most cases, this should return a NumPy ndarray. For
460 exceptional cases like ``SparseArray``, where returning
461 an ndarray would be expensive, an ExtensionArray may be
462 returned.
464 Notes
465 -----
466 If returning an ExtensionArray, then
468 * ``na_values._is_boolean`` should be True
469 * `na_values` should implement :func:`ExtensionArray._reduce`
470 * ``na_values.any`` and ``na_values.all`` should be implemented
471 """
472 raise AbstractMethodError(self)
474 def _values_for_argsort(self) -> np.ndarray:
475 """
476 Return values for sorting.
478 Returns
479 -------
480 ndarray
481 The transformed values should maintain the ordering between values
482 within the array.
484 See Also
485 --------
486 ExtensionArray.argsort
487 """
488 # Note: this is used in `ExtensionArray.argsort`.
489 return np.array(self)
491 def argsort(
492 self, ascending: bool = True, kind: str = "quicksort", *args, **kwargs
493 ) -> np.ndarray:
494 """
495 Return the indices that would sort this array.
497 Parameters
498 ----------
499 ascending : bool, default True
500 Whether the indices should result in an ascending
501 or descending sort.
502 kind : {'quicksort', 'mergesort', 'heapsort'}, optional
503 Sorting algorithm.
504 *args, **kwargs:
505 passed through to :func:`numpy.argsort`.
507 Returns
508 -------
509 ndarray
510 Array of indices that sort ``self``. If NaN values are contained,
511 NaN values are placed at the end.
513 See Also
514 --------
515 numpy.argsort : Sorting implementation used internally.
516 """
517 # Implementor note: You have two places to override the behavior of
518 # argsort.
519 # 1. _values_for_argsort : construct the values passed to np.argsort
520 # 2. argsort : total control over sorting.
521 ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
523 result = nargsort(self, kind=kind, ascending=ascending, na_position="last")
524 return result
526 def fillna(self, value=None, method=None, limit=None):
527 """
528 Fill NA/NaN values using the specified method.
530 Parameters
531 ----------
532 value : scalar, array-like
533 If a scalar value is passed it is used to fill all missing values.
534 Alternatively, an array-like 'value' can be given. It's expected
535 that the array-like have the same length as 'self'.
536 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
537 Method to use for filling holes in reindexed Series
538 pad / ffill: propagate last valid observation forward to next valid
539 backfill / bfill: use NEXT valid observation to fill gap.
540 limit : int, default None
541 If method is specified, this is the maximum number of consecutive
542 NaN values to forward/backward fill. In other words, if there is
543 a gap with more than this number of consecutive NaNs, it will only
544 be partially filled. If method is not specified, this is the
545 maximum number of entries along the entire axis where NaNs will be
546 filled.
548 Returns
549 -------
550 ExtensionArray
551 With NA/NaN filled.
552 """
553 value, method = validate_fillna_kwargs(value, method)
555 mask = self.isna()
557 if is_array_like(value):
558 if len(value) != len(self):
559 raise ValueError(
560 f"Length of 'value' does not match. Got ({len(value)}) "
561 f"expected {len(self)}"
562 )
563 value = value[mask]
565 if mask.any():
566 if method is not None:
567 func = pad_1d if method == "pad" else backfill_1d
568 new_values = func(self.astype(object), limit=limit, mask=mask)
569 new_values = self._from_sequence(new_values, dtype=self.dtype)
570 else:
571 # fill with value
572 new_values = self.copy()
573 new_values[mask] = value
574 else:
575 new_values = self.copy()
576 return new_values
578 def dropna(self):
579 """
580 Return ExtensionArray without NA values.
582 Returns
583 -------
584 valid : ExtensionArray
585 """
586 return self[~self.isna()]
588 def shift(self, periods: int = 1, fill_value: object = None) -> ABCExtensionArray:
589 """
590 Shift values by desired number.
592 Newly introduced missing values are filled with
593 ``self.dtype.na_value``.
595 .. versionadded:: 0.24.0
597 Parameters
598 ----------
599 periods : int, default 1
600 The number of periods to shift. Negative values are allowed
601 for shifting backwards.
603 fill_value : object, optional
604 The scalar value to use for newly introduced missing values.
605 The default is ``self.dtype.na_value``.
607 .. versionadded:: 0.24.0
609 Returns
610 -------
611 ExtensionArray
612 Shifted.
614 Notes
615 -----
616 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
617 returned.
619 If ``periods > len(self)``, then an array of size
620 len(self) is returned, with all values filled with
621 ``self.dtype.na_value``.
622 """
623 # Note: this implementation assumes that `self.dtype.na_value` can be
624 # stored in an instance of your ExtensionArray with `self.dtype`.
625 if not len(self) or periods == 0:
626 return self.copy()
628 if isna(fill_value):
629 fill_value = self.dtype.na_value
631 empty = self._from_sequence(
632 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype
633 )
634 if periods > 0:
635 a = empty
636 b = self[:-periods]
637 else:
638 a = self[abs(periods) :]
639 b = empty
640 return self._concat_same_type([a, b])
642 def unique(self):
643 """
644 Compute the ExtensionArray of unique values.
646 Returns
647 -------
648 uniques : ExtensionArray
649 """
650 uniques = unique(self.astype(object))
651 return self._from_sequence(uniques, dtype=self.dtype)
653 def searchsorted(self, value, side="left", sorter=None):
654 """
655 Find indices where elements should be inserted to maintain order.
657 .. versionadded:: 0.24.0
659 Find the indices into a sorted array `self` (a) such that, if the
660 corresponding elements in `value` were inserted before the indices,
661 the order of `self` would be preserved.
663 Assuming that `self` is sorted:
665 ====== ================================
666 `side` returned index `i` satisfies
667 ====== ================================
668 left ``self[i-1] < value <= self[i]``
669 right ``self[i-1] <= value < self[i]``
670 ====== ================================
672 Parameters
673 ----------
674 value : array_like
675 Values to insert into `self`.
676 side : {'left', 'right'}, optional
677 If 'left', the index of the first suitable location found is given.
678 If 'right', return the last such index. If there is no suitable
679 index, return either 0 or N (where N is the length of `self`).
680 sorter : 1-D array_like, optional
681 Optional array of integer indices that sort array a into ascending
682 order. They are typically the result of argsort.
684 Returns
685 -------
686 array of ints
687 Array of insertion points with the same shape as `value`.
689 See Also
690 --------
691 numpy.searchsorted : Similar method from NumPy.
692 """
693 # Note: the base tests provided by pandas only test the basics.
694 # We do not test
695 # 1. Values outside the range of the `data_for_sorting` fixture
696 # 2. Values between the values in the `data_for_sorting` fixture
697 # 3. Missing values.
698 arr = self.astype(object)
699 return arr.searchsorted(value, side=side, sorter=sorter)
701 def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
702 """
703 Return an array and missing value suitable for factorization.
705 Returns
706 -------
707 values : ndarray
709 An array suitable for factorization. This should maintain order
710 and be a supported dtype (Float64, Int64, UInt64, String, Object).
711 By default, the extension array is cast to object dtype.
712 na_value : object
713 The value in `values` to consider missing. This will be treated
714 as NA in the factorization routines, so it will be coded as
715 `na_sentinal` and not included in `uniques`. By default,
716 ``np.nan`` is used.
718 Notes
719 -----
720 The values returned by this method are also used in
721 :func:`pandas.util.hash_pandas_object`.
722 """
723 return self.astype(object), np.nan
725 def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ABCExtensionArray]:
726 """
727 Encode the extension array as an enumerated type.
729 Parameters
730 ----------
731 na_sentinel : int, default -1
732 Value to use in the `codes` array to indicate missing values.
734 Returns
735 -------
736 codes : ndarray
737 An integer NumPy array that's an indexer into the original
738 ExtensionArray.
739 uniques : ExtensionArray
740 An ExtensionArray containing the unique values of `self`.
742 .. note::
744 uniques will *not* contain an entry for the NA value of
745 the ExtensionArray if there are any missing values present
746 in `self`.
748 See Also
749 --------
750 factorize : Top-level factorize method that dispatches here.
752 Notes
753 -----
754 :meth:`pandas.factorize` offers a `sort` keyword as well.
755 """
756 # Implementer note: There are two ways to override the behavior of
757 # pandas.factorize
758 # 1. _values_for_factorize and _from_factorize.
759 # Specify the values passed to pandas' internal factorization
760 # routines, and how to convert from those values back to the
761 # original ExtensionArray.
762 # 2. ExtensionArray.factorize.
763 # Complete control over factorization.
764 arr, na_value = self._values_for_factorize()
766 codes, uniques = _factorize_array(
767 arr, na_sentinel=na_sentinel, na_value=na_value
768 )
770 uniques = self._from_factorized(uniques, self)
771 return codes, uniques
773 _extension_array_shared_docs[
774 "repeat"
775 ] = """
776 Repeat elements of a %(klass)s.
778 Returns a new %(klass)s where each element of the current %(klass)s
779 is repeated consecutively a given number of times.
781 Parameters
782 ----------
783 repeats : int or array of ints
784 The number of repetitions for each element. This should be a
785 non-negative integer. Repeating 0 times will return an empty
786 %(klass)s.
787 axis : None
788 Must be ``None``. Has no effect but is accepted for compatibility
789 with numpy.
791 Returns
792 -------
793 repeated_array : %(klass)s
794 Newly created %(klass)s with repeated elements.
796 See Also
797 --------
798 Series.repeat : Equivalent function for Series.
799 Index.repeat : Equivalent function for Index.
800 numpy.repeat : Similar method for :class:`numpy.ndarray`.
801 ExtensionArray.take : Take arbitrary positions.
803 Examples
804 --------
805 >>> cat = pd.Categorical(['a', 'b', 'c'])
806 >>> cat
807 [a, b, c]
808 Categories (3, object): [a, b, c]
809 >>> cat.repeat(2)
810 [a, a, b, b, c, c]
811 Categories (3, object): [a, b, c]
812 >>> cat.repeat([1, 2, 3])
813 [a, b, b, c, c, c]
814 Categories (3, object): [a, b, c]
815 """
817 @Substitution(klass="ExtensionArray")
818 @Appender(_extension_array_shared_docs["repeat"])
819 def repeat(self, repeats, axis=None):
820 nv.validate_repeat(tuple(), dict(axis=axis))
821 ind = np.arange(len(self)).repeat(repeats)
822 return self.take(ind)
824 # ------------------------------------------------------------------------
825 # Indexing methods
826 # ------------------------------------------------------------------------
828 def take(
829 self, indices: Sequence[int], allow_fill: bool = False, fill_value: Any = None
830 ) -> ABCExtensionArray:
831 """
832 Take elements from an array.
834 Parameters
835 ----------
836 indices : sequence of int
837 Indices to be taken.
838 allow_fill : bool, default False
839 How to handle negative values in `indices`.
841 * False: negative values in `indices` indicate positional indices
842 from the right (the default). This is similar to
843 :func:`numpy.take`.
845 * True: negative values in `indices` indicate
846 missing values. These values are set to `fill_value`. Any other
847 other negative values raise a ``ValueError``.
849 fill_value : any, optional
850 Fill value to use for NA-indices when `allow_fill` is True.
851 This may be ``None``, in which case the default NA value for
852 the type, ``self.dtype.na_value``, is used.
854 For many ExtensionArrays, there will be two representations of
855 `fill_value`: a user-facing "boxed" scalar, and a low-level
856 physical NA value. `fill_value` should be the user-facing version,
857 and the implementation should handle translating that to the
858 physical version for processing the take if necessary.
860 Returns
861 -------
862 ExtensionArray
864 Raises
865 ------
866 IndexError
867 When the indices are out of bounds for the array.
868 ValueError
869 When `indices` contains negative values other than ``-1``
870 and `allow_fill` is True.
872 See Also
873 --------
874 numpy.take
875 api.extensions.take
877 Notes
878 -----
879 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
880 ``iloc``, when `indices` is a sequence of values. Additionally,
881 it's called by :meth:`Series.reindex`, or any other method
882 that causes realignment, with a `fill_value`.
884 Examples
885 --------
886 Here's an example implementation, which relies on casting the
887 extension array to object dtype. This uses the helper method
888 :func:`pandas.api.extensions.take`.
890 .. code-block:: python
892 def take(self, indices, allow_fill=False, fill_value=None):
893 from pandas.core.algorithms import take
895 # If the ExtensionArray is backed by an ndarray, then
896 # just pass that here instead of coercing to object.
897 data = self.astype(object)
899 if allow_fill and fill_value is None:
900 fill_value = self.dtype.na_value
902 # fill value should always be translated from the scalar
903 # type for the array, to the physical storage type for
904 # the data, before passing to take.
906 result = take(data, indices, fill_value=fill_value,
907 allow_fill=allow_fill)
908 return self._from_sequence(result, dtype=self.dtype)
909 """
910 # Implementer note: The `fill_value` parameter should be a user-facing
911 # value, an instance of self.dtype.type. When passed `fill_value=None`,
912 # the default of `self.dtype.na_value` should be used.
913 # This may differ from the physical storage type your ExtensionArray
914 # uses. In this case, your implementation is responsible for casting
915 # the user-facing type to the storage type, before using
916 # pandas.api.extensions.take
917 raise AbstractMethodError(self)
919 def copy(self) -> ABCExtensionArray:
920 """
921 Return a copy of the array.
923 Returns
924 -------
925 ExtensionArray
926 """
927 raise AbstractMethodError(self)
929 def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]:
930 """
931 Return a view on the array.
933 Parameters
934 ----------
935 dtype : str, np.dtype, or ExtensionDtype, optional
936 Default None.
938 Returns
939 -------
940 ExtensionArray
941 A view of the :class:`ExtensionArray`.
942 """
943 # NB:
944 # - This must return a *new* object referencing the same data, not self.
945 # - The only case that *must* be implemented is with dtype=None,
946 # giving a view with the same dtype as self.
947 if dtype is not None:
948 raise NotImplementedError(dtype)
949 return self[:]
951 # ------------------------------------------------------------------------
952 # Printing
953 # ------------------------------------------------------------------------
955 def __repr__(self) -> str:
956 from pandas.io.formats.printing import format_object_summary
958 # the short repr has no trailing newline, while the truncated
959 # repr does. So we include a newline in our template, and strip
960 # any trailing newlines from format_object_summary
961 data = format_object_summary(
962 self, self._formatter(), indent_for_name=False
963 ).rstrip(", \n")
964 class_name = f"<{type(self).__name__}>\n"
965 return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
967 def _formatter(self, boxed: bool = False) -> Callable[[Any], Optional[str]]:
968 """Formatting function for scalar values.
970 This is used in the default '__repr__'. The returned formatting
971 function receives instances of your scalar type.
973 Parameters
974 ----------
975 boxed : bool, default False
976 An indicated for whether or not your array is being printed
977 within a Series, DataFrame, or Index (True), or just by
978 itself (False). This may be useful if you want scalar values
979 to appear differently within a Series versus on its own (e.g.
980 quoted or not).
982 Returns
983 -------
984 Callable[[Any], str]
985 A callable that gets instances of the scalar type and
986 returns a string. By default, :func:`repr` is used
987 when ``boxed=False`` and :func:`str` is used when
988 ``boxed=True``.
989 """
990 if boxed:
991 return str
992 return repr
994 # ------------------------------------------------------------------------
995 # Reshaping
996 # ------------------------------------------------------------------------
998 def ravel(self, order="C") -> ABCExtensionArray:
999 """
1000 Return a flattened view on this array.
1002 Parameters
1003 ----------
1004 order : {None, 'C', 'F', 'A', 'K'}, default 'C'
1006 Returns
1007 -------
1008 ExtensionArray
1010 Notes
1011 -----
1012 - Because ExtensionArrays are 1D-only, this is a no-op.
1013 - The "order" argument is ignored, is for compatibility with NumPy.
1014 """
1015 return self
1017 @classmethod
1018 def _concat_same_type(
1019 cls, to_concat: Sequence[ABCExtensionArray]
1020 ) -> ABCExtensionArray:
1021 """
1022 Concatenate multiple array.
1024 Parameters
1025 ----------
1026 to_concat : sequence of this type
1028 Returns
1029 -------
1030 ExtensionArray
1031 """
1032 raise AbstractMethodError(cls)
1034 # The _can_hold_na attribute is set to True so that pandas internals
1035 # will use the ExtensionDtype.na_value as the NA value in operations
1036 # such as take(), reindex(), shift(), etc. In addition, those results
1037 # will then be of the ExtensionArray subclass rather than an array
1038 # of objects
1039 _can_hold_na = True
1041 @property
1042 def _ndarray_values(self) -> np.ndarray:
1043 """
1044 Internal pandas method for lossy conversion to a NumPy ndarray.
1046 This method is not part of the pandas interface.
1048 The expectation is that this is cheap to compute, and is primarily
1049 used for interacting with our indexers.
1051 Returns
1052 -------
1053 array : ndarray
1054 """
1055 return np.array(self)
1057 def _reduce(self, name, skipna=True, **kwargs):
1058 """
1059 Return a scalar result of performing the reduction operation.
1061 Parameters
1062 ----------
1063 name : str
1064 Name of the function, supported values are:
1065 { any, all, min, max, sum, mean, median, prod,
1066 std, var, sem, kurt, skew }.
1067 skipna : bool, default True
1068 If True, skip NaN values.
1069 **kwargs
1070 Additional keyword arguments passed to the reduction function.
1071 Currently, `ddof` is the only supported kwarg.
1073 Returns
1074 -------
1075 scalar
1077 Raises
1078 ------
1079 TypeError : subclass does not define reductions
1080 """
1081 raise TypeError(f"cannot perform {name} with type {self.dtype}")
1084class ExtensionOpsMixin:
1085 """
1086 A base class for linking the operators to their dunder names.
1088 .. note::
1090 You may want to set ``__array_priority__`` if you want your
1091 implementation to be called when involved in binary operations
1092 with NumPy arrays.
1093 """
1095 @classmethod
1096 def _add_arithmetic_ops(cls):
1097 cls.__add__ = cls._create_arithmetic_method(operator.add)
1098 cls.__radd__ = cls._create_arithmetic_method(ops.radd)
1099 cls.__sub__ = cls._create_arithmetic_method(operator.sub)
1100 cls.__rsub__ = cls._create_arithmetic_method(ops.rsub)
1101 cls.__mul__ = cls._create_arithmetic_method(operator.mul)
1102 cls.__rmul__ = cls._create_arithmetic_method(ops.rmul)
1103 cls.__pow__ = cls._create_arithmetic_method(operator.pow)
1104 cls.__rpow__ = cls._create_arithmetic_method(ops.rpow)
1105 cls.__mod__ = cls._create_arithmetic_method(operator.mod)
1106 cls.__rmod__ = cls._create_arithmetic_method(ops.rmod)
1107 cls.__floordiv__ = cls._create_arithmetic_method(operator.floordiv)
1108 cls.__rfloordiv__ = cls._create_arithmetic_method(ops.rfloordiv)
1109 cls.__truediv__ = cls._create_arithmetic_method(operator.truediv)
1110 cls.__rtruediv__ = cls._create_arithmetic_method(ops.rtruediv)
1111 cls.__divmod__ = cls._create_arithmetic_method(divmod)
1112 cls.__rdivmod__ = cls._create_arithmetic_method(ops.rdivmod)
1114 @classmethod
1115 def _add_comparison_ops(cls):
1116 cls.__eq__ = cls._create_comparison_method(operator.eq)
1117 cls.__ne__ = cls._create_comparison_method(operator.ne)
1118 cls.__lt__ = cls._create_comparison_method(operator.lt)
1119 cls.__gt__ = cls._create_comparison_method(operator.gt)
1120 cls.__le__ = cls._create_comparison_method(operator.le)
1121 cls.__ge__ = cls._create_comparison_method(operator.ge)
1123 @classmethod
1124 def _add_logical_ops(cls):
1125 cls.__and__ = cls._create_logical_method(operator.and_)
1126 cls.__rand__ = cls._create_logical_method(ops.rand_)
1127 cls.__or__ = cls._create_logical_method(operator.or_)
1128 cls.__ror__ = cls._create_logical_method(ops.ror_)
1129 cls.__xor__ = cls._create_logical_method(operator.xor)
1130 cls.__rxor__ = cls._create_logical_method(ops.rxor)
1133class ExtensionScalarOpsMixin(ExtensionOpsMixin):
1134 """
1135 A mixin for defining ops on an ExtensionArray.
1137 It is assumed that the underlying scalar objects have the operators
1138 already defined.
1140 Notes
1141 -----
1142 If you have defined a subclass MyExtensionArray(ExtensionArray), then
1143 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to
1144 get the arithmetic operators. After the definition of MyExtensionArray,
1145 insert the lines
1147 MyExtensionArray._add_arithmetic_ops()
1148 MyExtensionArray._add_comparison_ops()
1150 to link the operators to your class.
1152 .. note::
1154 You may want to set ``__array_priority__`` if you want your
1155 implementation to be called when involved in binary operations
1156 with NumPy arrays.
1157 """
1159 @classmethod
1160 def _create_method(cls, op, coerce_to_dtype=True):
1161 """
1162 A class method that returns a method that will correspond to an
1163 operator for an ExtensionArray subclass, by dispatching to the
1164 relevant operator defined on the individual elements of the
1165 ExtensionArray.
1167 Parameters
1168 ----------
1169 op : function
1170 An operator that takes arguments op(a, b)
1171 coerce_to_dtype : bool, default True
1172 boolean indicating whether to attempt to convert
1173 the result to the underlying ExtensionArray dtype.
1174 If it's not possible to create a new ExtensionArray with the
1175 values, an ndarray is returned instead.
1177 Returns
1178 -------
1179 Callable[[Any, Any], Union[ndarray, ExtensionArray]]
1180 A method that can be bound to a class. When used, the method
1181 receives the two arguments, one of which is the instance of
1182 this class, and should return an ExtensionArray or an ndarray.
1184 Returning an ndarray may be necessary when the result of the
1185 `op` cannot be stored in the ExtensionArray. The dtype of the
1186 ndarray uses NumPy's normal inference rules.
1188 Examples
1189 --------
1190 Given an ExtensionArray subclass called MyExtensionArray, use
1192 >>> __add__ = cls._create_method(operator.add)
1194 in the class definition of MyExtensionArray to create the operator
1195 for addition, that will be based on the operator implementation
1196 of the underlying elements of the ExtensionArray
1197 """
1199 def _binop(self, other):
1200 def convert_values(param):
1201 if isinstance(param, ExtensionArray) or is_list_like(param):
1202 ovalues = param
1203 else: # Assume its an object
1204 ovalues = [param] * len(self)
1205 return ovalues
1207 if isinstance(other, (ABCSeries, ABCIndexClass)):
1208 # rely on pandas to unbox and dispatch to us
1209 return NotImplemented
1211 lvalues = self
1212 rvalues = convert_values(other)
1214 # If the operator is not defined for the underlying objects,
1215 # a TypeError should be raised
1216 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
1218 def _maybe_convert(arr):
1219 if coerce_to_dtype:
1220 # https://github.com/pandas-dev/pandas/issues/22850
1221 # We catch all regular exceptions here, and fall back
1222 # to an ndarray.
1223 res = try_cast_to_ea(self, arr)
1224 if not isinstance(res, type(self)):
1225 # exception raised in _from_sequence; ensure we have ndarray
1226 res = np.asarray(arr)
1227 else:
1228 res = np.asarray(arr)
1229 return res
1231 if op.__name__ in {"divmod", "rdivmod"}:
1232 a, b = zip(*res)
1233 return _maybe_convert(a), _maybe_convert(b)
1235 return _maybe_convert(res)
1237 op_name = ops._get_op_name(op, True)
1238 return set_function_name(_binop, op_name, cls)
1240 @classmethod
1241 def _create_arithmetic_method(cls, op):
1242 return cls._create_method(op)
1244 @classmethod
1245 def _create_comparison_method(cls, op):
1246 return cls._create_method(op, coerce_to_dtype=False)