Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/dtypes/base.py : 42%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Extend pandas with custom array types"""
2from typing import Any, List, Optional, Tuple, Type
4import numpy as np
6from pandas.errors import AbstractMethodError
8from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
11class ExtensionDtype:
12 """
13 A custom data type, to be paired with an ExtensionArray.
15 .. versionadded:: 0.23.0
17 See Also
18 --------
19 extensions.register_extension_dtype
20 extensions.ExtensionArray
22 Notes
23 -----
24 The interface includes the following abstract methods that must
25 be implemented by subclasses:
27 * type
28 * name
29 * construct_from_string
31 The following attributes influence the behavior of the dtype in
32 pandas operations
34 * _is_numeric
35 * _is_boolean
37 Optionally one can override construct_array_type for construction
38 with the name of this dtype via the Registry. See
39 :meth:`extensions.register_extension_dtype`.
41 * construct_array_type
43 The `na_value` class attribute can be used to set the default NA value
44 for this type. :attr:`numpy.nan` is used by default.
46 ExtensionDtypes are required to be hashable. The base class provides
47 a default implementation, which relies on the ``_metadata`` class
48 attribute. ``_metadata`` should be a tuple containing the strings
49 that define your data type. For example, with ``PeriodDtype`` that's
50 the ``freq`` attribute.
52 **If you have a parametrized dtype you should set the ``_metadata``
53 class property**.
55 Ideally, the attributes in ``_metadata`` will match the
56 parameters to your ``ExtensionDtype.__init__`` (if any). If any of
57 the attributes in ``_metadata`` don't implement the standard
58 ``__eq__`` or ``__hash__``, the default implementations here will not
59 work.
61 .. versionchanged:: 0.24.0
63 Added ``_metadata``, ``__hash__``, and changed the default definition
64 of ``__eq__``.
66 For interaction with Apache Arrow (pyarrow), a ``__from_arrow__`` method
67 can be implemented: this method receives a pyarrow Array or ChunkedArray
68 as only argument and is expected to return the appropriate pandas
69 ExtensionArray for this dtype and the passed values::
71 class ExtensionDtype:
73 def __from_arrow__(
74 self, array: pyarrow.Array/ChunkedArray
75 ) -> ExtensionArray:
76 ...
78 This class does not inherit from 'abc.ABCMeta' for performance reasons.
79 Methods and properties required by the interface raise
80 ``pandas.errors.AbstractMethodError`` and no ``register`` method is
81 provided for registering virtual subclasses.
82 """
84 _metadata: Tuple[str, ...] = ()
86 def __str__(self) -> str:
87 return self.name
89 def __eq__(self, other: Any) -> bool:
90 """
91 Check whether 'other' is equal to self.
93 By default, 'other' is considered equal if either
95 * it's a string matching 'self.name'.
96 * it's an instance of this type and all of the
97 the attributes in ``self._metadata`` are equal between
98 `self` and `other`.
100 Parameters
101 ----------
102 other : Any
104 Returns
105 -------
106 bool
107 """
108 if isinstance(other, str):
109 try:
110 other = self.construct_from_string(other)
111 except TypeError:
112 return False
113 if isinstance(other, type(self)):
114 return all(
115 getattr(self, attr) == getattr(other, attr) for attr in self._metadata
116 )
117 return False
119 def __hash__(self) -> int:
120 return hash(tuple(getattr(self, attr) for attr in self._metadata))
122 def __ne__(self, other) -> bool:
123 return not self.__eq__(other)
125 @property
126 def na_value(self):
127 """
128 Default NA value to use for this type.
130 This is used in e.g. ExtensionArray.take. This should be the
131 user-facing "boxed" version of the NA value, not the physical NA value
132 for storage. e.g. for JSONArray, this is an empty dictionary.
133 """
134 return np.nan
136 @property
137 def type(self) -> Type:
138 """
139 The scalar type for the array, e.g. ``int``
141 It's expected ``ExtensionArray[item]`` returns an instance
142 of ``ExtensionDtype.type`` for scalar ``item``, assuming
143 that value is valid (not NA). NA values do not need to be
144 instances of `type`.
145 """
146 raise AbstractMethodError(self)
148 @property
149 def kind(self) -> str:
150 """
151 A character code (one of 'biufcmMOSUV'), default 'O'
153 This should match the NumPy dtype used when the array is
154 converted to an ndarray, which is probably 'O' for object if
155 the extension type cannot be represented as a built-in NumPy
156 type.
158 See Also
159 --------
160 numpy.dtype.kind
161 """
162 return "O"
164 @property
165 def name(self) -> str:
166 """
167 A string identifying the data type.
169 Will be used for display in, e.g. ``Series.dtype``
170 """
171 raise AbstractMethodError(self)
173 @property
174 def names(self) -> Optional[List[str]]:
175 """
176 Ordered list of field names, or None if there are no fields.
178 This is for compatibility with NumPy arrays, and may be removed in the
179 future.
180 """
181 return None
183 @classmethod
184 def construct_array_type(cls):
185 """
186 Return the array type associated with this dtype.
188 Returns
189 -------
190 type
191 """
192 raise NotImplementedError
194 @classmethod
195 def construct_from_string(cls, string: str):
196 r"""
197 Construct this type from a string.
199 This is useful mainly for data types that accept parameters.
200 For example, a period dtype accepts a frequency parameter that
201 can be set as ``period[H]`` (where H means hourly frequency).
203 By default, in the abstract class, just the name of the type is
204 expected. But subclasses can overwrite this method to accept
205 parameters.
207 Parameters
208 ----------
209 string : str
210 The name of the type, for example ``category``.
212 Returns
213 -------
214 ExtensionDtype
215 Instance of the dtype.
217 Raises
218 ------
219 TypeError
220 If a class cannot be constructed from this 'string'.
222 Examples
223 --------
224 For extension dtypes with arguments the following may be an
225 adequate implementation.
227 >>> @classmethod
228 ... def construct_from_string(cls, string):
229 ... pattern = re.compile(r"^my_type\[(?P<arg_name>.+)\]$")
230 ... match = pattern.match(string)
231 ... if match:
232 ... return cls(**match.groupdict())
233 ... else:
234 ... raise TypeError(f"Cannot construct a '{cls.__name__}' from
235 ... " "'{string}'")
236 """
237 if not isinstance(string, str):
238 raise TypeError(f"Expects a string, got {type(string).__name__}")
240 # error: Non-overlapping equality check (left operand type: "str", right
241 # operand type: "Callable[[ExtensionDtype], str]") [comparison-overlap]
242 assert isinstance(cls.name, str), (cls, type(cls.name))
243 if string != cls.name:
244 raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'")
245 return cls()
247 @classmethod
248 def is_dtype(cls, dtype) -> bool:
249 """
250 Check if we match 'dtype'.
252 Parameters
253 ----------
254 dtype : object
255 The object to check.
257 Returns
258 -------
259 is_dtype : bool
261 Notes
262 -----
263 The default implementation is True if
265 1. ``cls.construct_from_string(dtype)`` is an instance
266 of ``cls``.
267 2. ``dtype`` is an object and is an instance of ``cls``
268 3. ``dtype`` has a ``dtype`` attribute, and any of the above
269 conditions is true for ``dtype.dtype``.
270 """
271 dtype = getattr(dtype, "dtype", dtype)
273 if isinstance(dtype, (ABCSeries, ABCIndexClass, ABCDataFrame, np.dtype)):
274 # https://github.com/pandas-dev/pandas/issues/22960
275 # avoid passing data to `construct_from_string`. This could
276 # cause a FutureWarning from numpy about failing elementwise
277 # comparison from, e.g., comparing DataFrame == 'category'.
278 return False
279 elif dtype is None:
280 return False
281 elif isinstance(dtype, cls):
282 return True
283 if isinstance(dtype, str):
284 try:
285 return cls.construct_from_string(dtype) is not None
286 except TypeError:
287 return False
288 return False
290 @property
291 def _is_numeric(self) -> bool:
292 """
293 Whether columns with this dtype should be considered numeric.
295 By default ExtensionDtypes are assumed to be non-numeric.
296 They'll be excluded from operations that exclude non-numeric
297 columns, like (groupby) reductions, plotting, etc.
298 """
299 return False
301 @property
302 def _is_boolean(self) -> bool:
303 """
304 Whether this dtype should be considered boolean.
306 By default, ExtensionDtypes are assumed to be non-numeric.
307 Setting this to True will affect the behavior of several places,
308 e.g.
310 * is_bool
311 * boolean indexing
313 Returns
314 -------
315 bool
316 """
317 return False