Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/dtypes/missing.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2missing types & inference
3"""
4import numpy as np
6from pandas._config import get_option
8from pandas._libs import lib
9import pandas._libs.missing as libmissing
10from pandas._libs.tslibs import NaT, iNaT
12from pandas.core.dtypes.common import (
13 _NS_DTYPE,
14 _TD_DTYPE,
15 ensure_object,
16 is_bool_dtype,
17 is_complex_dtype,
18 is_datetime64_dtype,
19 is_datetime64tz_dtype,
20 is_datetimelike_v_numeric,
21 is_dtype_equal,
22 is_extension_array_dtype,
23 is_float_dtype,
24 is_integer_dtype,
25 is_object_dtype,
26 is_period_dtype,
27 is_scalar,
28 is_string_dtype,
29 is_string_like_dtype,
30 is_timedelta64_dtype,
31 needs_i8_conversion,
32 pandas_dtype,
33)
34from pandas.core.dtypes.generic import (
35 ABCDatetimeArray,
36 ABCExtensionArray,
37 ABCGeneric,
38 ABCIndexClass,
39 ABCMultiIndex,
40 ABCSeries,
41 ABCTimedeltaArray,
42)
43from pandas.core.dtypes.inference import is_list_like
45isposinf_scalar = libmissing.isposinf_scalar
46isneginf_scalar = libmissing.isneginf_scalar
49def isna(obj):
50 """
51 Detect missing values for an array-like object.
53 This function takes a scalar or array-like object and indicates
54 whether values are missing (``NaN`` in numeric arrays, ``None`` or ``NaN``
55 in object arrays, ``NaT`` in datetimelike).
57 Parameters
58 ----------
59 obj : scalar or array-like
60 Object to check for null or missing values.
62 Returns
63 -------
64 bool or array-like of bool
65 For scalar input, returns a scalar boolean.
66 For array input, returns an array of boolean indicating whether each
67 corresponding element is missing.
69 See Also
70 --------
71 notna : Boolean inverse of pandas.isna.
72 Series.isna : Detect missing values in a Series.
73 DataFrame.isna : Detect missing values in a DataFrame.
74 Index.isna : Detect missing values in an Index.
76 Examples
77 --------
78 Scalar arguments (including strings) result in a scalar boolean.
80 >>> pd.isna('dog')
81 False
83 >>> pd.isna(pd.NA)
84 True
86 >>> pd.isna(np.nan)
87 True
89 ndarrays result in an ndarray of booleans.
91 >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]])
92 >>> array
93 array([[ 1., nan, 3.],
94 [ 4., 5., nan]])
95 >>> pd.isna(array)
96 array([[False, True, False],
97 [False, False, True]])
99 For indexes, an ndarray of booleans is returned.
101 >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None,
102 ... "2017-07-08"])
103 >>> index
104 DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'],
105 dtype='datetime64[ns]', freq=None)
106 >>> pd.isna(index)
107 array([False, False, True, False])
109 For Series and DataFrame, the same type is returned, containing booleans.
111 >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
112 >>> df
113 0 1 2
114 0 ant bee cat
115 1 dog None fly
116 >>> pd.isna(df)
117 0 1 2
118 0 False False False
119 1 False True False
121 >>> pd.isna(df[1])
122 0 False
123 1 True
124 Name: 1, dtype: bool
125 """
126 return _isna(obj)
129isnull = isna
132def _isna_new(obj):
134 if is_scalar(obj):
135 return libmissing.checknull(obj)
136 # hack (for now) because MI registers as ndarray
137 elif isinstance(obj, ABCMultiIndex):
138 raise NotImplementedError("isna is not defined for MultiIndex")
139 elif isinstance(obj, type):
140 return False
141 elif isinstance(
142 obj,
143 (
144 ABCSeries,
145 np.ndarray,
146 ABCIndexClass,
147 ABCExtensionArray,
148 ABCDatetimeArray,
149 ABCTimedeltaArray,
150 ),
151 ):
152 return _isna_ndarraylike(obj)
153 elif isinstance(obj, ABCGeneric):
154 return obj._constructor(obj._data.isna(func=isna))
155 elif isinstance(obj, list):
156 return _isna_ndarraylike(np.asarray(obj, dtype=object))
157 elif hasattr(obj, "__array__"):
158 return _isna_ndarraylike(np.asarray(obj))
159 else:
160 return obj is None
163def _isna_old(obj):
164 """
165 Detect missing values, treating None, NaN, INF, -INF as null.
167 Parameters
168 ----------
169 arr: ndarray or object value
171 Returns
172 -------
173 boolean ndarray or boolean
174 """
175 if is_scalar(obj):
176 return libmissing.checknull_old(obj)
177 # hack (for now) because MI registers as ndarray
178 elif isinstance(obj, ABCMultiIndex):
179 raise NotImplementedError("isna is not defined for MultiIndex")
180 elif isinstance(obj, type):
181 return False
182 elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)):
183 return _isna_ndarraylike_old(obj)
184 elif isinstance(obj, ABCGeneric):
185 return obj._constructor(obj._data.isna(func=_isna_old))
186 elif isinstance(obj, list):
187 return _isna_ndarraylike_old(np.asarray(obj, dtype=object))
188 elif hasattr(obj, "__array__"):
189 return _isna_ndarraylike_old(np.asarray(obj))
190 else:
191 return obj is None
194_isna = _isna_new
197def _use_inf_as_na(key):
198 """
199 Option change callback for na/inf behaviour.
201 Choose which replacement for numpy.isnan / -numpy.isfinite is used.
203 Parameters
204 ----------
205 flag: bool
206 True means treat None, NaN, INF, -INF as null (old way),
207 False means None and NaN are null, but INF, -INF are not null
208 (new way).
210 Notes
211 -----
212 This approach to setting global module values is discussed and
213 approved here:
215 * https://stackoverflow.com/questions/4859217/
216 programmatically-creating-variables-in-python/4859312#4859312
217 """
218 flag = get_option(key)
219 if flag:
220 globals()["_isna"] = _isna_old
221 else:
222 globals()["_isna"] = _isna_new
225def _isna_ndarraylike(obj):
226 is_extension = is_extension_array_dtype(obj)
228 if not is_extension:
229 # Avoid accessing `.values` on things like
230 # PeriodIndex, which may be expensive.
231 values = getattr(obj, "values", obj)
232 else:
233 values = obj
235 dtype = values.dtype
237 if is_extension:
238 if isinstance(obj, (ABCIndexClass, ABCSeries)):
239 values = obj._values
240 else:
241 values = obj
242 result = values.isna()
243 elif isinstance(obj, ABCDatetimeArray):
244 return obj.isna()
245 elif is_string_dtype(dtype):
246 # Working around NumPy ticket 1542
247 shape = values.shape
249 if is_string_like_dtype(dtype):
250 # object array of strings
251 result = np.zeros(values.shape, dtype=bool)
252 else:
253 # object array of non-strings
254 result = np.empty(shape, dtype=bool)
255 vec = libmissing.isnaobj(values.ravel())
256 result[...] = vec.reshape(shape)
258 elif needs_i8_conversion(dtype):
259 # this is the NaT pattern
260 result = values.view("i8") == iNaT
261 else:
262 result = np.isnan(values)
264 # box
265 if isinstance(obj, ABCSeries):
266 result = obj._constructor(result, index=obj.index, name=obj.name, copy=False)
268 return result
271def _isna_ndarraylike_old(obj):
272 is_extension = is_extension_array_dtype(obj)
274 values = getattr(obj, "values", obj)
275 dtype = values.dtype
277 if is_extension:
278 if isinstance(obj, (ABCIndexClass, ABCSeries)):
279 values = obj._values
280 else:
281 values = obj
282 result = values.isna() | (values == -np.inf) | (values == np.inf)
283 elif is_string_dtype(dtype):
284 # Working around NumPy ticket 1542
285 shape = values.shape
287 if is_string_like_dtype(dtype):
288 result = np.zeros(values.shape, dtype=bool)
289 else:
290 result = np.empty(shape, dtype=bool)
291 vec = libmissing.isnaobj_old(values.ravel())
292 result[:] = vec.reshape(shape)
294 elif is_datetime64_dtype(dtype):
295 # this is the NaT pattern
296 result = values.view("i8") == iNaT
297 else:
298 result = ~np.isfinite(values)
300 # box
301 if isinstance(obj, ABCSeries):
302 result = obj._constructor(result, index=obj.index, name=obj.name, copy=False)
304 return result
307def notna(obj):
308 """
309 Detect non-missing values for an array-like object.
311 This function takes a scalar or array-like object and indicates
312 whether values are valid (not missing, which is ``NaN`` in numeric
313 arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike).
315 Parameters
316 ----------
317 obj : array-like or object value
318 Object to check for *not* null or *non*-missing values.
320 Returns
321 -------
322 bool or array-like of bool
323 For scalar input, returns a scalar boolean.
324 For array input, returns an array of boolean indicating whether each
325 corresponding element is valid.
327 See Also
328 --------
329 isna : Boolean inverse of pandas.notna.
330 Series.notna : Detect valid values in a Series.
331 DataFrame.notna : Detect valid values in a DataFrame.
332 Index.notna : Detect valid values in an Index.
334 Examples
335 --------
336 Scalar arguments (including strings) result in a scalar boolean.
338 >>> pd.notna('dog')
339 True
341 >>> pd.notna(pd.NA)
342 False
344 >>> pd.notna(np.nan)
345 False
347 ndarrays result in an ndarray of booleans.
349 >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]])
350 >>> array
351 array([[ 1., nan, 3.],
352 [ 4., 5., nan]])
353 >>> pd.notna(array)
354 array([[ True, False, True],
355 [ True, True, False]])
357 For indexes, an ndarray of booleans is returned.
359 >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None,
360 ... "2017-07-08"])
361 >>> index
362 DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'],
363 dtype='datetime64[ns]', freq=None)
364 >>> pd.notna(index)
365 array([ True, True, False, True])
367 For Series and DataFrame, the same type is returned, containing booleans.
369 >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']])
370 >>> df
371 0 1 2
372 0 ant bee cat
373 1 dog None fly
374 >>> pd.notna(df)
375 0 1 2
376 0 True True True
377 1 True False True
379 >>> pd.notna(df[1])
380 0 True
381 1 False
382 Name: 1, dtype: bool
383 """
384 res = isna(obj)
385 if is_scalar(res):
386 return not res
387 return ~res
390notnull = notna
393def _isna_compat(arr, fill_value=np.nan) -> bool:
394 """
395 Parameters
396 ----------
397 arr: a numpy array
398 fill_value: fill value, default to np.nan
400 Returns
401 -------
402 True if we can fill using this fill_value
403 """
404 dtype = arr.dtype
405 if isna(fill_value):
406 return not (is_bool_dtype(dtype) or is_integer_dtype(dtype))
407 return True
410def array_equivalent(left, right, strict_nan: bool = False) -> bool:
411 """
412 True if two arrays, left and right, have equal non-NaN elements, and NaNs
413 in corresponding locations. False otherwise. It is assumed that left and
414 right are NumPy arrays of the same dtype. The behavior of this function
415 (particularly with respect to NaNs) is not defined if the dtypes are
416 different.
418 Parameters
419 ----------
420 left, right : ndarrays
421 strict_nan : bool, default False
422 If True, consider NaN and None to be different.
424 Returns
425 -------
426 b : bool
427 Returns True if the arrays are equivalent.
429 Examples
430 --------
431 >>> array_equivalent(
432 ... np.array([1, 2, np.nan]),
433 ... np.array([1, 2, np.nan]))
434 True
435 >>> array_equivalent(
436 ... np.array([1, np.nan, 2]),
437 ... np.array([1, 2, np.nan]))
438 False
439 """
441 left, right = np.asarray(left), np.asarray(right)
443 # shape compat
444 if left.shape != right.shape:
445 return False
447 # Object arrays can contain None, NaN and NaT.
448 # string dtypes must be come to this path for NumPy 1.7.1 compat
449 if is_string_dtype(left) or is_string_dtype(right):
451 if not strict_nan:
452 # isna considers NaN and None to be equivalent.
453 return lib.array_equivalent_object(
454 ensure_object(left.ravel()), ensure_object(right.ravel())
455 )
457 for left_value, right_value in zip(left, right):
458 if left_value is NaT and right_value is not NaT:
459 return False
461 elif left_value is libmissing.NA and right_value is not libmissing.NA:
462 return False
464 elif isinstance(left_value, float) and np.isnan(left_value):
465 if not isinstance(right_value, float) or not np.isnan(right_value):
466 return False
467 else:
468 try:
469 if np.any(np.asarray(left_value != right_value)):
470 return False
471 except TypeError as err:
472 if "Cannot compare tz-naive" in str(err):
473 # tzawareness compat failure, see GH#28507
474 return False
475 elif "boolean value of NA is ambiguous" in str(err):
476 return False
477 raise
478 return True
480 # NaNs can occur in float and complex arrays.
481 if is_float_dtype(left) or is_complex_dtype(left):
483 # empty
484 if not (np.prod(left.shape) and np.prod(right.shape)):
485 return True
486 return ((left == right) | (isna(left) & isna(right))).all()
488 elif is_datetimelike_v_numeric(left, right):
489 # GH#29553 avoid numpy deprecation warning
490 return False
492 elif needs_i8_conversion(left) or needs_i8_conversion(right):
493 # datetime64, timedelta64, Period
494 if not is_dtype_equal(left.dtype, right.dtype):
495 return False
497 left = left.view("i8")
498 right = right.view("i8")
500 # if we have structured dtypes, compare first
501 if left.dtype.type is np.void or right.dtype.type is np.void:
502 if left.dtype != right.dtype:
503 return False
505 return np.array_equal(left, right)
508def _infer_fill_value(val):
509 """
510 infer the fill value for the nan/NaT from the provided
511 scalar/ndarray/list-like if we are a NaT, return the correct dtyped
512 element to provide proper block construction
513 """
515 if not is_list_like(val):
516 val = [val]
517 val = np.array(val, copy=False)
518 if needs_i8_conversion(val):
519 return np.array("NaT", dtype=val.dtype)
520 elif is_object_dtype(val.dtype):
521 dtype = lib.infer_dtype(ensure_object(val), skipna=False)
522 if dtype in ["datetime", "datetime64"]:
523 return np.array("NaT", dtype=_NS_DTYPE)
524 elif dtype in ["timedelta", "timedelta64"]:
525 return np.array("NaT", dtype=_TD_DTYPE)
526 return np.nan
529def _maybe_fill(arr, fill_value=np.nan):
530 """
531 if we have a compatible fill_value and arr dtype, then fill
532 """
533 if _isna_compat(arr, fill_value):
534 arr.fill(fill_value)
535 return arr
538def na_value_for_dtype(dtype, compat: bool = True):
539 """
540 Return a dtype compat na value
542 Parameters
543 ----------
544 dtype : string / dtype
545 compat : bool, default True
547 Returns
548 -------
549 np.dtype or a pandas dtype
551 Examples
552 --------
553 >>> na_value_for_dtype(np.dtype('int64'))
554 0
555 >>> na_value_for_dtype(np.dtype('int64'), compat=False)
556 nan
557 >>> na_value_for_dtype(np.dtype('float64'))
558 nan
559 >>> na_value_for_dtype(np.dtype('bool'))
560 False
561 >>> na_value_for_dtype(np.dtype('datetime64[ns]'))
562 NaT
563 """
564 dtype = pandas_dtype(dtype)
566 if is_extension_array_dtype(dtype):
567 return dtype.na_value
568 if (
569 is_datetime64_dtype(dtype)
570 or is_datetime64tz_dtype(dtype)
571 or is_timedelta64_dtype(dtype)
572 or is_period_dtype(dtype)
573 ):
574 return NaT
575 elif is_float_dtype(dtype):
576 return np.nan
577 elif is_integer_dtype(dtype):
578 if compat:
579 return 0
580 return np.nan
581 elif is_bool_dtype(dtype):
582 return False
583 return np.nan
586def remove_na_arraylike(arr):
587 """
588 Return array-like containing only true/non-NaN values, possibly empty.
589 """
590 if is_extension_array_dtype(arr):
591 return arr[notna(arr)]
592 else:
593 return arr[notna(lib.values_from_object(arr))]
596def is_valid_nat_for_dtype(obj, dtype) -> bool:
597 """
598 isna check that excludes incompatible dtypes
600 Parameters
601 ----------
602 obj : object
603 dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype
605 Returns
606 -------
607 bool
608 """
609 if not lib.is_scalar(obj) or not isna(obj):
610 return False
611 if dtype.kind == "M":
612 return not isinstance(obj, np.timedelta64)
613 if dtype.kind == "m":
614 return not isinstance(obj, np.datetime64)
616 # must be PeriodDType
617 return not isinstance(obj, (np.datetime64, np.timedelta64))