Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2missing types & inference 

3""" 

4import numpy as np 

5 

6from pandas._config import get_option 

7 

8from pandas._libs import lib 

9import pandas._libs.missing as libmissing 

10from pandas._libs.tslibs import NaT, iNaT 

11 

12from pandas.core.dtypes.common import ( 

13 _NS_DTYPE, 

14 _TD_DTYPE, 

15 ensure_object, 

16 is_bool_dtype, 

17 is_complex_dtype, 

18 is_datetime64_dtype, 

19 is_datetime64tz_dtype, 

20 is_datetimelike_v_numeric, 

21 is_dtype_equal, 

22 is_extension_array_dtype, 

23 is_float_dtype, 

24 is_integer_dtype, 

25 is_object_dtype, 

26 is_period_dtype, 

27 is_scalar, 

28 is_string_dtype, 

29 is_string_like_dtype, 

30 is_timedelta64_dtype, 

31 needs_i8_conversion, 

32 pandas_dtype, 

33) 

34from pandas.core.dtypes.generic import ( 

35 ABCDatetimeArray, 

36 ABCExtensionArray, 

37 ABCGeneric, 

38 ABCIndexClass, 

39 ABCMultiIndex, 

40 ABCSeries, 

41 ABCTimedeltaArray, 

42) 

43from pandas.core.dtypes.inference import is_list_like 

44 

45isposinf_scalar = libmissing.isposinf_scalar 

46isneginf_scalar = libmissing.isneginf_scalar 

47 

48 

49def isna(obj): 

50 """ 

51 Detect missing values for an array-like object. 

52 

53 This function takes a scalar or array-like object and indicates 

54 whether values are missing (``NaN`` in numeric arrays, ``None`` or ``NaN`` 

55 in object arrays, ``NaT`` in datetimelike). 

56 

57 Parameters 

58 ---------- 

59 obj : scalar or array-like 

60 Object to check for null or missing values. 

61 

62 Returns 

63 ------- 

64 bool or array-like of bool 

65 For scalar input, returns a scalar boolean. 

66 For array input, returns an array of boolean indicating whether each 

67 corresponding element is missing. 

68 

69 See Also 

70 -------- 

71 notna : Boolean inverse of pandas.isna. 

72 Series.isna : Detect missing values in a Series. 

73 DataFrame.isna : Detect missing values in a DataFrame. 

74 Index.isna : Detect missing values in an Index. 

75 

76 Examples 

77 -------- 

78 Scalar arguments (including strings) result in a scalar boolean. 

79 

80 >>> pd.isna('dog') 

81 False 

82 

83 >>> pd.isna(pd.NA) 

84 True 

85 

86 >>> pd.isna(np.nan) 

87 True 

88 

89 ndarrays result in an ndarray of booleans. 

90 

91 >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) 

92 >>> array 

93 array([[ 1., nan, 3.], 

94 [ 4., 5., nan]]) 

95 >>> pd.isna(array) 

96 array([[False, True, False], 

97 [False, False, True]]) 

98 

99 For indexes, an ndarray of booleans is returned. 

100 

101 >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, 

102 ... "2017-07-08"]) 

103 >>> index 

104 DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], 

105 dtype='datetime64[ns]', freq=None) 

106 >>> pd.isna(index) 

107 array([False, False, True, False]) 

108 

109 For Series and DataFrame, the same type is returned, containing booleans. 

110 

111 >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) 

112 >>> df 

113 0 1 2 

114 0 ant bee cat 

115 1 dog None fly 

116 >>> pd.isna(df) 

117 0 1 2 

118 0 False False False 

119 1 False True False 

120 

121 >>> pd.isna(df[1]) 

122 0 False 

123 1 True 

124 Name: 1, dtype: bool 

125 """ 

126 return _isna(obj) 

127 

128 

129isnull = isna 

130 

131 

132def _isna_new(obj): 

133 

134 if is_scalar(obj): 

135 return libmissing.checknull(obj) 

136 # hack (for now) because MI registers as ndarray 

137 elif isinstance(obj, ABCMultiIndex): 

138 raise NotImplementedError("isna is not defined for MultiIndex") 

139 elif isinstance(obj, type): 

140 return False 

141 elif isinstance( 

142 obj, 

143 ( 

144 ABCSeries, 

145 np.ndarray, 

146 ABCIndexClass, 

147 ABCExtensionArray, 

148 ABCDatetimeArray, 

149 ABCTimedeltaArray, 

150 ), 

151 ): 

152 return _isna_ndarraylike(obj) 

153 elif isinstance(obj, ABCGeneric): 

154 return obj._constructor(obj._data.isna(func=isna)) 

155 elif isinstance(obj, list): 

156 return _isna_ndarraylike(np.asarray(obj, dtype=object)) 

157 elif hasattr(obj, "__array__"): 

158 return _isna_ndarraylike(np.asarray(obj)) 

159 else: 

160 return obj is None 

161 

162 

163def _isna_old(obj): 

164 """ 

165 Detect missing values, treating None, NaN, INF, -INF as null. 

166 

167 Parameters 

168 ---------- 

169 arr: ndarray or object value 

170 

171 Returns 

172 ------- 

173 boolean ndarray or boolean 

174 """ 

175 if is_scalar(obj): 

176 return libmissing.checknull_old(obj) 

177 # hack (for now) because MI registers as ndarray 

178 elif isinstance(obj, ABCMultiIndex): 

179 raise NotImplementedError("isna is not defined for MultiIndex") 

180 elif isinstance(obj, type): 

181 return False 

182 elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)): 

183 return _isna_ndarraylike_old(obj) 

184 elif isinstance(obj, ABCGeneric): 

185 return obj._constructor(obj._data.isna(func=_isna_old)) 

186 elif isinstance(obj, list): 

187 return _isna_ndarraylike_old(np.asarray(obj, dtype=object)) 

188 elif hasattr(obj, "__array__"): 

189 return _isna_ndarraylike_old(np.asarray(obj)) 

190 else: 

191 return obj is None 

192 

193 

194_isna = _isna_new 

195 

196 

197def _use_inf_as_na(key): 

198 """ 

199 Option change callback for na/inf behaviour. 

200 

201 Choose which replacement for numpy.isnan / -numpy.isfinite is used. 

202 

203 Parameters 

204 ---------- 

205 flag: bool 

206 True means treat None, NaN, INF, -INF as null (old way), 

207 False means None and NaN are null, but INF, -INF are not null 

208 (new way). 

209 

210 Notes 

211 ----- 

212 This approach to setting global module values is discussed and 

213 approved here: 

214 

215 * https://stackoverflow.com/questions/4859217/ 

216 programmatically-creating-variables-in-python/4859312#4859312 

217 """ 

218 flag = get_option(key) 

219 if flag: 

220 globals()["_isna"] = _isna_old 

221 else: 

222 globals()["_isna"] = _isna_new 

223 

224 

225def _isna_ndarraylike(obj): 

226 is_extension = is_extension_array_dtype(obj) 

227 

228 if not is_extension: 

229 # Avoid accessing `.values` on things like 

230 # PeriodIndex, which may be expensive. 

231 values = getattr(obj, "values", obj) 

232 else: 

233 values = obj 

234 

235 dtype = values.dtype 

236 

237 if is_extension: 

238 if isinstance(obj, (ABCIndexClass, ABCSeries)): 

239 values = obj._values 

240 else: 

241 values = obj 

242 result = values.isna() 

243 elif isinstance(obj, ABCDatetimeArray): 

244 return obj.isna() 

245 elif is_string_dtype(dtype): 

246 # Working around NumPy ticket 1542 

247 shape = values.shape 

248 

249 if is_string_like_dtype(dtype): 

250 # object array of strings 

251 result = np.zeros(values.shape, dtype=bool) 

252 else: 

253 # object array of non-strings 

254 result = np.empty(shape, dtype=bool) 

255 vec = libmissing.isnaobj(values.ravel()) 

256 result[...] = vec.reshape(shape) 

257 

258 elif needs_i8_conversion(dtype): 

259 # this is the NaT pattern 

260 result = values.view("i8") == iNaT 

261 else: 

262 result = np.isnan(values) 

263 

264 # box 

265 if isinstance(obj, ABCSeries): 

266 result = obj._constructor(result, index=obj.index, name=obj.name, copy=False) 

267 

268 return result 

269 

270 

271def _isna_ndarraylike_old(obj): 

272 is_extension = is_extension_array_dtype(obj) 

273 

274 values = getattr(obj, "values", obj) 

275 dtype = values.dtype 

276 

277 if is_extension: 

278 if isinstance(obj, (ABCIndexClass, ABCSeries)): 

279 values = obj._values 

280 else: 

281 values = obj 

282 result = values.isna() | (values == -np.inf) | (values == np.inf) 

283 elif is_string_dtype(dtype): 

284 # Working around NumPy ticket 1542 

285 shape = values.shape 

286 

287 if is_string_like_dtype(dtype): 

288 result = np.zeros(values.shape, dtype=bool) 

289 else: 

290 result = np.empty(shape, dtype=bool) 

291 vec = libmissing.isnaobj_old(values.ravel()) 

292 result[:] = vec.reshape(shape) 

293 

294 elif is_datetime64_dtype(dtype): 

295 # this is the NaT pattern 

296 result = values.view("i8") == iNaT 

297 else: 

298 result = ~np.isfinite(values) 

299 

300 # box 

301 if isinstance(obj, ABCSeries): 

302 result = obj._constructor(result, index=obj.index, name=obj.name, copy=False) 

303 

304 return result 

305 

306 

307def notna(obj): 

308 """ 

309 Detect non-missing values for an array-like object. 

310 

311 This function takes a scalar or array-like object and indicates 

312 whether values are valid (not missing, which is ``NaN`` in numeric 

313 arrays, ``None`` or ``NaN`` in object arrays, ``NaT`` in datetimelike). 

314 

315 Parameters 

316 ---------- 

317 obj : array-like or object value 

318 Object to check for *not* null or *non*-missing values. 

319 

320 Returns 

321 ------- 

322 bool or array-like of bool 

323 For scalar input, returns a scalar boolean. 

324 For array input, returns an array of boolean indicating whether each 

325 corresponding element is valid. 

326 

327 See Also 

328 -------- 

329 isna : Boolean inverse of pandas.notna. 

330 Series.notna : Detect valid values in a Series. 

331 DataFrame.notna : Detect valid values in a DataFrame. 

332 Index.notna : Detect valid values in an Index. 

333 

334 Examples 

335 -------- 

336 Scalar arguments (including strings) result in a scalar boolean. 

337 

338 >>> pd.notna('dog') 

339 True 

340 

341 >>> pd.notna(pd.NA) 

342 False 

343 

344 >>> pd.notna(np.nan) 

345 False 

346 

347 ndarrays result in an ndarray of booleans. 

348 

349 >>> array = np.array([[1, np.nan, 3], [4, 5, np.nan]]) 

350 >>> array 

351 array([[ 1., nan, 3.], 

352 [ 4., 5., nan]]) 

353 >>> pd.notna(array) 

354 array([[ True, False, True], 

355 [ True, True, False]]) 

356 

357 For indexes, an ndarray of booleans is returned. 

358 

359 >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, 

360 ... "2017-07-08"]) 

361 >>> index 

362 DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], 

363 dtype='datetime64[ns]', freq=None) 

364 >>> pd.notna(index) 

365 array([ True, True, False, True]) 

366 

367 For Series and DataFrame, the same type is returned, containing booleans. 

368 

369 >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) 

370 >>> df 

371 0 1 2 

372 0 ant bee cat 

373 1 dog None fly 

374 >>> pd.notna(df) 

375 0 1 2 

376 0 True True True 

377 1 True False True 

378 

379 >>> pd.notna(df[1]) 

380 0 True 

381 1 False 

382 Name: 1, dtype: bool 

383 """ 

384 res = isna(obj) 

385 if is_scalar(res): 

386 return not res 

387 return ~res 

388 

389 

390notnull = notna 

391 

392 

393def _isna_compat(arr, fill_value=np.nan) -> bool: 

394 """ 

395 Parameters 

396 ---------- 

397 arr: a numpy array 

398 fill_value: fill value, default to np.nan 

399 

400 Returns 

401 ------- 

402 True if we can fill using this fill_value 

403 """ 

404 dtype = arr.dtype 

405 if isna(fill_value): 

406 return not (is_bool_dtype(dtype) or is_integer_dtype(dtype)) 

407 return True 

408 

409 

410def array_equivalent(left, right, strict_nan: bool = False) -> bool: 

411 """ 

412 True if two arrays, left and right, have equal non-NaN elements, and NaNs 

413 in corresponding locations. False otherwise. It is assumed that left and 

414 right are NumPy arrays of the same dtype. The behavior of this function 

415 (particularly with respect to NaNs) is not defined if the dtypes are 

416 different. 

417 

418 Parameters 

419 ---------- 

420 left, right : ndarrays 

421 strict_nan : bool, default False 

422 If True, consider NaN and None to be different. 

423 

424 Returns 

425 ------- 

426 b : bool 

427 Returns True if the arrays are equivalent. 

428 

429 Examples 

430 -------- 

431 >>> array_equivalent( 

432 ... np.array([1, 2, np.nan]), 

433 ... np.array([1, 2, np.nan])) 

434 True 

435 >>> array_equivalent( 

436 ... np.array([1, np.nan, 2]), 

437 ... np.array([1, 2, np.nan])) 

438 False 

439 """ 

440 

441 left, right = np.asarray(left), np.asarray(right) 

442 

443 # shape compat 

444 if left.shape != right.shape: 

445 return False 

446 

447 # Object arrays can contain None, NaN and NaT. 

448 # string dtypes must be come to this path for NumPy 1.7.1 compat 

449 if is_string_dtype(left) or is_string_dtype(right): 

450 

451 if not strict_nan: 

452 # isna considers NaN and None to be equivalent. 

453 return lib.array_equivalent_object( 

454 ensure_object(left.ravel()), ensure_object(right.ravel()) 

455 ) 

456 

457 for left_value, right_value in zip(left, right): 

458 if left_value is NaT and right_value is not NaT: 

459 return False 

460 

461 elif left_value is libmissing.NA and right_value is not libmissing.NA: 

462 return False 

463 

464 elif isinstance(left_value, float) and np.isnan(left_value): 

465 if not isinstance(right_value, float) or not np.isnan(right_value): 

466 return False 

467 else: 

468 try: 

469 if np.any(np.asarray(left_value != right_value)): 

470 return False 

471 except TypeError as err: 

472 if "Cannot compare tz-naive" in str(err): 

473 # tzawareness compat failure, see GH#28507 

474 return False 

475 elif "boolean value of NA is ambiguous" in str(err): 

476 return False 

477 raise 

478 return True 

479 

480 # NaNs can occur in float and complex arrays. 

481 if is_float_dtype(left) or is_complex_dtype(left): 

482 

483 # empty 

484 if not (np.prod(left.shape) and np.prod(right.shape)): 

485 return True 

486 return ((left == right) | (isna(left) & isna(right))).all() 

487 

488 elif is_datetimelike_v_numeric(left, right): 

489 # GH#29553 avoid numpy deprecation warning 

490 return False 

491 

492 elif needs_i8_conversion(left) or needs_i8_conversion(right): 

493 # datetime64, timedelta64, Period 

494 if not is_dtype_equal(left.dtype, right.dtype): 

495 return False 

496 

497 left = left.view("i8") 

498 right = right.view("i8") 

499 

500 # if we have structured dtypes, compare first 

501 if left.dtype.type is np.void or right.dtype.type is np.void: 

502 if left.dtype != right.dtype: 

503 return False 

504 

505 return np.array_equal(left, right) 

506 

507 

508def _infer_fill_value(val): 

509 """ 

510 infer the fill value for the nan/NaT from the provided 

511 scalar/ndarray/list-like if we are a NaT, return the correct dtyped 

512 element to provide proper block construction 

513 """ 

514 

515 if not is_list_like(val): 

516 val = [val] 

517 val = np.array(val, copy=False) 

518 if needs_i8_conversion(val): 

519 return np.array("NaT", dtype=val.dtype) 

520 elif is_object_dtype(val.dtype): 

521 dtype = lib.infer_dtype(ensure_object(val), skipna=False) 

522 if dtype in ["datetime", "datetime64"]: 

523 return np.array("NaT", dtype=_NS_DTYPE) 

524 elif dtype in ["timedelta", "timedelta64"]: 

525 return np.array("NaT", dtype=_TD_DTYPE) 

526 return np.nan 

527 

528 

529def _maybe_fill(arr, fill_value=np.nan): 

530 """ 

531 if we have a compatible fill_value and arr dtype, then fill 

532 """ 

533 if _isna_compat(arr, fill_value): 

534 arr.fill(fill_value) 

535 return arr 

536 

537 

538def na_value_for_dtype(dtype, compat: bool = True): 

539 """ 

540 Return a dtype compat na value 

541 

542 Parameters 

543 ---------- 

544 dtype : string / dtype 

545 compat : bool, default True 

546 

547 Returns 

548 ------- 

549 np.dtype or a pandas dtype 

550 

551 Examples 

552 -------- 

553 >>> na_value_for_dtype(np.dtype('int64')) 

554 0 

555 >>> na_value_for_dtype(np.dtype('int64'), compat=False) 

556 nan 

557 >>> na_value_for_dtype(np.dtype('float64')) 

558 nan 

559 >>> na_value_for_dtype(np.dtype('bool')) 

560 False 

561 >>> na_value_for_dtype(np.dtype('datetime64[ns]')) 

562 NaT 

563 """ 

564 dtype = pandas_dtype(dtype) 

565 

566 if is_extension_array_dtype(dtype): 

567 return dtype.na_value 

568 if ( 

569 is_datetime64_dtype(dtype) 

570 or is_datetime64tz_dtype(dtype) 

571 or is_timedelta64_dtype(dtype) 

572 or is_period_dtype(dtype) 

573 ): 

574 return NaT 

575 elif is_float_dtype(dtype): 

576 return np.nan 

577 elif is_integer_dtype(dtype): 

578 if compat: 

579 return 0 

580 return np.nan 

581 elif is_bool_dtype(dtype): 

582 return False 

583 return np.nan 

584 

585 

586def remove_na_arraylike(arr): 

587 """ 

588 Return array-like containing only true/non-NaN values, possibly empty. 

589 """ 

590 if is_extension_array_dtype(arr): 

591 return arr[notna(arr)] 

592 else: 

593 return arr[notna(lib.values_from_object(arr))] 

594 

595 

596def is_valid_nat_for_dtype(obj, dtype) -> bool: 

597 """ 

598 isna check that excludes incompatible dtypes 

599 

600 Parameters 

601 ---------- 

602 obj : object 

603 dtype : np.datetime64, np.timedelta64, DatetimeTZDtype, or PeriodDtype 

604 

605 Returns 

606 ------- 

607 bool 

608 """ 

609 if not lib.is_scalar(obj) or not isna(obj): 

610 return False 

611 if dtype.kind == "M": 

612 return not isinstance(obj, np.timedelta64) 

613 if dtype.kind == "m": 

614 return not isinstance(obj, np.datetime64) 

615 

616 # must be PeriodDType 

617 return not isinstance(obj, (np.datetime64, np.timedelta64))