Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from datetime import datetime, timedelta 

2import functools 

3import inspect 

4import re 

5from typing import Any, List 

6import warnings 

7 

8import numpy as np 

9 

10from pandas._libs import NaT, Timestamp, algos as libalgos, lib, tslib, writers 

11from pandas._libs.index import convert_scalar 

12import pandas._libs.internals as libinternals 

13from pandas._libs.tslibs import Timedelta, conversion 

14from pandas._libs.tslibs.timezones import tz_compare 

15from pandas.util._validators import validate_bool_kwarg 

16 

17from pandas.core.dtypes.cast import ( 

18 astype_nansafe, 

19 find_common_type, 

20 infer_dtype_from, 

21 infer_dtype_from_scalar, 

22 maybe_downcast_numeric, 

23 maybe_downcast_to_dtype, 

24 maybe_infer_dtype_type, 

25 maybe_promote, 

26 maybe_upcast, 

27 soft_convert_objects, 

28) 

29from pandas.core.dtypes.common import ( 

30 _NS_DTYPE, 

31 _TD_DTYPE, 

32 ensure_platform_int, 

33 is_bool_dtype, 

34 is_categorical, 

35 is_categorical_dtype, 

36 is_datetime64_dtype, 

37 is_datetime64tz_dtype, 

38 is_dtype_equal, 

39 is_extension_array_dtype, 

40 is_float_dtype, 

41 is_integer, 

42 is_integer_dtype, 

43 is_interval_dtype, 

44 is_list_like, 

45 is_object_dtype, 

46 is_period_dtype, 

47 is_re, 

48 is_re_compilable, 

49 is_sparse, 

50 is_timedelta64_dtype, 

51 pandas_dtype, 

52) 

53from pandas.core.dtypes.concat import concat_categorical, concat_datetime 

54from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype 

55from pandas.core.dtypes.generic import ( 

56 ABCDataFrame, 

57 ABCExtensionArray, 

58 ABCPandasArray, 

59 ABCSeries, 

60) 

61from pandas.core.dtypes.missing import ( 

62 _isna_compat, 

63 array_equivalent, 

64 is_valid_nat_for_dtype, 

65 isna, 

66) 

67 

68import pandas.core.algorithms as algos 

69from pandas.core.arrays import ( 

70 Categorical, 

71 DatetimeArray, 

72 ExtensionArray, 

73 PandasArray, 

74 PandasDtype, 

75 TimedeltaArray, 

76) 

77from pandas.core.base import PandasObject 

78import pandas.core.common as com 

79from pandas.core.construction import extract_array 

80from pandas.core.indexers import ( 

81 check_setitem_lengths, 

82 is_empty_indexer, 

83 is_scalar_indexer, 

84) 

85import pandas.core.missing as missing 

86from pandas.core.nanops import nanpercentile 

87 

88from pandas.io.formats.printing import pprint_thing 

89 

90 

91class Block(PandasObject): 

92 """ 

93 Canonical n-dimensional unit of homogeneous dtype contained in a pandas 

94 data structure 

95 

96 Index-ignorant; let the container take care of that 

97 """ 

98 

99 __slots__ = ["_mgr_locs", "values", "ndim"] 

100 is_numeric = False 

101 is_float = False 

102 is_integer = False 

103 is_complex = False 

104 is_datetime = False 

105 is_datetimetz = False 

106 is_timedelta = False 

107 is_bool = False 

108 is_object = False 

109 is_categorical = False 

110 is_extension = False 

111 _can_hold_na = False 

112 _can_consolidate = True 

113 _verify_integrity = True 

114 _validate_ndim = True 

115 _ftype = "dense" 

116 _concatenator = staticmethod(np.concatenate) 

117 

118 def __init__(self, values, placement, ndim=None): 

119 self.ndim = self._check_ndim(values, ndim) 

120 self.mgr_locs = placement 

121 self.values = values 

122 

123 if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values): 

124 raise ValueError( 

125 f"Wrong number of items passed {len(self.values)}, " 

126 f"placement implies {len(self.mgr_locs)}" 

127 ) 

128 

129 def _check_ndim(self, values, ndim): 

130 """ 

131 ndim inference and validation. 

132 

133 Infers ndim from 'values' if not provided to __init__. 

134 Validates that values.ndim and ndim are consistent if and only if 

135 the class variable '_validate_ndim' is True. 

136 

137 Parameters 

138 ---------- 

139 values : array-like 

140 ndim : int or None 

141 

142 Returns 

143 ------- 

144 ndim : int 

145 

146 Raises 

147 ------ 

148 ValueError : the number of dimensions do not match 

149 """ 

150 if ndim is None: 

151 ndim = values.ndim 

152 

153 if self._validate_ndim and values.ndim != ndim: 

154 raise ValueError( 

155 "Wrong number of dimensions. " 

156 f"values.ndim != ndim [{values.ndim} != {ndim}]" 

157 ) 

158 return ndim 

159 

160 @property 

161 def _holder(self): 

162 """The array-like that can hold the underlying values. 

163 

164 None for 'Block', overridden by subclasses that don't 

165 use an ndarray. 

166 """ 

167 return None 

168 

169 @property 

170 def _consolidate_key(self): 

171 return (self._can_consolidate, self.dtype.name) 

172 

173 @property 

174 def _is_single_block(self): 

175 return self.ndim == 1 

176 

177 @property 

178 def is_view(self): 

179 """ return a boolean if I am possibly a view """ 

180 return self.values.base is not None 

181 

182 @property 

183 def is_datelike(self): 

184 """ return True if I am a non-datelike """ 

185 return self.is_datetime or self.is_timedelta 

186 

187 def is_categorical_astype(self, dtype): 

188 """ 

189 validate that we have a astypeable to categorical, 

190 returns a boolean if we are a categorical 

191 """ 

192 if dtype is Categorical or dtype is CategoricalDtype: 

193 # this is a pd.Categorical, but is not 

194 # a valid type for astypeing 

195 raise TypeError(f"invalid type {dtype} for astype") 

196 

197 elif is_categorical_dtype(dtype): 

198 return True 

199 

200 return False 

201 

202 def external_values(self, dtype=None): 

203 """ 

204 The array that Series.values returns (public attribute). 

205 

206 This has some historical constraints, and is overridden in block 

207 subclasses to return the correct array (e.g. period returns 

208 object ndarray and datetimetz a datetime64[ns] ndarray instead of 

209 proper extension array). 

210 """ 

211 return self.values 

212 

213 def internal_values(self, dtype=None): 

214 """ return an internal format, currently just the ndarray 

215 this should be the pure internal API format 

216 """ 

217 return self.values 

218 

219 def array_values(self) -> ExtensionArray: 

220 """ 

221 The array that Series.array returns. Always an ExtensionArray. 

222 """ 

223 return PandasArray(self.values) 

224 

225 def get_values(self, dtype=None): 

226 """ 

227 return an internal format, currently just the ndarray 

228 this is often overridden to handle to_dense like operations 

229 """ 

230 if is_object_dtype(dtype): 

231 return self.values.astype(object) 

232 return self.values 

233 

234 def get_block_values(self, dtype=None): 

235 """ 

236 This is used in the JSON C code 

237 """ 

238 return self.get_values(dtype=dtype) 

239 

240 def to_dense(self): 

241 return self.values.view() 

242 

243 @property 

244 def fill_value(self): 

245 return np.nan 

246 

247 @property 

248 def mgr_locs(self): 

249 return self._mgr_locs 

250 

251 @mgr_locs.setter 

252 def mgr_locs(self, new_mgr_locs): 

253 if not isinstance(new_mgr_locs, libinternals.BlockPlacement): 

254 new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs) 

255 

256 self._mgr_locs = new_mgr_locs 

257 

258 @property 

259 def array_dtype(self): 

260 """ the dtype to return if I want to construct this block as an 

261 array 

262 """ 

263 return self.dtype 

264 

265 def make_block(self, values, placement=None) -> "Block": 

266 """ 

267 Create a new block, with type inference propagate any values that are 

268 not specified 

269 """ 

270 if placement is None: 

271 placement = self.mgr_locs 

272 

273 return make_block(values, placement=placement, ndim=self.ndim) 

274 

275 def make_block_same_class(self, values, placement=None, ndim=None): 

276 """ Wrap given values in a block of same type as self. """ 

277 if placement is None: 

278 placement = self.mgr_locs 

279 if ndim is None: 

280 ndim = self.ndim 

281 return make_block(values, placement=placement, ndim=ndim, klass=type(self)) 

282 

283 def __repr__(self) -> str: 

284 # don't want to print out all of the items here 

285 name = type(self).__name__ 

286 if self._is_single_block: 

287 

288 result = f"{name}: {len(self)} dtype: {self.dtype}" 

289 

290 else: 

291 

292 shape = " x ".join(pprint_thing(s) for s in self.shape) 

293 result = ( 

294 f"{name}: {pprint_thing(self.mgr_locs.indexer)}, " 

295 f"{shape}, dtype: {self.dtype}" 

296 ) 

297 

298 return result 

299 

300 def __len__(self) -> int: 

301 return len(self.values) 

302 

303 def __getstate__(self): 

304 return self.mgr_locs.indexer, self.values 

305 

306 def __setstate__(self, state): 

307 self.mgr_locs = libinternals.BlockPlacement(state[0]) 

308 self.values = state[1] 

309 self.ndim = self.values.ndim 

310 

311 def _slice(self, slicer): 

312 """ return a slice of my values """ 

313 return self.values[slicer] 

314 

315 def getitem_block(self, slicer, new_mgr_locs=None): 

316 """ 

317 Perform __getitem__-like, return result as block. 

318 

319 As of now, only supports slices that preserve dimensionality. 

320 """ 

321 if new_mgr_locs is None: 

322 if isinstance(slicer, tuple): 

323 axis0_slicer = slicer[0] 

324 else: 

325 axis0_slicer = slicer 

326 new_mgr_locs = self.mgr_locs[axis0_slicer] 

327 

328 new_values = self._slice(slicer) 

329 

330 if self._validate_ndim and new_values.ndim != self.ndim: 

331 raise ValueError("Only same dim slicing is allowed") 

332 

333 return self.make_block_same_class(new_values, new_mgr_locs) 

334 

335 @property 

336 def shape(self): 

337 return self.values.shape 

338 

339 @property 

340 def dtype(self): 

341 return self.values.dtype 

342 

343 @property 

344 def ftype(self): 

345 if getattr(self.values, "_pandas_ftype", False): 

346 dtype = self.dtype.subtype 

347 else: 

348 dtype = self.dtype 

349 return f"{dtype}:{self._ftype}" 

350 

351 def merge(self, other): 

352 return _merge_blocks([self, other]) 

353 

354 def concat_same_type(self, to_concat, placement=None): 

355 """ 

356 Concatenate list of single blocks of the same type. 

357 """ 

358 values = self._concatenator( 

359 [blk.values for blk in to_concat], axis=self.ndim - 1 

360 ) 

361 return self.make_block_same_class( 

362 values, placement=placement or slice(0, len(values), 1) 

363 ) 

364 

365 def iget(self, i): 

366 return self.values[i] 

367 

368 def set(self, locs, values): 

369 """ 

370 Modify Block in-place with new item value 

371 

372 Returns 

373 ------- 

374 None 

375 """ 

376 self.values[locs] = values 

377 

378 def delete(self, loc): 

379 """ 

380 Delete given loc(-s) from block in-place. 

381 """ 

382 self.values = np.delete(self.values, loc, 0) 

383 self.mgr_locs = self.mgr_locs.delete(loc) 

384 

385 def apply(self, func, **kwargs): 

386 """ apply the function to my values; return a block if we are not 

387 one 

388 """ 

389 with np.errstate(all="ignore"): 

390 result = func(self.values, **kwargs) 

391 

392 if is_extension_array_dtype(result) and result.ndim > 1: 

393 # if we get a 2D ExtensionArray, we need to split it into 1D pieces 

394 nbs = [] 

395 for i, loc in enumerate(self.mgr_locs): 

396 vals = result[i] 

397 nv = _block_shape(vals, ndim=self.ndim) 

398 block = self.make_block(values=nv, placement=[loc]) 

399 nbs.append(block) 

400 return nbs 

401 

402 if not isinstance(result, Block): 

403 result = self.make_block(values=_block_shape(result, ndim=self.ndim)) 

404 

405 return result 

406 

407 def fillna(self, value, limit=None, inplace=False, downcast=None): 

408 """ fillna on the block with the value. If we fail, then convert to 

409 ObjectBlock and try again 

410 """ 

411 inplace = validate_bool_kwarg(inplace, "inplace") 

412 

413 mask = isna(self.values) 

414 if limit is not None: 

415 limit = libalgos._validate_limit(None, limit=limit) 

416 mask[mask.cumsum(self.ndim - 1) > limit] = False 

417 

418 if not self._can_hold_na: 

419 if inplace: 

420 return self 

421 else: 

422 return self.copy() 

423 

424 if self._can_hold_element(value): 

425 # equivalent: _try_coerce_args(value) would not raise 

426 blocks = self.putmask(mask, value, inplace=inplace) 

427 return self._maybe_downcast(blocks, downcast) 

428 

429 # we can't process the value, but nothing to do 

430 if not mask.any(): 

431 return self if inplace else self.copy() 

432 

433 # operate column-by-column 

434 def f(mask, val, idx): 

435 block = self.coerce_to_target_dtype(value) 

436 

437 # slice out our block 

438 if idx is not None: 

439 # i.e. self.ndim == 2 

440 block = block.getitem_block(slice(idx, idx + 1)) 

441 return block.fillna(value, limit=limit, inplace=inplace, downcast=None) 

442 

443 return self.split_and_operate(None, f, inplace) 

444 

445 def split_and_operate(self, mask, f, inplace: bool): 

446 """ 

447 split the block per-column, and apply the callable f 

448 per-column, return a new block for each. Handle 

449 masking which will not change a block unless needed. 

450 

451 Parameters 

452 ---------- 

453 mask : 2-d boolean mask 

454 f : callable accepting (1d-mask, 1d values, indexer) 

455 inplace : boolean 

456 

457 Returns 

458 ------- 

459 list of blocks 

460 """ 

461 

462 if mask is None: 

463 mask = np.broadcast_to(True, shape=self.shape) 

464 

465 new_values = self.values 

466 

467 def make_a_block(nv, ref_loc): 

468 if isinstance(nv, list): 

469 assert len(nv) == 1, nv 

470 assert isinstance(nv[0], Block) 

471 block = nv[0] 

472 else: 

473 # Put back the dimension that was taken from it and make 

474 # a block out of the result. 

475 nv = _block_shape(nv, ndim=self.ndim) 

476 block = self.make_block(values=nv, placement=ref_loc) 

477 return block 

478 

479 # ndim == 1 

480 if self.ndim == 1: 

481 if mask.any(): 

482 nv = f(mask, new_values, None) 

483 else: 

484 nv = new_values if inplace else new_values.copy() 

485 block = make_a_block(nv, self.mgr_locs) 

486 return [block] 

487 

488 # ndim > 1 

489 new_blocks = [] 

490 for i, ref_loc in enumerate(self.mgr_locs): 

491 m = mask[i] 

492 v = new_values[i] 

493 

494 # need a new block 

495 if m.any(): 

496 nv = f(m, v, i) 

497 else: 

498 nv = v if inplace else v.copy() 

499 

500 block = make_a_block(nv, [ref_loc]) 

501 new_blocks.append(block) 

502 

503 return new_blocks 

504 

505 def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]: 

506 

507 # no need to downcast our float 

508 # unless indicated 

509 if downcast is None and ( 

510 self.is_float or self.is_timedelta or self.is_datetime 

511 ): 

512 return blocks 

513 

514 return _extend_blocks([b.downcast(downcast) for b in blocks]) 

515 

516 def downcast(self, dtypes=None): 

517 """ try to downcast each item to the dict of dtypes if present """ 

518 

519 # turn it off completely 

520 if dtypes is False: 

521 return self 

522 

523 values = self.values 

524 

525 # single block handling 

526 if self._is_single_block: 

527 

528 # try to cast all non-floats here 

529 if dtypes is None: 

530 dtypes = "infer" 

531 

532 nv = maybe_downcast_to_dtype(values, dtypes) 

533 return self.make_block(nv) 

534 

535 # ndim > 1 

536 if dtypes is None: 

537 return self 

538 

539 if not (dtypes == "infer" or isinstance(dtypes, dict)): 

540 raise ValueError( 

541 "downcast must have a dictionary or 'infer' as its argument" 

542 ) 

543 elif dtypes != "infer": 

544 raise AssertionError("dtypes as dict is not supported yet") 

545 

546 # operate column-by-column 

547 # this is expensive as it splits the blocks items-by-item 

548 def f(mask, val, idx): 

549 val = maybe_downcast_to_dtype(val, dtype="infer") 

550 return val 

551 

552 return self.split_and_operate(None, f, False) 

553 

554 def astype(self, dtype, copy: bool = False, errors: str = "raise"): 

555 """ 

556 Coerce to the new dtype. 

557 

558 Parameters 

559 ---------- 

560 dtype : str, dtype convertible 

561 copy : bool, default False 

562 copy if indicated 

563 errors : str, {'raise', 'ignore'}, default 'ignore' 

564 - ``raise`` : allow exceptions to be raised 

565 - ``ignore`` : suppress exceptions. On error return original object 

566 

567 Returns 

568 ------- 

569 Block 

570 """ 

571 errors_legal_values = ("raise", "ignore") 

572 

573 if errors not in errors_legal_values: 

574 invalid_arg = ( 

575 "Expected value of kwarg 'errors' to be one of " 

576 f"{list(errors_legal_values)}. Supplied value is '{errors}'" 

577 ) 

578 raise ValueError(invalid_arg) 

579 

580 if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype): 

581 msg = ( 

582 f"Expected an instance of {dtype.__name__}, " 

583 "but got the class instead. Try instantiating 'dtype'." 

584 ) 

585 raise TypeError(msg) 

586 

587 # may need to convert to categorical 

588 if self.is_categorical_astype(dtype): 

589 

590 if is_categorical_dtype(self.values): 

591 # GH 10696/18593: update an existing categorical efficiently 

592 return self.make_block(self.values.astype(dtype, copy=copy)) 

593 

594 return self.make_block(Categorical(self.values, dtype=dtype)) 

595 

596 dtype = pandas_dtype(dtype) 

597 

598 # astype processing 

599 if is_dtype_equal(self.dtype, dtype): 

600 if copy: 

601 return self.copy() 

602 return self 

603 

604 # force the copy here 

605 if self.is_extension: 

606 # TODO: Should we try/except this astype? 

607 values = self.values.astype(dtype) 

608 else: 

609 if issubclass(dtype.type, str): 

610 

611 # use native type formatting for datetime/tz/timedelta 

612 if self.is_datelike: 

613 values = self.to_native_types() 

614 

615 # astype formatting 

616 else: 

617 values = self.get_values() 

618 

619 else: 

620 values = self.get_values(dtype=dtype) 

621 

622 # _astype_nansafe works fine with 1-d only 

623 vals1d = values.ravel() 

624 try: 

625 values = astype_nansafe(vals1d, dtype, copy=True) 

626 except (ValueError, TypeError): 

627 # e.g. astype_nansafe can fail on object-dtype of strings 

628 # trying to convert to float 

629 if errors == "raise": 

630 raise 

631 newb = self.copy() if copy else self 

632 return newb 

633 

634 # TODO(extension) 

635 # should we make this attribute? 

636 if isinstance(values, np.ndarray): 

637 values = values.reshape(self.shape) 

638 

639 newb = make_block(values, placement=self.mgr_locs, ndim=self.ndim) 

640 

641 if newb.is_numeric and self.is_numeric: 

642 if newb.shape != self.shape: 

643 raise TypeError( 

644 f"cannot set astype for copy = [{copy}] for dtype " 

645 f"({self.dtype.name} [{self.shape}]) to different shape " 

646 f"({newb.dtype.name} [{newb.shape}])" 

647 ) 

648 return newb 

649 

650 def convert( 

651 self, 

652 copy: bool = True, 

653 datetime: bool = True, 

654 numeric: bool = True, 

655 timedelta: bool = True, 

656 coerce: bool = False, 

657 ): 

658 """ attempt to coerce any object types to better types return a copy 

659 of the block (if copy = True) by definition we are not an ObjectBlock 

660 here! 

661 """ 

662 

663 return self.copy() if copy else self 

664 

665 def _can_hold_element(self, element: Any) -> bool: 

666 """ require the same dtype as ourselves """ 

667 dtype = self.values.dtype.type 

668 tipo = maybe_infer_dtype_type(element) 

669 if tipo is not None: 

670 return issubclass(tipo.type, dtype) 

671 return isinstance(element, dtype) 

672 

673 def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): 

674 """ convert to our native types format, slicing if desired """ 

675 values = self.get_values() 

676 

677 if slicer is not None: 

678 values = values[:, slicer] 

679 mask = isna(values) 

680 itemsize = writers.word_len(na_rep) 

681 

682 if not self.is_object and not quoting and itemsize: 

683 values = values.astype(str) 

684 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: 

685 # enlarge for the na_rep 

686 values = values.astype(f"<U{itemsize}") 

687 else: 

688 values = np.array(values, dtype="object") 

689 

690 values[mask] = na_rep 

691 return values 

692 

693 # block actions # 

694 def copy(self, deep=True): 

695 """ copy constructor """ 

696 values = self.values 

697 if deep: 

698 values = values.copy() 

699 return self.make_block_same_class(values, ndim=self.ndim) 

700 

701 def replace( 

702 self, to_replace, value, inplace=False, filter=None, regex=False, convert=True 

703 ): 

704 """replace the to_replace value with value, possible to create new 

705 blocks here this is just a call to putmask. regex is not used here. 

706 It is used in ObjectBlocks. It is here for API compatibility. 

707 """ 

708 

709 inplace = validate_bool_kwarg(inplace, "inplace") 

710 original_to_replace = to_replace 

711 

712 # If we cannot replace with own dtype, convert to ObjectBlock and 

713 # retry 

714 if not self._can_hold_element(to_replace): 

715 if not isinstance(to_replace, list): 

716 if inplace: 

717 return [self] 

718 return [self.copy()] 

719 

720 to_replace = [x for x in to_replace if self._can_hold_element(x)] 

721 if not len(to_replace): 

722 # GH#28084 avoid costly checks since we can infer 

723 # that there is nothing to replace in this block 

724 if inplace: 

725 return [self] 

726 return [self.copy()] 

727 

728 if len(to_replace) == 1: 

729 # _can_hold_element checks have reduced this back to the 

730 # scalar case and we can avoid a costly object cast 

731 return self.replace( 

732 to_replace[0], 

733 value, 

734 inplace=inplace, 

735 filter=filter, 

736 regex=regex, 

737 convert=convert, 

738 ) 

739 

740 # GH 22083, TypeError or ValueError occurred within error handling 

741 # causes infinite loop. Cast and retry only if not objectblock. 

742 if is_object_dtype(self): 

743 raise AssertionError 

744 

745 # try again with a compatible block 

746 block = self.astype(object) 

747 return block.replace( 

748 to_replace=to_replace, 

749 value=value, 

750 inplace=inplace, 

751 filter=filter, 

752 regex=regex, 

753 convert=convert, 

754 ) 

755 

756 values = self.values 

757 if lib.is_scalar(to_replace) and isinstance(values, np.ndarray): 

758 # The only non-DatetimeLike class that also has a non-trivial 

759 # try_coerce_args is ObjectBlock, but that overrides replace, 

760 # so does not get here. 

761 to_replace = convert_scalar(values, to_replace) 

762 

763 mask = missing.mask_missing(values, to_replace) 

764 if filter is not None: 

765 filtered_out = ~self.mgr_locs.isin(filter) 

766 mask[filtered_out.nonzero()[0]] = False 

767 

768 try: 

769 blocks = self.putmask(mask, value, inplace=inplace) 

770 # Note: it is _not_ the case that self._can_hold_element(value) 

771 # is always true at this point. In particular, that can fail 

772 # for: 

773 # "2u" with bool-dtype, float-dtype 

774 # 0.5 with int64-dtype 

775 # np.nan with int64-dtype 

776 except (TypeError, ValueError): 

777 # GH 22083, TypeError or ValueError occurred within error handling 

778 # causes infinite loop. Cast and retry only if not objectblock. 

779 if is_object_dtype(self): 

780 raise 

781 

782 if not self.is_extension: 

783 # TODO: https://github.com/pandas-dev/pandas/issues/32586 

784 # Need an ExtensionArray._can_hold_element to indicate whether 

785 # a scalar value can be placed in the array. 

786 assert not self._can_hold_element(value), value 

787 

788 # try again with a compatible block 

789 block = self.astype(object) 

790 return block.replace( 

791 to_replace=original_to_replace, 

792 value=value, 

793 inplace=inplace, 

794 filter=filter, 

795 regex=regex, 

796 convert=convert, 

797 ) 

798 if convert: 

799 blocks = [b.convert(numeric=False, copy=not inplace) for b in blocks] 

800 return blocks 

801 

802 def _replace_single(self, *args, **kwargs): 

803 """ no-op on a non-ObjectBlock """ 

804 return self if kwargs["inplace"] else self.copy() 

805 

806 def setitem(self, indexer, value): 

807 """ 

808 Set the value inplace, returning a a maybe different typed block. 

809 

810 Parameters 

811 ---------- 

812 indexer : tuple, list-like, array-like, slice 

813 The subset of self.values to set 

814 value : object 

815 The value being set 

816 

817 Returns 

818 ------- 

819 Block 

820 

821 Notes 

822 ----- 

823 `indexer` is a direct slice/positional indexer. `value` must 

824 be a compatible shape. 

825 """ 

826 transpose = self.ndim == 2 

827 

828 # coerce None values, if appropriate 

829 if value is None: 

830 if self.is_numeric: 

831 value = np.nan 

832 

833 # coerce if block dtype can store value 

834 values = self.values 

835 if self._can_hold_element(value): 

836 # We only get here for non-Extension Blocks, so _try_coerce_args 

837 # is only relevant for DatetimeBlock and TimedeltaBlock 

838 if lib.is_scalar(value): 

839 value = convert_scalar(values, value) 

840 

841 else: 

842 # current dtype cannot store value, coerce to common dtype 

843 find_dtype = False 

844 

845 if hasattr(value, "dtype"): 

846 dtype = value.dtype 

847 find_dtype = True 

848 

849 elif lib.is_scalar(value) and not isna(value): 

850 dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True) 

851 find_dtype = True 

852 

853 if find_dtype: 

854 dtype = find_common_type([values.dtype, dtype]) 

855 if not is_dtype_equal(self.dtype, dtype): 

856 b = self.astype(dtype) 

857 return b.setitem(indexer, value) 

858 

859 # value must be storeable at this moment 

860 if is_extension_array_dtype(getattr(value, "dtype", None)): 

861 # We need to be careful not to allow through strings that 

862 # can be parsed to EADtypes 

863 is_ea_value = True 

864 arr_value = value 

865 else: 

866 is_ea_value = False 

867 arr_value = np.array(value) 

868 

869 # cast the values to a type that can hold nan (if necessary) 

870 if not self._can_hold_element(value): 

871 dtype, _ = maybe_promote(arr_value.dtype) 

872 values = values.astype(dtype) 

873 

874 if transpose: 

875 values = values.T 

876 

877 # length checking 

878 check_setitem_lengths(indexer, value, values) 

879 exact_match = ( 

880 len(arr_value.shape) 

881 and arr_value.shape[0] == values.shape[0] 

882 and arr_value.size == values.size 

883 ) 

884 if is_empty_indexer(indexer, arr_value): 

885 # GH#8669 empty indexers 

886 pass 

887 

888 elif is_scalar_indexer(indexer, arr_value): 

889 # setting a single element for each dim and with a rhs that could 

890 # be e.g. a list; see GH#6043 

891 values[indexer] = value 

892 

893 elif ( 

894 exact_match 

895 and is_categorical_dtype(arr_value.dtype) 

896 and not is_categorical_dtype(values) 

897 ): 

898 # GH25495 - If the current dtype is not categorical, 

899 # we need to create a new categorical block 

900 values[indexer] = value 

901 return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) 

902 

903 elif exact_match and is_ea_value: 

904 # GH#32395 if we're going to replace the values entirely, just 

905 # substitute in the new array 

906 return self.make_block(arr_value) 

907 

908 # if we are an exact match (ex-broadcasting), 

909 # then use the resultant dtype 

910 elif exact_match: 

911 values[indexer] = value 

912 

913 try: 

914 values = values.astype(arr_value.dtype) 

915 except ValueError: 

916 pass 

917 

918 # set 

919 else: 

920 values[indexer] = value 

921 

922 if transpose: 

923 values = values.T 

924 block = self.make_block(values) 

925 return block 

926 

927 def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): 

928 """ putmask the data to the block; it is possible that we may create a 

929 new dtype of block 

930 

931 return the resulting block(s) 

932 

933 Parameters 

934 ---------- 

935 mask : the condition to respect 

936 new : a ndarray/object 

937 align : boolean, perform alignment on other/cond, default is True 

938 inplace : perform inplace modification, default is False 

939 axis : int 

940 transpose : boolean 

941 Set to True if self is stored with axes reversed 

942 

943 Returns 

944 ------- 

945 a list of new blocks, the result of the putmask 

946 """ 

947 

948 new_values = self.values if inplace else self.values.copy() 

949 

950 new = getattr(new, "values", new) 

951 mask = getattr(mask, "values", mask) 

952 

953 # if we are passed a scalar None, convert it here 

954 if not is_list_like(new) and isna(new) and not self.is_object: 

955 # FIXME: make sure we have compatible NA 

956 new = self.fill_value 

957 

958 if self._can_hold_element(new): 

959 # We only get here for non-Extension Blocks, so _try_coerce_args 

960 # is only relevant for DatetimeBlock and TimedeltaBlock 

961 if lib.is_scalar(new): 

962 new = convert_scalar(new_values, new) 

963 

964 if transpose: 

965 new_values = new_values.T 

966 

967 # If the default repeat behavior in np.putmask would go in the 

968 # wrong direction, then explicitly repeat and reshape new instead 

969 if getattr(new, "ndim", 0) >= 1: 

970 if self.ndim - 1 == new.ndim and axis == 1: 

971 new = np.repeat(new, new_values.shape[-1]).reshape(self.shape) 

972 new = new.astype(new_values.dtype) 

973 

974 # we require exact matches between the len of the 

975 # values we are setting (or is compat). np.putmask 

976 # doesn't check this and will simply truncate / pad 

977 # the output, but we want sane error messages 

978 # 

979 # TODO: this prob needs some better checking 

980 # for 2D cases 

981 if ( 

982 is_list_like(new) 

983 and np.any(mask[mask]) 

984 and getattr(new, "ndim", 1) == 1 

985 ): 

986 if mask[mask].shape[-1] == len(new): 

987 # GH 30567 

988 # If length of ``new`` is less than the length of ``new_values``, 

989 # `np.putmask` would first repeat the ``new`` array and then 

990 # assign the masked values hence produces incorrect result. 

991 # `np.place` on the other hand uses the ``new`` values at it is 

992 # to place in the masked locations of ``new_values`` 

993 np.place(new_values, mask, new) 

994 elif mask.shape[-1] == len(new) or len(new) == 1: 

995 np.putmask(new_values, mask, new) 

996 else: 

997 raise ValueError("cannot assign mismatch length to masked array") 

998 else: 

999 np.putmask(new_values, mask, new) 

1000 

1001 # maybe upcast me 

1002 elif mask.any(): 

1003 if transpose: 

1004 mask = mask.T 

1005 if isinstance(new, np.ndarray): 

1006 new = new.T 

1007 axis = new_values.ndim - axis - 1 

1008 

1009 # Pseudo-broadcast 

1010 if getattr(new, "ndim", 0) >= 1: 

1011 if self.ndim - 1 == new.ndim: 

1012 new_shape = list(new.shape) 

1013 new_shape.insert(axis, 1) 

1014 new = new.reshape(tuple(new_shape)) 

1015 

1016 # operate column-by-column 

1017 def f(mask, val, idx): 

1018 

1019 if idx is None: 

1020 # ndim==1 case. 

1021 n = new 

1022 else: 

1023 

1024 if isinstance(new, np.ndarray): 

1025 n = np.squeeze(new[idx % new.shape[0]]) 

1026 else: 

1027 n = np.array(new) 

1028 

1029 # type of the new block 

1030 dtype, _ = maybe_promote(n.dtype) 

1031 

1032 # we need to explicitly astype here to make a copy 

1033 n = n.astype(dtype) 

1034 

1035 nv = _putmask_smart(val, mask, n) 

1036 return nv 

1037 

1038 new_blocks = self.split_and_operate(mask, f, inplace) 

1039 return new_blocks 

1040 

1041 if inplace: 

1042 return [self] 

1043 

1044 if transpose: 

1045 new_values = new_values.T 

1046 

1047 return [self.make_block(new_values)] 

1048 

1049 def coerce_to_target_dtype(self, other): 

1050 """ 

1051 coerce the current block to a dtype compat for other 

1052 we will return a block, possibly object, and not raise 

1053 

1054 we can also safely try to coerce to the same dtype 

1055 and will receive the same block 

1056 """ 

1057 

1058 # if we cannot then coerce to object 

1059 dtype, _ = infer_dtype_from(other, pandas_dtype=True) 

1060 

1061 if is_dtype_equal(self.dtype, dtype): 

1062 return self 

1063 

1064 if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype): 

1065 # we don't upcast to bool 

1066 return self.astype(object) 

1067 

1068 elif (self.is_float or self.is_complex) and ( 

1069 is_integer_dtype(dtype) or is_float_dtype(dtype) 

1070 ): 

1071 # don't coerce float/complex to int 

1072 return self 

1073 

1074 elif ( 

1075 self.is_datetime 

1076 or is_datetime64_dtype(dtype) 

1077 or is_datetime64tz_dtype(dtype) 

1078 ): 

1079 

1080 # not a datetime 

1081 if not ( 

1082 (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)) 

1083 and self.is_datetime 

1084 ): 

1085 return self.astype(object) 

1086 

1087 # don't upcast timezone with different timezone or no timezone 

1088 mytz = getattr(self.dtype, "tz", None) 

1089 othertz = getattr(dtype, "tz", None) 

1090 

1091 if not tz_compare(mytz, othertz): 

1092 return self.astype(object) 

1093 

1094 raise AssertionError( 

1095 f"possible recursion in coerce_to_target_dtype: {self} {other}" 

1096 ) 

1097 

1098 elif self.is_timedelta or is_timedelta64_dtype(dtype): 

1099 

1100 # not a timedelta 

1101 if not (is_timedelta64_dtype(dtype) and self.is_timedelta): 

1102 return self.astype(object) 

1103 

1104 raise AssertionError( 

1105 f"possible recursion in coerce_to_target_dtype: {self} {other}" 

1106 ) 

1107 

1108 try: 

1109 return self.astype(dtype) 

1110 except (ValueError, TypeError, OverflowError): 

1111 return self.astype(object) 

1112 

1113 def interpolate( 

1114 self, 

1115 method="pad", 

1116 axis=0, 

1117 index=None, 

1118 values=None, 

1119 inplace=False, 

1120 limit=None, 

1121 limit_direction="forward", 

1122 limit_area=None, 

1123 fill_value=None, 

1124 coerce=False, 

1125 downcast=None, 

1126 **kwargs, 

1127 ): 

1128 

1129 inplace = validate_bool_kwarg(inplace, "inplace") 

1130 

1131 def check_int_bool(self, inplace): 

1132 # Only FloatBlocks will contain NaNs. 

1133 # timedelta subclasses IntBlock 

1134 if (self.is_bool or self.is_integer) and not self.is_timedelta: 

1135 if inplace: 

1136 return self 

1137 else: 

1138 return self.copy() 

1139 

1140 # a fill na type method 

1141 try: 

1142 m = missing.clean_fill_method(method) 

1143 except ValueError: 

1144 m = None 

1145 

1146 if m is not None: 

1147 r = check_int_bool(self, inplace) 

1148 if r is not None: 

1149 return r 

1150 return self._interpolate_with_fill( 

1151 method=m, 

1152 axis=axis, 

1153 inplace=inplace, 

1154 limit=limit, 

1155 fill_value=fill_value, 

1156 coerce=coerce, 

1157 downcast=downcast, 

1158 ) 

1159 # validate the interp method 

1160 m = missing.clean_interp_method(method, **kwargs) 

1161 

1162 r = check_int_bool(self, inplace) 

1163 if r is not None: 

1164 return r 

1165 return self._interpolate( 

1166 method=m, 

1167 index=index, 

1168 values=values, 

1169 axis=axis, 

1170 limit=limit, 

1171 limit_direction=limit_direction, 

1172 limit_area=limit_area, 

1173 fill_value=fill_value, 

1174 inplace=inplace, 

1175 downcast=downcast, 

1176 **kwargs, 

1177 ) 

1178 

1179 def _interpolate_with_fill( 

1180 self, 

1181 method="pad", 

1182 axis=0, 

1183 inplace=False, 

1184 limit=None, 

1185 fill_value=None, 

1186 coerce=False, 

1187 downcast=None, 

1188 ): 

1189 """ fillna but using the interpolate machinery """ 

1190 

1191 inplace = validate_bool_kwarg(inplace, "inplace") 

1192 

1193 # if we are coercing, then don't force the conversion 

1194 # if the block can't hold the type 

1195 if coerce: 

1196 if not self._can_hold_na: 

1197 if inplace: 

1198 return [self] 

1199 else: 

1200 return [self.copy()] 

1201 

1202 values = self.values if inplace else self.values.copy() 

1203 

1204 # We only get here for non-ExtensionBlock 

1205 fill_value = convert_scalar(self.values, fill_value) 

1206 

1207 values = missing.interpolate_2d( 

1208 values, 

1209 method=method, 

1210 axis=axis, 

1211 limit=limit, 

1212 fill_value=fill_value, 

1213 dtype=self.dtype, 

1214 ) 

1215 

1216 blocks = [self.make_block_same_class(values, ndim=self.ndim)] 

1217 return self._maybe_downcast(blocks, downcast) 

1218 

1219 def _interpolate( 

1220 self, 

1221 method=None, 

1222 index=None, 

1223 values=None, 

1224 fill_value=None, 

1225 axis=0, 

1226 limit=None, 

1227 limit_direction="forward", 

1228 limit_area=None, 

1229 inplace=False, 

1230 downcast=None, 

1231 **kwargs, 

1232 ): 

1233 """ interpolate using scipy wrappers """ 

1234 

1235 inplace = validate_bool_kwarg(inplace, "inplace") 

1236 data = self.values if inplace else self.values.copy() 

1237 

1238 # only deal with floats 

1239 if not self.is_float: 

1240 if not self.is_integer: 

1241 return self 

1242 data = data.astype(np.float64) 

1243 

1244 if fill_value is None: 

1245 fill_value = self.fill_value 

1246 

1247 if method in ("krogh", "piecewise_polynomial", "pchip"): 

1248 if not index.is_monotonic: 

1249 raise ValueError( 

1250 f"{method} interpolation requires that the index be monotonic." 

1251 ) 

1252 # process 1-d slices in the axis direction 

1253 

1254 def func(x): 

1255 

1256 # process a 1-d slice, returning it 

1257 # should the axis argument be handled below in apply_along_axis? 

1258 # i.e. not an arg to missing.interpolate_1d 

1259 return missing.interpolate_1d( 

1260 index, 

1261 x, 

1262 method=method, 

1263 limit=limit, 

1264 limit_direction=limit_direction, 

1265 limit_area=limit_area, 

1266 fill_value=fill_value, 

1267 bounds_error=False, 

1268 **kwargs, 

1269 ) 

1270 

1271 # interp each column independently 

1272 interp_values = np.apply_along_axis(func, axis, data) 

1273 

1274 blocks = [self.make_block_same_class(interp_values)] 

1275 return self._maybe_downcast(blocks, downcast) 

1276 

1277 def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): 

1278 """ 

1279 Take values according to indexer and return them as a block.bb 

1280 

1281 """ 

1282 

1283 # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock 

1284 # so need to preserve types 

1285 # sparse is treated like an ndarray, but needs .get_values() shaping 

1286 

1287 values = self.values 

1288 

1289 if fill_tuple is None: 

1290 fill_value = self.fill_value 

1291 allow_fill = False 

1292 else: 

1293 fill_value = fill_tuple[0] 

1294 allow_fill = True 

1295 

1296 new_values = algos.take_nd( 

1297 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value 

1298 ) 

1299 

1300 # Called from three places in managers, all of which satisfy 

1301 # this assertion 

1302 assert not (axis == 0 and new_mgr_locs is None) 

1303 if new_mgr_locs is None: 

1304 new_mgr_locs = self.mgr_locs 

1305 

1306 if not is_dtype_equal(new_values.dtype, self.dtype): 

1307 return self.make_block(new_values, new_mgr_locs) 

1308 else: 

1309 return self.make_block_same_class(new_values, new_mgr_locs) 

1310 

1311 def diff(self, n: int, axis: int = 1) -> List["Block"]: 

1312 """ return block for the diff of the values """ 

1313 new_values = algos.diff(self.values, n, axis=axis, stacklevel=7) 

1314 # We use block_shape for ExtensionBlock subclasses, which may call here 

1315 # via a super. 

1316 new_values = _block_shape(new_values, ndim=self.ndim) 

1317 return [self.make_block(values=new_values)] 

1318 

1319 def shift(self, periods, axis=0, fill_value=None): 

1320 """ shift the block by periods, possibly upcast """ 

1321 

1322 # convert integer to float if necessary. need to do a lot more than 

1323 # that, handle boolean etc also 

1324 new_values, fill_value = maybe_upcast(self.values, fill_value) 

1325 

1326 # make sure array sent to np.roll is c_contiguous 

1327 f_ordered = new_values.flags.f_contiguous 

1328 if f_ordered: 

1329 new_values = new_values.T 

1330 axis = new_values.ndim - axis - 1 

1331 

1332 if np.prod(new_values.shape): 

1333 new_values = np.roll(new_values, ensure_platform_int(periods), axis=axis) 

1334 

1335 axis_indexer = [slice(None)] * self.ndim 

1336 if periods > 0: 

1337 axis_indexer[axis] = slice(None, periods) 

1338 else: 

1339 axis_indexer[axis] = slice(periods, None) 

1340 new_values[tuple(axis_indexer)] = fill_value 

1341 

1342 # restore original order 

1343 if f_ordered: 

1344 new_values = new_values.T 

1345 

1346 return [self.make_block(new_values)] 

1347 

1348 def where( 

1349 self, 

1350 other, 

1351 cond, 

1352 align=True, 

1353 errors="raise", 

1354 try_cast: bool = False, 

1355 axis: int = 0, 

1356 ) -> List["Block"]: 

1357 """ 

1358 evaluate the block; return result block(s) from the result 

1359 

1360 Parameters 

1361 ---------- 

1362 other : a ndarray/object 

1363 cond : the condition to respect 

1364 align : boolean, perform alignment on other/cond 

1365 errors : str, {'raise', 'ignore'}, default 'raise' 

1366 - ``raise`` : allow exceptions to be raised 

1367 - ``ignore`` : suppress exceptions. On error return original object 

1368 axis : int 

1369 

1370 Returns 

1371 ------- 

1372 a new block(s), the result of the func 

1373 """ 

1374 import pandas.core.computation.expressions as expressions 

1375 

1376 assert errors in ["raise", "ignore"] 

1377 transpose = self.ndim == 2 

1378 

1379 values = self.values 

1380 orig_other = other 

1381 if transpose: 

1382 values = values.T 

1383 

1384 other = getattr(other, "_values", getattr(other, "values", other)) 

1385 cond = getattr(cond, "values", cond) 

1386 

1387 # If the default broadcasting would go in the wrong direction, then 

1388 # explicitly reshape other instead 

1389 if getattr(other, "ndim", 0) >= 1: 

1390 if values.ndim - 1 == other.ndim and axis == 1: 

1391 other = other.reshape(tuple(other.shape + (1,))) 

1392 elif transpose and values.ndim == self.ndim - 1: 

1393 cond = cond.T 

1394 

1395 if not hasattr(cond, "shape"): 

1396 raise ValueError("where must have a condition that is ndarray like") 

1397 

1398 # our where function 

1399 def func(cond, values, other): 

1400 

1401 if not ( 

1402 (self.is_integer or self.is_bool) 

1403 and lib.is_float(other) 

1404 and np.isnan(other) 

1405 ): 

1406 # np.where will cast integer array to floats in this case 

1407 if not self._can_hold_element(other): 

1408 raise TypeError 

1409 if lib.is_scalar(other) and isinstance(values, np.ndarray): 

1410 other = convert_scalar(values, other) 

1411 

1412 fastres = expressions.where(cond, values, other) 

1413 return fastres 

1414 

1415 if cond.ravel().all(): 

1416 result = values 

1417 else: 

1418 # see if we can operate on the entire block, or need item-by-item 

1419 # or if we are a single block (ndim == 1) 

1420 try: 

1421 result = func(cond, values, other) 

1422 except TypeError: 

1423 

1424 # we cannot coerce, return a compat dtype 

1425 # we are explicitly ignoring errors 

1426 block = self.coerce_to_target_dtype(other) 

1427 blocks = block.where( 

1428 orig_other, 

1429 cond, 

1430 align=align, 

1431 errors=errors, 

1432 try_cast=try_cast, 

1433 axis=axis, 

1434 ) 

1435 return self._maybe_downcast(blocks, "infer") 

1436 

1437 if self._can_hold_na or self.ndim == 1: 

1438 

1439 if transpose: 

1440 result = result.T 

1441 

1442 return [self.make_block(result)] 

1443 

1444 # might need to separate out blocks 

1445 axis = cond.ndim - 1 

1446 cond = cond.swapaxes(axis, 0) 

1447 mask = np.array([cond[i].all() for i in range(cond.shape[0])], dtype=bool) 

1448 

1449 result_blocks = [] 

1450 for m in [mask, ~mask]: 

1451 if m.any(): 

1452 taken = result.take(m.nonzero()[0], axis=axis) 

1453 r = maybe_downcast_numeric(taken, self.dtype) 

1454 nb = self.make_block(r.T, placement=self.mgr_locs[m]) 

1455 result_blocks.append(nb) 

1456 

1457 return result_blocks 

1458 

1459 def equals(self, other) -> bool: 

1460 if self.dtype != other.dtype or self.shape != other.shape: 

1461 return False 

1462 return array_equivalent(self.values, other.values) 

1463 

1464 def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): 

1465 """Return a list of unstacked blocks of self 

1466 

1467 Parameters 

1468 ---------- 

1469 unstacker_func : callable 

1470 Partially applied unstacker. 

1471 new_columns : Index 

1472 All columns of the unstacked BlockManager. 

1473 n_rows : int 

1474 Only used in ExtensionBlock._unstack 

1475 fill_value : int 

1476 Only used in ExtensionBlock._unstack 

1477 

1478 Returns 

1479 ------- 

1480 blocks : list of Block 

1481 New blocks of unstacked values. 

1482 mask : array_like of bool 

1483 The mask of columns of `blocks` we should keep. 

1484 """ 

1485 unstacker = unstacker_func(self.values.T) 

1486 new_items = unstacker.get_new_columns() 

1487 new_placement = new_columns.get_indexer(new_items) 

1488 new_values, mask = unstacker.get_new_values() 

1489 

1490 mask = mask.any(0) 

1491 new_values = new_values.T[mask] 

1492 new_placement = new_placement[mask] 

1493 

1494 blocks = [make_block(new_values, placement=new_placement)] 

1495 return blocks, mask 

1496 

1497 def quantile(self, qs, interpolation="linear", axis=0): 

1498 """ 

1499 compute the quantiles of the 

1500 

1501 Parameters 

1502 ---------- 

1503 qs: a scalar or list of the quantiles to be computed 

1504 interpolation: type of interpolation, default 'linear' 

1505 axis: axis to compute, default 0 

1506 

1507 Returns 

1508 ------- 

1509 Block 

1510 """ 

1511 # We should always have ndim == 2 because Series dispatches to DataFrame 

1512 assert self.ndim == 2 

1513 

1514 values = self.get_values() 

1515 

1516 is_empty = values.shape[axis] == 0 

1517 orig_scalar = not is_list_like(qs) 

1518 if orig_scalar: 

1519 # make list-like, unpack later 

1520 qs = [qs] 

1521 

1522 if is_empty: 

1523 # create the array of na_values 

1524 # 2d len(values) * len(qs) 

1525 result = np.repeat( 

1526 np.array([self.fill_value] * len(qs)), len(values) 

1527 ).reshape(len(values), len(qs)) 

1528 else: 

1529 # asarray needed for Sparse, see GH#24600 

1530 mask = np.asarray(isna(values)) 

1531 result = nanpercentile( 

1532 values, 

1533 np.array(qs) * 100, 

1534 axis=axis, 

1535 na_value=self.fill_value, 

1536 mask=mask, 

1537 ndim=values.ndim, 

1538 interpolation=interpolation, 

1539 ) 

1540 

1541 result = np.array(result, copy=False) 

1542 result = result.T 

1543 

1544 if orig_scalar and not lib.is_scalar(result): 

1545 # result could be scalar in case with is_empty and self.ndim == 1 

1546 assert result.shape[-1] == 1, result.shape 

1547 result = result[..., 0] 

1548 result = lib.item_from_zerodim(result) 

1549 

1550 ndim = np.ndim(result) 

1551 return make_block(result, placement=np.arange(len(result)), ndim=ndim) 

1552 

1553 def _replace_coerce( 

1554 self, to_replace, value, inplace=True, regex=False, convert=False, mask=None 

1555 ): 

1556 """ 

1557 Replace value corresponding to the given boolean array with another 

1558 value. 

1559 

1560 Parameters 

1561 ---------- 

1562 to_replace : object or pattern 

1563 Scalar to replace or regular expression to match. 

1564 value : object 

1565 Replacement object. 

1566 inplace : bool, default False 

1567 Perform inplace modification. 

1568 regex : bool, default False 

1569 If true, perform regular expression substitution. 

1570 convert : bool, default True 

1571 If true, try to coerce any object types to better types. 

1572 mask : array-like of bool, optional 

1573 True indicate corresponding element is ignored. 

1574 

1575 Returns 

1576 ------- 

1577 A new block if there is anything to replace or the original block. 

1578 """ 

1579 

1580 if mask.any(): 

1581 if not regex: 

1582 self = self.coerce_to_target_dtype(value) 

1583 return self.putmask(mask, value, inplace=inplace) 

1584 else: 

1585 return self._replace_single( 

1586 to_replace, 

1587 value, 

1588 inplace=inplace, 

1589 regex=regex, 

1590 convert=convert, 

1591 mask=mask, 

1592 ) 

1593 return self 

1594 

1595 

1596class NonConsolidatableMixIn: 

1597 """ hold methods for the nonconsolidatable blocks """ 

1598 

1599 _can_consolidate = False 

1600 _verify_integrity = False 

1601 _validate_ndim = False 

1602 

1603 def __init__(self, values, placement, ndim=None): 

1604 """Initialize a non-consolidatable block. 

1605 

1606 'ndim' may be inferred from 'placement'. 

1607 

1608 This will call continue to call __init__ for the other base 

1609 classes mixed in with this Mixin. 

1610 """ 

1611 # Placement must be converted to BlockPlacement so that we can check 

1612 # its length 

1613 if not isinstance(placement, libinternals.BlockPlacement): 

1614 placement = libinternals.BlockPlacement(placement) 

1615 

1616 # Maybe infer ndim from placement 

1617 if ndim is None: 

1618 if len(placement) != 1: 

1619 ndim = 1 

1620 else: 

1621 ndim = 2 

1622 super().__init__(values, placement, ndim=ndim) 

1623 

1624 @property 

1625 def shape(self): 

1626 if self.ndim == 1: 

1627 return ((len(self.values)),) 

1628 return (len(self.mgr_locs), len(self.values)) 

1629 

1630 def iget(self, col): 

1631 

1632 if self.ndim == 2 and isinstance(col, tuple): 

1633 col, loc = col 

1634 if not com.is_null_slice(col) and col != 0: 

1635 raise IndexError(f"{self} only contains one item") 

1636 elif isinstance(col, slice): 

1637 if col != slice(None): 

1638 raise NotImplementedError(col) 

1639 return self.values[[loc]] 

1640 return self.values[loc] 

1641 else: 

1642 if col != 0: 

1643 raise IndexError(f"{self} only contains one item") 

1644 return self.values 

1645 

1646 def should_store(self, value): 

1647 return isinstance(value, self._holder) 

1648 

1649 def set(self, locs, values, check=False): 

1650 assert locs.tolist() == [0] 

1651 self.values = values 

1652 

1653 def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False): 

1654 """ 

1655 putmask the data to the block; we must be a single block and not 

1656 generate other blocks 

1657 

1658 return the resulting block 

1659 

1660 Parameters 

1661 ---------- 

1662 mask : the condition to respect 

1663 new : a ndarray/object 

1664 align : boolean, perform alignment on other/cond, default is True 

1665 inplace : perform inplace modification, default is False 

1666 

1667 Returns 

1668 ------- 

1669 a new block, the result of the putmask 

1670 """ 

1671 inplace = validate_bool_kwarg(inplace, "inplace") 

1672 

1673 # use block's copy logic. 

1674 # .values may be an Index which does shallow copy by default 

1675 new_values = self.values if inplace else self.copy().values 

1676 

1677 if isinstance(new, np.ndarray) and len(new) == len(mask): 

1678 new = new[mask] 

1679 

1680 mask = _safe_reshape(mask, new_values.shape) 

1681 

1682 new_values[mask] = new 

1683 return [self.make_block(values=new_values)] 

1684 

1685 def _get_unstack_items(self, unstacker, new_columns): 

1686 """ 

1687 Get the placement, values, and mask for a Block unstack. 

1688 

1689 This is shared between ObjectBlock and ExtensionBlock. They 

1690 differ in that ObjectBlock passes the values, while ExtensionBlock 

1691 passes the dummy ndarray of positions to be used by a take 

1692 later. 

1693 

1694 Parameters 

1695 ---------- 

1696 unstacker : pandas.core.reshape.reshape._Unstacker 

1697 new_columns : Index 

1698 All columns of the unstacked BlockManager. 

1699 

1700 Returns 

1701 ------- 

1702 new_placement : ndarray[int] 

1703 The placement of the new columns in `new_columns`. 

1704 new_values : Union[ndarray, ExtensionArray] 

1705 The first return value from _Unstacker.get_new_values. 

1706 mask : ndarray[bool] 

1707 The second return value from _Unstacker.get_new_values. 

1708 """ 

1709 # shared with ExtensionBlock 

1710 new_items = unstacker.get_new_columns() 

1711 new_placement = new_columns.get_indexer(new_items) 

1712 new_values, mask = unstacker.get_new_values() 

1713 

1714 mask = mask.any(0) 

1715 return new_placement, new_values, mask 

1716 

1717 

1718class ExtensionBlock(NonConsolidatableMixIn, Block): 

1719 """Block for holding extension types. 

1720 

1721 Notes 

1722 ----- 

1723 This holds all 3rd-party extension array types. It's also the immediate 

1724 parent class for our internal extension types' blocks, CategoricalBlock. 

1725 

1726 ExtensionArrays are limited to 1-D. 

1727 """ 

1728 

1729 is_extension = True 

1730 

1731 def __init__(self, values, placement, ndim=None): 

1732 values = self._maybe_coerce_values(values) 

1733 super().__init__(values, placement, ndim) 

1734 

1735 def _maybe_coerce_values(self, values): 

1736 """ 

1737 Unbox to an extension array. 

1738 

1739 This will unbox an ExtensionArray stored in an Index or Series. 

1740 ExtensionArrays pass through. No dtype coercion is done. 

1741 

1742 Parameters 

1743 ---------- 

1744 values : Index, Series, ExtensionArray 

1745 

1746 Returns 

1747 ------- 

1748 ExtensionArray 

1749 """ 

1750 return extract_array(values) 

1751 

1752 @property 

1753 def _holder(self): 

1754 # For extension blocks, the holder is values-dependent. 

1755 return type(self.values) 

1756 

1757 @property 

1758 def fill_value(self): 

1759 # Used in reindex_indexer 

1760 return self.values.dtype.na_value 

1761 

1762 @property 

1763 def _can_hold_na(self): 

1764 # The default ExtensionArray._can_hold_na is True 

1765 return self._holder._can_hold_na 

1766 

1767 @property 

1768 def is_view(self): 

1769 """Extension arrays are never treated as views.""" 

1770 return False 

1771 

1772 @property 

1773 def is_numeric(self): 

1774 return self.values.dtype._is_numeric 

1775 

1776 def setitem(self, indexer, value): 

1777 """Set the value inplace, returning a same-typed block. 

1778 

1779 This differs from Block.setitem by not allowing setitem to change 

1780 the dtype of the Block. 

1781 

1782 Parameters 

1783 ---------- 

1784 indexer : tuple, list-like, array-like, slice 

1785 The subset of self.values to set 

1786 value : object 

1787 The value being set 

1788 

1789 Returns 

1790 ------- 

1791 Block 

1792 

1793 Notes 

1794 ----- 

1795 `indexer` is a direct slice/positional indexer. `value` must 

1796 be a compatible shape. 

1797 """ 

1798 if isinstance(indexer, tuple): 

1799 # we are always 1-D 

1800 indexer = indexer[0] 

1801 

1802 check_setitem_lengths(indexer, value, self.values) 

1803 self.values[indexer] = value 

1804 return self 

1805 

1806 def get_values(self, dtype=None): 

1807 # ExtensionArrays must be iterable, so this works. 

1808 values = np.asarray(self.values) 

1809 if values.ndim == self.ndim - 1: 

1810 values = values.reshape((1,) + values.shape) 

1811 return values 

1812 

1813 def array_values(self) -> ExtensionArray: 

1814 return self.values 

1815 

1816 def to_dense(self): 

1817 return np.asarray(self.values) 

1818 

1819 def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs): 

1820 """override to use ExtensionArray astype for the conversion""" 

1821 values = self.values 

1822 if slicer is not None: 

1823 values = values[slicer] 

1824 mask = isna(values) 

1825 

1826 values = np.asarray(values.astype(object)) 

1827 values[mask] = na_rep 

1828 

1829 # we are expected to return a 2-d ndarray 

1830 return values.reshape(1, len(values)) 

1831 

1832 def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None): 

1833 """ 

1834 Take values according to indexer and return them as a block. 

1835 """ 

1836 if fill_tuple is None: 

1837 fill_value = None 

1838 else: 

1839 fill_value = fill_tuple[0] 

1840 

1841 # axis doesn't matter; we are really a single-dim object 

1842 # but are passed the axis depending on the calling routing 

1843 # if its REALLY axis 0, then this will be a reindex and not a take 

1844 new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True) 

1845 

1846 # Called from three places in managers, all of which satisfy 

1847 # this assertion 

1848 assert not (self.ndim == 1 and new_mgr_locs is None) 

1849 if new_mgr_locs is None: 

1850 new_mgr_locs = self.mgr_locs 

1851 

1852 return self.make_block_same_class(new_values, new_mgr_locs) 

1853 

1854 def _can_hold_element(self, element: Any) -> bool: 

1855 # XXX: We may need to think about pushing this onto the array. 

1856 # We're doing the same as CategoricalBlock here. 

1857 return True 

1858 

1859 def _slice(self, slicer): 

1860 """ return a slice of my values """ 

1861 

1862 # slice the category 

1863 # return same dims as we currently have 

1864 

1865 if isinstance(slicer, tuple) and len(slicer) == 2: 

1866 if not com.is_null_slice(slicer[0]): 

1867 raise AssertionError("invalid slicing for a 1-ndim categorical") 

1868 slicer = slicer[1] 

1869 

1870 return self.values[slicer] 

1871 

1872 def concat_same_type(self, to_concat, placement=None): 

1873 """ 

1874 Concatenate list of single blocks of the same type. 

1875 """ 

1876 values = self._holder._concat_same_type([blk.values for blk in to_concat]) 

1877 placement = placement or slice(0, len(values), 1) 

1878 return self.make_block_same_class(values, ndim=self.ndim, placement=placement) 

1879 

1880 def fillna(self, value, limit=None, inplace=False, downcast=None): 

1881 values = self.values if inplace else self.values.copy() 

1882 values = values.fillna(value=value, limit=limit) 

1883 return [ 

1884 self.make_block_same_class( 

1885 values=values, placement=self.mgr_locs, ndim=self.ndim 

1886 ) 

1887 ] 

1888 

1889 def interpolate( 

1890 self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs 

1891 ): 

1892 

1893 values = self.values if inplace else self.values.copy() 

1894 return self.make_block_same_class( 

1895 values=values.fillna(value=fill_value, method=method, limit=limit), 

1896 placement=self.mgr_locs, 

1897 ) 

1898 

1899 def diff(self, n: int, axis: int = 1) -> List["Block"]: 

1900 if axis == 1: 

1901 # we are by definition 1D. 

1902 axis = 0 

1903 return super().diff(n, axis) 

1904 

1905 def shift( 

1906 self, periods: int, axis: int = 0, fill_value: Any = None, 

1907 ) -> List["ExtensionBlock"]: 

1908 """ 

1909 Shift the block by `periods`. 

1910 

1911 Dispatches to underlying ExtensionArray and re-boxes in an 

1912 ExtensionBlock. 

1913 """ 

1914 return [ 

1915 self.make_block_same_class( 

1916 self.values.shift(periods=periods, fill_value=fill_value), 

1917 placement=self.mgr_locs, 

1918 ndim=self.ndim, 

1919 ) 

1920 ] 

1921 

1922 def where( 

1923 self, 

1924 other, 

1925 cond, 

1926 align=True, 

1927 errors="raise", 

1928 try_cast: bool = False, 

1929 axis: int = 0, 

1930 ) -> List["Block"]: 

1931 if isinstance(other, ABCDataFrame): 

1932 # ExtensionArrays are 1-D, so if we get here then 

1933 # `other` should be a DataFrame with a single column. 

1934 assert other.shape[1] == 1 

1935 other = other.iloc[:, 0] 

1936 

1937 other = extract_array(other, extract_numpy=True) 

1938 

1939 if isinstance(cond, ABCDataFrame): 

1940 assert cond.shape[1] == 1 

1941 cond = cond.iloc[:, 0] 

1942 

1943 cond = extract_array(cond, extract_numpy=True) 

1944 

1945 if lib.is_scalar(other) and isna(other): 

1946 # The default `other` for Series / Frame is np.nan 

1947 # we want to replace that with the correct NA value 

1948 # for the type 

1949 other = self.dtype.na_value 

1950 

1951 if is_sparse(self.values): 

1952 # TODO(SparseArray.__setitem__): remove this if condition 

1953 # We need to re-infer the type of the data after doing the 

1954 # where, for cases where the subtypes don't match 

1955 dtype = None 

1956 else: 

1957 dtype = self.dtype 

1958 

1959 result = self.values.copy() 

1960 icond = ~cond 

1961 if lib.is_scalar(other): 

1962 set_other = other 

1963 else: 

1964 set_other = other[icond] 

1965 try: 

1966 result[icond] = set_other 

1967 except (NotImplementedError, TypeError): 

1968 # NotImplementedError for class not implementing `__setitem__` 

1969 # TypeError for SparseArray, which implements just to raise 

1970 # a TypeError 

1971 result = self._holder._from_sequence( 

1972 np.where(cond, self.values, other), dtype=dtype 

1973 ) 

1974 

1975 return [self.make_block_same_class(result, placement=self.mgr_locs)] 

1976 

1977 @property 

1978 def _ftype(self): 

1979 return getattr(self.values, "_pandas_ftype", Block._ftype) 

1980 

1981 def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): 

1982 # ExtensionArray-safe unstack. 

1983 # We override ObjectBlock._unstack, which unstacks directly on the 

1984 # values of the array. For EA-backed blocks, this would require 

1985 # converting to a 2-D ndarray of objects. 

1986 # Instead, we unstack an ndarray of integer positions, followed by 

1987 # a `take` on the actual values. 

1988 dummy_arr = np.arange(n_rows) 

1989 dummy_unstacker = functools.partial(unstacker_func, fill_value=-1) 

1990 unstacker = dummy_unstacker(dummy_arr) 

1991 

1992 new_placement, new_values, mask = self._get_unstack_items( 

1993 unstacker, new_columns 

1994 ) 

1995 

1996 blocks = [ 

1997 self.make_block_same_class( 

1998 self.values.take(indices, allow_fill=True, fill_value=fill_value), 

1999 [place], 

2000 ) 

2001 for indices, place in zip(new_values.T, new_placement) 

2002 ] 

2003 return blocks, mask 

2004 

2005 

2006class ObjectValuesExtensionBlock(ExtensionBlock): 

2007 """ 

2008 Block providing backwards-compatibility for `.values`. 

2009 

2010 Used by PeriodArray and IntervalArray to ensure that 

2011 Series[T].values is an ndarray of objects. 

2012 """ 

2013 

2014 def external_values(self, dtype=None): 

2015 return self.values.astype(object) 

2016 

2017 

2018class NumericBlock(Block): 

2019 __slots__ = () 

2020 is_numeric = True 

2021 _can_hold_na = True 

2022 

2023 

2024class FloatOrComplexBlock(NumericBlock): 

2025 __slots__ = () 

2026 

2027 def equals(self, other) -> bool: 

2028 if self.dtype != other.dtype or self.shape != other.shape: 

2029 return False 

2030 left, right = self.values, other.values 

2031 return ((left == right) | (np.isnan(left) & np.isnan(right))).all() 

2032 

2033 

2034class FloatBlock(FloatOrComplexBlock): 

2035 __slots__ = () 

2036 is_float = True 

2037 

2038 def _can_hold_element(self, element: Any) -> bool: 

2039 tipo = maybe_infer_dtype_type(element) 

2040 if tipo is not None: 

2041 return issubclass(tipo.type, (np.floating, np.integer)) and not issubclass( 

2042 tipo.type, (np.datetime64, np.timedelta64) 

2043 ) 

2044 return isinstance( 

2045 element, (float, int, np.floating, np.int_) 

2046 ) and not isinstance( 

2047 element, 

2048 (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64), 

2049 ) 

2050 

2051 def to_native_types( 

2052 self, 

2053 slicer=None, 

2054 na_rep="", 

2055 float_format=None, 

2056 decimal=".", 

2057 quoting=None, 

2058 **kwargs, 

2059 ): 

2060 """ convert to our native types format, slicing if desired """ 

2061 

2062 values = self.values 

2063 if slicer is not None: 

2064 values = values[:, slicer] 

2065 

2066 # see gh-13418: no special formatting is desired at the 

2067 # output (important for appropriate 'quoting' behaviour), 

2068 # so do not pass it through the FloatArrayFormatter 

2069 if float_format is None and decimal == ".": 

2070 mask = isna(values) 

2071 

2072 if not quoting: 

2073 values = values.astype(str) 

2074 else: 

2075 values = np.array(values, dtype="object") 

2076 

2077 values[mask] = na_rep 

2078 return values 

2079 

2080 from pandas.io.formats.format import FloatArrayFormatter 

2081 

2082 formatter = FloatArrayFormatter( 

2083 values, 

2084 na_rep=na_rep, 

2085 float_format=float_format, 

2086 decimal=decimal, 

2087 quoting=quoting, 

2088 fixed_width=False, 

2089 ) 

2090 return formatter.get_result_as_array() 

2091 

2092 def should_store(self, value): 

2093 # when inserting a column should not coerce integers to floats 

2094 # unnecessarily 

2095 return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype 

2096 

2097 

2098class ComplexBlock(FloatOrComplexBlock): 

2099 __slots__ = () 

2100 is_complex = True 

2101 

2102 def _can_hold_element(self, element: Any) -> bool: 

2103 tipo = maybe_infer_dtype_type(element) 

2104 if tipo is not None: 

2105 return issubclass(tipo.type, (np.floating, np.integer, np.complexfloating)) 

2106 return isinstance( 

2107 element, (float, int, complex, np.float_, np.int_) 

2108 ) and not isinstance(element, (bool, np.bool_)) 

2109 

2110 def should_store(self, value): 

2111 return issubclass(value.dtype.type, np.complexfloating) 

2112 

2113 

2114class IntBlock(NumericBlock): 

2115 __slots__ = () 

2116 is_integer = True 

2117 _can_hold_na = False 

2118 

2119 def _can_hold_element(self, element: Any) -> bool: 

2120 tipo = maybe_infer_dtype_type(element) 

2121 if tipo is not None: 

2122 return ( 

2123 issubclass(tipo.type, np.integer) 

2124 and not issubclass(tipo.type, (np.datetime64, np.timedelta64)) 

2125 and self.dtype.itemsize >= tipo.itemsize 

2126 ) 

2127 return is_integer(element) 

2128 

2129 def should_store(self, value): 

2130 return is_integer_dtype(value) and value.dtype == self.dtype 

2131 

2132 

2133class DatetimeLikeBlockMixin: 

2134 """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" 

2135 

2136 @property 

2137 def _holder(self): 

2138 return DatetimeArray 

2139 

2140 @property 

2141 def fill_value(self): 

2142 return np.datetime64("NaT", "ns") 

2143 

2144 def get_values(self, dtype=None): 

2145 """ 

2146 return object dtype as boxed values, such as Timestamps/Timedelta 

2147 """ 

2148 if is_object_dtype(dtype): 

2149 values = self.values.ravel() 

2150 result = self._holder(values).astype(object) 

2151 return result.reshape(self.values.shape) 

2152 return self.values 

2153 

2154 def iget(self, key): 

2155 # GH#31649 we need to wrap scalars in Timestamp/Timedelta 

2156 # TODO(EA2D): this can be removed if we ever have 2D EA 

2157 result = super().iget(key) 

2158 if isinstance(result, np.datetime64): 

2159 result = Timestamp(result) 

2160 elif isinstance(result, np.timedelta64): 

2161 result = Timedelta(result) 

2162 return result 

2163 

2164 def shift(self, periods, axis=0, fill_value=None): 

2165 # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs 

2166 values = self.array_values() 

2167 new_values = values.shift(periods, fill_value=fill_value, axis=axis) 

2168 return self.make_block_same_class(new_values) 

2169 

2170 

2171class DatetimeBlock(DatetimeLikeBlockMixin, Block): 

2172 __slots__ = () 

2173 is_datetime = True 

2174 

2175 def __init__(self, values, placement, ndim=None): 

2176 values = self._maybe_coerce_values(values) 

2177 super().__init__(values, placement=placement, ndim=ndim) 

2178 

2179 @property 

2180 def _can_hold_na(self): 

2181 return True 

2182 

2183 def _maybe_coerce_values(self, values): 

2184 """ 

2185 Input validation for values passed to __init__. Ensure that 

2186 we have datetime64ns, coercing if necessary. 

2187 

2188 Parameters 

2189 ---------- 

2190 values : array-like 

2191 Must be convertible to datetime64 

2192 

2193 Returns 

2194 ------- 

2195 values : ndarray[datetime64ns] 

2196 

2197 Overridden by DatetimeTZBlock. 

2198 """ 

2199 if values.dtype != _NS_DTYPE: 

2200 values = conversion.ensure_datetime64ns(values) 

2201 

2202 if isinstance(values, DatetimeArray): 

2203 values = values._data 

2204 

2205 assert isinstance(values, np.ndarray), type(values) 

2206 return values 

2207 

2208 def astype(self, dtype, copy: bool = False, errors: str = "raise"): 

2209 """ 

2210 these automatically copy, so copy=True has no effect 

2211 raise on an except if raise == True 

2212 """ 

2213 dtype = pandas_dtype(dtype) 

2214 

2215 # if we are passed a datetime64[ns, tz] 

2216 if is_datetime64tz_dtype(dtype): 

2217 values = self.values 

2218 if copy: 

2219 # this should be the only copy 

2220 values = values.copy() 

2221 if getattr(values, "tz", None) is None: 

2222 values = DatetimeArray(values).tz_localize("UTC") 

2223 values = values.tz_convert(dtype.tz) 

2224 return self.make_block(values) 

2225 

2226 # delegate 

2227 return super().astype(dtype=dtype, copy=copy, errors=errors) 

2228 

2229 def _can_hold_element(self, element: Any) -> bool: 

2230 tipo = maybe_infer_dtype_type(element) 

2231 if tipo is not None: 

2232 if self.is_datetimetz: 

2233 # require exact match, since non-nano does not exist 

2234 return is_dtype_equal(tipo, self.dtype) or is_valid_nat_for_dtype( 

2235 element, self.dtype 

2236 ) 

2237 

2238 # GH#27419 if we get a non-nano datetime64 object 

2239 return is_datetime64_dtype(tipo) 

2240 elif element is NaT: 

2241 return True 

2242 elif isinstance(element, datetime): 

2243 if self.is_datetimetz: 

2244 return tz_compare(element.tzinfo, self.dtype.tz) 

2245 return element.tzinfo is None 

2246 

2247 return is_valid_nat_for_dtype(element, self.dtype) 

2248 

2249 def to_native_types( 

2250 self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs 

2251 ): 

2252 """ convert to our native types format, slicing if desired """ 

2253 

2254 values = self.values 

2255 i8values = self.values.view("i8") 

2256 

2257 if slicer is not None: 

2258 values = values[..., slicer] 

2259 i8values = i8values[..., slicer] 

2260 

2261 from pandas.io.formats.format import _get_format_datetime64_from_values 

2262 

2263 fmt = _get_format_datetime64_from_values(values, date_format) 

2264 

2265 result = tslib.format_array_from_datetime( 

2266 i8values.ravel(), 

2267 tz=getattr(self.values, "tz", None), 

2268 format=fmt, 

2269 na_rep=na_rep, 

2270 ).reshape(i8values.shape) 

2271 return np.atleast_2d(result) 

2272 

2273 def should_store(self, value): 

2274 return ( 

2275 issubclass(value.dtype.type, np.datetime64) 

2276 and not is_datetime64tz_dtype(value) 

2277 and not is_extension_array_dtype(value) 

2278 ) 

2279 

2280 def set(self, locs, values): 

2281 """ 

2282 Modify Block in-place with new item value 

2283 

2284 Returns 

2285 ------- 

2286 None 

2287 """ 

2288 values = conversion.ensure_datetime64ns(values, copy=False) 

2289 

2290 self.values[locs] = values 

2291 

2292 def external_values(self): 

2293 return np.asarray(self.values.astype("datetime64[ns]", copy=False)) 

2294 

2295 def array_values(self) -> ExtensionArray: 

2296 return DatetimeArray._simple_new(self.values) 

2297 

2298 

2299class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): 

2300 """ implement a datetime64 block with a tz attribute """ 

2301 

2302 __slots__ = () 

2303 is_datetimetz = True 

2304 is_extension = True 

2305 

2306 _can_hold_element = DatetimeBlock._can_hold_element 

2307 to_native_types = DatetimeBlock.to_native_types 

2308 fill_value = np.datetime64("NaT", "ns") 

2309 

2310 @property 

2311 def _holder(self): 

2312 return DatetimeArray 

2313 

2314 def _maybe_coerce_values(self, values): 

2315 """Input validation for values passed to __init__. Ensure that 

2316 we have datetime64TZ, coercing if necessary. 

2317 

2318 Parameters 

2319 ---------- 

2320 values : array-like 

2321 Must be convertible to datetime64 

2322 

2323 Returns 

2324 ------- 

2325 values : DatetimeArray 

2326 """ 

2327 if not isinstance(values, self._holder): 

2328 values = self._holder(values) 

2329 

2330 if values.tz is None: 

2331 raise ValueError("cannot create a DatetimeTZBlock without a tz") 

2332 

2333 return values 

2334 

2335 @property 

2336 def is_view(self): 

2337 """ return a boolean if I am possibly a view """ 

2338 # check the ndarray values of the DatetimeIndex values 

2339 return self.values._data.base is not None 

2340 

2341 def get_values(self, dtype=None): 

2342 """ 

2343 Returns an ndarray of values. 

2344 

2345 Parameters 

2346 ---------- 

2347 dtype : np.dtype 

2348 Only `object`-like dtypes are respected here (not sure 

2349 why). 

2350 

2351 Returns 

2352 ------- 

2353 values : ndarray 

2354 When ``dtype=object``, then and object-dtype ndarray of 

2355 boxed values is returned. Otherwise, an M8[ns] ndarray 

2356 is returned. 

2357 

2358 DatetimeArray is always 1-d. ``get_values`` will reshape 

2359 the return value to be the same dimensionality as the 

2360 block. 

2361 """ 

2362 values = self.values 

2363 if is_object_dtype(dtype): 

2364 values = values.astype(object) 

2365 

2366 values = np.asarray(values) 

2367 

2368 if self.ndim == 2: 

2369 # Ensure that our shape is correct for DataFrame. 

2370 # ExtensionArrays are always 1-D, even in a DataFrame when 

2371 # the analogous NumPy-backed column would be a 2-D ndarray. 

2372 values = values.reshape(1, -1) 

2373 return values 

2374 

2375 def to_dense(self): 

2376 # we request M8[ns] dtype here, even though it discards tzinfo, 

2377 # as lots of code (e.g. anything using values_from_object) 

2378 # expects that behavior. 

2379 return np.asarray(self.values, dtype=_NS_DTYPE) 

2380 

2381 def _slice(self, slicer): 

2382 """ return a slice of my values """ 

2383 if isinstance(slicer, tuple): 

2384 col, loc = slicer 

2385 if not com.is_null_slice(col) and col != 0: 

2386 raise IndexError(f"{self} only contains one item") 

2387 return self.values[loc] 

2388 return self.values[slicer] 

2389 

2390 def diff(self, n: int, axis: int = 0) -> List["Block"]: 

2391 """ 

2392 1st discrete difference. 

2393 

2394 Parameters 

2395 ---------- 

2396 n : int 

2397 Number of periods to diff. 

2398 axis : int, default 0 

2399 Axis to diff upon. 

2400 

2401 Returns 

2402 ------- 

2403 A list with a new TimeDeltaBlock. 

2404 

2405 Notes 

2406 ----- 

2407 The arguments here are mimicking shift so they are called correctly 

2408 by apply. 

2409 """ 

2410 if axis == 0: 

2411 # Cannot currently calculate diff across multiple blocks since this 

2412 # function is invoked via apply 

2413 raise NotImplementedError 

2414 new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8 

2415 

2416 # Reshape the new_values like how algos.diff does for timedelta data 

2417 new_values = new_values.reshape(1, len(new_values)) 

2418 new_values = new_values.astype("timedelta64[ns]") 

2419 return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)] 

2420 

2421 def concat_same_type(self, to_concat, placement=None): 

2422 # need to handle concat([tz1, tz2]) here, since DatetimeArray 

2423 # only handles cases where all the tzs are the same. 

2424 # Instead of placing the condition here, it could also go into the 

2425 # is_uniform_join_units check, but I'm not sure what is better. 

2426 if len({x.dtype for x in to_concat}) > 1: 

2427 values = concat_datetime([x.values for x in to_concat]) 

2428 placement = placement or slice(0, len(values), 1) 

2429 

2430 if self.ndim > 1: 

2431 values = np.atleast_2d(values) 

2432 return ObjectBlock(values, ndim=self.ndim, placement=placement) 

2433 return super().concat_same_type(to_concat, placement) 

2434 

2435 def fillna(self, value, limit=None, inplace=False, downcast=None): 

2436 # We support filling a DatetimeTZ with a `value` whose timezone 

2437 # is different by coercing to object. 

2438 if self._can_hold_element(value): 

2439 return super().fillna(value, limit, inplace, downcast) 

2440 

2441 # different timezones, or a non-tz 

2442 return self.astype(object).fillna( 

2443 value, limit=limit, inplace=inplace, downcast=downcast 

2444 ) 

2445 

2446 def setitem(self, indexer, value): 

2447 # https://github.com/pandas-dev/pandas/issues/24020 

2448 # Need a dedicated setitem until #24020 (type promotion in setitem 

2449 # for extension arrays) is designed and implemented. 

2450 if self._can_hold_element(value) or ( 

2451 isinstance(indexer, np.ndarray) and indexer.size == 0 

2452 ): 

2453 return super().setitem(indexer, value) 

2454 

2455 obj_vals = self.values.astype(object) 

2456 newb = make_block( 

2457 obj_vals, placement=self.mgr_locs, klass=ObjectBlock, ndim=self.ndim 

2458 ) 

2459 return newb.setitem(indexer, value) 

2460 

2461 def equals(self, other) -> bool: 

2462 # override for significant performance improvement 

2463 if self.dtype != other.dtype or self.shape != other.shape: 

2464 return False 

2465 return (self.values.view("i8") == other.values.view("i8")).all() 

2466 

2467 def quantile(self, qs, interpolation="linear", axis=0): 

2468 naive = self.values.view("M8[ns]") 

2469 

2470 # kludge for 2D block with 1D values 

2471 naive = naive.reshape(self.shape) 

2472 

2473 blk = self.make_block(naive) 

2474 res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis) 

2475 

2476 # ravel is kludge for 2D block with 1D values, assumes column-like 

2477 aware = self._holder(res_blk.values.ravel(), dtype=self.dtype) 

2478 return self.make_block_same_class(aware, ndim=res_blk.ndim) 

2479 

2480 

2481class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): 

2482 __slots__ = () 

2483 is_timedelta = True 

2484 _can_hold_na = True 

2485 is_numeric = False 

2486 fill_value = np.timedelta64("NaT", "ns") 

2487 

2488 def __init__(self, values, placement, ndim=None): 

2489 if values.dtype != _TD_DTYPE: 

2490 values = conversion.ensure_timedelta64ns(values) 

2491 if isinstance(values, TimedeltaArray): 

2492 values = values._data 

2493 assert isinstance(values, np.ndarray), type(values) 

2494 super().__init__(values, placement=placement, ndim=ndim) 

2495 

2496 @property 

2497 def _holder(self): 

2498 return TimedeltaArray 

2499 

2500 def _can_hold_element(self, element: Any) -> bool: 

2501 tipo = maybe_infer_dtype_type(element) 

2502 if tipo is not None: 

2503 return issubclass(tipo.type, np.timedelta64) 

2504 elif element is NaT: 

2505 return True 

2506 elif isinstance(element, (timedelta, np.timedelta64)): 

2507 return True 

2508 return is_valid_nat_for_dtype(element, self.dtype) 

2509 

2510 def fillna(self, value, **kwargs): 

2511 

2512 # allow filling with integers to be 

2513 # interpreted as nanoseconds 

2514 if is_integer(value): 

2515 # Deprecation GH#24694, GH#19233 

2516 raise TypeError( 

2517 "Passing integers to fillna for timedelta64[ns] dtype is no " 

2518 "longer supported. To obtain the old behavior, pass " 

2519 "`pd.Timedelta(seconds=n)` instead." 

2520 ) 

2521 return super().fillna(value, **kwargs) 

2522 

2523 def should_store(self, value): 

2524 return issubclass( 

2525 value.dtype.type, np.timedelta64 

2526 ) and not is_extension_array_dtype(value) 

2527 

2528 def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs): 

2529 """ convert to our native types format, slicing if desired """ 

2530 

2531 values = self.values 

2532 if slicer is not None: 

2533 values = values[:, slicer] 

2534 mask = isna(values) 

2535 

2536 rvalues = np.empty(values.shape, dtype=object) 

2537 if na_rep is None: 

2538 na_rep = "NaT" 

2539 rvalues[mask] = na_rep 

2540 imask = (~mask).ravel() 

2541 

2542 # FIXME: 

2543 # should use the formats.format.Timedelta64Formatter here 

2544 # to figure what format to pass to the Timedelta 

2545 # e.g. to not show the decimals say 

2546 rvalues.flat[imask] = np.array( 

2547 [Timedelta(val)._repr_base(format="all") for val in values.ravel()[imask]], 

2548 dtype=object, 

2549 ) 

2550 return rvalues 

2551 

2552 def external_values(self, dtype=None): 

2553 return np.asarray(self.values.astype("timedelta64[ns]", copy=False)) 

2554 

2555 def array_values(self) -> ExtensionArray: 

2556 return TimedeltaArray._simple_new(self.values) 

2557 

2558 

2559class BoolBlock(NumericBlock): 

2560 __slots__ = () 

2561 is_bool = True 

2562 _can_hold_na = False 

2563 

2564 def _can_hold_element(self, element: Any) -> bool: 

2565 tipo = maybe_infer_dtype_type(element) 

2566 if tipo is not None: 

2567 return issubclass(tipo.type, np.bool_) 

2568 return isinstance(element, (bool, np.bool_)) 

2569 

2570 def should_store(self, value): 

2571 return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype( 

2572 value 

2573 ) 

2574 

2575 def replace( 

2576 self, to_replace, value, inplace=False, filter=None, regex=False, convert=True 

2577 ): 

2578 inplace = validate_bool_kwarg(inplace, "inplace") 

2579 to_replace_values = np.atleast_1d(to_replace) 

2580 if not np.can_cast(to_replace_values, bool): 

2581 return self 

2582 return super().replace( 

2583 to_replace, 

2584 value, 

2585 inplace=inplace, 

2586 filter=filter, 

2587 regex=regex, 

2588 convert=convert, 

2589 ) 

2590 

2591 

2592class ObjectBlock(Block): 

2593 __slots__ = () 

2594 is_object = True 

2595 _can_hold_na = True 

2596 

2597 def __init__(self, values, placement=None, ndim=2): 

2598 if issubclass(values.dtype.type, str): 

2599 values = np.array(values, dtype=object) 

2600 

2601 super().__init__(values, ndim=ndim, placement=placement) 

2602 

2603 @property 

2604 def is_bool(self): 

2605 """ we can be a bool if we have only bool values but are of type 

2606 object 

2607 """ 

2608 return lib.is_bool_array(self.values.ravel()) 

2609 

2610 def convert( 

2611 self, 

2612 copy: bool = True, 

2613 datetime: bool = True, 

2614 numeric: bool = True, 

2615 timedelta: bool = True, 

2616 coerce: bool = False, 

2617 ): 

2618 """ attempt to coerce any object types to better types return a copy of 

2619 the block (if copy = True) by definition we ARE an ObjectBlock!!!!! 

2620 

2621 can return multiple blocks! 

2622 """ 

2623 

2624 # operate column-by-column 

2625 def f(mask, val, idx): 

2626 shape = val.shape 

2627 values = soft_convert_objects( 

2628 val.ravel(), 

2629 datetime=datetime, 

2630 numeric=numeric, 

2631 timedelta=timedelta, 

2632 coerce=coerce, 

2633 copy=copy, 

2634 ) 

2635 if isinstance(values, np.ndarray): 

2636 # TODO: allow EA once reshape is supported 

2637 values = values.reshape(shape) 

2638 

2639 values = _block_shape(values, ndim=self.ndim) 

2640 return values 

2641 

2642 if self.ndim == 2: 

2643 blocks = self.split_and_operate(None, f, False) 

2644 else: 

2645 values = f(None, self.values.ravel(), None) 

2646 blocks = [make_block(values, ndim=self.ndim, placement=self.mgr_locs)] 

2647 

2648 return blocks 

2649 

2650 def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]: 

2651 

2652 if downcast is not None: 

2653 return blocks 

2654 

2655 # split and convert the blocks 

2656 return _extend_blocks([b.convert(datetime=True, numeric=False) for b in blocks]) 

2657 

2658 def _can_hold_element(self, element: Any) -> bool: 

2659 return True 

2660 

2661 def should_store(self, value): 

2662 return not ( 

2663 issubclass( 

2664 value.dtype.type, 

2665 (np.integer, np.floating, np.complexfloating, np.datetime64, np.bool_), 

2666 ) 

2667 or is_extension_array_dtype(value) 

2668 ) 

2669 

2670 def replace( 

2671 self, to_replace, value, inplace=False, filter=None, regex=False, convert=True 

2672 ): 

2673 to_rep_is_list = is_list_like(to_replace) 

2674 value_is_list = is_list_like(value) 

2675 both_lists = to_rep_is_list and value_is_list 

2676 either_list = to_rep_is_list or value_is_list 

2677 

2678 result_blocks = [] 

2679 blocks = [self] 

2680 

2681 if not either_list and is_re(to_replace): 

2682 return self._replace_single( 

2683 to_replace, 

2684 value, 

2685 inplace=inplace, 

2686 filter=filter, 

2687 regex=True, 

2688 convert=convert, 

2689 ) 

2690 elif not (either_list or regex): 

2691 return super().replace( 

2692 to_replace, 

2693 value, 

2694 inplace=inplace, 

2695 filter=filter, 

2696 regex=regex, 

2697 convert=convert, 

2698 ) 

2699 elif both_lists: 

2700 for to_rep, v in zip(to_replace, value): 

2701 result_blocks = [] 

2702 for b in blocks: 

2703 result = b._replace_single( 

2704 to_rep, 

2705 v, 

2706 inplace=inplace, 

2707 filter=filter, 

2708 regex=regex, 

2709 convert=convert, 

2710 ) 

2711 result_blocks = _extend_blocks(result, result_blocks) 

2712 blocks = result_blocks 

2713 return result_blocks 

2714 

2715 elif to_rep_is_list and regex: 

2716 for to_rep in to_replace: 

2717 result_blocks = [] 

2718 for b in blocks: 

2719 result = b._replace_single( 

2720 to_rep, 

2721 value, 

2722 inplace=inplace, 

2723 filter=filter, 

2724 regex=regex, 

2725 convert=convert, 

2726 ) 

2727 result_blocks = _extend_blocks(result, result_blocks) 

2728 blocks = result_blocks 

2729 return result_blocks 

2730 

2731 return self._replace_single( 

2732 to_replace, 

2733 value, 

2734 inplace=inplace, 

2735 filter=filter, 

2736 convert=convert, 

2737 regex=regex, 

2738 ) 

2739 

2740 def _replace_single( 

2741 self, 

2742 to_replace, 

2743 value, 

2744 inplace=False, 

2745 filter=None, 

2746 regex=False, 

2747 convert=True, 

2748 mask=None, 

2749 ): 

2750 """ 

2751 Replace elements by the given value. 

2752 

2753 Parameters 

2754 ---------- 

2755 to_replace : object or pattern 

2756 Scalar to replace or regular expression to match. 

2757 value : object 

2758 Replacement object. 

2759 inplace : bool, default False 

2760 Perform inplace modification. 

2761 filter : list, optional 

2762 regex : bool, default False 

2763 If true, perform regular expression substitution. 

2764 convert : bool, default True 

2765 If true, try to coerce any object types to better types. 

2766 mask : array-like of bool, optional 

2767 True indicate corresponding element is ignored. 

2768 

2769 Returns 

2770 ------- 

2771 a new block, the result after replacing 

2772 """ 

2773 inplace = validate_bool_kwarg(inplace, "inplace") 

2774 

2775 # to_replace is regex compilable 

2776 to_rep_re = regex and is_re_compilable(to_replace) 

2777 

2778 # regex is regex compilable 

2779 regex_re = is_re_compilable(regex) 

2780 

2781 # only one will survive 

2782 if to_rep_re and regex_re: 

2783 raise AssertionError( 

2784 "only one of to_replace and regex can be regex compilable" 

2785 ) 

2786 

2787 # if regex was passed as something that can be a regex (rather than a 

2788 # boolean) 

2789 if regex_re: 

2790 to_replace = regex 

2791 

2792 regex = regex_re or to_rep_re 

2793 

2794 # try to get the pattern attribute (compiled re) or it's a string 

2795 if is_re(to_replace): 

2796 pattern = to_replace.pattern 

2797 else: 

2798 pattern = to_replace 

2799 

2800 # if the pattern is not empty and to_replace is either a string or a 

2801 # regex 

2802 if regex and pattern: 

2803 rx = re.compile(to_replace) 

2804 else: 

2805 # if the thing to replace is not a string or compiled regex call 

2806 # the superclass method -> to_replace is some kind of object 

2807 return super().replace( 

2808 to_replace, value, inplace=inplace, filter=filter, regex=regex 

2809 ) 

2810 

2811 new_values = self.values if inplace else self.values.copy() 

2812 

2813 # deal with replacing values with objects (strings) that match but 

2814 # whose replacement is not a string (numeric, nan, object) 

2815 if isna(value) or not isinstance(value, str): 

2816 

2817 def re_replacer(s): 

2818 if is_re(rx) and isinstance(s, str): 

2819 return value if rx.search(s) is not None else s 

2820 else: 

2821 return s 

2822 

2823 else: 

2824 # value is guaranteed to be a string here, s can be either a string 

2825 # or null if it's null it gets returned 

2826 def re_replacer(s): 

2827 if is_re(rx) and isinstance(s, str): 

2828 return rx.sub(value, s) 

2829 else: 

2830 return s 

2831 

2832 f = np.vectorize(re_replacer, otypes=[self.dtype]) 

2833 

2834 if filter is None: 

2835 filt = slice(None) 

2836 else: 

2837 filt = self.mgr_locs.isin(filter).nonzero()[0] 

2838 

2839 if mask is None: 

2840 new_values[filt] = f(new_values[filt]) 

2841 else: 

2842 new_values[filt][mask] = f(new_values[filt][mask]) 

2843 

2844 # convert 

2845 block = self.make_block(new_values) 

2846 if convert: 

2847 block = block.convert(numeric=False) 

2848 return block 

2849 

2850 def _replace_coerce( 

2851 self, to_replace, value, inplace=True, regex=False, convert=False, mask=None 

2852 ): 

2853 """ 

2854 Replace value corresponding to the given boolean array with another 

2855 value. 

2856 

2857 Parameters 

2858 ---------- 

2859 to_replace : object or pattern 

2860 Scalar to replace or regular expression to match. 

2861 value : object 

2862 Replacement object. 

2863 inplace : bool, default False 

2864 Perform inplace modification. 

2865 regex : bool, default False 

2866 If true, perform regular expression substitution. 

2867 convert : bool, default True 

2868 If true, try to coerce any object types to better types. 

2869 mask : array-like of bool, optional 

2870 True indicate corresponding element is ignored. 

2871 

2872 Returns 

2873 ------- 

2874 A new block if there is anything to replace or the original block. 

2875 """ 

2876 if mask.any(): 

2877 block = super()._replace_coerce( 

2878 to_replace=to_replace, 

2879 value=value, 

2880 inplace=inplace, 

2881 regex=regex, 

2882 convert=convert, 

2883 mask=mask, 

2884 ) 

2885 if convert: 

2886 block = [b.convert(numeric=False, copy=True) for b in block] 

2887 return block 

2888 if convert: 

2889 return [self.convert(numeric=False, copy=True)] 

2890 return self 

2891 

2892 

2893class CategoricalBlock(ExtensionBlock): 

2894 __slots__ = () 

2895 is_categorical = True 

2896 _verify_integrity = True 

2897 _can_hold_na = True 

2898 _concatenator = staticmethod(concat_categorical) 

2899 

2900 def __init__(self, values, placement, ndim=None): 

2901 # coerce to categorical if we can 

2902 values = extract_array(values) 

2903 assert isinstance(values, Categorical), type(values) 

2904 super().__init__(values, placement=placement, ndim=ndim) 

2905 

2906 @property 

2907 def _holder(self): 

2908 return Categorical 

2909 

2910 @property 

2911 def array_dtype(self): 

2912 """ the dtype to return if I want to construct this block as an 

2913 array 

2914 """ 

2915 return np.object_ 

2916 

2917 def to_dense(self): 

2918 # Categorical.get_values returns a DatetimeIndex for datetime 

2919 # categories, so we can't simply use `np.asarray(self.values)` like 

2920 # other types. 

2921 return self.values._internal_get_values() 

2922 

2923 def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs): 

2924 """ convert to our native types format, slicing if desired """ 

2925 

2926 values = self.values 

2927 if slicer is not None: 

2928 # Categorical is always one dimension 

2929 values = values[slicer] 

2930 mask = isna(values) 

2931 values = np.array(values, dtype="object") 

2932 values[mask] = na_rep 

2933 

2934 # we are expected to return a 2-d ndarray 

2935 return values.reshape(1, len(values)) 

2936 

2937 def concat_same_type(self, to_concat, placement=None): 

2938 """ 

2939 Concatenate list of single blocks of the same type. 

2940 

2941 Note that this CategoricalBlock._concat_same_type *may* not 

2942 return a CategoricalBlock. When the categories in `to_concat` 

2943 differ, this will return an object ndarray. 

2944 

2945 If / when we decide we don't like that behavior: 

2946 

2947 1. Change Categorical._concat_same_type to use union_categoricals 

2948 2. Delete this method. 

2949 """ 

2950 values = self._concatenator( 

2951 [blk.values for blk in to_concat], axis=self.ndim - 1 

2952 ) 

2953 # not using self.make_block_same_class as values can be object dtype 

2954 return make_block( 

2955 values, placement=placement or slice(0, len(values), 1), ndim=self.ndim 

2956 ) 

2957 

2958 def replace( 

2959 self, 

2960 to_replace, 

2961 value, 

2962 inplace: bool = False, 

2963 filter=None, 

2964 regex: bool = False, 

2965 convert: bool = True, 

2966 ): 

2967 inplace = validate_bool_kwarg(inplace, "inplace") 

2968 result = self if inplace else self.copy() 

2969 if filter is None: # replace was called on a series 

2970 result.values.replace(to_replace, value, inplace=True) 

2971 if convert: 

2972 return result.convert(numeric=False, copy=not inplace) 

2973 else: 

2974 return result 

2975 else: # replace was called on a DataFrame 

2976 if not isna(value): 

2977 result.values.add_categories(value, inplace=True) 

2978 return super(CategoricalBlock, result).replace( 

2979 to_replace, value, inplace, filter, regex, convert 

2980 ) 

2981 

2982 

2983# ----------------------------------------------------------------- 

2984# Constructor Helpers 

2985 

2986 

2987def get_block_type(values, dtype=None): 

2988 """ 

2989 Find the appropriate Block subclass to use for the given values and dtype. 

2990 

2991 Parameters 

2992 ---------- 

2993 values : ndarray-like 

2994 dtype : numpy or pandas dtype 

2995 

2996 Returns 

2997 ------- 

2998 cls : class, subclass of Block 

2999 """ 

3000 dtype = dtype or values.dtype 

3001 vtype = dtype.type 

3002 

3003 if is_sparse(dtype): 

3004 # Need this first(ish) so that Sparse[datetime] is sparse 

3005 cls = ExtensionBlock 

3006 elif is_categorical(values): 

3007 cls = CategoricalBlock 

3008 elif issubclass(vtype, np.datetime64): 

3009 assert not is_datetime64tz_dtype(values) 

3010 cls = DatetimeBlock 

3011 elif is_datetime64tz_dtype(values): 

3012 cls = DatetimeTZBlock 

3013 elif is_interval_dtype(dtype) or is_period_dtype(dtype): 

3014 cls = ObjectValuesExtensionBlock 

3015 elif is_extension_array_dtype(values): 

3016 cls = ExtensionBlock 

3017 elif issubclass(vtype, np.floating): 

3018 cls = FloatBlock 

3019 elif issubclass(vtype, np.timedelta64): 

3020 assert issubclass(vtype, np.integer) 

3021 cls = TimeDeltaBlock 

3022 elif issubclass(vtype, np.complexfloating): 

3023 cls = ComplexBlock 

3024 elif issubclass(vtype, np.integer): 

3025 cls = IntBlock 

3026 elif dtype == np.bool_: 

3027 cls = BoolBlock 

3028 else: 

3029 cls = ObjectBlock 

3030 return cls 

3031 

3032 

3033def make_block(values, placement, klass=None, ndim=None, dtype=None): 

3034 # Ensure that we don't allow PandasArray / PandasDtype in internals. 

3035 # For now, blocks should be backed by ndarrays when possible. 

3036 if isinstance(values, ABCPandasArray): 

3037 values = values.to_numpy() 

3038 if ndim and ndim > 1: 

3039 values = np.atleast_2d(values) 

3040 

3041 if isinstance(dtype, PandasDtype): 

3042 dtype = dtype.numpy_dtype 

3043 

3044 if klass is None: 

3045 dtype = dtype or values.dtype 

3046 klass = get_block_type(values, dtype) 

3047 

3048 elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values): 

3049 # TODO: This is no longer hit internally; does it need to be retained 

3050 # for e.g. pyarrow? 

3051 values = DatetimeArray._simple_new(values, dtype=dtype) 

3052 

3053 return klass(values, ndim=ndim, placement=placement) 

3054 

3055 

3056# ----------------------------------------------------------------- 

3057 

3058 

3059def _extend_blocks(result, blocks=None): 

3060 """ return a new extended blocks, given the result """ 

3061 from pandas.core.internals import BlockManager 

3062 

3063 if blocks is None: 

3064 blocks = [] 

3065 if isinstance(result, list): 

3066 for r in result: 

3067 if isinstance(r, list): 

3068 blocks.extend(r) 

3069 else: 

3070 blocks.append(r) 

3071 elif isinstance(result, BlockManager): 

3072 blocks.extend(result.blocks) 

3073 else: 

3074 blocks.append(result) 

3075 return blocks 

3076 

3077 

3078def _block_shape(values, ndim=1, shape=None): 

3079 """ guarantee the shape of the values to be at least 1 d """ 

3080 if values.ndim < ndim: 

3081 if shape is None: 

3082 shape = values.shape 

3083 if not is_extension_array_dtype(values): 

3084 # TODO: https://github.com/pandas-dev/pandas/issues/23023 

3085 # block.shape is incorrect for "2D" ExtensionArrays 

3086 # We can't, and don't need to, reshape. 

3087 values = values.reshape(tuple((1,) + shape)) 

3088 return values 

3089 

3090 

3091def _merge_blocks(blocks, dtype=None, _can_consolidate=True): 

3092 

3093 if len(blocks) == 1: 

3094 return blocks[0] 

3095 

3096 if _can_consolidate: 

3097 

3098 if dtype is None: 

3099 if len({b.dtype for b in blocks}) != 1: 

3100 raise AssertionError("_merge_blocks are invalid!") 

3101 

3102 # FIXME: optimization potential in case all mgrs contain slices and 

3103 # combination of those slices is a slice, too. 

3104 new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) 

3105 new_values = np.vstack([b.values for b in blocks]) 

3106 

3107 argsort = np.argsort(new_mgr_locs) 

3108 new_values = new_values[argsort] 

3109 new_mgr_locs = new_mgr_locs[argsort] 

3110 

3111 return make_block(new_values, placement=new_mgr_locs) 

3112 

3113 # no merge 

3114 return blocks 

3115 

3116 

3117def _safe_reshape(arr, new_shape): 

3118 """ 

3119 If possible, reshape `arr` to have shape `new_shape`, 

3120 with a couple of exceptions (see gh-13012): 

3121 

3122 1) If `arr` is a ExtensionArray or Index, `arr` will be 

3123 returned as is. 

3124 2) If `arr` is a Series, the `_values` attribute will 

3125 be reshaped and returned. 

3126 

3127 Parameters 

3128 ---------- 

3129 arr : array-like, object to be reshaped 

3130 new_shape : int or tuple of ints, the new shape 

3131 """ 

3132 if isinstance(arr, ABCSeries): 

3133 arr = arr._values 

3134 if not isinstance(arr, ABCExtensionArray): 

3135 arr = arr.reshape(new_shape) 

3136 return arr 

3137 

3138 

3139def _putmask_smart(v, mask, n): 

3140 """ 

3141 Return a new ndarray, try to preserve dtype if possible. 

3142 

3143 Parameters 

3144 ---------- 

3145 v : `values`, updated in-place (array like) 

3146 mask : np.ndarray 

3147 Applies to both sides (array like). 

3148 n : `new values` either scalar or an array like aligned with `values` 

3149 

3150 Returns 

3151 ------- 

3152 values : ndarray with updated values 

3153 this *may* be a copy of the original 

3154 

3155 See Also 

3156 -------- 

3157 ndarray.putmask 

3158 """ 

3159 

3160 # we cannot use np.asarray() here as we cannot have conversions 

3161 # that numpy does when numeric are mixed with strings 

3162 

3163 # n should be the length of the mask or a scalar here 

3164 if not is_list_like(n): 

3165 n = np.repeat(n, len(mask)) 

3166 

3167 # see if we are only masking values that if putted 

3168 # will work in the current dtype 

3169 try: 

3170 nn = n[mask] 

3171 except TypeError: 

3172 # TypeError: only integer scalar arrays can be converted to a scalar index 

3173 pass 

3174 else: 

3175 # make sure that we have a nullable type 

3176 # if we have nulls 

3177 if not _isna_compat(v, nn[0]): 

3178 pass 

3179 elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)): 

3180 # only compare integers/floats 

3181 pass 

3182 elif not (is_float_dtype(v.dtype) or is_integer_dtype(v.dtype)): 

3183 # only compare integers/floats 

3184 pass 

3185 else: 

3186 

3187 # we ignore ComplexWarning here 

3188 with warnings.catch_warnings(record=True): 

3189 warnings.simplefilter("ignore", np.ComplexWarning) 

3190 nn_at = nn.astype(v.dtype) 

3191 

3192 comp = nn == nn_at 

3193 if is_list_like(comp) and comp.all(): 

3194 nv = v.copy() 

3195 nv[mask] = nn_at 

3196 return nv 

3197 

3198 n = np.asarray(n) 

3199 

3200 def _putmask_preserve(nv, n): 

3201 try: 

3202 nv[mask] = n[mask] 

3203 except (IndexError, ValueError): 

3204 nv[mask] = n 

3205 return nv 

3206 

3207 # preserves dtype if possible 

3208 if v.dtype.kind == n.dtype.kind: 

3209 return _putmask_preserve(v, n) 

3210 

3211 # change the dtype if needed 

3212 dtype, _ = maybe_promote(n.dtype) 

3213 

3214 if is_extension_array_dtype(v.dtype) and is_object_dtype(dtype): 

3215 v = v._internal_get_values(dtype) 

3216 else: 

3217 v = v.astype(dtype) 

3218 

3219 return _putmask_preserve(v, n)