Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/internals/blocks.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from datetime import datetime, timedelta
2import functools
3import inspect
4import re
5from typing import Any, List
6import warnings
8import numpy as np
10from pandas._libs import NaT, Timestamp, algos as libalgos, lib, tslib, writers
11from pandas._libs.index import convert_scalar
12import pandas._libs.internals as libinternals
13from pandas._libs.tslibs import Timedelta, conversion
14from pandas._libs.tslibs.timezones import tz_compare
15from pandas.util._validators import validate_bool_kwarg
17from pandas.core.dtypes.cast import (
18 astype_nansafe,
19 find_common_type,
20 infer_dtype_from,
21 infer_dtype_from_scalar,
22 maybe_downcast_numeric,
23 maybe_downcast_to_dtype,
24 maybe_infer_dtype_type,
25 maybe_promote,
26 maybe_upcast,
27 soft_convert_objects,
28)
29from pandas.core.dtypes.common import (
30 _NS_DTYPE,
31 _TD_DTYPE,
32 ensure_platform_int,
33 is_bool_dtype,
34 is_categorical,
35 is_categorical_dtype,
36 is_datetime64_dtype,
37 is_datetime64tz_dtype,
38 is_dtype_equal,
39 is_extension_array_dtype,
40 is_float_dtype,
41 is_integer,
42 is_integer_dtype,
43 is_interval_dtype,
44 is_list_like,
45 is_object_dtype,
46 is_period_dtype,
47 is_re,
48 is_re_compilable,
49 is_sparse,
50 is_timedelta64_dtype,
51 pandas_dtype,
52)
53from pandas.core.dtypes.concat import concat_categorical, concat_datetime
54from pandas.core.dtypes.dtypes import CategoricalDtype, ExtensionDtype
55from pandas.core.dtypes.generic import (
56 ABCDataFrame,
57 ABCExtensionArray,
58 ABCPandasArray,
59 ABCSeries,
60)
61from pandas.core.dtypes.missing import (
62 _isna_compat,
63 array_equivalent,
64 is_valid_nat_for_dtype,
65 isna,
66)
68import pandas.core.algorithms as algos
69from pandas.core.arrays import (
70 Categorical,
71 DatetimeArray,
72 ExtensionArray,
73 PandasArray,
74 PandasDtype,
75 TimedeltaArray,
76)
77from pandas.core.base import PandasObject
78import pandas.core.common as com
79from pandas.core.construction import extract_array
80from pandas.core.indexers import (
81 check_setitem_lengths,
82 is_empty_indexer,
83 is_scalar_indexer,
84)
85import pandas.core.missing as missing
86from pandas.core.nanops import nanpercentile
88from pandas.io.formats.printing import pprint_thing
91class Block(PandasObject):
92 """
93 Canonical n-dimensional unit of homogeneous dtype contained in a pandas
94 data structure
96 Index-ignorant; let the container take care of that
97 """
99 __slots__ = ["_mgr_locs", "values", "ndim"]
100 is_numeric = False
101 is_float = False
102 is_integer = False
103 is_complex = False
104 is_datetime = False
105 is_datetimetz = False
106 is_timedelta = False
107 is_bool = False
108 is_object = False
109 is_categorical = False
110 is_extension = False
111 _can_hold_na = False
112 _can_consolidate = True
113 _verify_integrity = True
114 _validate_ndim = True
115 _ftype = "dense"
116 _concatenator = staticmethod(np.concatenate)
118 def __init__(self, values, placement, ndim=None):
119 self.ndim = self._check_ndim(values, ndim)
120 self.mgr_locs = placement
121 self.values = values
123 if self._validate_ndim and self.ndim and len(self.mgr_locs) != len(self.values):
124 raise ValueError(
125 f"Wrong number of items passed {len(self.values)}, "
126 f"placement implies {len(self.mgr_locs)}"
127 )
129 def _check_ndim(self, values, ndim):
130 """
131 ndim inference and validation.
133 Infers ndim from 'values' if not provided to __init__.
134 Validates that values.ndim and ndim are consistent if and only if
135 the class variable '_validate_ndim' is True.
137 Parameters
138 ----------
139 values : array-like
140 ndim : int or None
142 Returns
143 -------
144 ndim : int
146 Raises
147 ------
148 ValueError : the number of dimensions do not match
149 """
150 if ndim is None:
151 ndim = values.ndim
153 if self._validate_ndim and values.ndim != ndim:
154 raise ValueError(
155 "Wrong number of dimensions. "
156 f"values.ndim != ndim [{values.ndim} != {ndim}]"
157 )
158 return ndim
160 @property
161 def _holder(self):
162 """The array-like that can hold the underlying values.
164 None for 'Block', overridden by subclasses that don't
165 use an ndarray.
166 """
167 return None
169 @property
170 def _consolidate_key(self):
171 return (self._can_consolidate, self.dtype.name)
173 @property
174 def _is_single_block(self):
175 return self.ndim == 1
177 @property
178 def is_view(self):
179 """ return a boolean if I am possibly a view """
180 return self.values.base is not None
182 @property
183 def is_datelike(self):
184 """ return True if I am a non-datelike """
185 return self.is_datetime or self.is_timedelta
187 def is_categorical_astype(self, dtype):
188 """
189 validate that we have a astypeable to categorical,
190 returns a boolean if we are a categorical
191 """
192 if dtype is Categorical or dtype is CategoricalDtype:
193 # this is a pd.Categorical, but is not
194 # a valid type for astypeing
195 raise TypeError(f"invalid type {dtype} for astype")
197 elif is_categorical_dtype(dtype):
198 return True
200 return False
202 def external_values(self, dtype=None):
203 """
204 The array that Series.values returns (public attribute).
206 This has some historical constraints, and is overridden in block
207 subclasses to return the correct array (e.g. period returns
208 object ndarray and datetimetz a datetime64[ns] ndarray instead of
209 proper extension array).
210 """
211 return self.values
213 def internal_values(self, dtype=None):
214 """ return an internal format, currently just the ndarray
215 this should be the pure internal API format
216 """
217 return self.values
219 def array_values(self) -> ExtensionArray:
220 """
221 The array that Series.array returns. Always an ExtensionArray.
222 """
223 return PandasArray(self.values)
225 def get_values(self, dtype=None):
226 """
227 return an internal format, currently just the ndarray
228 this is often overridden to handle to_dense like operations
229 """
230 if is_object_dtype(dtype):
231 return self.values.astype(object)
232 return self.values
234 def get_block_values(self, dtype=None):
235 """
236 This is used in the JSON C code
237 """
238 return self.get_values(dtype=dtype)
240 def to_dense(self):
241 return self.values.view()
243 @property
244 def fill_value(self):
245 return np.nan
247 @property
248 def mgr_locs(self):
249 return self._mgr_locs
251 @mgr_locs.setter
252 def mgr_locs(self, new_mgr_locs):
253 if not isinstance(new_mgr_locs, libinternals.BlockPlacement):
254 new_mgr_locs = libinternals.BlockPlacement(new_mgr_locs)
256 self._mgr_locs = new_mgr_locs
258 @property
259 def array_dtype(self):
260 """ the dtype to return if I want to construct this block as an
261 array
262 """
263 return self.dtype
265 def make_block(self, values, placement=None) -> "Block":
266 """
267 Create a new block, with type inference propagate any values that are
268 not specified
269 """
270 if placement is None:
271 placement = self.mgr_locs
273 return make_block(values, placement=placement, ndim=self.ndim)
275 def make_block_same_class(self, values, placement=None, ndim=None):
276 """ Wrap given values in a block of same type as self. """
277 if placement is None:
278 placement = self.mgr_locs
279 if ndim is None:
280 ndim = self.ndim
281 return make_block(values, placement=placement, ndim=ndim, klass=type(self))
283 def __repr__(self) -> str:
284 # don't want to print out all of the items here
285 name = type(self).__name__
286 if self._is_single_block:
288 result = f"{name}: {len(self)} dtype: {self.dtype}"
290 else:
292 shape = " x ".join(pprint_thing(s) for s in self.shape)
293 result = (
294 f"{name}: {pprint_thing(self.mgr_locs.indexer)}, "
295 f"{shape}, dtype: {self.dtype}"
296 )
298 return result
300 def __len__(self) -> int:
301 return len(self.values)
303 def __getstate__(self):
304 return self.mgr_locs.indexer, self.values
306 def __setstate__(self, state):
307 self.mgr_locs = libinternals.BlockPlacement(state[0])
308 self.values = state[1]
309 self.ndim = self.values.ndim
311 def _slice(self, slicer):
312 """ return a slice of my values """
313 return self.values[slicer]
315 def getitem_block(self, slicer, new_mgr_locs=None):
316 """
317 Perform __getitem__-like, return result as block.
319 As of now, only supports slices that preserve dimensionality.
320 """
321 if new_mgr_locs is None:
322 if isinstance(slicer, tuple):
323 axis0_slicer = slicer[0]
324 else:
325 axis0_slicer = slicer
326 new_mgr_locs = self.mgr_locs[axis0_slicer]
328 new_values = self._slice(slicer)
330 if self._validate_ndim and new_values.ndim != self.ndim:
331 raise ValueError("Only same dim slicing is allowed")
333 return self.make_block_same_class(new_values, new_mgr_locs)
335 @property
336 def shape(self):
337 return self.values.shape
339 @property
340 def dtype(self):
341 return self.values.dtype
343 @property
344 def ftype(self):
345 if getattr(self.values, "_pandas_ftype", False):
346 dtype = self.dtype.subtype
347 else:
348 dtype = self.dtype
349 return f"{dtype}:{self._ftype}"
351 def merge(self, other):
352 return _merge_blocks([self, other])
354 def concat_same_type(self, to_concat, placement=None):
355 """
356 Concatenate list of single blocks of the same type.
357 """
358 values = self._concatenator(
359 [blk.values for blk in to_concat], axis=self.ndim - 1
360 )
361 return self.make_block_same_class(
362 values, placement=placement or slice(0, len(values), 1)
363 )
365 def iget(self, i):
366 return self.values[i]
368 def set(self, locs, values):
369 """
370 Modify Block in-place with new item value
372 Returns
373 -------
374 None
375 """
376 self.values[locs] = values
378 def delete(self, loc):
379 """
380 Delete given loc(-s) from block in-place.
381 """
382 self.values = np.delete(self.values, loc, 0)
383 self.mgr_locs = self.mgr_locs.delete(loc)
385 def apply(self, func, **kwargs):
386 """ apply the function to my values; return a block if we are not
387 one
388 """
389 with np.errstate(all="ignore"):
390 result = func(self.values, **kwargs)
392 if is_extension_array_dtype(result) and result.ndim > 1:
393 # if we get a 2D ExtensionArray, we need to split it into 1D pieces
394 nbs = []
395 for i, loc in enumerate(self.mgr_locs):
396 vals = result[i]
397 nv = _block_shape(vals, ndim=self.ndim)
398 block = self.make_block(values=nv, placement=[loc])
399 nbs.append(block)
400 return nbs
402 if not isinstance(result, Block):
403 result = self.make_block(values=_block_shape(result, ndim=self.ndim))
405 return result
407 def fillna(self, value, limit=None, inplace=False, downcast=None):
408 """ fillna on the block with the value. If we fail, then convert to
409 ObjectBlock and try again
410 """
411 inplace = validate_bool_kwarg(inplace, "inplace")
413 mask = isna(self.values)
414 if limit is not None:
415 limit = libalgos._validate_limit(None, limit=limit)
416 mask[mask.cumsum(self.ndim - 1) > limit] = False
418 if not self._can_hold_na:
419 if inplace:
420 return self
421 else:
422 return self.copy()
424 if self._can_hold_element(value):
425 # equivalent: _try_coerce_args(value) would not raise
426 blocks = self.putmask(mask, value, inplace=inplace)
427 return self._maybe_downcast(blocks, downcast)
429 # we can't process the value, but nothing to do
430 if not mask.any():
431 return self if inplace else self.copy()
433 # operate column-by-column
434 def f(mask, val, idx):
435 block = self.coerce_to_target_dtype(value)
437 # slice out our block
438 if idx is not None:
439 # i.e. self.ndim == 2
440 block = block.getitem_block(slice(idx, idx + 1))
441 return block.fillna(value, limit=limit, inplace=inplace, downcast=None)
443 return self.split_and_operate(None, f, inplace)
445 def split_and_operate(self, mask, f, inplace: bool):
446 """
447 split the block per-column, and apply the callable f
448 per-column, return a new block for each. Handle
449 masking which will not change a block unless needed.
451 Parameters
452 ----------
453 mask : 2-d boolean mask
454 f : callable accepting (1d-mask, 1d values, indexer)
455 inplace : boolean
457 Returns
458 -------
459 list of blocks
460 """
462 if mask is None:
463 mask = np.broadcast_to(True, shape=self.shape)
465 new_values = self.values
467 def make_a_block(nv, ref_loc):
468 if isinstance(nv, list):
469 assert len(nv) == 1, nv
470 assert isinstance(nv[0], Block)
471 block = nv[0]
472 else:
473 # Put back the dimension that was taken from it and make
474 # a block out of the result.
475 nv = _block_shape(nv, ndim=self.ndim)
476 block = self.make_block(values=nv, placement=ref_loc)
477 return block
479 # ndim == 1
480 if self.ndim == 1:
481 if mask.any():
482 nv = f(mask, new_values, None)
483 else:
484 nv = new_values if inplace else new_values.copy()
485 block = make_a_block(nv, self.mgr_locs)
486 return [block]
488 # ndim > 1
489 new_blocks = []
490 for i, ref_loc in enumerate(self.mgr_locs):
491 m = mask[i]
492 v = new_values[i]
494 # need a new block
495 if m.any():
496 nv = f(m, v, i)
497 else:
498 nv = v if inplace else v.copy()
500 block = make_a_block(nv, [ref_loc])
501 new_blocks.append(block)
503 return new_blocks
505 def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]:
507 # no need to downcast our float
508 # unless indicated
509 if downcast is None and (
510 self.is_float or self.is_timedelta or self.is_datetime
511 ):
512 return blocks
514 return _extend_blocks([b.downcast(downcast) for b in blocks])
516 def downcast(self, dtypes=None):
517 """ try to downcast each item to the dict of dtypes if present """
519 # turn it off completely
520 if dtypes is False:
521 return self
523 values = self.values
525 # single block handling
526 if self._is_single_block:
528 # try to cast all non-floats here
529 if dtypes is None:
530 dtypes = "infer"
532 nv = maybe_downcast_to_dtype(values, dtypes)
533 return self.make_block(nv)
535 # ndim > 1
536 if dtypes is None:
537 return self
539 if not (dtypes == "infer" or isinstance(dtypes, dict)):
540 raise ValueError(
541 "downcast must have a dictionary or 'infer' as its argument"
542 )
543 elif dtypes != "infer":
544 raise AssertionError("dtypes as dict is not supported yet")
546 # operate column-by-column
547 # this is expensive as it splits the blocks items-by-item
548 def f(mask, val, idx):
549 val = maybe_downcast_to_dtype(val, dtype="infer")
550 return val
552 return self.split_and_operate(None, f, False)
554 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
555 """
556 Coerce to the new dtype.
558 Parameters
559 ----------
560 dtype : str, dtype convertible
561 copy : bool, default False
562 copy if indicated
563 errors : str, {'raise', 'ignore'}, default 'ignore'
564 - ``raise`` : allow exceptions to be raised
565 - ``ignore`` : suppress exceptions. On error return original object
567 Returns
568 -------
569 Block
570 """
571 errors_legal_values = ("raise", "ignore")
573 if errors not in errors_legal_values:
574 invalid_arg = (
575 "Expected value of kwarg 'errors' to be one of "
576 f"{list(errors_legal_values)}. Supplied value is '{errors}'"
577 )
578 raise ValueError(invalid_arg)
580 if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype):
581 msg = (
582 f"Expected an instance of {dtype.__name__}, "
583 "but got the class instead. Try instantiating 'dtype'."
584 )
585 raise TypeError(msg)
587 # may need to convert to categorical
588 if self.is_categorical_astype(dtype):
590 if is_categorical_dtype(self.values):
591 # GH 10696/18593: update an existing categorical efficiently
592 return self.make_block(self.values.astype(dtype, copy=copy))
594 return self.make_block(Categorical(self.values, dtype=dtype))
596 dtype = pandas_dtype(dtype)
598 # astype processing
599 if is_dtype_equal(self.dtype, dtype):
600 if copy:
601 return self.copy()
602 return self
604 # force the copy here
605 if self.is_extension:
606 # TODO: Should we try/except this astype?
607 values = self.values.astype(dtype)
608 else:
609 if issubclass(dtype.type, str):
611 # use native type formatting for datetime/tz/timedelta
612 if self.is_datelike:
613 values = self.to_native_types()
615 # astype formatting
616 else:
617 values = self.get_values()
619 else:
620 values = self.get_values(dtype=dtype)
622 # _astype_nansafe works fine with 1-d only
623 vals1d = values.ravel()
624 try:
625 values = astype_nansafe(vals1d, dtype, copy=True)
626 except (ValueError, TypeError):
627 # e.g. astype_nansafe can fail on object-dtype of strings
628 # trying to convert to float
629 if errors == "raise":
630 raise
631 newb = self.copy() if copy else self
632 return newb
634 # TODO(extension)
635 # should we make this attribute?
636 if isinstance(values, np.ndarray):
637 values = values.reshape(self.shape)
639 newb = make_block(values, placement=self.mgr_locs, ndim=self.ndim)
641 if newb.is_numeric and self.is_numeric:
642 if newb.shape != self.shape:
643 raise TypeError(
644 f"cannot set astype for copy = [{copy}] for dtype "
645 f"({self.dtype.name} [{self.shape}]) to different shape "
646 f"({newb.dtype.name} [{newb.shape}])"
647 )
648 return newb
650 def convert(
651 self,
652 copy: bool = True,
653 datetime: bool = True,
654 numeric: bool = True,
655 timedelta: bool = True,
656 coerce: bool = False,
657 ):
658 """ attempt to coerce any object types to better types return a copy
659 of the block (if copy = True) by definition we are not an ObjectBlock
660 here!
661 """
663 return self.copy() if copy else self
665 def _can_hold_element(self, element: Any) -> bool:
666 """ require the same dtype as ourselves """
667 dtype = self.values.dtype.type
668 tipo = maybe_infer_dtype_type(element)
669 if tipo is not None:
670 return issubclass(tipo.type, dtype)
671 return isinstance(element, dtype)
673 def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
674 """ convert to our native types format, slicing if desired """
675 values = self.get_values()
677 if slicer is not None:
678 values = values[:, slicer]
679 mask = isna(values)
680 itemsize = writers.word_len(na_rep)
682 if not self.is_object and not quoting and itemsize:
683 values = values.astype(str)
684 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
685 # enlarge for the na_rep
686 values = values.astype(f"<U{itemsize}")
687 else:
688 values = np.array(values, dtype="object")
690 values[mask] = na_rep
691 return values
693 # block actions #
694 def copy(self, deep=True):
695 """ copy constructor """
696 values = self.values
697 if deep:
698 values = values.copy()
699 return self.make_block_same_class(values, ndim=self.ndim)
701 def replace(
702 self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
703 ):
704 """replace the to_replace value with value, possible to create new
705 blocks here this is just a call to putmask. regex is not used here.
706 It is used in ObjectBlocks. It is here for API compatibility.
707 """
709 inplace = validate_bool_kwarg(inplace, "inplace")
710 original_to_replace = to_replace
712 # If we cannot replace with own dtype, convert to ObjectBlock and
713 # retry
714 if not self._can_hold_element(to_replace):
715 if not isinstance(to_replace, list):
716 if inplace:
717 return [self]
718 return [self.copy()]
720 to_replace = [x for x in to_replace if self._can_hold_element(x)]
721 if not len(to_replace):
722 # GH#28084 avoid costly checks since we can infer
723 # that there is nothing to replace in this block
724 if inplace:
725 return [self]
726 return [self.copy()]
728 if len(to_replace) == 1:
729 # _can_hold_element checks have reduced this back to the
730 # scalar case and we can avoid a costly object cast
731 return self.replace(
732 to_replace[0],
733 value,
734 inplace=inplace,
735 filter=filter,
736 regex=regex,
737 convert=convert,
738 )
740 # GH 22083, TypeError or ValueError occurred within error handling
741 # causes infinite loop. Cast and retry only if not objectblock.
742 if is_object_dtype(self):
743 raise AssertionError
745 # try again with a compatible block
746 block = self.astype(object)
747 return block.replace(
748 to_replace=to_replace,
749 value=value,
750 inplace=inplace,
751 filter=filter,
752 regex=regex,
753 convert=convert,
754 )
756 values = self.values
757 if lib.is_scalar(to_replace) and isinstance(values, np.ndarray):
758 # The only non-DatetimeLike class that also has a non-trivial
759 # try_coerce_args is ObjectBlock, but that overrides replace,
760 # so does not get here.
761 to_replace = convert_scalar(values, to_replace)
763 mask = missing.mask_missing(values, to_replace)
764 if filter is not None:
765 filtered_out = ~self.mgr_locs.isin(filter)
766 mask[filtered_out.nonzero()[0]] = False
768 try:
769 blocks = self.putmask(mask, value, inplace=inplace)
770 # Note: it is _not_ the case that self._can_hold_element(value)
771 # is always true at this point. In particular, that can fail
772 # for:
773 # "2u" with bool-dtype, float-dtype
774 # 0.5 with int64-dtype
775 # np.nan with int64-dtype
776 except (TypeError, ValueError):
777 # GH 22083, TypeError or ValueError occurred within error handling
778 # causes infinite loop. Cast and retry only if not objectblock.
779 if is_object_dtype(self):
780 raise
782 if not self.is_extension:
783 # TODO: https://github.com/pandas-dev/pandas/issues/32586
784 # Need an ExtensionArray._can_hold_element to indicate whether
785 # a scalar value can be placed in the array.
786 assert not self._can_hold_element(value), value
788 # try again with a compatible block
789 block = self.astype(object)
790 return block.replace(
791 to_replace=original_to_replace,
792 value=value,
793 inplace=inplace,
794 filter=filter,
795 regex=regex,
796 convert=convert,
797 )
798 if convert:
799 blocks = [b.convert(numeric=False, copy=not inplace) for b in blocks]
800 return blocks
802 def _replace_single(self, *args, **kwargs):
803 """ no-op on a non-ObjectBlock """
804 return self if kwargs["inplace"] else self.copy()
806 def setitem(self, indexer, value):
807 """
808 Set the value inplace, returning a a maybe different typed block.
810 Parameters
811 ----------
812 indexer : tuple, list-like, array-like, slice
813 The subset of self.values to set
814 value : object
815 The value being set
817 Returns
818 -------
819 Block
821 Notes
822 -----
823 `indexer` is a direct slice/positional indexer. `value` must
824 be a compatible shape.
825 """
826 transpose = self.ndim == 2
828 # coerce None values, if appropriate
829 if value is None:
830 if self.is_numeric:
831 value = np.nan
833 # coerce if block dtype can store value
834 values = self.values
835 if self._can_hold_element(value):
836 # We only get here for non-Extension Blocks, so _try_coerce_args
837 # is only relevant for DatetimeBlock and TimedeltaBlock
838 if lib.is_scalar(value):
839 value = convert_scalar(values, value)
841 else:
842 # current dtype cannot store value, coerce to common dtype
843 find_dtype = False
845 if hasattr(value, "dtype"):
846 dtype = value.dtype
847 find_dtype = True
849 elif lib.is_scalar(value) and not isna(value):
850 dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)
851 find_dtype = True
853 if find_dtype:
854 dtype = find_common_type([values.dtype, dtype])
855 if not is_dtype_equal(self.dtype, dtype):
856 b = self.astype(dtype)
857 return b.setitem(indexer, value)
859 # value must be storeable at this moment
860 if is_extension_array_dtype(getattr(value, "dtype", None)):
861 # We need to be careful not to allow through strings that
862 # can be parsed to EADtypes
863 is_ea_value = True
864 arr_value = value
865 else:
866 is_ea_value = False
867 arr_value = np.array(value)
869 # cast the values to a type that can hold nan (if necessary)
870 if not self._can_hold_element(value):
871 dtype, _ = maybe_promote(arr_value.dtype)
872 values = values.astype(dtype)
874 if transpose:
875 values = values.T
877 # length checking
878 check_setitem_lengths(indexer, value, values)
879 exact_match = (
880 len(arr_value.shape)
881 and arr_value.shape[0] == values.shape[0]
882 and arr_value.size == values.size
883 )
884 if is_empty_indexer(indexer, arr_value):
885 # GH#8669 empty indexers
886 pass
888 elif is_scalar_indexer(indexer, arr_value):
889 # setting a single element for each dim and with a rhs that could
890 # be e.g. a list; see GH#6043
891 values[indexer] = value
893 elif (
894 exact_match
895 and is_categorical_dtype(arr_value.dtype)
896 and not is_categorical_dtype(values)
897 ):
898 # GH25495 - If the current dtype is not categorical,
899 # we need to create a new categorical block
900 values[indexer] = value
901 return self.make_block(Categorical(self.values, dtype=arr_value.dtype))
903 elif exact_match and is_ea_value:
904 # GH#32395 if we're going to replace the values entirely, just
905 # substitute in the new array
906 return self.make_block(arr_value)
908 # if we are an exact match (ex-broadcasting),
909 # then use the resultant dtype
910 elif exact_match:
911 values[indexer] = value
913 try:
914 values = values.astype(arr_value.dtype)
915 except ValueError:
916 pass
918 # set
919 else:
920 values[indexer] = value
922 if transpose:
923 values = values.T
924 block = self.make_block(values)
925 return block
927 def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False):
928 """ putmask the data to the block; it is possible that we may create a
929 new dtype of block
931 return the resulting block(s)
933 Parameters
934 ----------
935 mask : the condition to respect
936 new : a ndarray/object
937 align : boolean, perform alignment on other/cond, default is True
938 inplace : perform inplace modification, default is False
939 axis : int
940 transpose : boolean
941 Set to True if self is stored with axes reversed
943 Returns
944 -------
945 a list of new blocks, the result of the putmask
946 """
948 new_values = self.values if inplace else self.values.copy()
950 new = getattr(new, "values", new)
951 mask = getattr(mask, "values", mask)
953 # if we are passed a scalar None, convert it here
954 if not is_list_like(new) and isna(new) and not self.is_object:
955 # FIXME: make sure we have compatible NA
956 new = self.fill_value
958 if self._can_hold_element(new):
959 # We only get here for non-Extension Blocks, so _try_coerce_args
960 # is only relevant for DatetimeBlock and TimedeltaBlock
961 if lib.is_scalar(new):
962 new = convert_scalar(new_values, new)
964 if transpose:
965 new_values = new_values.T
967 # If the default repeat behavior in np.putmask would go in the
968 # wrong direction, then explicitly repeat and reshape new instead
969 if getattr(new, "ndim", 0) >= 1:
970 if self.ndim - 1 == new.ndim and axis == 1:
971 new = np.repeat(new, new_values.shape[-1]).reshape(self.shape)
972 new = new.astype(new_values.dtype)
974 # we require exact matches between the len of the
975 # values we are setting (or is compat). np.putmask
976 # doesn't check this and will simply truncate / pad
977 # the output, but we want sane error messages
978 #
979 # TODO: this prob needs some better checking
980 # for 2D cases
981 if (
982 is_list_like(new)
983 and np.any(mask[mask])
984 and getattr(new, "ndim", 1) == 1
985 ):
986 if mask[mask].shape[-1] == len(new):
987 # GH 30567
988 # If length of ``new`` is less than the length of ``new_values``,
989 # `np.putmask` would first repeat the ``new`` array and then
990 # assign the masked values hence produces incorrect result.
991 # `np.place` on the other hand uses the ``new`` values at it is
992 # to place in the masked locations of ``new_values``
993 np.place(new_values, mask, new)
994 elif mask.shape[-1] == len(new) or len(new) == 1:
995 np.putmask(new_values, mask, new)
996 else:
997 raise ValueError("cannot assign mismatch length to masked array")
998 else:
999 np.putmask(new_values, mask, new)
1001 # maybe upcast me
1002 elif mask.any():
1003 if transpose:
1004 mask = mask.T
1005 if isinstance(new, np.ndarray):
1006 new = new.T
1007 axis = new_values.ndim - axis - 1
1009 # Pseudo-broadcast
1010 if getattr(new, "ndim", 0) >= 1:
1011 if self.ndim - 1 == new.ndim:
1012 new_shape = list(new.shape)
1013 new_shape.insert(axis, 1)
1014 new = new.reshape(tuple(new_shape))
1016 # operate column-by-column
1017 def f(mask, val, idx):
1019 if idx is None:
1020 # ndim==1 case.
1021 n = new
1022 else:
1024 if isinstance(new, np.ndarray):
1025 n = np.squeeze(new[idx % new.shape[0]])
1026 else:
1027 n = np.array(new)
1029 # type of the new block
1030 dtype, _ = maybe_promote(n.dtype)
1032 # we need to explicitly astype here to make a copy
1033 n = n.astype(dtype)
1035 nv = _putmask_smart(val, mask, n)
1036 return nv
1038 new_blocks = self.split_and_operate(mask, f, inplace)
1039 return new_blocks
1041 if inplace:
1042 return [self]
1044 if transpose:
1045 new_values = new_values.T
1047 return [self.make_block(new_values)]
1049 def coerce_to_target_dtype(self, other):
1050 """
1051 coerce the current block to a dtype compat for other
1052 we will return a block, possibly object, and not raise
1054 we can also safely try to coerce to the same dtype
1055 and will receive the same block
1056 """
1058 # if we cannot then coerce to object
1059 dtype, _ = infer_dtype_from(other, pandas_dtype=True)
1061 if is_dtype_equal(self.dtype, dtype):
1062 return self
1064 if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype):
1065 # we don't upcast to bool
1066 return self.astype(object)
1068 elif (self.is_float or self.is_complex) and (
1069 is_integer_dtype(dtype) or is_float_dtype(dtype)
1070 ):
1071 # don't coerce float/complex to int
1072 return self
1074 elif (
1075 self.is_datetime
1076 or is_datetime64_dtype(dtype)
1077 or is_datetime64tz_dtype(dtype)
1078 ):
1080 # not a datetime
1081 if not (
1082 (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype))
1083 and self.is_datetime
1084 ):
1085 return self.astype(object)
1087 # don't upcast timezone with different timezone or no timezone
1088 mytz = getattr(self.dtype, "tz", None)
1089 othertz = getattr(dtype, "tz", None)
1091 if not tz_compare(mytz, othertz):
1092 return self.astype(object)
1094 raise AssertionError(
1095 f"possible recursion in coerce_to_target_dtype: {self} {other}"
1096 )
1098 elif self.is_timedelta or is_timedelta64_dtype(dtype):
1100 # not a timedelta
1101 if not (is_timedelta64_dtype(dtype) and self.is_timedelta):
1102 return self.astype(object)
1104 raise AssertionError(
1105 f"possible recursion in coerce_to_target_dtype: {self} {other}"
1106 )
1108 try:
1109 return self.astype(dtype)
1110 except (ValueError, TypeError, OverflowError):
1111 return self.astype(object)
1113 def interpolate(
1114 self,
1115 method="pad",
1116 axis=0,
1117 index=None,
1118 values=None,
1119 inplace=False,
1120 limit=None,
1121 limit_direction="forward",
1122 limit_area=None,
1123 fill_value=None,
1124 coerce=False,
1125 downcast=None,
1126 **kwargs,
1127 ):
1129 inplace = validate_bool_kwarg(inplace, "inplace")
1131 def check_int_bool(self, inplace):
1132 # Only FloatBlocks will contain NaNs.
1133 # timedelta subclasses IntBlock
1134 if (self.is_bool or self.is_integer) and not self.is_timedelta:
1135 if inplace:
1136 return self
1137 else:
1138 return self.copy()
1140 # a fill na type method
1141 try:
1142 m = missing.clean_fill_method(method)
1143 except ValueError:
1144 m = None
1146 if m is not None:
1147 r = check_int_bool(self, inplace)
1148 if r is not None:
1149 return r
1150 return self._interpolate_with_fill(
1151 method=m,
1152 axis=axis,
1153 inplace=inplace,
1154 limit=limit,
1155 fill_value=fill_value,
1156 coerce=coerce,
1157 downcast=downcast,
1158 )
1159 # validate the interp method
1160 m = missing.clean_interp_method(method, **kwargs)
1162 r = check_int_bool(self, inplace)
1163 if r is not None:
1164 return r
1165 return self._interpolate(
1166 method=m,
1167 index=index,
1168 values=values,
1169 axis=axis,
1170 limit=limit,
1171 limit_direction=limit_direction,
1172 limit_area=limit_area,
1173 fill_value=fill_value,
1174 inplace=inplace,
1175 downcast=downcast,
1176 **kwargs,
1177 )
1179 def _interpolate_with_fill(
1180 self,
1181 method="pad",
1182 axis=0,
1183 inplace=False,
1184 limit=None,
1185 fill_value=None,
1186 coerce=False,
1187 downcast=None,
1188 ):
1189 """ fillna but using the interpolate machinery """
1191 inplace = validate_bool_kwarg(inplace, "inplace")
1193 # if we are coercing, then don't force the conversion
1194 # if the block can't hold the type
1195 if coerce:
1196 if not self._can_hold_na:
1197 if inplace:
1198 return [self]
1199 else:
1200 return [self.copy()]
1202 values = self.values if inplace else self.values.copy()
1204 # We only get here for non-ExtensionBlock
1205 fill_value = convert_scalar(self.values, fill_value)
1207 values = missing.interpolate_2d(
1208 values,
1209 method=method,
1210 axis=axis,
1211 limit=limit,
1212 fill_value=fill_value,
1213 dtype=self.dtype,
1214 )
1216 blocks = [self.make_block_same_class(values, ndim=self.ndim)]
1217 return self._maybe_downcast(blocks, downcast)
1219 def _interpolate(
1220 self,
1221 method=None,
1222 index=None,
1223 values=None,
1224 fill_value=None,
1225 axis=0,
1226 limit=None,
1227 limit_direction="forward",
1228 limit_area=None,
1229 inplace=False,
1230 downcast=None,
1231 **kwargs,
1232 ):
1233 """ interpolate using scipy wrappers """
1235 inplace = validate_bool_kwarg(inplace, "inplace")
1236 data = self.values if inplace else self.values.copy()
1238 # only deal with floats
1239 if not self.is_float:
1240 if not self.is_integer:
1241 return self
1242 data = data.astype(np.float64)
1244 if fill_value is None:
1245 fill_value = self.fill_value
1247 if method in ("krogh", "piecewise_polynomial", "pchip"):
1248 if not index.is_monotonic:
1249 raise ValueError(
1250 f"{method} interpolation requires that the index be monotonic."
1251 )
1252 # process 1-d slices in the axis direction
1254 def func(x):
1256 # process a 1-d slice, returning it
1257 # should the axis argument be handled below in apply_along_axis?
1258 # i.e. not an arg to missing.interpolate_1d
1259 return missing.interpolate_1d(
1260 index,
1261 x,
1262 method=method,
1263 limit=limit,
1264 limit_direction=limit_direction,
1265 limit_area=limit_area,
1266 fill_value=fill_value,
1267 bounds_error=False,
1268 **kwargs,
1269 )
1271 # interp each column independently
1272 interp_values = np.apply_along_axis(func, axis, data)
1274 blocks = [self.make_block_same_class(interp_values)]
1275 return self._maybe_downcast(blocks, downcast)
1277 def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
1278 """
1279 Take values according to indexer and return them as a block.bb
1281 """
1283 # algos.take_nd dispatches for DatetimeTZBlock, CategoricalBlock
1284 # so need to preserve types
1285 # sparse is treated like an ndarray, but needs .get_values() shaping
1287 values = self.values
1289 if fill_tuple is None:
1290 fill_value = self.fill_value
1291 allow_fill = False
1292 else:
1293 fill_value = fill_tuple[0]
1294 allow_fill = True
1296 new_values = algos.take_nd(
1297 values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value
1298 )
1300 # Called from three places in managers, all of which satisfy
1301 # this assertion
1302 assert not (axis == 0 and new_mgr_locs is None)
1303 if new_mgr_locs is None:
1304 new_mgr_locs = self.mgr_locs
1306 if not is_dtype_equal(new_values.dtype, self.dtype):
1307 return self.make_block(new_values, new_mgr_locs)
1308 else:
1309 return self.make_block_same_class(new_values, new_mgr_locs)
1311 def diff(self, n: int, axis: int = 1) -> List["Block"]:
1312 """ return block for the diff of the values """
1313 new_values = algos.diff(self.values, n, axis=axis, stacklevel=7)
1314 # We use block_shape for ExtensionBlock subclasses, which may call here
1315 # via a super.
1316 new_values = _block_shape(new_values, ndim=self.ndim)
1317 return [self.make_block(values=new_values)]
1319 def shift(self, periods, axis=0, fill_value=None):
1320 """ shift the block by periods, possibly upcast """
1322 # convert integer to float if necessary. need to do a lot more than
1323 # that, handle boolean etc also
1324 new_values, fill_value = maybe_upcast(self.values, fill_value)
1326 # make sure array sent to np.roll is c_contiguous
1327 f_ordered = new_values.flags.f_contiguous
1328 if f_ordered:
1329 new_values = new_values.T
1330 axis = new_values.ndim - axis - 1
1332 if np.prod(new_values.shape):
1333 new_values = np.roll(new_values, ensure_platform_int(periods), axis=axis)
1335 axis_indexer = [slice(None)] * self.ndim
1336 if periods > 0:
1337 axis_indexer[axis] = slice(None, periods)
1338 else:
1339 axis_indexer[axis] = slice(periods, None)
1340 new_values[tuple(axis_indexer)] = fill_value
1342 # restore original order
1343 if f_ordered:
1344 new_values = new_values.T
1346 return [self.make_block(new_values)]
1348 def where(
1349 self,
1350 other,
1351 cond,
1352 align=True,
1353 errors="raise",
1354 try_cast: bool = False,
1355 axis: int = 0,
1356 ) -> List["Block"]:
1357 """
1358 evaluate the block; return result block(s) from the result
1360 Parameters
1361 ----------
1362 other : a ndarray/object
1363 cond : the condition to respect
1364 align : boolean, perform alignment on other/cond
1365 errors : str, {'raise', 'ignore'}, default 'raise'
1366 - ``raise`` : allow exceptions to be raised
1367 - ``ignore`` : suppress exceptions. On error return original object
1368 axis : int
1370 Returns
1371 -------
1372 a new block(s), the result of the func
1373 """
1374 import pandas.core.computation.expressions as expressions
1376 assert errors in ["raise", "ignore"]
1377 transpose = self.ndim == 2
1379 values = self.values
1380 orig_other = other
1381 if transpose:
1382 values = values.T
1384 other = getattr(other, "_values", getattr(other, "values", other))
1385 cond = getattr(cond, "values", cond)
1387 # If the default broadcasting would go in the wrong direction, then
1388 # explicitly reshape other instead
1389 if getattr(other, "ndim", 0) >= 1:
1390 if values.ndim - 1 == other.ndim and axis == 1:
1391 other = other.reshape(tuple(other.shape + (1,)))
1392 elif transpose and values.ndim == self.ndim - 1:
1393 cond = cond.T
1395 if not hasattr(cond, "shape"):
1396 raise ValueError("where must have a condition that is ndarray like")
1398 # our where function
1399 def func(cond, values, other):
1401 if not (
1402 (self.is_integer or self.is_bool)
1403 and lib.is_float(other)
1404 and np.isnan(other)
1405 ):
1406 # np.where will cast integer array to floats in this case
1407 if not self._can_hold_element(other):
1408 raise TypeError
1409 if lib.is_scalar(other) and isinstance(values, np.ndarray):
1410 other = convert_scalar(values, other)
1412 fastres = expressions.where(cond, values, other)
1413 return fastres
1415 if cond.ravel().all():
1416 result = values
1417 else:
1418 # see if we can operate on the entire block, or need item-by-item
1419 # or if we are a single block (ndim == 1)
1420 try:
1421 result = func(cond, values, other)
1422 except TypeError:
1424 # we cannot coerce, return a compat dtype
1425 # we are explicitly ignoring errors
1426 block = self.coerce_to_target_dtype(other)
1427 blocks = block.where(
1428 orig_other,
1429 cond,
1430 align=align,
1431 errors=errors,
1432 try_cast=try_cast,
1433 axis=axis,
1434 )
1435 return self._maybe_downcast(blocks, "infer")
1437 if self._can_hold_na or self.ndim == 1:
1439 if transpose:
1440 result = result.T
1442 return [self.make_block(result)]
1444 # might need to separate out blocks
1445 axis = cond.ndim - 1
1446 cond = cond.swapaxes(axis, 0)
1447 mask = np.array([cond[i].all() for i in range(cond.shape[0])], dtype=bool)
1449 result_blocks = []
1450 for m in [mask, ~mask]:
1451 if m.any():
1452 taken = result.take(m.nonzero()[0], axis=axis)
1453 r = maybe_downcast_numeric(taken, self.dtype)
1454 nb = self.make_block(r.T, placement=self.mgr_locs[m])
1455 result_blocks.append(nb)
1457 return result_blocks
1459 def equals(self, other) -> bool:
1460 if self.dtype != other.dtype or self.shape != other.shape:
1461 return False
1462 return array_equivalent(self.values, other.values)
1464 def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
1465 """Return a list of unstacked blocks of self
1467 Parameters
1468 ----------
1469 unstacker_func : callable
1470 Partially applied unstacker.
1471 new_columns : Index
1472 All columns of the unstacked BlockManager.
1473 n_rows : int
1474 Only used in ExtensionBlock._unstack
1475 fill_value : int
1476 Only used in ExtensionBlock._unstack
1478 Returns
1479 -------
1480 blocks : list of Block
1481 New blocks of unstacked values.
1482 mask : array_like of bool
1483 The mask of columns of `blocks` we should keep.
1484 """
1485 unstacker = unstacker_func(self.values.T)
1486 new_items = unstacker.get_new_columns()
1487 new_placement = new_columns.get_indexer(new_items)
1488 new_values, mask = unstacker.get_new_values()
1490 mask = mask.any(0)
1491 new_values = new_values.T[mask]
1492 new_placement = new_placement[mask]
1494 blocks = [make_block(new_values, placement=new_placement)]
1495 return blocks, mask
1497 def quantile(self, qs, interpolation="linear", axis=0):
1498 """
1499 compute the quantiles of the
1501 Parameters
1502 ----------
1503 qs: a scalar or list of the quantiles to be computed
1504 interpolation: type of interpolation, default 'linear'
1505 axis: axis to compute, default 0
1507 Returns
1508 -------
1509 Block
1510 """
1511 # We should always have ndim == 2 because Series dispatches to DataFrame
1512 assert self.ndim == 2
1514 values = self.get_values()
1516 is_empty = values.shape[axis] == 0
1517 orig_scalar = not is_list_like(qs)
1518 if orig_scalar:
1519 # make list-like, unpack later
1520 qs = [qs]
1522 if is_empty:
1523 # create the array of na_values
1524 # 2d len(values) * len(qs)
1525 result = np.repeat(
1526 np.array([self.fill_value] * len(qs)), len(values)
1527 ).reshape(len(values), len(qs))
1528 else:
1529 # asarray needed for Sparse, see GH#24600
1530 mask = np.asarray(isna(values))
1531 result = nanpercentile(
1532 values,
1533 np.array(qs) * 100,
1534 axis=axis,
1535 na_value=self.fill_value,
1536 mask=mask,
1537 ndim=values.ndim,
1538 interpolation=interpolation,
1539 )
1541 result = np.array(result, copy=False)
1542 result = result.T
1544 if orig_scalar and not lib.is_scalar(result):
1545 # result could be scalar in case with is_empty and self.ndim == 1
1546 assert result.shape[-1] == 1, result.shape
1547 result = result[..., 0]
1548 result = lib.item_from_zerodim(result)
1550 ndim = np.ndim(result)
1551 return make_block(result, placement=np.arange(len(result)), ndim=ndim)
1553 def _replace_coerce(
1554 self, to_replace, value, inplace=True, regex=False, convert=False, mask=None
1555 ):
1556 """
1557 Replace value corresponding to the given boolean array with another
1558 value.
1560 Parameters
1561 ----------
1562 to_replace : object or pattern
1563 Scalar to replace or regular expression to match.
1564 value : object
1565 Replacement object.
1566 inplace : bool, default False
1567 Perform inplace modification.
1568 regex : bool, default False
1569 If true, perform regular expression substitution.
1570 convert : bool, default True
1571 If true, try to coerce any object types to better types.
1572 mask : array-like of bool, optional
1573 True indicate corresponding element is ignored.
1575 Returns
1576 -------
1577 A new block if there is anything to replace or the original block.
1578 """
1580 if mask.any():
1581 if not regex:
1582 self = self.coerce_to_target_dtype(value)
1583 return self.putmask(mask, value, inplace=inplace)
1584 else:
1585 return self._replace_single(
1586 to_replace,
1587 value,
1588 inplace=inplace,
1589 regex=regex,
1590 convert=convert,
1591 mask=mask,
1592 )
1593 return self
1596class NonConsolidatableMixIn:
1597 """ hold methods for the nonconsolidatable blocks """
1599 _can_consolidate = False
1600 _verify_integrity = False
1601 _validate_ndim = False
1603 def __init__(self, values, placement, ndim=None):
1604 """Initialize a non-consolidatable block.
1606 'ndim' may be inferred from 'placement'.
1608 This will call continue to call __init__ for the other base
1609 classes mixed in with this Mixin.
1610 """
1611 # Placement must be converted to BlockPlacement so that we can check
1612 # its length
1613 if not isinstance(placement, libinternals.BlockPlacement):
1614 placement = libinternals.BlockPlacement(placement)
1616 # Maybe infer ndim from placement
1617 if ndim is None:
1618 if len(placement) != 1:
1619 ndim = 1
1620 else:
1621 ndim = 2
1622 super().__init__(values, placement, ndim=ndim)
1624 @property
1625 def shape(self):
1626 if self.ndim == 1:
1627 return ((len(self.values)),)
1628 return (len(self.mgr_locs), len(self.values))
1630 def iget(self, col):
1632 if self.ndim == 2 and isinstance(col, tuple):
1633 col, loc = col
1634 if not com.is_null_slice(col) and col != 0:
1635 raise IndexError(f"{self} only contains one item")
1636 elif isinstance(col, slice):
1637 if col != slice(None):
1638 raise NotImplementedError(col)
1639 return self.values[[loc]]
1640 return self.values[loc]
1641 else:
1642 if col != 0:
1643 raise IndexError(f"{self} only contains one item")
1644 return self.values
1646 def should_store(self, value):
1647 return isinstance(value, self._holder)
1649 def set(self, locs, values, check=False):
1650 assert locs.tolist() == [0]
1651 self.values = values
1653 def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False):
1654 """
1655 putmask the data to the block; we must be a single block and not
1656 generate other blocks
1658 return the resulting block
1660 Parameters
1661 ----------
1662 mask : the condition to respect
1663 new : a ndarray/object
1664 align : boolean, perform alignment on other/cond, default is True
1665 inplace : perform inplace modification, default is False
1667 Returns
1668 -------
1669 a new block, the result of the putmask
1670 """
1671 inplace = validate_bool_kwarg(inplace, "inplace")
1673 # use block's copy logic.
1674 # .values may be an Index which does shallow copy by default
1675 new_values = self.values if inplace else self.copy().values
1677 if isinstance(new, np.ndarray) and len(new) == len(mask):
1678 new = new[mask]
1680 mask = _safe_reshape(mask, new_values.shape)
1682 new_values[mask] = new
1683 return [self.make_block(values=new_values)]
1685 def _get_unstack_items(self, unstacker, new_columns):
1686 """
1687 Get the placement, values, and mask for a Block unstack.
1689 This is shared between ObjectBlock and ExtensionBlock. They
1690 differ in that ObjectBlock passes the values, while ExtensionBlock
1691 passes the dummy ndarray of positions to be used by a take
1692 later.
1694 Parameters
1695 ----------
1696 unstacker : pandas.core.reshape.reshape._Unstacker
1697 new_columns : Index
1698 All columns of the unstacked BlockManager.
1700 Returns
1701 -------
1702 new_placement : ndarray[int]
1703 The placement of the new columns in `new_columns`.
1704 new_values : Union[ndarray, ExtensionArray]
1705 The first return value from _Unstacker.get_new_values.
1706 mask : ndarray[bool]
1707 The second return value from _Unstacker.get_new_values.
1708 """
1709 # shared with ExtensionBlock
1710 new_items = unstacker.get_new_columns()
1711 new_placement = new_columns.get_indexer(new_items)
1712 new_values, mask = unstacker.get_new_values()
1714 mask = mask.any(0)
1715 return new_placement, new_values, mask
1718class ExtensionBlock(NonConsolidatableMixIn, Block):
1719 """Block for holding extension types.
1721 Notes
1722 -----
1723 This holds all 3rd-party extension array types. It's also the immediate
1724 parent class for our internal extension types' blocks, CategoricalBlock.
1726 ExtensionArrays are limited to 1-D.
1727 """
1729 is_extension = True
1731 def __init__(self, values, placement, ndim=None):
1732 values = self._maybe_coerce_values(values)
1733 super().__init__(values, placement, ndim)
1735 def _maybe_coerce_values(self, values):
1736 """
1737 Unbox to an extension array.
1739 This will unbox an ExtensionArray stored in an Index or Series.
1740 ExtensionArrays pass through. No dtype coercion is done.
1742 Parameters
1743 ----------
1744 values : Index, Series, ExtensionArray
1746 Returns
1747 -------
1748 ExtensionArray
1749 """
1750 return extract_array(values)
1752 @property
1753 def _holder(self):
1754 # For extension blocks, the holder is values-dependent.
1755 return type(self.values)
1757 @property
1758 def fill_value(self):
1759 # Used in reindex_indexer
1760 return self.values.dtype.na_value
1762 @property
1763 def _can_hold_na(self):
1764 # The default ExtensionArray._can_hold_na is True
1765 return self._holder._can_hold_na
1767 @property
1768 def is_view(self):
1769 """Extension arrays are never treated as views."""
1770 return False
1772 @property
1773 def is_numeric(self):
1774 return self.values.dtype._is_numeric
1776 def setitem(self, indexer, value):
1777 """Set the value inplace, returning a same-typed block.
1779 This differs from Block.setitem by not allowing setitem to change
1780 the dtype of the Block.
1782 Parameters
1783 ----------
1784 indexer : tuple, list-like, array-like, slice
1785 The subset of self.values to set
1786 value : object
1787 The value being set
1789 Returns
1790 -------
1791 Block
1793 Notes
1794 -----
1795 `indexer` is a direct slice/positional indexer. `value` must
1796 be a compatible shape.
1797 """
1798 if isinstance(indexer, tuple):
1799 # we are always 1-D
1800 indexer = indexer[0]
1802 check_setitem_lengths(indexer, value, self.values)
1803 self.values[indexer] = value
1804 return self
1806 def get_values(self, dtype=None):
1807 # ExtensionArrays must be iterable, so this works.
1808 values = np.asarray(self.values)
1809 if values.ndim == self.ndim - 1:
1810 values = values.reshape((1,) + values.shape)
1811 return values
1813 def array_values(self) -> ExtensionArray:
1814 return self.values
1816 def to_dense(self):
1817 return np.asarray(self.values)
1819 def to_native_types(self, slicer=None, na_rep="nan", quoting=None, **kwargs):
1820 """override to use ExtensionArray astype for the conversion"""
1821 values = self.values
1822 if slicer is not None:
1823 values = values[slicer]
1824 mask = isna(values)
1826 values = np.asarray(values.astype(object))
1827 values[mask] = na_rep
1829 # we are expected to return a 2-d ndarray
1830 return values.reshape(1, len(values))
1832 def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
1833 """
1834 Take values according to indexer and return them as a block.
1835 """
1836 if fill_tuple is None:
1837 fill_value = None
1838 else:
1839 fill_value = fill_tuple[0]
1841 # axis doesn't matter; we are really a single-dim object
1842 # but are passed the axis depending on the calling routing
1843 # if its REALLY axis 0, then this will be a reindex and not a take
1844 new_values = self.values.take(indexer, fill_value=fill_value, allow_fill=True)
1846 # Called from three places in managers, all of which satisfy
1847 # this assertion
1848 assert not (self.ndim == 1 and new_mgr_locs is None)
1849 if new_mgr_locs is None:
1850 new_mgr_locs = self.mgr_locs
1852 return self.make_block_same_class(new_values, new_mgr_locs)
1854 def _can_hold_element(self, element: Any) -> bool:
1855 # XXX: We may need to think about pushing this onto the array.
1856 # We're doing the same as CategoricalBlock here.
1857 return True
1859 def _slice(self, slicer):
1860 """ return a slice of my values """
1862 # slice the category
1863 # return same dims as we currently have
1865 if isinstance(slicer, tuple) and len(slicer) == 2:
1866 if not com.is_null_slice(slicer[0]):
1867 raise AssertionError("invalid slicing for a 1-ndim categorical")
1868 slicer = slicer[1]
1870 return self.values[slicer]
1872 def concat_same_type(self, to_concat, placement=None):
1873 """
1874 Concatenate list of single blocks of the same type.
1875 """
1876 values = self._holder._concat_same_type([blk.values for blk in to_concat])
1877 placement = placement or slice(0, len(values), 1)
1878 return self.make_block_same_class(values, ndim=self.ndim, placement=placement)
1880 def fillna(self, value, limit=None, inplace=False, downcast=None):
1881 values = self.values if inplace else self.values.copy()
1882 values = values.fillna(value=value, limit=limit)
1883 return [
1884 self.make_block_same_class(
1885 values=values, placement=self.mgr_locs, ndim=self.ndim
1886 )
1887 ]
1889 def interpolate(
1890 self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs
1891 ):
1893 values = self.values if inplace else self.values.copy()
1894 return self.make_block_same_class(
1895 values=values.fillna(value=fill_value, method=method, limit=limit),
1896 placement=self.mgr_locs,
1897 )
1899 def diff(self, n: int, axis: int = 1) -> List["Block"]:
1900 if axis == 1:
1901 # we are by definition 1D.
1902 axis = 0
1903 return super().diff(n, axis)
1905 def shift(
1906 self, periods: int, axis: int = 0, fill_value: Any = None,
1907 ) -> List["ExtensionBlock"]:
1908 """
1909 Shift the block by `periods`.
1911 Dispatches to underlying ExtensionArray and re-boxes in an
1912 ExtensionBlock.
1913 """
1914 return [
1915 self.make_block_same_class(
1916 self.values.shift(periods=periods, fill_value=fill_value),
1917 placement=self.mgr_locs,
1918 ndim=self.ndim,
1919 )
1920 ]
1922 def where(
1923 self,
1924 other,
1925 cond,
1926 align=True,
1927 errors="raise",
1928 try_cast: bool = False,
1929 axis: int = 0,
1930 ) -> List["Block"]:
1931 if isinstance(other, ABCDataFrame):
1932 # ExtensionArrays are 1-D, so if we get here then
1933 # `other` should be a DataFrame with a single column.
1934 assert other.shape[1] == 1
1935 other = other.iloc[:, 0]
1937 other = extract_array(other, extract_numpy=True)
1939 if isinstance(cond, ABCDataFrame):
1940 assert cond.shape[1] == 1
1941 cond = cond.iloc[:, 0]
1943 cond = extract_array(cond, extract_numpy=True)
1945 if lib.is_scalar(other) and isna(other):
1946 # The default `other` for Series / Frame is np.nan
1947 # we want to replace that with the correct NA value
1948 # for the type
1949 other = self.dtype.na_value
1951 if is_sparse(self.values):
1952 # TODO(SparseArray.__setitem__): remove this if condition
1953 # We need to re-infer the type of the data after doing the
1954 # where, for cases where the subtypes don't match
1955 dtype = None
1956 else:
1957 dtype = self.dtype
1959 result = self.values.copy()
1960 icond = ~cond
1961 if lib.is_scalar(other):
1962 set_other = other
1963 else:
1964 set_other = other[icond]
1965 try:
1966 result[icond] = set_other
1967 except (NotImplementedError, TypeError):
1968 # NotImplementedError for class not implementing `__setitem__`
1969 # TypeError for SparseArray, which implements just to raise
1970 # a TypeError
1971 result = self._holder._from_sequence(
1972 np.where(cond, self.values, other), dtype=dtype
1973 )
1975 return [self.make_block_same_class(result, placement=self.mgr_locs)]
1977 @property
1978 def _ftype(self):
1979 return getattr(self.values, "_pandas_ftype", Block._ftype)
1981 def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
1982 # ExtensionArray-safe unstack.
1983 # We override ObjectBlock._unstack, which unstacks directly on the
1984 # values of the array. For EA-backed blocks, this would require
1985 # converting to a 2-D ndarray of objects.
1986 # Instead, we unstack an ndarray of integer positions, followed by
1987 # a `take` on the actual values.
1988 dummy_arr = np.arange(n_rows)
1989 dummy_unstacker = functools.partial(unstacker_func, fill_value=-1)
1990 unstacker = dummy_unstacker(dummy_arr)
1992 new_placement, new_values, mask = self._get_unstack_items(
1993 unstacker, new_columns
1994 )
1996 blocks = [
1997 self.make_block_same_class(
1998 self.values.take(indices, allow_fill=True, fill_value=fill_value),
1999 [place],
2000 )
2001 for indices, place in zip(new_values.T, new_placement)
2002 ]
2003 return blocks, mask
2006class ObjectValuesExtensionBlock(ExtensionBlock):
2007 """
2008 Block providing backwards-compatibility for `.values`.
2010 Used by PeriodArray and IntervalArray to ensure that
2011 Series[T].values is an ndarray of objects.
2012 """
2014 def external_values(self, dtype=None):
2015 return self.values.astype(object)
2018class NumericBlock(Block):
2019 __slots__ = ()
2020 is_numeric = True
2021 _can_hold_na = True
2024class FloatOrComplexBlock(NumericBlock):
2025 __slots__ = ()
2027 def equals(self, other) -> bool:
2028 if self.dtype != other.dtype or self.shape != other.shape:
2029 return False
2030 left, right = self.values, other.values
2031 return ((left == right) | (np.isnan(left) & np.isnan(right))).all()
2034class FloatBlock(FloatOrComplexBlock):
2035 __slots__ = ()
2036 is_float = True
2038 def _can_hold_element(self, element: Any) -> bool:
2039 tipo = maybe_infer_dtype_type(element)
2040 if tipo is not None:
2041 return issubclass(tipo.type, (np.floating, np.integer)) and not issubclass(
2042 tipo.type, (np.datetime64, np.timedelta64)
2043 )
2044 return isinstance(
2045 element, (float, int, np.floating, np.int_)
2046 ) and not isinstance(
2047 element,
2048 (bool, np.bool_, datetime, timedelta, np.datetime64, np.timedelta64),
2049 )
2051 def to_native_types(
2052 self,
2053 slicer=None,
2054 na_rep="",
2055 float_format=None,
2056 decimal=".",
2057 quoting=None,
2058 **kwargs,
2059 ):
2060 """ convert to our native types format, slicing if desired """
2062 values = self.values
2063 if slicer is not None:
2064 values = values[:, slicer]
2066 # see gh-13418: no special formatting is desired at the
2067 # output (important for appropriate 'quoting' behaviour),
2068 # so do not pass it through the FloatArrayFormatter
2069 if float_format is None and decimal == ".":
2070 mask = isna(values)
2072 if not quoting:
2073 values = values.astype(str)
2074 else:
2075 values = np.array(values, dtype="object")
2077 values[mask] = na_rep
2078 return values
2080 from pandas.io.formats.format import FloatArrayFormatter
2082 formatter = FloatArrayFormatter(
2083 values,
2084 na_rep=na_rep,
2085 float_format=float_format,
2086 decimal=decimal,
2087 quoting=quoting,
2088 fixed_width=False,
2089 )
2090 return formatter.get_result_as_array()
2092 def should_store(self, value):
2093 # when inserting a column should not coerce integers to floats
2094 # unnecessarily
2095 return issubclass(value.dtype.type, np.floating) and value.dtype == self.dtype
2098class ComplexBlock(FloatOrComplexBlock):
2099 __slots__ = ()
2100 is_complex = True
2102 def _can_hold_element(self, element: Any) -> bool:
2103 tipo = maybe_infer_dtype_type(element)
2104 if tipo is not None:
2105 return issubclass(tipo.type, (np.floating, np.integer, np.complexfloating))
2106 return isinstance(
2107 element, (float, int, complex, np.float_, np.int_)
2108 ) and not isinstance(element, (bool, np.bool_))
2110 def should_store(self, value):
2111 return issubclass(value.dtype.type, np.complexfloating)
2114class IntBlock(NumericBlock):
2115 __slots__ = ()
2116 is_integer = True
2117 _can_hold_na = False
2119 def _can_hold_element(self, element: Any) -> bool:
2120 tipo = maybe_infer_dtype_type(element)
2121 if tipo is not None:
2122 return (
2123 issubclass(tipo.type, np.integer)
2124 and not issubclass(tipo.type, (np.datetime64, np.timedelta64))
2125 and self.dtype.itemsize >= tipo.itemsize
2126 )
2127 return is_integer(element)
2129 def should_store(self, value):
2130 return is_integer_dtype(value) and value.dtype == self.dtype
2133class DatetimeLikeBlockMixin:
2134 """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""
2136 @property
2137 def _holder(self):
2138 return DatetimeArray
2140 @property
2141 def fill_value(self):
2142 return np.datetime64("NaT", "ns")
2144 def get_values(self, dtype=None):
2145 """
2146 return object dtype as boxed values, such as Timestamps/Timedelta
2147 """
2148 if is_object_dtype(dtype):
2149 values = self.values.ravel()
2150 result = self._holder(values).astype(object)
2151 return result.reshape(self.values.shape)
2152 return self.values
2154 def iget(self, key):
2155 # GH#31649 we need to wrap scalars in Timestamp/Timedelta
2156 # TODO(EA2D): this can be removed if we ever have 2D EA
2157 result = super().iget(key)
2158 if isinstance(result, np.datetime64):
2159 result = Timestamp(result)
2160 elif isinstance(result, np.timedelta64):
2161 result = Timedelta(result)
2162 return result
2164 def shift(self, periods, axis=0, fill_value=None):
2165 # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
2166 values = self.array_values()
2167 new_values = values.shift(periods, fill_value=fill_value, axis=axis)
2168 return self.make_block_same_class(new_values)
2171class DatetimeBlock(DatetimeLikeBlockMixin, Block):
2172 __slots__ = ()
2173 is_datetime = True
2175 def __init__(self, values, placement, ndim=None):
2176 values = self._maybe_coerce_values(values)
2177 super().__init__(values, placement=placement, ndim=ndim)
2179 @property
2180 def _can_hold_na(self):
2181 return True
2183 def _maybe_coerce_values(self, values):
2184 """
2185 Input validation for values passed to __init__. Ensure that
2186 we have datetime64ns, coercing if necessary.
2188 Parameters
2189 ----------
2190 values : array-like
2191 Must be convertible to datetime64
2193 Returns
2194 -------
2195 values : ndarray[datetime64ns]
2197 Overridden by DatetimeTZBlock.
2198 """
2199 if values.dtype != _NS_DTYPE:
2200 values = conversion.ensure_datetime64ns(values)
2202 if isinstance(values, DatetimeArray):
2203 values = values._data
2205 assert isinstance(values, np.ndarray), type(values)
2206 return values
2208 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
2209 """
2210 these automatically copy, so copy=True has no effect
2211 raise on an except if raise == True
2212 """
2213 dtype = pandas_dtype(dtype)
2215 # if we are passed a datetime64[ns, tz]
2216 if is_datetime64tz_dtype(dtype):
2217 values = self.values
2218 if copy:
2219 # this should be the only copy
2220 values = values.copy()
2221 if getattr(values, "tz", None) is None:
2222 values = DatetimeArray(values).tz_localize("UTC")
2223 values = values.tz_convert(dtype.tz)
2224 return self.make_block(values)
2226 # delegate
2227 return super().astype(dtype=dtype, copy=copy, errors=errors)
2229 def _can_hold_element(self, element: Any) -> bool:
2230 tipo = maybe_infer_dtype_type(element)
2231 if tipo is not None:
2232 if self.is_datetimetz:
2233 # require exact match, since non-nano does not exist
2234 return is_dtype_equal(tipo, self.dtype) or is_valid_nat_for_dtype(
2235 element, self.dtype
2236 )
2238 # GH#27419 if we get a non-nano datetime64 object
2239 return is_datetime64_dtype(tipo)
2240 elif element is NaT:
2241 return True
2242 elif isinstance(element, datetime):
2243 if self.is_datetimetz:
2244 return tz_compare(element.tzinfo, self.dtype.tz)
2245 return element.tzinfo is None
2247 return is_valid_nat_for_dtype(element, self.dtype)
2249 def to_native_types(
2250 self, slicer=None, na_rep=None, date_format=None, quoting=None, **kwargs
2251 ):
2252 """ convert to our native types format, slicing if desired """
2254 values = self.values
2255 i8values = self.values.view("i8")
2257 if slicer is not None:
2258 values = values[..., slicer]
2259 i8values = i8values[..., slicer]
2261 from pandas.io.formats.format import _get_format_datetime64_from_values
2263 fmt = _get_format_datetime64_from_values(values, date_format)
2265 result = tslib.format_array_from_datetime(
2266 i8values.ravel(),
2267 tz=getattr(self.values, "tz", None),
2268 format=fmt,
2269 na_rep=na_rep,
2270 ).reshape(i8values.shape)
2271 return np.atleast_2d(result)
2273 def should_store(self, value):
2274 return (
2275 issubclass(value.dtype.type, np.datetime64)
2276 and not is_datetime64tz_dtype(value)
2277 and not is_extension_array_dtype(value)
2278 )
2280 def set(self, locs, values):
2281 """
2282 Modify Block in-place with new item value
2284 Returns
2285 -------
2286 None
2287 """
2288 values = conversion.ensure_datetime64ns(values, copy=False)
2290 self.values[locs] = values
2292 def external_values(self):
2293 return np.asarray(self.values.astype("datetime64[ns]", copy=False))
2295 def array_values(self) -> ExtensionArray:
2296 return DatetimeArray._simple_new(self.values)
2299class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
2300 """ implement a datetime64 block with a tz attribute """
2302 __slots__ = ()
2303 is_datetimetz = True
2304 is_extension = True
2306 _can_hold_element = DatetimeBlock._can_hold_element
2307 to_native_types = DatetimeBlock.to_native_types
2308 fill_value = np.datetime64("NaT", "ns")
2310 @property
2311 def _holder(self):
2312 return DatetimeArray
2314 def _maybe_coerce_values(self, values):
2315 """Input validation for values passed to __init__. Ensure that
2316 we have datetime64TZ, coercing if necessary.
2318 Parameters
2319 ----------
2320 values : array-like
2321 Must be convertible to datetime64
2323 Returns
2324 -------
2325 values : DatetimeArray
2326 """
2327 if not isinstance(values, self._holder):
2328 values = self._holder(values)
2330 if values.tz is None:
2331 raise ValueError("cannot create a DatetimeTZBlock without a tz")
2333 return values
2335 @property
2336 def is_view(self):
2337 """ return a boolean if I am possibly a view """
2338 # check the ndarray values of the DatetimeIndex values
2339 return self.values._data.base is not None
2341 def get_values(self, dtype=None):
2342 """
2343 Returns an ndarray of values.
2345 Parameters
2346 ----------
2347 dtype : np.dtype
2348 Only `object`-like dtypes are respected here (not sure
2349 why).
2351 Returns
2352 -------
2353 values : ndarray
2354 When ``dtype=object``, then and object-dtype ndarray of
2355 boxed values is returned. Otherwise, an M8[ns] ndarray
2356 is returned.
2358 DatetimeArray is always 1-d. ``get_values`` will reshape
2359 the return value to be the same dimensionality as the
2360 block.
2361 """
2362 values = self.values
2363 if is_object_dtype(dtype):
2364 values = values.astype(object)
2366 values = np.asarray(values)
2368 if self.ndim == 2:
2369 # Ensure that our shape is correct for DataFrame.
2370 # ExtensionArrays are always 1-D, even in a DataFrame when
2371 # the analogous NumPy-backed column would be a 2-D ndarray.
2372 values = values.reshape(1, -1)
2373 return values
2375 def to_dense(self):
2376 # we request M8[ns] dtype here, even though it discards tzinfo,
2377 # as lots of code (e.g. anything using values_from_object)
2378 # expects that behavior.
2379 return np.asarray(self.values, dtype=_NS_DTYPE)
2381 def _slice(self, slicer):
2382 """ return a slice of my values """
2383 if isinstance(slicer, tuple):
2384 col, loc = slicer
2385 if not com.is_null_slice(col) and col != 0:
2386 raise IndexError(f"{self} only contains one item")
2387 return self.values[loc]
2388 return self.values[slicer]
2390 def diff(self, n: int, axis: int = 0) -> List["Block"]:
2391 """
2392 1st discrete difference.
2394 Parameters
2395 ----------
2396 n : int
2397 Number of periods to diff.
2398 axis : int, default 0
2399 Axis to diff upon.
2401 Returns
2402 -------
2403 A list with a new TimeDeltaBlock.
2405 Notes
2406 -----
2407 The arguments here are mimicking shift so they are called correctly
2408 by apply.
2409 """
2410 if axis == 0:
2411 # Cannot currently calculate diff across multiple blocks since this
2412 # function is invoked via apply
2413 raise NotImplementedError
2414 new_values = (self.values - self.shift(n, axis=axis)[0].values).asi8
2416 # Reshape the new_values like how algos.diff does for timedelta data
2417 new_values = new_values.reshape(1, len(new_values))
2418 new_values = new_values.astype("timedelta64[ns]")
2419 return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)]
2421 def concat_same_type(self, to_concat, placement=None):
2422 # need to handle concat([tz1, tz2]) here, since DatetimeArray
2423 # only handles cases where all the tzs are the same.
2424 # Instead of placing the condition here, it could also go into the
2425 # is_uniform_join_units check, but I'm not sure what is better.
2426 if len({x.dtype for x in to_concat}) > 1:
2427 values = concat_datetime([x.values for x in to_concat])
2428 placement = placement or slice(0, len(values), 1)
2430 if self.ndim > 1:
2431 values = np.atleast_2d(values)
2432 return ObjectBlock(values, ndim=self.ndim, placement=placement)
2433 return super().concat_same_type(to_concat, placement)
2435 def fillna(self, value, limit=None, inplace=False, downcast=None):
2436 # We support filling a DatetimeTZ with a `value` whose timezone
2437 # is different by coercing to object.
2438 if self._can_hold_element(value):
2439 return super().fillna(value, limit, inplace, downcast)
2441 # different timezones, or a non-tz
2442 return self.astype(object).fillna(
2443 value, limit=limit, inplace=inplace, downcast=downcast
2444 )
2446 def setitem(self, indexer, value):
2447 # https://github.com/pandas-dev/pandas/issues/24020
2448 # Need a dedicated setitem until #24020 (type promotion in setitem
2449 # for extension arrays) is designed and implemented.
2450 if self._can_hold_element(value) or (
2451 isinstance(indexer, np.ndarray) and indexer.size == 0
2452 ):
2453 return super().setitem(indexer, value)
2455 obj_vals = self.values.astype(object)
2456 newb = make_block(
2457 obj_vals, placement=self.mgr_locs, klass=ObjectBlock, ndim=self.ndim
2458 )
2459 return newb.setitem(indexer, value)
2461 def equals(self, other) -> bool:
2462 # override for significant performance improvement
2463 if self.dtype != other.dtype or self.shape != other.shape:
2464 return False
2465 return (self.values.view("i8") == other.values.view("i8")).all()
2467 def quantile(self, qs, interpolation="linear", axis=0):
2468 naive = self.values.view("M8[ns]")
2470 # kludge for 2D block with 1D values
2471 naive = naive.reshape(self.shape)
2473 blk = self.make_block(naive)
2474 res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis)
2476 # ravel is kludge for 2D block with 1D values, assumes column-like
2477 aware = self._holder(res_blk.values.ravel(), dtype=self.dtype)
2478 return self.make_block_same_class(aware, ndim=res_blk.ndim)
2481class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
2482 __slots__ = ()
2483 is_timedelta = True
2484 _can_hold_na = True
2485 is_numeric = False
2486 fill_value = np.timedelta64("NaT", "ns")
2488 def __init__(self, values, placement, ndim=None):
2489 if values.dtype != _TD_DTYPE:
2490 values = conversion.ensure_timedelta64ns(values)
2491 if isinstance(values, TimedeltaArray):
2492 values = values._data
2493 assert isinstance(values, np.ndarray), type(values)
2494 super().__init__(values, placement=placement, ndim=ndim)
2496 @property
2497 def _holder(self):
2498 return TimedeltaArray
2500 def _can_hold_element(self, element: Any) -> bool:
2501 tipo = maybe_infer_dtype_type(element)
2502 if tipo is not None:
2503 return issubclass(tipo.type, np.timedelta64)
2504 elif element is NaT:
2505 return True
2506 elif isinstance(element, (timedelta, np.timedelta64)):
2507 return True
2508 return is_valid_nat_for_dtype(element, self.dtype)
2510 def fillna(self, value, **kwargs):
2512 # allow filling with integers to be
2513 # interpreted as nanoseconds
2514 if is_integer(value):
2515 # Deprecation GH#24694, GH#19233
2516 raise TypeError(
2517 "Passing integers to fillna for timedelta64[ns] dtype is no "
2518 "longer supported. To obtain the old behavior, pass "
2519 "`pd.Timedelta(seconds=n)` instead."
2520 )
2521 return super().fillna(value, **kwargs)
2523 def should_store(self, value):
2524 return issubclass(
2525 value.dtype.type, np.timedelta64
2526 ) and not is_extension_array_dtype(value)
2528 def to_native_types(self, slicer=None, na_rep=None, quoting=None, **kwargs):
2529 """ convert to our native types format, slicing if desired """
2531 values = self.values
2532 if slicer is not None:
2533 values = values[:, slicer]
2534 mask = isna(values)
2536 rvalues = np.empty(values.shape, dtype=object)
2537 if na_rep is None:
2538 na_rep = "NaT"
2539 rvalues[mask] = na_rep
2540 imask = (~mask).ravel()
2542 # FIXME:
2543 # should use the formats.format.Timedelta64Formatter here
2544 # to figure what format to pass to the Timedelta
2545 # e.g. to not show the decimals say
2546 rvalues.flat[imask] = np.array(
2547 [Timedelta(val)._repr_base(format="all") for val in values.ravel()[imask]],
2548 dtype=object,
2549 )
2550 return rvalues
2552 def external_values(self, dtype=None):
2553 return np.asarray(self.values.astype("timedelta64[ns]", copy=False))
2555 def array_values(self) -> ExtensionArray:
2556 return TimedeltaArray._simple_new(self.values)
2559class BoolBlock(NumericBlock):
2560 __slots__ = ()
2561 is_bool = True
2562 _can_hold_na = False
2564 def _can_hold_element(self, element: Any) -> bool:
2565 tipo = maybe_infer_dtype_type(element)
2566 if tipo is not None:
2567 return issubclass(tipo.type, np.bool_)
2568 return isinstance(element, (bool, np.bool_))
2570 def should_store(self, value):
2571 return issubclass(value.dtype.type, np.bool_) and not is_extension_array_dtype(
2572 value
2573 )
2575 def replace(
2576 self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
2577 ):
2578 inplace = validate_bool_kwarg(inplace, "inplace")
2579 to_replace_values = np.atleast_1d(to_replace)
2580 if not np.can_cast(to_replace_values, bool):
2581 return self
2582 return super().replace(
2583 to_replace,
2584 value,
2585 inplace=inplace,
2586 filter=filter,
2587 regex=regex,
2588 convert=convert,
2589 )
2592class ObjectBlock(Block):
2593 __slots__ = ()
2594 is_object = True
2595 _can_hold_na = True
2597 def __init__(self, values, placement=None, ndim=2):
2598 if issubclass(values.dtype.type, str):
2599 values = np.array(values, dtype=object)
2601 super().__init__(values, ndim=ndim, placement=placement)
2603 @property
2604 def is_bool(self):
2605 """ we can be a bool if we have only bool values but are of type
2606 object
2607 """
2608 return lib.is_bool_array(self.values.ravel())
2610 def convert(
2611 self,
2612 copy: bool = True,
2613 datetime: bool = True,
2614 numeric: bool = True,
2615 timedelta: bool = True,
2616 coerce: bool = False,
2617 ):
2618 """ attempt to coerce any object types to better types return a copy of
2619 the block (if copy = True) by definition we ARE an ObjectBlock!!!!!
2621 can return multiple blocks!
2622 """
2624 # operate column-by-column
2625 def f(mask, val, idx):
2626 shape = val.shape
2627 values = soft_convert_objects(
2628 val.ravel(),
2629 datetime=datetime,
2630 numeric=numeric,
2631 timedelta=timedelta,
2632 coerce=coerce,
2633 copy=copy,
2634 )
2635 if isinstance(values, np.ndarray):
2636 # TODO: allow EA once reshape is supported
2637 values = values.reshape(shape)
2639 values = _block_shape(values, ndim=self.ndim)
2640 return values
2642 if self.ndim == 2:
2643 blocks = self.split_and_operate(None, f, False)
2644 else:
2645 values = f(None, self.values.ravel(), None)
2646 blocks = [make_block(values, ndim=self.ndim, placement=self.mgr_locs)]
2648 return blocks
2650 def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]:
2652 if downcast is not None:
2653 return blocks
2655 # split and convert the blocks
2656 return _extend_blocks([b.convert(datetime=True, numeric=False) for b in blocks])
2658 def _can_hold_element(self, element: Any) -> bool:
2659 return True
2661 def should_store(self, value):
2662 return not (
2663 issubclass(
2664 value.dtype.type,
2665 (np.integer, np.floating, np.complexfloating, np.datetime64, np.bool_),
2666 )
2667 or is_extension_array_dtype(value)
2668 )
2670 def replace(
2671 self, to_replace, value, inplace=False, filter=None, regex=False, convert=True
2672 ):
2673 to_rep_is_list = is_list_like(to_replace)
2674 value_is_list = is_list_like(value)
2675 both_lists = to_rep_is_list and value_is_list
2676 either_list = to_rep_is_list or value_is_list
2678 result_blocks = []
2679 blocks = [self]
2681 if not either_list and is_re(to_replace):
2682 return self._replace_single(
2683 to_replace,
2684 value,
2685 inplace=inplace,
2686 filter=filter,
2687 regex=True,
2688 convert=convert,
2689 )
2690 elif not (either_list or regex):
2691 return super().replace(
2692 to_replace,
2693 value,
2694 inplace=inplace,
2695 filter=filter,
2696 regex=regex,
2697 convert=convert,
2698 )
2699 elif both_lists:
2700 for to_rep, v in zip(to_replace, value):
2701 result_blocks = []
2702 for b in blocks:
2703 result = b._replace_single(
2704 to_rep,
2705 v,
2706 inplace=inplace,
2707 filter=filter,
2708 regex=regex,
2709 convert=convert,
2710 )
2711 result_blocks = _extend_blocks(result, result_blocks)
2712 blocks = result_blocks
2713 return result_blocks
2715 elif to_rep_is_list and regex:
2716 for to_rep in to_replace:
2717 result_blocks = []
2718 for b in blocks:
2719 result = b._replace_single(
2720 to_rep,
2721 value,
2722 inplace=inplace,
2723 filter=filter,
2724 regex=regex,
2725 convert=convert,
2726 )
2727 result_blocks = _extend_blocks(result, result_blocks)
2728 blocks = result_blocks
2729 return result_blocks
2731 return self._replace_single(
2732 to_replace,
2733 value,
2734 inplace=inplace,
2735 filter=filter,
2736 convert=convert,
2737 regex=regex,
2738 )
2740 def _replace_single(
2741 self,
2742 to_replace,
2743 value,
2744 inplace=False,
2745 filter=None,
2746 regex=False,
2747 convert=True,
2748 mask=None,
2749 ):
2750 """
2751 Replace elements by the given value.
2753 Parameters
2754 ----------
2755 to_replace : object or pattern
2756 Scalar to replace or regular expression to match.
2757 value : object
2758 Replacement object.
2759 inplace : bool, default False
2760 Perform inplace modification.
2761 filter : list, optional
2762 regex : bool, default False
2763 If true, perform regular expression substitution.
2764 convert : bool, default True
2765 If true, try to coerce any object types to better types.
2766 mask : array-like of bool, optional
2767 True indicate corresponding element is ignored.
2769 Returns
2770 -------
2771 a new block, the result after replacing
2772 """
2773 inplace = validate_bool_kwarg(inplace, "inplace")
2775 # to_replace is regex compilable
2776 to_rep_re = regex and is_re_compilable(to_replace)
2778 # regex is regex compilable
2779 regex_re = is_re_compilable(regex)
2781 # only one will survive
2782 if to_rep_re and regex_re:
2783 raise AssertionError(
2784 "only one of to_replace and regex can be regex compilable"
2785 )
2787 # if regex was passed as something that can be a regex (rather than a
2788 # boolean)
2789 if regex_re:
2790 to_replace = regex
2792 regex = regex_re or to_rep_re
2794 # try to get the pattern attribute (compiled re) or it's a string
2795 if is_re(to_replace):
2796 pattern = to_replace.pattern
2797 else:
2798 pattern = to_replace
2800 # if the pattern is not empty and to_replace is either a string or a
2801 # regex
2802 if regex and pattern:
2803 rx = re.compile(to_replace)
2804 else:
2805 # if the thing to replace is not a string or compiled regex call
2806 # the superclass method -> to_replace is some kind of object
2807 return super().replace(
2808 to_replace, value, inplace=inplace, filter=filter, regex=regex
2809 )
2811 new_values = self.values if inplace else self.values.copy()
2813 # deal with replacing values with objects (strings) that match but
2814 # whose replacement is not a string (numeric, nan, object)
2815 if isna(value) or not isinstance(value, str):
2817 def re_replacer(s):
2818 if is_re(rx) and isinstance(s, str):
2819 return value if rx.search(s) is not None else s
2820 else:
2821 return s
2823 else:
2824 # value is guaranteed to be a string here, s can be either a string
2825 # or null if it's null it gets returned
2826 def re_replacer(s):
2827 if is_re(rx) and isinstance(s, str):
2828 return rx.sub(value, s)
2829 else:
2830 return s
2832 f = np.vectorize(re_replacer, otypes=[self.dtype])
2834 if filter is None:
2835 filt = slice(None)
2836 else:
2837 filt = self.mgr_locs.isin(filter).nonzero()[0]
2839 if mask is None:
2840 new_values[filt] = f(new_values[filt])
2841 else:
2842 new_values[filt][mask] = f(new_values[filt][mask])
2844 # convert
2845 block = self.make_block(new_values)
2846 if convert:
2847 block = block.convert(numeric=False)
2848 return block
2850 def _replace_coerce(
2851 self, to_replace, value, inplace=True, regex=False, convert=False, mask=None
2852 ):
2853 """
2854 Replace value corresponding to the given boolean array with another
2855 value.
2857 Parameters
2858 ----------
2859 to_replace : object or pattern
2860 Scalar to replace or regular expression to match.
2861 value : object
2862 Replacement object.
2863 inplace : bool, default False
2864 Perform inplace modification.
2865 regex : bool, default False
2866 If true, perform regular expression substitution.
2867 convert : bool, default True
2868 If true, try to coerce any object types to better types.
2869 mask : array-like of bool, optional
2870 True indicate corresponding element is ignored.
2872 Returns
2873 -------
2874 A new block if there is anything to replace or the original block.
2875 """
2876 if mask.any():
2877 block = super()._replace_coerce(
2878 to_replace=to_replace,
2879 value=value,
2880 inplace=inplace,
2881 regex=regex,
2882 convert=convert,
2883 mask=mask,
2884 )
2885 if convert:
2886 block = [b.convert(numeric=False, copy=True) for b in block]
2887 return block
2888 if convert:
2889 return [self.convert(numeric=False, copy=True)]
2890 return self
2893class CategoricalBlock(ExtensionBlock):
2894 __slots__ = ()
2895 is_categorical = True
2896 _verify_integrity = True
2897 _can_hold_na = True
2898 _concatenator = staticmethod(concat_categorical)
2900 def __init__(self, values, placement, ndim=None):
2901 # coerce to categorical if we can
2902 values = extract_array(values)
2903 assert isinstance(values, Categorical), type(values)
2904 super().__init__(values, placement=placement, ndim=ndim)
2906 @property
2907 def _holder(self):
2908 return Categorical
2910 @property
2911 def array_dtype(self):
2912 """ the dtype to return if I want to construct this block as an
2913 array
2914 """
2915 return np.object_
2917 def to_dense(self):
2918 # Categorical.get_values returns a DatetimeIndex for datetime
2919 # categories, so we can't simply use `np.asarray(self.values)` like
2920 # other types.
2921 return self.values._internal_get_values()
2923 def to_native_types(self, slicer=None, na_rep="", quoting=None, **kwargs):
2924 """ convert to our native types format, slicing if desired """
2926 values = self.values
2927 if slicer is not None:
2928 # Categorical is always one dimension
2929 values = values[slicer]
2930 mask = isna(values)
2931 values = np.array(values, dtype="object")
2932 values[mask] = na_rep
2934 # we are expected to return a 2-d ndarray
2935 return values.reshape(1, len(values))
2937 def concat_same_type(self, to_concat, placement=None):
2938 """
2939 Concatenate list of single blocks of the same type.
2941 Note that this CategoricalBlock._concat_same_type *may* not
2942 return a CategoricalBlock. When the categories in `to_concat`
2943 differ, this will return an object ndarray.
2945 If / when we decide we don't like that behavior:
2947 1. Change Categorical._concat_same_type to use union_categoricals
2948 2. Delete this method.
2949 """
2950 values = self._concatenator(
2951 [blk.values for blk in to_concat], axis=self.ndim - 1
2952 )
2953 # not using self.make_block_same_class as values can be object dtype
2954 return make_block(
2955 values, placement=placement or slice(0, len(values), 1), ndim=self.ndim
2956 )
2958 def replace(
2959 self,
2960 to_replace,
2961 value,
2962 inplace: bool = False,
2963 filter=None,
2964 regex: bool = False,
2965 convert: bool = True,
2966 ):
2967 inplace = validate_bool_kwarg(inplace, "inplace")
2968 result = self if inplace else self.copy()
2969 if filter is None: # replace was called on a series
2970 result.values.replace(to_replace, value, inplace=True)
2971 if convert:
2972 return result.convert(numeric=False, copy=not inplace)
2973 else:
2974 return result
2975 else: # replace was called on a DataFrame
2976 if not isna(value):
2977 result.values.add_categories(value, inplace=True)
2978 return super(CategoricalBlock, result).replace(
2979 to_replace, value, inplace, filter, regex, convert
2980 )
2983# -----------------------------------------------------------------
2984# Constructor Helpers
2987def get_block_type(values, dtype=None):
2988 """
2989 Find the appropriate Block subclass to use for the given values and dtype.
2991 Parameters
2992 ----------
2993 values : ndarray-like
2994 dtype : numpy or pandas dtype
2996 Returns
2997 -------
2998 cls : class, subclass of Block
2999 """
3000 dtype = dtype or values.dtype
3001 vtype = dtype.type
3003 if is_sparse(dtype):
3004 # Need this first(ish) so that Sparse[datetime] is sparse
3005 cls = ExtensionBlock
3006 elif is_categorical(values):
3007 cls = CategoricalBlock
3008 elif issubclass(vtype, np.datetime64):
3009 assert not is_datetime64tz_dtype(values)
3010 cls = DatetimeBlock
3011 elif is_datetime64tz_dtype(values):
3012 cls = DatetimeTZBlock
3013 elif is_interval_dtype(dtype) or is_period_dtype(dtype):
3014 cls = ObjectValuesExtensionBlock
3015 elif is_extension_array_dtype(values):
3016 cls = ExtensionBlock
3017 elif issubclass(vtype, np.floating):
3018 cls = FloatBlock
3019 elif issubclass(vtype, np.timedelta64):
3020 assert issubclass(vtype, np.integer)
3021 cls = TimeDeltaBlock
3022 elif issubclass(vtype, np.complexfloating):
3023 cls = ComplexBlock
3024 elif issubclass(vtype, np.integer):
3025 cls = IntBlock
3026 elif dtype == np.bool_:
3027 cls = BoolBlock
3028 else:
3029 cls = ObjectBlock
3030 return cls
3033def make_block(values, placement, klass=None, ndim=None, dtype=None):
3034 # Ensure that we don't allow PandasArray / PandasDtype in internals.
3035 # For now, blocks should be backed by ndarrays when possible.
3036 if isinstance(values, ABCPandasArray):
3037 values = values.to_numpy()
3038 if ndim and ndim > 1:
3039 values = np.atleast_2d(values)
3041 if isinstance(dtype, PandasDtype):
3042 dtype = dtype.numpy_dtype
3044 if klass is None:
3045 dtype = dtype or values.dtype
3046 klass = get_block_type(values, dtype)
3048 elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values):
3049 # TODO: This is no longer hit internally; does it need to be retained
3050 # for e.g. pyarrow?
3051 values = DatetimeArray._simple_new(values, dtype=dtype)
3053 return klass(values, ndim=ndim, placement=placement)
3056# -----------------------------------------------------------------
3059def _extend_blocks(result, blocks=None):
3060 """ return a new extended blocks, given the result """
3061 from pandas.core.internals import BlockManager
3063 if blocks is None:
3064 blocks = []
3065 if isinstance(result, list):
3066 for r in result:
3067 if isinstance(r, list):
3068 blocks.extend(r)
3069 else:
3070 blocks.append(r)
3071 elif isinstance(result, BlockManager):
3072 blocks.extend(result.blocks)
3073 else:
3074 blocks.append(result)
3075 return blocks
3078def _block_shape(values, ndim=1, shape=None):
3079 """ guarantee the shape of the values to be at least 1 d """
3080 if values.ndim < ndim:
3081 if shape is None:
3082 shape = values.shape
3083 if not is_extension_array_dtype(values):
3084 # TODO: https://github.com/pandas-dev/pandas/issues/23023
3085 # block.shape is incorrect for "2D" ExtensionArrays
3086 # We can't, and don't need to, reshape.
3087 values = values.reshape(tuple((1,) + shape))
3088 return values
3091def _merge_blocks(blocks, dtype=None, _can_consolidate=True):
3093 if len(blocks) == 1:
3094 return blocks[0]
3096 if _can_consolidate:
3098 if dtype is None:
3099 if len({b.dtype for b in blocks}) != 1:
3100 raise AssertionError("_merge_blocks are invalid!")
3102 # FIXME: optimization potential in case all mgrs contain slices and
3103 # combination of those slices is a slice, too.
3104 new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks])
3105 new_values = np.vstack([b.values for b in blocks])
3107 argsort = np.argsort(new_mgr_locs)
3108 new_values = new_values[argsort]
3109 new_mgr_locs = new_mgr_locs[argsort]
3111 return make_block(new_values, placement=new_mgr_locs)
3113 # no merge
3114 return blocks
3117def _safe_reshape(arr, new_shape):
3118 """
3119 If possible, reshape `arr` to have shape `new_shape`,
3120 with a couple of exceptions (see gh-13012):
3122 1) If `arr` is a ExtensionArray or Index, `arr` will be
3123 returned as is.
3124 2) If `arr` is a Series, the `_values` attribute will
3125 be reshaped and returned.
3127 Parameters
3128 ----------
3129 arr : array-like, object to be reshaped
3130 new_shape : int or tuple of ints, the new shape
3131 """
3132 if isinstance(arr, ABCSeries):
3133 arr = arr._values
3134 if not isinstance(arr, ABCExtensionArray):
3135 arr = arr.reshape(new_shape)
3136 return arr
3139def _putmask_smart(v, mask, n):
3140 """
3141 Return a new ndarray, try to preserve dtype if possible.
3143 Parameters
3144 ----------
3145 v : `values`, updated in-place (array like)
3146 mask : np.ndarray
3147 Applies to both sides (array like).
3148 n : `new values` either scalar or an array like aligned with `values`
3150 Returns
3151 -------
3152 values : ndarray with updated values
3153 this *may* be a copy of the original
3155 See Also
3156 --------
3157 ndarray.putmask
3158 """
3160 # we cannot use np.asarray() here as we cannot have conversions
3161 # that numpy does when numeric are mixed with strings
3163 # n should be the length of the mask or a scalar here
3164 if not is_list_like(n):
3165 n = np.repeat(n, len(mask))
3167 # see if we are only masking values that if putted
3168 # will work in the current dtype
3169 try:
3170 nn = n[mask]
3171 except TypeError:
3172 # TypeError: only integer scalar arrays can be converted to a scalar index
3173 pass
3174 else:
3175 # make sure that we have a nullable type
3176 # if we have nulls
3177 if not _isna_compat(v, nn[0]):
3178 pass
3179 elif not (is_float_dtype(nn.dtype) or is_integer_dtype(nn.dtype)):
3180 # only compare integers/floats
3181 pass
3182 elif not (is_float_dtype(v.dtype) or is_integer_dtype(v.dtype)):
3183 # only compare integers/floats
3184 pass
3185 else:
3187 # we ignore ComplexWarning here
3188 with warnings.catch_warnings(record=True):
3189 warnings.simplefilter("ignore", np.ComplexWarning)
3190 nn_at = nn.astype(v.dtype)
3192 comp = nn == nn_at
3193 if is_list_like(comp) and comp.all():
3194 nv = v.copy()
3195 nv[mask] = nn_at
3196 return nv
3198 n = np.asarray(n)
3200 def _putmask_preserve(nv, n):
3201 try:
3202 nv[mask] = n[mask]
3203 except (IndexError, ValueError):
3204 nv[mask] = n
3205 return nv
3207 # preserves dtype if possible
3208 if v.dtype.kind == n.dtype.kind:
3209 return _putmask_preserve(v, n)
3211 # change the dtype if needed
3212 dtype, _ = maybe_promote(n.dtype)
3214 if is_extension_array_dtype(v.dtype) and is_object_dtype(dtype):
3215 v = v._internal_get_values(dtype)
3216 else:
3217 v = v.astype(dtype)
3219 return _putmask_preserve(v, n)