Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/internals/managers.py : 15%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from collections import defaultdict
2from functools import partial
3import itertools
4import operator
5import re
6from typing import List, Optional, Sequence, Tuple, Union
8import numpy as np
10from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib
11from pandas.util._validators import validate_bool_kwarg
13from pandas.core.dtypes.cast import (
14 find_common_type,
15 infer_dtype_from_scalar,
16 maybe_convert_objects,
17 maybe_promote,
18)
19from pandas.core.dtypes.common import (
20 _NS_DTYPE,
21 is_datetimelike_v_numeric,
22 is_extension_array_dtype,
23 is_list_like,
24 is_numeric_v_string_like,
25 is_scalar,
26 is_sparse,
27)
28from pandas.core.dtypes.concat import concat_compat
29from pandas.core.dtypes.dtypes import ExtensionDtype
30from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries
31from pandas.core.dtypes.missing import isna
33import pandas.core.algorithms as algos
34from pandas.core.base import PandasObject
35from pandas.core.indexers import maybe_convert_indices
36from pandas.core.indexes.api import Index, MultiIndex, ensure_index
37from pandas.core.internals.blocks import (
38 Block,
39 CategoricalBlock,
40 DatetimeTZBlock,
41 ExtensionBlock,
42 ObjectValuesExtensionBlock,
43 _extend_blocks,
44 _merge_blocks,
45 _safe_reshape,
46 get_block_type,
47 make_block,
48)
49from pandas.core.internals.concat import ( # all for concatenate_block_managers
50 combine_concat_plans,
51 concatenate_join_units,
52 get_mgr_concatenation_plan,
53 is_uniform_join_units,
54)
56from pandas.io.formats.printing import pprint_thing
58# TODO: flexible with index=None and/or items=None
61class BlockManager(PandasObject):
62 """
63 Core internal data structure to implement DataFrame, Series, etc.
65 Manage a bunch of labeled 2D mixed-type ndarrays. Essentially it's a
66 lightweight blocked set of labeled data to be manipulated by the DataFrame
67 public API class
69 Attributes
70 ----------
71 shape
72 ndim
73 axes
74 values
75 items
77 Methods
78 -------
79 set_axis(axis, new_labels)
80 copy(deep=True)
82 get_dtype_counts
83 get_dtypes
85 apply(func, axes, block_filter_fn)
87 get_bool_data
88 get_numeric_data
90 get_slice(slice_like, axis)
91 get(label)
92 iget(loc)
94 take(indexer, axis)
95 reindex_axis(new_labels, axis)
96 reindex_indexer(new_labels, indexer, axis)
98 delete(label)
99 insert(loc, label, value)
100 set(label, value)
102 Parameters
103 ----------
106 Notes
107 -----
108 This is *not* a public API class
109 """
111 __slots__ = [
112 "axes",
113 "blocks",
114 "_ndim",
115 "_shape",
116 "_known_consolidated",
117 "_is_consolidated",
118 "_blknos",
119 "_blklocs",
120 ]
122 def __init__(
123 self,
124 blocks: Sequence[Block],
125 axes: Sequence[Index],
126 do_integrity_check: bool = True,
127 ):
128 self.axes = [ensure_index(ax) for ax in axes]
129 self.blocks: Tuple[Block, ...] = tuple(blocks)
131 for block in blocks:
132 if self.ndim != block.ndim:
133 raise AssertionError(
134 f"Number of Block dimensions ({block.ndim}) must equal "
135 f"number of axes ({self.ndim})"
136 )
138 if do_integrity_check:
139 self._verify_integrity()
141 self._consolidate_check()
143 self._rebuild_blknos_and_blklocs()
145 def make_empty(self, axes=None):
146 """ return an empty BlockManager with the items axis of len 0 """
147 if axes is None:
148 axes = [ensure_index([])] + [ensure_index(a) for a in self.axes[1:]]
150 # preserve dtype if possible
151 if self.ndim == 1:
152 blocks = np.array([], dtype=self.array_dtype)
153 else:
154 blocks = []
155 return type(self)(blocks, axes)
157 def __nonzero__(self):
158 return True
160 # Python3 compat
161 __bool__ = __nonzero__
163 @property
164 def shape(self):
165 return tuple(len(ax) for ax in self.axes)
167 @property
168 def ndim(self) -> int:
169 return len(self.axes)
171 def set_axis(self, axis, new_labels):
172 new_labels = ensure_index(new_labels)
173 old_len = len(self.axes[axis])
174 new_len = len(new_labels)
176 if new_len != old_len:
177 raise ValueError(
178 f"Length mismatch: Expected axis has {old_len} elements, new "
179 f"values have {new_len} elements"
180 )
182 self.axes[axis] = new_labels
184 def rename_axis(self, mapper, axis, copy=True, level=None):
185 """
186 Rename one of axes.
188 Parameters
189 ----------
190 mapper : unary callable
191 axis : int
192 copy : boolean, default True
193 level : int, default None
194 """
195 obj = self.copy(deep=copy)
196 obj.set_axis(axis, _transform_index(self.axes[axis], mapper, level))
197 return obj
199 @property
200 def _is_single_block(self):
201 if self.ndim == 1:
202 return True
204 if len(self.blocks) != 1:
205 return False
207 blk = self.blocks[0]
208 return blk.mgr_locs.is_slice_like and blk.mgr_locs.as_slice == slice(
209 0, len(self), 1
210 )
212 def _rebuild_blknos_and_blklocs(self):
213 """
214 Update mgr._blknos / mgr._blklocs.
215 """
216 new_blknos = np.empty(self.shape[0], dtype=np.int64)
217 new_blklocs = np.empty(self.shape[0], dtype=np.int64)
218 new_blknos.fill(-1)
219 new_blklocs.fill(-1)
221 for blkno, blk in enumerate(self.blocks):
222 rl = blk.mgr_locs
223 new_blknos[rl.indexer] = blkno
224 new_blklocs[rl.indexer] = np.arange(len(rl))
226 if (new_blknos == -1).any():
227 raise AssertionError("Gaps in blk ref_locs")
229 self._blknos = new_blknos
230 self._blklocs = new_blklocs
232 @property
233 def items(self):
234 return self.axes[0]
236 def _get_counts(self, f):
237 """ return a dict of the counts of the function in BlockManager """
238 self._consolidate_inplace()
239 counts = dict()
240 for b in self.blocks:
241 v = f(b)
242 counts[v] = counts.get(v, 0) + b.shape[0]
243 return counts
245 def get_dtype_counts(self):
246 return self._get_counts(lambda b: b.dtype.name)
248 def get_dtypes(self):
249 dtypes = np.array([blk.dtype for blk in self.blocks])
250 return algos.take_1d(dtypes, self._blknos, allow_fill=False)
252 def __getstate__(self):
253 block_values = [b.values for b in self.blocks]
254 block_items = [self.items[b.mgr_locs.indexer] for b in self.blocks]
255 axes_array = list(self.axes)
257 extra_state = {
258 "0.14.1": {
259 "axes": axes_array,
260 "blocks": [
261 dict(values=b.values, mgr_locs=b.mgr_locs.indexer)
262 for b in self.blocks
263 ],
264 }
265 }
267 # First three elements of the state are to maintain forward
268 # compatibility with 0.13.1.
269 return axes_array, block_values, block_items, extra_state
271 def __setstate__(self, state):
272 def unpickle_block(values, mgr_locs):
273 return make_block(values, placement=mgr_locs)
275 if isinstance(state, tuple) and len(state) >= 4 and "0.14.1" in state[3]:
276 state = state[3]["0.14.1"]
277 self.axes = [ensure_index(ax) for ax in state["axes"]]
278 self.blocks = tuple(
279 unpickle_block(b["values"], b["mgr_locs"]) for b in state["blocks"]
280 )
281 else:
282 # discard anything after 3rd, support beta pickling format for a
283 # little while longer
284 ax_arrays, bvalues, bitems = state[:3]
286 self.axes = [ensure_index(ax) for ax in ax_arrays]
288 if len(bitems) == 1 and self.axes[0].equals(bitems[0]):
289 # This is a workaround for pre-0.14.1 pickles that didn't
290 # support unpickling multi-block frames/panels with non-unique
291 # columns/items, because given a manager with items ["a", "b",
292 # "a"] there's no way of knowing which block's "a" is where.
293 #
294 # Single-block case can be supported under the assumption that
295 # block items corresponded to manager items 1-to-1.
296 all_mgr_locs = [slice(0, len(bitems[0]))]
297 else:
298 all_mgr_locs = [
299 self.axes[0].get_indexer(blk_items) for blk_items in bitems
300 ]
302 self.blocks = tuple(
303 unpickle_block(values, mgr_locs)
304 for values, mgr_locs in zip(bvalues, all_mgr_locs)
305 )
307 self._post_setstate()
309 def _post_setstate(self):
310 self._is_consolidated = False
311 self._known_consolidated = False
312 self._rebuild_blknos_and_blklocs()
314 def __len__(self) -> int:
315 return len(self.items)
317 def __repr__(self) -> str:
318 output = type(self).__name__
319 for i, ax in enumerate(self.axes):
320 if i == 0:
321 output += f"\nItems: {ax}"
322 else:
323 output += f"\nAxis {i}: {ax}"
325 for block in self.blocks:
326 output += f"\n{pprint_thing(block)}"
327 return output
329 def _verify_integrity(self):
330 mgr_shape = self.shape
331 tot_items = sum(len(x.mgr_locs) for x in self.blocks)
332 for block in self.blocks:
333 if block._verify_integrity and block.shape[1:] != mgr_shape[1:]:
334 construction_error(tot_items, block.shape[1:], self.axes)
335 if len(self.items) != tot_items:
336 raise AssertionError(
337 "Number of manager items must equal union of "
338 f"block items\n# manager items: {len(self.items)}, # "
339 f"tot_items: {tot_items}"
340 )
342 def reduce(self, func, *args, **kwargs):
343 # If 2D, we assume that we're operating column-wise
344 if self.ndim == 1:
345 # we'll be returning a scalar
346 blk = self.blocks[0]
347 return func(blk.values, *args, **kwargs)
349 res = {}
350 for blk in self.blocks:
351 bres = func(blk.values, *args, **kwargs)
353 if np.ndim(bres) == 0:
354 # EA
355 assert blk.shape[0] == 1
356 new_res = zip(blk.mgr_locs.as_array, [bres])
357 else:
358 assert bres.ndim == 1, bres.shape
359 assert blk.shape[0] == len(bres), (blk.shape, bres.shape, args, kwargs)
360 new_res = zip(blk.mgr_locs.as_array, bres)
362 nr = dict(new_res)
363 assert not any(key in res for key in nr)
364 res.update(nr)
366 return res
368 def apply(self, f, filter=None, **kwargs):
369 """
370 Iterate over the blocks, collect and create a new BlockManager.
372 Parameters
373 ----------
374 f : str or callable
375 Name of the Block method to apply.
376 filter : list, if supplied, only call the block if the filter is in
377 the block
379 Returns
380 -------
381 BlockManager
382 """
384 result_blocks = []
386 # filter kwarg is used in replace-* family of methods
387 if filter is not None:
388 filter_locs = set(self.items.get_indexer_for(filter))
389 if len(filter_locs) == len(self.items):
390 # All items are included, as if there were no filtering
391 filter = None
392 else:
393 kwargs["filter"] = filter_locs
395 self._consolidate_inplace()
397 if f == "where":
398 align_copy = True
399 if kwargs.get("align", True):
400 align_keys = ["other", "cond"]
401 else:
402 align_keys = ["cond"]
403 elif f == "putmask":
404 align_copy = False
405 if kwargs.get("align", True):
406 align_keys = ["new", "mask"]
407 else:
408 align_keys = ["mask"]
409 elif f == "fillna":
410 # fillna internally does putmask, maybe it's better to do this
411 # at mgr, not block level?
412 align_copy = False
413 align_keys = ["value"]
414 else:
415 align_keys = []
417 # TODO(EA): may interfere with ExtensionBlock.setitem for blocks
418 # with a .values attribute.
419 aligned_args = {
420 k: kwargs[k]
421 for k in align_keys
422 if not isinstance(kwargs[k], ABCExtensionArray)
423 and hasattr(kwargs[k], "values")
424 }
426 for b in self.blocks:
427 if filter is not None:
428 if not b.mgr_locs.isin(filter_locs).any():
429 result_blocks.append(b)
430 continue
432 if aligned_args:
433 b_items = self.items[b.mgr_locs.indexer]
435 for k, obj in aligned_args.items():
436 axis = obj._info_axis_number
437 kwargs[k] = obj.reindex(b_items, axis=axis, copy=align_copy)
439 if callable(f):
440 applied = b.apply(f, **kwargs)
441 else:
442 applied = getattr(b, f)(**kwargs)
443 result_blocks = _extend_blocks(applied, result_blocks)
445 if len(result_blocks) == 0:
446 return self.make_empty(self.axes)
447 bm = type(self)(result_blocks, self.axes, do_integrity_check=False)
448 return bm
450 def quantile(
451 self,
452 axis=0,
453 consolidate=True,
454 transposed=False,
455 interpolation="linear",
456 qs=None,
457 numeric_only=None,
458 ):
459 """
460 Iterate over blocks applying quantile reduction.
461 This routine is intended for reduction type operations and
462 will do inference on the generated blocks.
464 Parameters
465 ----------
466 axis: reduction axis, default 0
467 consolidate: boolean, default True. Join together blocks having same
468 dtype
469 transposed: boolean, default False
470 we are holding transposed data
471 interpolation : type of interpolation, default 'linear'
472 qs : a scalar or list of the quantiles to be computed
473 numeric_only : ignored
475 Returns
476 -------
477 Block Manager (new object)
478 """
480 # Series dispatches to DataFrame for quantile, which allows us to
481 # simplify some of the code here and in the blocks
482 assert self.ndim >= 2
484 if consolidate:
485 self._consolidate_inplace()
487 def get_axe(block, qs, axes):
488 # Because Series dispatches to DataFrame, we will always have
489 # block.ndim == 2
490 from pandas import Float64Index
492 if is_list_like(qs):
493 ax = Float64Index(qs)
494 else:
495 ax = axes[0]
496 return ax
498 axes, blocks = [], []
499 for b in self.blocks:
500 block = b.quantile(axis=axis, qs=qs, interpolation=interpolation)
502 axe = get_axe(b, qs, axes=self.axes)
504 axes.append(axe)
505 blocks.append(block)
507 # note that some DatetimeTZ, Categorical are always ndim==1
508 ndim = {b.ndim for b in blocks}
509 assert 0 not in ndim, ndim
511 if 2 in ndim:
513 new_axes = list(self.axes)
515 # multiple blocks that are reduced
516 if len(blocks) > 1:
517 new_axes[1] = axes[0]
519 # reset the placement to the original
520 for b, sb in zip(blocks, self.blocks):
521 b.mgr_locs = sb.mgr_locs
523 else:
524 new_axes[axis] = Index(np.concatenate([ax.values for ax in axes]))
526 if transposed:
527 new_axes = new_axes[::-1]
528 blocks = [
529 b.make_block(b.values.T, placement=np.arange(b.shape[1]))
530 for b in blocks
531 ]
533 return type(self)(blocks, new_axes)
535 # single block, i.e. ndim == {1}
536 values = concat_compat([b.values for b in blocks])
538 # compute the orderings of our original data
539 if len(self.blocks) > 1:
541 indexer = np.empty(len(self.axes[0]), dtype=np.intp)
542 i = 0
543 for b in self.blocks:
544 for j in b.mgr_locs:
545 indexer[j] = i
546 i = i + 1
548 values = values.take(indexer)
550 return SingleBlockManager(
551 [make_block(values, ndim=1, placement=np.arange(len(values)))], axes[0]
552 )
554 def isna(self, func):
555 return self.apply("apply", func=func)
557 def where(self, **kwargs):
558 return self.apply("where", **kwargs)
560 def setitem(self, **kwargs):
561 return self.apply("setitem", **kwargs)
563 def putmask(self, **kwargs):
564 return self.apply("putmask", **kwargs)
566 def diff(self, **kwargs):
567 return self.apply("diff", **kwargs)
569 def interpolate(self, **kwargs):
570 return self.apply("interpolate", **kwargs)
572 def shift(self, **kwargs):
573 return self.apply("shift", **kwargs)
575 def fillna(self, **kwargs):
576 return self.apply("fillna", **kwargs)
578 def downcast(self, **kwargs):
579 return self.apply("downcast", **kwargs)
581 def astype(self, dtype, copy: bool = False, errors: str = "raise"):
582 return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
584 def convert(self, **kwargs):
585 return self.apply("convert", **kwargs)
587 def replace(self, value, **kwargs):
588 assert np.ndim(value) == 0, value
589 return self.apply("replace", value=value, **kwargs)
591 def replace_list(self, src_list, dest_list, inplace=False, regex=False):
592 """ do a list replace """
594 inplace = validate_bool_kwarg(inplace, "inplace")
596 # figure out our mask a-priori to avoid repeated replacements
597 values = self.as_array()
599 def comp(s, regex=False):
600 """
601 Generate a bool array by perform an equality check, or perform
602 an element-wise regular expression matching
603 """
604 if isna(s):
605 return isna(values)
606 if isinstance(s, (Timedelta, Timestamp)) and getattr(s, "tz", None) is None:
608 return _compare_or_regex_search(
609 maybe_convert_objects(values), s.asm8, regex
610 )
611 return _compare_or_regex_search(values, s, regex)
613 masks = [comp(s, regex) for i, s in enumerate(src_list)]
615 result_blocks = []
616 src_len = len(src_list) - 1
617 for blk in self.blocks:
619 # its possible to get multiple result blocks here
620 # replace ALWAYS will return a list
621 rb = [blk if inplace else blk.copy()]
622 for i, (s, d) in enumerate(zip(src_list, dest_list)):
623 # TODO: assert/validate that `d` is always a scalar?
624 new_rb = []
625 for b in rb:
626 m = masks[i][b.mgr_locs.indexer]
627 convert = i == src_len
628 result = b._replace_coerce(
629 mask=m,
630 to_replace=s,
631 value=d,
632 inplace=inplace,
633 convert=convert,
634 regex=regex,
635 )
636 if m.any() or convert:
637 new_rb = _extend_blocks(result, new_rb)
638 else:
639 new_rb.append(b)
640 rb = new_rb
641 result_blocks.extend(rb)
643 bm = type(self)(result_blocks, self.axes)
644 bm._consolidate_inplace()
645 return bm
647 def is_consolidated(self):
648 """
649 Return True if more than one block with the same dtype
650 """
651 if not self._known_consolidated:
652 self._consolidate_check()
653 return self._is_consolidated
655 def _consolidate_check(self):
656 ftypes = [blk.ftype for blk in self.blocks]
657 self._is_consolidated = len(ftypes) == len(set(ftypes))
658 self._known_consolidated = True
660 @property
661 def is_mixed_type(self):
662 # Warning, consolidation needs to get checked upstairs
663 self._consolidate_inplace()
664 return len(self.blocks) > 1
666 @property
667 def is_numeric_mixed_type(self):
668 # Warning, consolidation needs to get checked upstairs
669 self._consolidate_inplace()
670 return all(block.is_numeric for block in self.blocks)
672 @property
673 def is_datelike_mixed_type(self):
674 # Warning, consolidation needs to get checked upstairs
675 self._consolidate_inplace()
676 return any(block.is_datelike for block in self.blocks)
678 @property
679 def any_extension_types(self):
680 """Whether any of the blocks in this manager are extension blocks"""
681 return any(block.is_extension for block in self.blocks)
683 @property
684 def is_view(self):
685 """ return a boolean if we are a single block and are a view """
686 if len(self.blocks) == 1:
687 return self.blocks[0].is_view
689 # It is technically possible to figure out which blocks are views
690 # e.g. [ b.values.base is not None for b in self.blocks ]
691 # but then we have the case of possibly some blocks being a view
692 # and some blocks not. setting in theory is possible on the non-view
693 # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit
694 # complicated
696 return False
698 def get_bool_data(self, copy=False):
699 """
700 Parameters
701 ----------
702 copy : boolean, default False
703 Whether to copy the blocks
704 """
705 self._consolidate_inplace()
706 return self.combine([b for b in self.blocks if b.is_bool], copy)
708 def get_numeric_data(self, copy=False):
709 """
710 Parameters
711 ----------
712 copy : boolean, default False
713 Whether to copy the blocks
714 """
715 self._consolidate_inplace()
716 return self.combine([b for b in self.blocks if b.is_numeric], copy)
718 def combine(self, blocks, copy=True):
719 """ return a new manager with the blocks """
720 if len(blocks) == 0:
721 return self.make_empty()
723 # FIXME: optimization potential
724 indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks]))
725 inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])
727 new_blocks = []
728 for b in blocks:
729 b = b.copy(deep=copy)
730 b.mgr_locs = algos.take_1d(
731 inv_indexer, b.mgr_locs.as_array, axis=0, allow_fill=False
732 )
733 new_blocks.append(b)
735 axes = list(self.axes)
736 axes[0] = self.items.take(indexer)
738 return type(self)(new_blocks, axes, do_integrity_check=False)
740 def get_slice(self, slobj: slice, axis: int = 0):
741 if axis >= self.ndim:
742 raise IndexError("Requested axis not found in manager")
744 if axis == 0:
745 new_blocks = self._slice_take_blocks_ax0(slobj)
746 else:
747 _slicer = [slice(None)] * (axis + 1)
748 _slicer[axis] = slobj
749 slicer = tuple(_slicer)
750 new_blocks = [blk.getitem_block(slicer) for blk in self.blocks]
752 new_axes = list(self.axes)
753 new_axes[axis] = new_axes[axis][slobj]
755 bm = type(self)(new_blocks, new_axes, do_integrity_check=False)
756 bm._consolidate_inplace()
757 return bm
759 def __contains__(self, item) -> bool:
760 return item in self.items
762 @property
763 def nblocks(self) -> int:
764 return len(self.blocks)
766 def copy(self, deep=True):
767 """
768 Make deep or shallow copy of BlockManager
770 Parameters
771 ----------
772 deep : bool or string, default True
773 If False, return shallow copy (do not copy data)
774 If 'all', copy data and a deep copy of the index
776 Returns
777 -------
778 BlockManager
779 """
780 # this preserves the notion of view copying of axes
781 if deep:
782 # hit in e.g. tests.io.json.test_pandas
784 def copy_func(ax):
785 if deep == "all":
786 return ax.copy(deep=True)
787 else:
788 return ax.view()
790 new_axes = [copy_func(ax) for ax in self.axes]
791 else:
792 new_axes = list(self.axes)
794 res = self.apply("copy", deep=deep)
795 res.axes = new_axes
796 return res
798 def as_array(self, transpose=False, items=None):
799 """Convert the blockmanager data into an numpy array.
801 Parameters
802 ----------
803 transpose : boolean, default False
804 If True, transpose the return array
805 items : list of strings or None
806 Names of block items that will be included in the returned
807 array. ``None`` means that all block items will be used
809 Returns
810 -------
811 arr : ndarray
812 """
813 if len(self.blocks) == 0:
814 arr = np.empty(self.shape, dtype=float)
815 return arr.transpose() if transpose else arr
817 if items is not None:
818 mgr = self.reindex_axis(items, axis=0)
819 else:
820 mgr = self
822 if self._is_single_block and mgr.blocks[0].is_datetimetz:
823 # TODO(Block.get_values): Make DatetimeTZBlock.get_values
824 # always be object dtype. Some callers seem to want the
825 # DatetimeArray (previously DTI)
826 arr = mgr.blocks[0].get_values(dtype=object)
827 elif self._is_single_block or not self.is_mixed_type:
828 arr = np.asarray(mgr.blocks[0].get_values())
829 else:
830 arr = mgr._interleave()
832 return arr.transpose() if transpose else arr
834 def _interleave(self):
835 """
836 Return ndarray from blocks with specified item order
837 Items must be contained in the blocks
838 """
839 dtype = _interleaved_dtype(self.blocks)
841 # TODO: https://github.com/pandas-dev/pandas/issues/22791
842 # Give EAs some input on what happens here. Sparse needs this.
843 if is_sparse(dtype):
844 dtype = dtype.subtype
845 elif is_extension_array_dtype(dtype):
846 dtype = "object"
848 result = np.empty(self.shape, dtype=dtype)
850 itemmask = np.zeros(self.shape[0])
852 for blk in self.blocks:
853 rl = blk.mgr_locs
854 result[rl.indexer] = blk.get_values(dtype)
855 itemmask[rl.indexer] = 1
857 if not itemmask.all():
858 raise AssertionError("Some items were not contained in blocks")
860 return result
862 def to_dict(self, copy=True):
863 """
864 Return a dict of str(dtype) -> BlockManager
866 Parameters
867 ----------
868 copy : boolean, default True
870 Returns
871 -------
872 values : a dict of dtype -> BlockManager
874 Notes
875 -----
876 This consolidates based on str(dtype)
877 """
878 self._consolidate_inplace()
880 bd = {}
881 for b in self.blocks:
882 bd.setdefault(str(b.dtype), []).append(b)
884 return {dtype: self.combine(blocks, copy=copy) for dtype, blocks in bd.items()}
886 def fast_xs(self, loc):
887 """
888 get a cross sectional for a given location in the
889 items ; handle dups
891 return the result, is *could* be a view in the case of a
892 single block
893 """
894 if len(self.blocks) == 1:
895 return self.blocks[0].iget((slice(None), loc))
897 items = self.items
899 # non-unique (GH4726)
900 if not items.is_unique:
901 result = self._interleave()
902 if self.ndim == 2:
903 result = result.T
904 return result[loc]
906 # unique
907 dtype = _interleaved_dtype(self.blocks)
909 n = len(items)
910 if is_extension_array_dtype(dtype):
911 # we'll eventually construct an ExtensionArray.
912 result = np.empty(n, dtype=object)
913 else:
914 result = np.empty(n, dtype=dtype)
916 for blk in self.blocks:
917 # Such assignment may incorrectly coerce NaT to None
918 # result[blk.mgr_locs] = blk._slice((slice(None), loc))
919 for i, rl in enumerate(blk.mgr_locs):
920 result[rl] = blk.iget((i, loc))
922 if is_extension_array_dtype(dtype):
923 result = dtype.construct_array_type()._from_sequence(result, dtype=dtype)
925 return result
927 def consolidate(self):
928 """
929 Join together blocks having same dtype
931 Returns
932 -------
933 y : BlockManager
934 """
935 if self.is_consolidated():
936 return self
938 bm = type(self)(self.blocks, self.axes)
939 bm._is_consolidated = False
940 bm._consolidate_inplace()
941 return bm
943 def _consolidate_inplace(self):
944 if not self.is_consolidated():
945 self.blocks = tuple(_consolidate(self.blocks))
946 self._is_consolidated = True
947 self._known_consolidated = True
948 self._rebuild_blknos_and_blklocs()
950 def get(self, item):
951 """
952 Return values for selected item (ndarray or BlockManager).
953 """
954 if self.items.is_unique:
956 if not isna(item):
957 loc = self.items.get_loc(item)
958 else:
959 indexer = np.arange(len(self.items))[isna(self.items)]
961 # allow a single nan location indexer
962 if not is_scalar(indexer):
963 if len(indexer) == 1:
964 loc = indexer.item()
965 else:
966 raise ValueError("cannot label index with a null key")
968 return self.iget(loc)
969 else:
971 if isna(item):
972 raise TypeError("cannot label index with a null key")
974 indexer = self.items.get_indexer_for([item])
975 return self.reindex_indexer(
976 new_axis=self.items[indexer], indexer=indexer, axis=0, allow_dups=True
977 )
979 def iget(self, i):
980 """
981 Return the data as a SingleBlockManager if possible
983 Otherwise return as a ndarray
984 """
985 block = self.blocks[self._blknos[i]]
986 values = block.iget(self._blklocs[i])
988 # shortcut for select a single-dim from a 2-dim BM
989 return SingleBlockManager(
990 [
991 block.make_block_same_class(
992 values, placement=slice(0, len(values)), ndim=1
993 )
994 ],
995 self.axes[1],
996 )
998 def delete(self, item):
999 """
1000 Delete selected item (items if non-unique) in-place.
1001 """
1002 indexer = self.items.get_loc(item)
1004 is_deleted = np.zeros(self.shape[0], dtype=np.bool_)
1005 is_deleted[indexer] = True
1006 ref_loc_offset = -is_deleted.cumsum()
1008 is_blk_deleted = [False] * len(self.blocks)
1010 if isinstance(indexer, int):
1011 affected_start = indexer
1012 else:
1013 affected_start = is_deleted.nonzero()[0][0]
1015 for blkno, _ in _fast_count_smallints(self._blknos[affected_start:]):
1016 blk = self.blocks[blkno]
1017 bml = blk.mgr_locs
1018 blk_del = is_deleted[bml.indexer].nonzero()[0]
1020 if len(blk_del) == len(bml):
1021 is_blk_deleted[blkno] = True
1022 continue
1023 elif len(blk_del) != 0:
1024 blk.delete(blk_del)
1025 bml = blk.mgr_locs
1027 blk.mgr_locs = bml.add(ref_loc_offset[bml.indexer])
1029 # FIXME: use Index.delete as soon as it uses fastpath=True
1030 self.axes[0] = self.items[~is_deleted]
1031 self.blocks = tuple(
1032 b for blkno, b in enumerate(self.blocks) if not is_blk_deleted[blkno]
1033 )
1034 self._shape = None
1035 self._rebuild_blknos_and_blklocs()
1037 def set(self, item, value):
1038 """
1039 Set new item in-place. Does not consolidate. Adds new Block if not
1040 contained in the current set of items
1041 """
1042 # FIXME: refactor, clearly separate broadcasting & zip-like assignment
1043 # can prob also fix the various if tests for sparse/categorical
1045 value_is_extension_type = is_extension_array_dtype(value)
1047 # categorical/sparse/datetimetz
1048 if value_is_extension_type:
1050 def value_getitem(placement):
1051 return value
1053 else:
1054 if value.ndim == self.ndim - 1:
1055 value = _safe_reshape(value, (1,) + value.shape)
1057 def value_getitem(placement):
1058 return value
1060 else:
1062 def value_getitem(placement):
1063 return value[placement.indexer]
1065 if value.shape[1:] != self.shape[1:]:
1066 raise AssertionError(
1067 "Shape of new values must be compatible with manager shape"
1068 )
1070 try:
1071 loc = self.items.get_loc(item)
1072 except KeyError:
1073 # This item wasn't present, just insert at end
1074 self.insert(len(self.items), item, value)
1075 return
1077 if isinstance(loc, int):
1078 loc = [loc]
1080 blknos = self._blknos[loc]
1081 blklocs = self._blklocs[loc].copy()
1083 unfit_mgr_locs = []
1084 unfit_val_locs = []
1085 removed_blknos = []
1086 for blkno, val_locs in libinternals.get_blkno_placements(blknos, group=True):
1087 blk = self.blocks[blkno]
1088 blk_locs = blklocs[val_locs.indexer]
1089 if blk.should_store(value):
1090 blk.set(blk_locs, value_getitem(val_locs))
1091 else:
1092 unfit_mgr_locs.append(blk.mgr_locs.as_array[blk_locs])
1093 unfit_val_locs.append(val_locs)
1095 # If all block items are unfit, schedule the block for removal.
1096 if len(val_locs) == len(blk.mgr_locs):
1097 removed_blknos.append(blkno)
1098 else:
1099 self._blklocs[blk.mgr_locs.indexer] = -1
1100 blk.delete(blk_locs)
1101 self._blklocs[blk.mgr_locs.indexer] = np.arange(len(blk))
1103 if len(removed_blknos):
1104 # Remove blocks & update blknos accordingly
1105 is_deleted = np.zeros(self.nblocks, dtype=np.bool_)
1106 is_deleted[removed_blknos] = True
1108 new_blknos = np.empty(self.nblocks, dtype=np.int64)
1109 new_blknos.fill(-1)
1110 new_blknos[~is_deleted] = np.arange(self.nblocks - len(removed_blknos))
1111 self._blknos = algos.take_1d(
1112 new_blknos, self._blknos, axis=0, allow_fill=False
1113 )
1114 self.blocks = tuple(
1115 blk for i, blk in enumerate(self.blocks) if i not in set(removed_blknos)
1116 )
1118 if unfit_val_locs:
1119 unfit_mgr_locs = np.concatenate(unfit_mgr_locs)
1120 unfit_count = len(unfit_mgr_locs)
1122 new_blocks = []
1123 if value_is_extension_type:
1124 # This code (ab-)uses the fact that sparse blocks contain only
1125 # one item.
1126 new_blocks.extend(
1127 make_block(
1128 values=value.copy(),
1129 ndim=self.ndim,
1130 placement=slice(mgr_loc, mgr_loc + 1),
1131 )
1132 for mgr_loc in unfit_mgr_locs
1133 )
1135 self._blknos[unfit_mgr_locs] = np.arange(unfit_count) + len(self.blocks)
1136 self._blklocs[unfit_mgr_locs] = 0
1138 else:
1139 # unfit_val_locs contains BlockPlacement objects
1140 unfit_val_items = unfit_val_locs[0].append(unfit_val_locs[1:])
1142 new_blocks.append(
1143 make_block(
1144 values=value_getitem(unfit_val_items),
1145 ndim=self.ndim,
1146 placement=unfit_mgr_locs,
1147 )
1148 )
1150 self._blknos[unfit_mgr_locs] = len(self.blocks)
1151 self._blklocs[unfit_mgr_locs] = np.arange(unfit_count)
1153 self.blocks += tuple(new_blocks)
1155 # Newly created block's dtype may already be present.
1156 self._known_consolidated = False
1158 def insert(self, loc: int, item, value, allow_duplicates: bool = False):
1159 """
1160 Insert item at selected position.
1162 Parameters
1163 ----------
1164 loc : int
1165 item : hashable
1166 value : array_like
1167 allow_duplicates: bool
1168 If False, trying to insert non-unique item will raise
1170 """
1171 if not allow_duplicates and item in self.items:
1172 # Should this be a different kind of error??
1173 raise ValueError(f"cannot insert {item}, already exists")
1175 if not isinstance(loc, int):
1176 raise TypeError("loc must be int")
1178 # insert to the axis; this could possibly raise a TypeError
1179 new_axis = self.items.insert(loc, item)
1181 block = make_block(values=value, ndim=self.ndim, placement=slice(loc, loc + 1))
1183 for blkno, count in _fast_count_smallints(self._blknos[loc:]):
1184 blk = self.blocks[blkno]
1185 if count == len(blk.mgr_locs):
1186 blk.mgr_locs = blk.mgr_locs.add(1)
1187 else:
1188 new_mgr_locs = blk.mgr_locs.as_array.copy()
1189 new_mgr_locs[new_mgr_locs >= loc] += 1
1190 blk.mgr_locs = new_mgr_locs
1192 if loc == self._blklocs.shape[0]:
1193 # np.append is a lot faster, let's use it if we can.
1194 self._blklocs = np.append(self._blklocs, 0)
1195 self._blknos = np.append(self._blknos, len(self.blocks))
1196 else:
1197 self._blklocs = np.insert(self._blklocs, loc, 0)
1198 self._blknos = np.insert(self._blknos, loc, len(self.blocks))
1200 self.axes[0] = new_axis
1201 self.blocks += (block,)
1202 self._shape = None
1204 self._known_consolidated = False
1206 if len(self.blocks) > 100:
1207 self._consolidate_inplace()
1209 def reindex_axis(
1210 self, new_index, axis, method=None, limit=None, fill_value=None, copy=True
1211 ):
1212 """
1213 Conform block manager to new index.
1214 """
1215 new_index = ensure_index(new_index)
1216 new_index, indexer = self.axes[axis].reindex(
1217 new_index, method=method, limit=limit
1218 )
1220 return self.reindex_indexer(
1221 new_index, indexer, axis=axis, fill_value=fill_value, copy=copy
1222 )
1224 def reindex_indexer(
1225 self, new_axis, indexer, axis, fill_value=None, allow_dups=False, copy=True
1226 ):
1227 """
1228 Parameters
1229 ----------
1230 new_axis : Index
1231 indexer : ndarray of int64 or None
1232 axis : int
1233 fill_value : object
1234 allow_dups : bool
1236 pandas-indexer with -1's only.
1237 """
1238 if indexer is None:
1239 if new_axis is self.axes[axis] and not copy:
1240 return self
1242 result = self.copy(deep=copy)
1243 result.axes = list(self.axes)
1244 result.axes[axis] = new_axis
1245 return result
1247 self._consolidate_inplace()
1249 # some axes don't allow reindexing with dups
1250 if not allow_dups:
1251 self.axes[axis]._can_reindex(indexer)
1253 if axis >= self.ndim:
1254 raise IndexError("Requested axis not found in manager")
1256 if axis == 0:
1257 new_blocks = self._slice_take_blocks_ax0(indexer, fill_tuple=(fill_value,))
1258 else:
1259 new_blocks = [
1260 blk.take_nd(
1261 indexer,
1262 axis=axis,
1263 fill_tuple=(
1264 fill_value if fill_value is not None else blk.fill_value,
1265 ),
1266 )
1267 for blk in self.blocks
1268 ]
1270 new_axes = list(self.axes)
1271 new_axes[axis] = new_axis
1272 return type(self)(new_blocks, new_axes)
1274 def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
1275 """
1276 Slice/take blocks along axis=0.
1278 Overloaded for SingleBlock
1280 Returns
1281 -------
1282 new_blocks : list of Block
1283 """
1285 allow_fill = fill_tuple is not None
1287 sl_type, slobj, sllen = _preprocess_slice_or_indexer(
1288 slice_or_indexer, self.shape[0], allow_fill=allow_fill
1289 )
1291 if self._is_single_block:
1292 blk = self.blocks[0]
1294 if sl_type in ("slice", "mask"):
1295 return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))]
1296 elif not allow_fill or self.ndim == 1:
1297 if allow_fill and fill_tuple[0] is None:
1298 _, fill_value = maybe_promote(blk.dtype)
1299 fill_tuple = (fill_value,)
1301 return [
1302 blk.take_nd(
1303 slobj,
1304 axis=0,
1305 new_mgr_locs=slice(0, sllen),
1306 fill_tuple=fill_tuple,
1307 )
1308 ]
1310 if sl_type in ("slice", "mask"):
1311 blknos = self._blknos[slobj]
1312 blklocs = self._blklocs[slobj]
1313 else:
1314 blknos = algos.take_1d(
1315 self._blknos, slobj, fill_value=-1, allow_fill=allow_fill
1316 )
1317 blklocs = algos.take_1d(
1318 self._blklocs, slobj, fill_value=-1, allow_fill=allow_fill
1319 )
1321 # When filling blknos, make sure blknos is updated before appending to
1322 # blocks list, that way new blkno is exactly len(blocks).
1323 #
1324 # FIXME: mgr_groupby_blknos must return mgr_locs in ascending order,
1325 # pytables serialization will break otherwise.
1326 blocks = []
1327 for blkno, mgr_locs in libinternals.get_blkno_placements(blknos, group=True):
1328 if blkno == -1:
1329 # If we've got here, fill_tuple was not None.
1330 fill_value = fill_tuple[0]
1332 blocks.append(
1333 self._make_na_block(placement=mgr_locs, fill_value=fill_value)
1334 )
1335 else:
1336 blk = self.blocks[blkno]
1338 # Otherwise, slicing along items axis is necessary.
1339 if not blk._can_consolidate:
1340 # A non-consolidatable block, it's easy, because there's
1341 # only one item and each mgr loc is a copy of that single
1342 # item.
1343 for mgr_loc in mgr_locs:
1344 newblk = blk.copy(deep=True)
1345 newblk.mgr_locs = slice(mgr_loc, mgr_loc + 1)
1346 blocks.append(newblk)
1348 else:
1349 blocks.append(
1350 blk.take_nd(
1351 blklocs[mgr_locs.indexer],
1352 axis=0,
1353 new_mgr_locs=mgr_locs,
1354 fill_tuple=None,
1355 )
1356 )
1358 return blocks
1360 def _make_na_block(self, placement, fill_value=None):
1361 # TODO: infer dtypes other than float64 from fill_value
1363 if fill_value is None:
1364 fill_value = np.nan
1365 block_shape = list(self.shape)
1366 block_shape[0] = len(placement)
1368 dtype, fill_value = infer_dtype_from_scalar(fill_value)
1369 block_values = np.empty(block_shape, dtype=dtype)
1370 block_values.fill(fill_value)
1371 return make_block(block_values, placement=placement)
1373 def take(self, indexer, axis=1, verify=True, convert=True):
1374 """
1375 Take items along any axis.
1376 """
1377 self._consolidate_inplace()
1378 indexer = (
1379 np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64")
1380 if isinstance(indexer, slice)
1381 else np.asanyarray(indexer, dtype="int64")
1382 )
1384 n = self.shape[axis]
1385 if convert:
1386 indexer = maybe_convert_indices(indexer, n)
1388 if verify:
1389 if ((indexer == -1) | (indexer >= n)).any():
1390 raise Exception("Indices must be nonzero and less than the axis length")
1392 new_labels = self.axes[axis].take(indexer)
1393 return self.reindex_indexer(
1394 new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True
1395 )
1397 def equals(self, other):
1398 self_axes, other_axes = self.axes, other.axes
1399 if len(self_axes) != len(other_axes):
1400 return False
1401 if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
1402 return False
1403 self._consolidate_inplace()
1404 other._consolidate_inplace()
1405 if len(self.blocks) != len(other.blocks):
1406 return False
1408 # canonicalize block order, using a tuple combining the mgr_locs
1409 # then type name because there might be unconsolidated
1410 # blocks (say, Categorical) which can only be distinguished by
1411 # the iteration order
1412 def canonicalize(block):
1413 return (block.mgr_locs.as_array.tolist(), block.dtype.name)
1415 self_blocks = sorted(self.blocks, key=canonicalize)
1416 other_blocks = sorted(other.blocks, key=canonicalize)
1417 return all(
1418 block.equals(oblock) for block, oblock in zip(self_blocks, other_blocks)
1419 )
1421 def unstack(self, unstacker_func, fill_value):
1422 """Return a blockmanager with all blocks unstacked.
1424 Parameters
1425 ----------
1426 unstacker_func : callable
1427 A (partially-applied) ``pd.core.reshape._Unstacker`` class.
1428 fill_value : Any
1429 fill_value for newly introduced missing values.
1431 Returns
1432 -------
1433 unstacked : BlockManager
1434 """
1435 n_rows = self.shape[-1]
1436 dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items)
1437 new_columns = dummy.get_new_columns()
1438 new_index = dummy.get_new_index()
1439 new_blocks = []
1440 columns_mask = []
1442 for blk in self.blocks:
1443 blocks, mask = blk._unstack(
1444 partial(unstacker_func, value_columns=self.items[blk.mgr_locs.indexer]),
1445 new_columns,
1446 n_rows,
1447 fill_value,
1448 )
1450 new_blocks.extend(blocks)
1451 columns_mask.extend(mask)
1453 new_columns = new_columns[columns_mask]
1455 bm = BlockManager(new_blocks, [new_columns, new_index])
1456 return bm
1459class SingleBlockManager(BlockManager):
1460 """ manage a single block with """
1462 ndim = 1
1463 _is_consolidated = True
1464 _known_consolidated = True
1465 __slots__ = ()
1467 def __init__(
1468 self,
1469 block: Block,
1470 axis: Union[Index, List[Index]],
1471 do_integrity_check: bool = False,
1472 fastpath: bool = False,
1473 ):
1474 if isinstance(axis, list):
1475 if len(axis) != 1:
1476 raise ValueError(
1477 "cannot create SingleBlockManager with more than 1 axis"
1478 )
1479 axis = axis[0]
1481 # passed from constructor, single block, single axis
1482 if fastpath:
1483 self.axes = [axis]
1484 if isinstance(block, list):
1486 # empty block
1487 if len(block) == 0:
1488 block = [np.array([])]
1489 elif len(block) != 1:
1490 raise ValueError(
1491 "Cannot create SingleBlockManager with more than 1 block"
1492 )
1493 block = block[0]
1494 else:
1495 self.axes = [ensure_index(axis)]
1497 # create the block here
1498 if isinstance(block, list):
1500 # provide consolidation to the interleaved_dtype
1501 if len(block) > 1:
1502 dtype = _interleaved_dtype(block)
1503 block = [b.astype(dtype) for b in block]
1504 block = _consolidate(block)
1506 if len(block) != 1:
1507 raise ValueError(
1508 "Cannot create SingleBlockManager with more than 1 block"
1509 )
1510 block = block[0]
1512 if not isinstance(block, Block):
1513 block = make_block(block, placement=slice(0, len(axis)), ndim=1)
1515 self.blocks = tuple([block])
1517 def _post_setstate(self):
1518 pass
1520 @property
1521 def _block(self):
1522 return self.blocks[0]
1524 @property
1525 def _values(self):
1526 return self._block.values
1528 @property
1529 def _blknos(self):
1530 """ compat with BlockManager """
1531 return None
1533 @property
1534 def _blklocs(self):
1535 """ compat with BlockManager """
1536 return None
1538 def get_slice(self, slobj, axis=0):
1539 if axis >= self.ndim:
1540 raise IndexError("Requested axis not found in manager")
1542 return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True,)
1544 @property
1545 def index(self):
1546 return self.axes[0]
1548 @property
1549 def dtype(self):
1550 return self._block.dtype
1552 @property
1553 def array_dtype(self):
1554 return self._block.array_dtype
1556 def get_dtype_counts(self):
1557 return {self.dtype.name: 1}
1559 def get_dtypes(self):
1560 return np.array([self._block.dtype])
1562 def external_values(self):
1563 return self._block.external_values()
1565 def internal_values(self):
1566 return self._block.internal_values()
1568 def get_values(self):
1569 """ return a dense type view """
1570 return np.array(self._block.to_dense(), copy=False)
1572 @property
1573 def _can_hold_na(self):
1574 return self._block._can_hold_na
1576 def is_consolidated(self):
1577 return True
1579 def _consolidate_check(self):
1580 pass
1582 def _consolidate_inplace(self):
1583 pass
1585 def delete(self, item):
1586 """
1587 Delete single item from SingleBlockManager.
1589 Ensures that self.blocks doesn't become empty.
1590 """
1591 loc = self.items.get_loc(item)
1592 self._block.delete(loc)
1593 self.axes[0] = self.axes[0].delete(loc)
1595 def fast_xs(self, loc):
1596 """
1597 fast path for getting a cross-section
1598 return a view of the data
1599 """
1600 return self._block.values[loc]
1602 def concat(self, to_concat, new_axis):
1603 """
1604 Concatenate a list of SingleBlockManagers into a single
1605 SingleBlockManager.
1607 Used for pd.concat of Series objects with axis=0.
1609 Parameters
1610 ----------
1611 to_concat : list of SingleBlockManagers
1612 new_axis : Index of the result
1614 Returns
1615 -------
1616 SingleBlockManager
1618 """
1619 non_empties = [x for x in to_concat if len(x) > 0]
1621 # check if all series are of the same block type:
1622 if len(non_empties) > 0:
1623 blocks = [obj.blocks[0] for obj in non_empties]
1624 if len({b.dtype for b in blocks}) == 1:
1625 new_block = blocks[0].concat_same_type(blocks)
1626 else:
1627 values = [x.values for x in blocks]
1628 values = concat_compat(values)
1629 new_block = make_block(values, placement=slice(0, len(values), 1))
1630 else:
1631 values = [x._block.values for x in to_concat]
1632 values = concat_compat(values)
1633 new_block = make_block(values, placement=slice(0, len(values), 1))
1635 mgr = SingleBlockManager(new_block, new_axis)
1636 return mgr
1639# --------------------------------------------------------------------
1640# Constructor Helpers
1643def create_block_manager_from_blocks(blocks, axes):
1644 try:
1645 if len(blocks) == 1 and not isinstance(blocks[0], Block):
1646 # if blocks[0] is of length 0, return empty blocks
1647 if not len(blocks[0]):
1648 blocks = []
1649 else:
1650 # It's OK if a single block is passed as values, its placement
1651 # is basically "all items", but if there're many, don't bother
1652 # converting, it's an error anyway.
1653 blocks = [
1654 make_block(values=blocks[0], placement=slice(0, len(axes[0])))
1655 ]
1657 mgr = BlockManager(blocks, axes)
1658 mgr._consolidate_inplace()
1659 return mgr
1661 except ValueError as e:
1662 blocks = [getattr(b, "values", b) for b in blocks]
1663 tot_items = sum(b.shape[0] for b in blocks)
1664 construction_error(tot_items, blocks[0].shape[1:], axes, e)
1667def create_block_manager_from_arrays(arrays, names, axes):
1669 try:
1670 blocks = form_blocks(arrays, names, axes)
1671 mgr = BlockManager(blocks, axes)
1672 mgr._consolidate_inplace()
1673 return mgr
1674 except ValueError as e:
1675 construction_error(len(arrays), arrays[0].shape, axes, e)
1678def construction_error(tot_items, block_shape, axes, e=None):
1679 """ raise a helpful message about our construction """
1680 passed = tuple(map(int, [tot_items] + list(block_shape)))
1681 # Correcting the user facing error message during dataframe construction
1682 if len(passed) <= 2:
1683 passed = passed[::-1]
1685 implied = tuple(len(ax) for ax in axes)
1686 # Correcting the user facing error message during dataframe construction
1687 if len(implied) <= 2:
1688 implied = implied[::-1]
1690 if passed == implied and e is not None:
1691 raise e
1692 if block_shape[0] == 0:
1693 raise ValueError("Empty data passed with indices specified.")
1694 raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
1697# -----------------------------------------------------------------------
1700def form_blocks(arrays, names, axes):
1701 # put "leftover" items in float bucket, where else?
1702 # generalize?
1703 items_dict = defaultdict(list)
1704 extra_locs = []
1706 names_idx = ensure_index(names)
1707 if names_idx.equals(axes[0]):
1708 names_indexer = np.arange(len(names_idx))
1709 else:
1710 assert names_idx.intersection(axes[0]).is_unique
1711 names_indexer = names_idx.get_indexer_for(axes[0])
1713 for i, name_idx in enumerate(names_indexer):
1714 if name_idx == -1:
1715 extra_locs.append(i)
1716 continue
1718 k = names[name_idx]
1719 v = arrays[name_idx]
1721 block_type = get_block_type(v)
1722 items_dict[block_type.__name__].append((i, k, v))
1724 blocks = []
1725 if len(items_dict["FloatBlock"]):
1726 float_blocks = _multi_blockify(items_dict["FloatBlock"])
1727 blocks.extend(float_blocks)
1729 if len(items_dict["ComplexBlock"]):
1730 complex_blocks = _multi_blockify(items_dict["ComplexBlock"])
1731 blocks.extend(complex_blocks)
1733 if len(items_dict["TimeDeltaBlock"]):
1734 timedelta_blocks = _multi_blockify(items_dict["TimeDeltaBlock"])
1735 blocks.extend(timedelta_blocks)
1737 if len(items_dict["IntBlock"]):
1738 int_blocks = _multi_blockify(items_dict["IntBlock"])
1739 blocks.extend(int_blocks)
1741 if len(items_dict["DatetimeBlock"]):
1742 datetime_blocks = _simple_blockify(items_dict["DatetimeBlock"], _NS_DTYPE)
1743 blocks.extend(datetime_blocks)
1745 if len(items_dict["DatetimeTZBlock"]):
1746 dttz_blocks = [
1747 make_block(array, klass=DatetimeTZBlock, placement=[i])
1748 for i, _, array in items_dict["DatetimeTZBlock"]
1749 ]
1750 blocks.extend(dttz_blocks)
1752 if len(items_dict["BoolBlock"]):
1753 bool_blocks = _simple_blockify(items_dict["BoolBlock"], np.bool_)
1754 blocks.extend(bool_blocks)
1756 if len(items_dict["ObjectBlock"]) > 0:
1757 object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_)
1758 blocks.extend(object_blocks)
1760 if len(items_dict["CategoricalBlock"]) > 0:
1761 cat_blocks = [
1762 make_block(array, klass=CategoricalBlock, placement=[i])
1763 for i, _, array in items_dict["CategoricalBlock"]
1764 ]
1765 blocks.extend(cat_blocks)
1767 if len(items_dict["ExtensionBlock"]):
1769 external_blocks = [
1770 make_block(array, klass=ExtensionBlock, placement=[i])
1771 for i, _, array in items_dict["ExtensionBlock"]
1772 ]
1774 blocks.extend(external_blocks)
1776 if len(items_dict["ObjectValuesExtensionBlock"]):
1777 external_blocks = [
1778 make_block(array, klass=ObjectValuesExtensionBlock, placement=[i])
1779 for i, _, array in items_dict["ObjectValuesExtensionBlock"]
1780 ]
1782 blocks.extend(external_blocks)
1784 if len(extra_locs):
1785 shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:])
1787 # empty items -> dtype object
1788 block_values = np.empty(shape, dtype=object)
1789 block_values.fill(np.nan)
1791 na_block = make_block(block_values, placement=extra_locs)
1792 blocks.append(na_block)
1794 return blocks
1797def _simple_blockify(tuples, dtype):
1798 """ return a single array of a block that has a single dtype; if dtype is
1799 not None, coerce to this dtype
1800 """
1801 values, placement = _stack_arrays(tuples, dtype)
1803 # TODO: CHECK DTYPE?
1804 if dtype is not None and values.dtype != dtype: # pragma: no cover
1805 values = values.astype(dtype)
1807 block = make_block(values, placement=placement)
1808 return [block]
1811def _multi_blockify(tuples, dtype=None):
1812 """ return an array of blocks that potentially have different dtypes """
1814 # group by dtype
1815 grouper = itertools.groupby(tuples, lambda x: x[2].dtype)
1817 new_blocks = []
1818 for dtype, tup_block in grouper:
1820 values, placement = _stack_arrays(list(tup_block), dtype)
1822 block = make_block(values, placement=placement)
1823 new_blocks.append(block)
1825 return new_blocks
1828def _stack_arrays(tuples, dtype):
1830 # fml
1831 def _asarray_compat(x):
1832 if isinstance(x, ABCSeries):
1833 return x._values
1834 else:
1835 return np.asarray(x)
1837 def _shape_compat(x):
1838 if isinstance(x, ABCSeries):
1839 return (len(x),)
1840 else:
1841 return x.shape
1843 placement, names, arrays = zip(*tuples)
1845 first = arrays[0]
1846 shape = (len(arrays),) + _shape_compat(first)
1848 stacked = np.empty(shape, dtype=dtype)
1849 for i, arr in enumerate(arrays):
1850 stacked[i] = _asarray_compat(arr)
1852 return stacked, placement
1855def _interleaved_dtype(
1856 blocks: List[Block],
1857) -> Optional[Union[np.dtype, ExtensionDtype]]:
1858 """Find the common dtype for `blocks`.
1860 Parameters
1861 ----------
1862 blocks : List[Block]
1864 Returns
1865 -------
1866 dtype : Optional[Union[np.dtype, ExtensionDtype]]
1867 None is returned when `blocks` is empty.
1868 """
1869 if not len(blocks):
1870 return None
1872 return find_common_type([b.dtype for b in blocks])
1875def _consolidate(blocks):
1876 """
1877 Merge blocks having same dtype, exclude non-consolidating blocks
1878 """
1880 # sort by _can_consolidate, dtype
1881 gkey = lambda x: x._consolidate_key
1882 grouper = itertools.groupby(sorted(blocks, key=gkey), gkey)
1884 new_blocks = []
1885 for (_can_consolidate, dtype), group_blocks in grouper:
1886 merged_blocks = _merge_blocks(
1887 list(group_blocks), dtype=dtype, _can_consolidate=_can_consolidate
1888 )
1889 new_blocks = _extend_blocks(merged_blocks, new_blocks)
1890 return new_blocks
1893def _compare_or_regex_search(a, b, regex=False):
1894 """
1895 Compare two array_like inputs of the same shape or two scalar values
1897 Calls operator.eq or re.search, depending on regex argument. If regex is
1898 True, perform an element-wise regex matching.
1900 Parameters
1901 ----------
1902 a : array_like or scalar
1903 b : array_like or scalar
1904 regex : bool, default False
1906 Returns
1907 -------
1908 mask : array_like of bool
1909 """
1910 if not regex:
1911 op = lambda x: operator.eq(x, b)
1912 else:
1913 op = np.vectorize(
1914 lambda x: bool(re.search(b, x)) if isinstance(x, str) else False
1915 )
1917 is_a_array = isinstance(a, np.ndarray)
1918 is_b_array = isinstance(b, np.ndarray)
1920 if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b):
1921 # GH#29553 avoid deprecation warnings from numpy
1922 result = False
1923 else:
1924 result = op(a)
1926 if is_scalar(result) and (is_a_array or is_b_array):
1927 type_names = [type(a).__name__, type(b).__name__]
1929 if is_a_array:
1930 type_names[0] = f"ndarray(dtype={a.dtype})"
1932 if is_b_array:
1933 type_names[1] = f"ndarray(dtype={b.dtype})"
1935 raise TypeError(
1936 f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
1937 )
1938 return result
1941def _transform_index(index, func, level=None):
1942 """
1943 Apply function to all values found in index.
1945 This includes transforming multiindex entries separately.
1946 Only apply function to one level of the MultiIndex if level is specified.
1948 """
1949 if isinstance(index, MultiIndex):
1950 if level is not None:
1951 items = [
1952 tuple(func(y) if i == level else y for i, y in enumerate(x))
1953 for x in index
1954 ]
1955 else:
1956 items = [tuple(func(y) for y in x) for x in index]
1957 return MultiIndex.from_tuples(items, names=index.names)
1958 else:
1959 items = [func(x) for x in index]
1960 return Index(items, name=index.name, tupleize_cols=False)
1963def _fast_count_smallints(arr):
1964 """Faster version of set(arr) for sequences of small numbers."""
1965 counts = np.bincount(arr.astype(np.int_))
1966 nz = counts.nonzero()[0]
1967 return np.c_[nz, counts[nz]]
1970def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill):
1971 if isinstance(slice_or_indexer, slice):
1972 return (
1973 "slice",
1974 slice_or_indexer,
1975 libinternals.slice_len(slice_or_indexer, length),
1976 )
1977 elif (
1978 isinstance(slice_or_indexer, np.ndarray) and slice_or_indexer.dtype == np.bool_
1979 ):
1980 return "mask", slice_or_indexer, slice_or_indexer.sum()
1981 else:
1982 indexer = np.asanyarray(slice_or_indexer, dtype=np.int64)
1983 if not allow_fill:
1984 indexer = maybe_convert_indices(indexer, length)
1985 return "fancy", indexer, len(indexer)
1988def concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy):
1989 """
1990 Concatenate block managers into one.
1992 Parameters
1993 ----------
1994 mgrs_indexers : list of (BlockManager, {axis: indexer,...}) tuples
1995 axes : list of Index
1996 concat_axis : int
1997 copy : bool
1999 """
2000 concat_plans = [
2001 get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers
2002 ]
2003 concat_plan = combine_concat_plans(concat_plans, concat_axis)
2004 blocks = []
2006 for placement, join_units in concat_plan:
2008 if len(join_units) == 1 and not join_units[0].indexers:
2009 b = join_units[0].block
2010 values = b.values
2011 if copy:
2012 values = values.copy()
2013 else:
2014 values = values.view()
2015 b = b.make_block_same_class(values, placement=placement)
2016 elif is_uniform_join_units(join_units):
2017 b = join_units[0].block.concat_same_type(
2018 [ju.block for ju in join_units], placement=placement
2019 )
2020 else:
2021 b = make_block(
2022 concatenate_join_units(join_units, concat_axis, copy=copy),
2023 placement=placement,
2024 )
2025 blocks.append(b)
2027 return BlockManager(blocks, axes)