Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/ops/__init__.py : 33%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Arithmetic operations for PandasObjects
4This is not a public API.
5"""
6import datetime
7import operator
8from typing import TYPE_CHECKING, Set, Tuple, Union
10import numpy as np
12from pandas._libs import Timedelta, Timestamp, lib
13from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401
14from pandas.util._decorators import Appender
16from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype
17from pandas.core.dtypes.generic import (
18 ABCDataFrame,
19 ABCExtensionArray,
20 ABCIndexClass,
21 ABCSeries,
22)
23from pandas.core.dtypes.missing import isna
25from pandas.core.construction import extract_array
26from pandas.core.ops.array_ops import (
27 arithmetic_op,
28 comparison_op,
29 define_na_arithmetic_op,
30 get_array_op,
31 logical_op,
32)
33from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401
34from pandas.core.ops.common import unpack_zerodim_and_defer
35from pandas.core.ops.dispatch import should_series_dispatch
36from pandas.core.ops.docstrings import (
37 _arith_doc_FRAME,
38 _flex_comp_doc_FRAME,
39 _make_flex_doc,
40 _op_descriptions,
41)
42from pandas.core.ops.invalid import invalid_comparison # noqa:F401
43from pandas.core.ops.mask_ops import kleene_and, kleene_or, kleene_xor # noqa: F401
44from pandas.core.ops.methods import ( # noqa:F401
45 add_flex_arithmetic_methods,
46 add_special_arithmetic_methods,
47)
48from pandas.core.ops.roperator import ( # noqa:F401
49 radd,
50 rand_,
51 rdiv,
52 rdivmod,
53 rfloordiv,
54 rmod,
55 rmul,
56 ror_,
57 rpow,
58 rsub,
59 rtruediv,
60 rxor,
61)
63if TYPE_CHECKING:
64 from pandas import DataFrame # noqa:F401
66# -----------------------------------------------------------------------------
67# constants
68ARITHMETIC_BINOPS: Set[str] = {
69 "add",
70 "sub",
71 "mul",
72 "pow",
73 "mod",
74 "floordiv",
75 "truediv",
76 "divmod",
77 "radd",
78 "rsub",
79 "rmul",
80 "rpow",
81 "rmod",
82 "rfloordiv",
83 "rtruediv",
84 "rdivmod",
85}
88COMPARISON_BINOPS: Set[str] = {
89 "eq",
90 "ne",
91 "lt",
92 "gt",
93 "le",
94 "ge",
95}
97# -----------------------------------------------------------------------------
98# Ops Wrapping Utilities
101def get_op_result_name(left, right):
102 """
103 Find the appropriate name to pin to an operation result. This result
104 should always be either an Index or a Series.
106 Parameters
107 ----------
108 left : {Series, Index}
109 right : object
111 Returns
112 -------
113 name : object
114 Usually a string
115 """
116 # `left` is always a Series when called from within ops
117 if isinstance(right, (ABCSeries, ABCIndexClass)):
118 name = _maybe_match_name(left, right)
119 else:
120 name = left.name
121 return name
124def _maybe_match_name(a, b):
125 """
126 Try to find a name to attach to the result of an operation between
127 a and b. If only one of these has a `name` attribute, return that
128 name. Otherwise return a consensus name if they match of None if
129 they have different names.
131 Parameters
132 ----------
133 a : object
134 b : object
136 Returns
137 -------
138 name : str or None
140 See Also
141 --------
142 pandas.core.common.consensus_name_attr
143 """
144 a_has = hasattr(a, "name")
145 b_has = hasattr(b, "name")
146 if a_has and b_has:
147 if a.name == b.name:
148 return a.name
149 else:
150 # TODO: what if they both have np.nan for their names?
151 return None
152 elif a_has:
153 return a.name
154 elif b_has:
155 return b.name
156 return None
159def maybe_upcast_for_op(obj, shape: Tuple[int, ...]):
160 """
161 Cast non-pandas objects to pandas types to unify behavior of arithmetic
162 and comparison operations.
164 Parameters
165 ----------
166 obj: object
167 shape : tuple[int]
169 Returns
170 -------
171 out : object
173 Notes
174 -----
175 Be careful to call this *after* determining the `name` attribute to be
176 attached to the result of the arithmetic operation.
177 """
178 from pandas.core.arrays import DatetimeArray, TimedeltaArray
180 if type(obj) is datetime.timedelta:
181 # GH#22390 cast up to Timedelta to rely on Timedelta
182 # implementation; otherwise operation against numeric-dtype
183 # raises TypeError
184 return Timedelta(obj)
185 elif isinstance(obj, np.datetime64):
186 # GH#28080 numpy casts integer-dtype to datetime64 when doing
187 # array[int] + datetime64, which we do not allow
188 if isna(obj):
189 # Avoid possible ambiguities with pd.NaT
190 obj = obj.astype("datetime64[ns]")
191 right = np.broadcast_to(obj, shape)
192 return DatetimeArray(right)
194 return Timestamp(obj)
196 elif isinstance(obj, np.timedelta64):
197 if isna(obj):
198 # wrapping timedelta64("NaT") in Timedelta returns NaT,
199 # which would incorrectly be treated as a datetime-NaT, so
200 # we broadcast and wrap in a TimedeltaArray
201 obj = obj.astype("timedelta64[ns]")
202 right = np.broadcast_to(obj, shape)
203 return TimedeltaArray(right)
205 # In particular non-nanosecond timedelta64 needs to be cast to
206 # nanoseconds, or else we get undesired behavior like
207 # np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
208 return Timedelta(obj)
210 elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj.dtype):
211 # GH#22390 Unfortunately we need to special-case right-hand
212 # timedelta64 dtypes because numpy casts integer dtypes to
213 # timedelta64 when operating with timedelta64
214 return TimedeltaArray._from_sequence(obj)
215 return obj
218# -----------------------------------------------------------------------------
221def _get_frame_op_default_axis(name):
222 """
223 Only DataFrame cares about default_axis, specifically:
224 special methods have default_axis=None and flex methods
225 have default_axis='columns'.
227 Parameters
228 ----------
229 name : str
231 Returns
232 -------
233 default_axis: str or None
234 """
235 if name.replace("__r", "__") in ["__and__", "__or__", "__xor__"]:
236 # bool methods
237 return "columns"
238 elif name.startswith("__"):
239 # __add__, __mul__, ...
240 return None
241 else:
242 # add, mul, ...
243 return "columns"
246def _get_opstr(op):
247 """
248 Find the operation string, if any, to pass to numexpr for this
249 operation.
251 Parameters
252 ----------
253 op : binary operator
255 Returns
256 -------
257 op_str : string or None
258 """
260 return {
261 operator.add: "+",
262 radd: "+",
263 operator.mul: "*",
264 rmul: "*",
265 operator.sub: "-",
266 rsub: "-",
267 operator.truediv: "/",
268 rtruediv: "/",
269 operator.floordiv: "//",
270 rfloordiv: "//",
271 operator.mod: None, # TODO: Why None for mod but '%' for rmod?
272 rmod: "%",
273 operator.pow: "**",
274 rpow: "**",
275 operator.eq: "==",
276 operator.ne: "!=",
277 operator.le: "<=",
278 operator.lt: "<",
279 operator.ge: ">=",
280 operator.gt: ">",
281 operator.and_: "&",
282 rand_: "&",
283 operator.or_: "|",
284 ror_: "|",
285 operator.xor: "^",
286 rxor: "^",
287 divmod: None,
288 rdivmod: None,
289 }[op]
292def _get_op_name(op, special):
293 """
294 Find the name to attach to this method according to conventions
295 for special and non-special methods.
297 Parameters
298 ----------
299 op : binary operator
300 special : bool
302 Returns
303 -------
304 op_name : str
305 """
306 opname = op.__name__.strip("_")
307 if special:
308 opname = f"__{opname}__"
309 return opname
312# -----------------------------------------------------------------------------
313# Masking NA values and fallbacks for operations numpy does not support
316def fill_binop(left, right, fill_value):
317 """
318 If a non-None fill_value is given, replace null entries in left and right
319 with this value, but only in positions where _one_ of left/right is null,
320 not both.
322 Parameters
323 ----------
324 left : array-like
325 right : array-like
326 fill_value : object
328 Returns
329 -------
330 left : array-like
331 right : array-like
333 Notes
334 -----
335 Makes copies if fill_value is not None
336 """
337 # TODO: can we make a no-copy implementation?
338 if fill_value is not None:
339 left_mask = isna(left)
340 right_mask = isna(right)
341 left = left.copy()
342 right = right.copy()
344 # one but not both
345 mask = left_mask ^ right_mask
346 left[left_mask & mask] = fill_value
347 right[right_mask & mask] = fill_value
348 return left, right
351# -----------------------------------------------------------------------------
352# Dispatch logic
355def dispatch_to_series(left, right, func, str_rep=None, axis=None):
356 """
357 Evaluate the frame operation func(left, right) by evaluating
358 column-by-column, dispatching to the Series implementation.
360 Parameters
361 ----------
362 left : DataFrame
363 right : scalar or DataFrame
364 func : arithmetic or comparison operator
365 str_rep : str or None, default None
366 axis : {None, 0, 1, "index", "columns"}
368 Returns
369 -------
370 DataFrame
371 """
372 # Note: we use iloc to access columns for compat with cases
373 # with non-unique columns.
374 import pandas.core.computation.expressions as expressions
376 right = lib.item_from_zerodim(right)
377 if lib.is_scalar(right) or np.ndim(right) == 0:
379 # Get the appropriate array-op to apply to each block's values.
380 array_op = get_array_op(func, str_rep=str_rep)
381 bm = left._data.apply(array_op, right=right)
382 return type(left)(bm)
384 elif isinstance(right, ABCDataFrame):
385 assert right._indexed_same(left)
387 def column_op(a, b):
388 return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))}
390 elif isinstance(right, ABCSeries) and axis == "columns":
391 # We only get here if called via _combine_series_frame,
392 # in which case we specifically want to operate row-by-row
393 assert right.index.equals(left.columns)
395 if right.dtype == "timedelta64[ns]":
396 # ensure we treat NaT values as the correct dtype
397 # Note: we do not do this unconditionally as it may be lossy or
398 # expensive for EA dtypes.
399 right = np.asarray(right)
401 def column_op(a, b):
402 return {i: func(a.iloc[:, i], b[i]) for i in range(len(a.columns))}
404 else:
406 def column_op(a, b):
407 return {i: func(a.iloc[:, i], b.iloc[i]) for i in range(len(a.columns))}
409 elif isinstance(right, ABCSeries):
410 assert right.index.equals(left.index) # Handle other cases later
412 def column_op(a, b):
413 return {i: func(a.iloc[:, i], b) for i in range(len(a.columns))}
415 else:
416 # Remaining cases have less-obvious dispatch rules
417 raise NotImplementedError(right)
419 new_data = expressions.evaluate(column_op, str_rep, left, right)
420 return new_data
423# -----------------------------------------------------------------------------
424# Series
427def _align_method_SERIES(left, right, align_asobject=False):
428 """ align lhs and rhs Series """
430 # ToDo: Different from _align_method_FRAME, list, tuple and ndarray
431 # are not coerced here
432 # because Series has inconsistencies described in #13637
434 if isinstance(right, ABCSeries):
435 # avoid repeated alignment
436 if not left.index.equals(right.index):
438 if align_asobject:
439 # to keep original value's dtype for bool ops
440 left = left.astype(object)
441 right = right.astype(object)
443 left, right = left.align(right, copy=False)
445 return left, right
448def _construct_result(
449 left: ABCSeries,
450 result: Union[np.ndarray, ABCExtensionArray],
451 index: ABCIndexClass,
452 name,
453):
454 """
455 Construct an appropriately-labelled Series from the result of an op.
457 Parameters
458 ----------
459 left : Series
460 result : ndarray or ExtensionArray
461 index : Index
462 name : object
464 Returns
465 -------
466 Series
467 In the case of __divmod__ or __rdivmod__, a 2-tuple of Series.
468 """
469 if isinstance(result, tuple):
470 # produced by divmod or rdivmod
471 return (
472 _construct_result(left, result[0], index=index, name=name),
473 _construct_result(left, result[1], index=index, name=name),
474 )
476 # We do not pass dtype to ensure that the Series constructor
477 # does inference in the case where `result` has object-dtype.
478 out = left._constructor(result, index=index)
479 out = out.__finalize__(left)
481 # Set the result's name after __finalize__ is called because __finalize__
482 # would set it back to self.name
483 out.name = name
484 return out
487def _arith_method_SERIES(cls, op, special):
488 """
489 Wrapper function for Series arithmetic operations, to avoid
490 code duplication.
491 """
492 str_rep = _get_opstr(op)
493 op_name = _get_op_name(op, special)
495 @unpack_zerodim_and_defer(op_name)
496 def wrapper(left, right):
498 left, right = _align_method_SERIES(left, right)
499 res_name = get_op_result_name(left, right)
501 lvalues = extract_array(left, extract_numpy=True)
502 rvalues = extract_array(right, extract_numpy=True)
503 result = arithmetic_op(lvalues, rvalues, op, str_rep)
505 return _construct_result(left, result, index=left.index, name=res_name)
507 wrapper.__name__ = op_name
508 return wrapper
511def _comp_method_SERIES(cls, op, special):
512 """
513 Wrapper function for Series arithmetic operations, to avoid
514 code duplication.
515 """
516 op_name = _get_op_name(op, special)
518 @unpack_zerodim_and_defer(op_name)
519 def wrapper(self, other):
521 res_name = get_op_result_name(self, other)
523 if isinstance(other, ABCSeries) and not self._indexed_same(other):
524 raise ValueError("Can only compare identically-labeled Series objects")
526 lvalues = extract_array(self, extract_numpy=True)
527 rvalues = extract_array(other, extract_numpy=True)
529 res_values = comparison_op(lvalues, rvalues, op)
531 return _construct_result(self, res_values, index=self.index, name=res_name)
533 wrapper.__name__ = op_name
534 return wrapper
537def _bool_method_SERIES(cls, op, special):
538 """
539 Wrapper function for Series arithmetic operations, to avoid
540 code duplication.
541 """
542 op_name = _get_op_name(op, special)
544 @unpack_zerodim_and_defer(op_name)
545 def wrapper(self, other):
546 self, other = _align_method_SERIES(self, other, align_asobject=True)
547 res_name = get_op_result_name(self, other)
549 lvalues = extract_array(self, extract_numpy=True)
550 rvalues = extract_array(other, extract_numpy=True)
552 res_values = logical_op(lvalues, rvalues, op)
553 return _construct_result(self, res_values, index=self.index, name=res_name)
555 wrapper.__name__ = op_name
556 return wrapper
559def _flex_method_SERIES(cls, op, special):
560 name = _get_op_name(op, special)
561 doc = _make_flex_doc(name, "series")
563 @Appender(doc)
564 def flex_wrapper(self, other, level=None, fill_value=None, axis=0):
565 # validate axis
566 if axis is not None:
567 self._get_axis_number(axis)
569 if isinstance(other, ABCSeries):
570 return self._binop(other, op, level=level, fill_value=fill_value)
571 elif isinstance(other, (np.ndarray, list, tuple)):
572 if len(other) != len(self):
573 raise ValueError("Lengths must be equal")
574 other = self._constructor(other, self.index)
575 return self._binop(other, op, level=level, fill_value=fill_value)
576 else:
577 if fill_value is not None:
578 self = self.fillna(fill_value)
580 return op(self, other)
582 flex_wrapper.__name__ = name
583 return flex_wrapper
586# -----------------------------------------------------------------------------
587# DataFrame
590def _combine_series_frame(self, other, func, fill_value=None, axis=None, level=None):
591 """
592 Apply binary operator `func` to self, other using alignment and fill
593 conventions determined by the fill_value, axis, and level kwargs.
595 Parameters
596 ----------
597 self : DataFrame
598 other : Series
599 func : binary operator
600 fill_value : object, default None
601 axis : {0, 1, 'columns', 'index', None}, default None
602 level : int or None, default None
604 Returns
605 -------
606 result : DataFrame
607 """
608 if fill_value is not None:
609 raise NotImplementedError(f"fill_value {fill_value} not supported.")
611 if axis is None:
612 # default axis is columns
613 axis = 1
615 axis = self._get_axis_number(axis)
616 left, right = self.align(other, join="outer", axis=axis, level=level, copy=False)
617 if axis == 0:
618 new_data = left._combine_match_index(right, func)
619 else:
620 new_data = dispatch_to_series(left, right, func, axis="columns")
622 return left._construct_result(new_data)
625def _align_method_FRAME(left, right, axis):
626 """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """
628 def to_series(right):
629 msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}"
630 if axis is not None and left._get_axis_name(axis) == "index":
631 if len(left.index) != len(right):
632 raise ValueError(
633 msg.format(req_len=len(left.index), given_len=len(right))
634 )
635 right = left._constructor_sliced(right, index=left.index)
636 else:
637 if len(left.columns) != len(right):
638 raise ValueError(
639 msg.format(req_len=len(left.columns), given_len=len(right))
640 )
641 right = left._constructor_sliced(right, index=left.columns)
642 return right
644 if isinstance(right, np.ndarray):
646 if right.ndim == 1:
647 right = to_series(right)
649 elif right.ndim == 2:
650 if right.shape == left.shape:
651 right = left._constructor(right, index=left.index, columns=left.columns)
653 elif right.shape[0] == left.shape[0] and right.shape[1] == 1:
654 # Broadcast across columns
655 right = np.broadcast_to(right, left.shape)
656 right = left._constructor(right, index=left.index, columns=left.columns)
658 elif right.shape[1] == left.shape[1] and right.shape[0] == 1:
659 # Broadcast along rows
660 right = to_series(right[0, :])
662 else:
663 raise ValueError(
664 "Unable to coerce to DataFrame, shape "
665 f"must be {left.shape}: given {right.shape}"
666 )
668 elif right.ndim > 2:
669 raise ValueError(
670 "Unable to coerce to Series/DataFrame, dim "
671 f"must be <= 2: {right.shape}"
672 )
674 elif is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame)):
675 # GH17901
676 right = to_series(right)
678 return right
681def _should_reindex_frame_op(
682 left: "DataFrame", right, op, axis, default_axis: int, fill_value, level
683) -> bool:
684 """
685 Check if this is an operation between DataFrames that will need to reindex.
686 """
687 assert isinstance(left, ABCDataFrame)
689 if op is operator.pow or op is rpow:
690 # GH#32685 pow has special semantics for operating with null values
691 return False
693 if not isinstance(right, ABCDataFrame):
694 return False
696 if fill_value is None and level is None and axis is default_axis:
697 # TODO: any other cases we should handle here?
698 cols = left.columns.intersection(right.columns)
699 if not (cols.equals(left.columns) and cols.equals(right.columns)):
700 return True
702 return False
705def _frame_arith_method_with_reindex(
706 left: "DataFrame", right: "DataFrame", op
707) -> "DataFrame":
708 """
709 For DataFrame-with-DataFrame operations that require reindexing,
710 operate only on shared columns, then reindex.
712 Parameters
713 ----------
714 left : DataFrame
715 right : DataFrame
716 op : binary operator
718 Returns
719 -------
720 DataFrame
721 """
722 # GH#31623, only operate on shared columns
723 cols = left.columns.intersection(right.columns)
725 new_left = left[cols]
726 new_right = right[cols]
727 result = op(new_left, new_right)
729 # Do the join on the columns instead of using _align_method_FRAME
730 # to avoid constructing two potentially large/sparse DataFrames
731 join_columns, _, _ = left.columns.join(
732 right.columns, how="outer", level=None, return_indexers=True
733 )
734 return result.reindex(join_columns, axis=1)
737def _arith_method_FRAME(cls, op, special):
738 str_rep = _get_opstr(op)
739 op_name = _get_op_name(op, special)
740 default_axis = _get_frame_op_default_axis(op_name)
742 na_op = define_na_arithmetic_op(op, str_rep)
743 is_logical = str_rep in ["&", "|", "^"]
745 if op_name in _op_descriptions:
746 # i.e. include "add" but not "__add__"
747 doc = _make_flex_doc(op_name, "dataframe")
748 else:
749 doc = _arith_doc_FRAME % op_name
751 @Appender(doc)
752 def f(self, other, axis=default_axis, level=None, fill_value=None):
754 if _should_reindex_frame_op(
755 self, other, op, axis, default_axis, fill_value, level
756 ):
757 return _frame_arith_method_with_reindex(self, other, op)
759 other = _align_method_FRAME(self, other, axis)
761 if isinstance(other, ABCDataFrame):
762 # Another DataFrame
763 pass_op = op if should_series_dispatch(self, other, op) else na_op
764 pass_op = pass_op if not is_logical else op
766 left, right = self.align(other, join="outer", level=level, copy=False)
767 new_data = left._combine_frame(right, pass_op, fill_value)
768 return left._construct_result(new_data)
770 elif isinstance(other, ABCSeries):
771 # For these values of `axis`, we end up dispatching to Series op,
772 # so do not want the masked op.
773 pass_op = op if axis in [0, "columns", None] else na_op
774 pass_op = pass_op if not is_logical else op
775 return _combine_series_frame(
776 self, other, pass_op, fill_value=fill_value, axis=axis, level=level
777 )
778 else:
779 # in this case we always have `np.ndim(other) == 0`
780 if fill_value is not None:
781 self = self.fillna(fill_value)
783 new_data = dispatch_to_series(self, other, op, str_rep)
784 return self._construct_result(new_data)
786 f.__name__ = op_name
788 return f
791def _flex_comp_method_FRAME(cls, op, special):
792 str_rep = _get_opstr(op)
793 op_name = _get_op_name(op, special)
794 default_axis = _get_frame_op_default_axis(op_name)
796 doc = _flex_comp_doc_FRAME.format(
797 op_name=op_name, desc=_op_descriptions[op_name]["desc"]
798 )
800 @Appender(doc)
801 def f(self, other, axis=default_axis, level=None):
803 other = _align_method_FRAME(self, other, axis)
805 if isinstance(other, ABCDataFrame):
806 # Another DataFrame
807 if not self._indexed_same(other):
808 self, other = self.align(other, "outer", level=level, copy=False)
809 new_data = dispatch_to_series(self, other, op, str_rep)
810 return self._construct_result(new_data)
812 elif isinstance(other, ABCSeries):
813 return _combine_series_frame(
814 self, other, op, fill_value=None, axis=axis, level=level
815 )
816 else:
817 # in this case we always have `np.ndim(other) == 0`
818 new_data = dispatch_to_series(self, other, op)
819 return self._construct_result(new_data)
821 f.__name__ = op_name
823 return f
826def _comp_method_FRAME(cls, op, special):
827 str_rep = _get_opstr(op)
828 op_name = _get_op_name(op, special)
830 @Appender(f"Wrapper for comparison method {op_name}")
831 def f(self, other):
833 other = _align_method_FRAME(self, other, axis=None)
835 if isinstance(other, ABCDataFrame):
836 # Another DataFrame
837 if not self._indexed_same(other):
838 raise ValueError(
839 "Can only compare identically-labeled DataFrame objects"
840 )
841 new_data = dispatch_to_series(self, other, op, str_rep)
842 return self._construct_result(new_data)
844 elif isinstance(other, ABCSeries):
845 return _combine_series_frame(
846 self, other, op, fill_value=None, axis=None, level=None
847 )
848 else:
850 # straight boolean comparisons we want to allow all columns
851 # (regardless of dtype to pass thru) See #4537 for discussion.
852 new_data = dispatch_to_series(self, other, op)
853 return self._construct_result(new_data)
855 f.__name__ = op_name
857 return f