Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/ops/array_ops.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Functions for arithmetic and comparison operations on NumPy arrays and
3ExtensionArrays.
4"""
5from functools import partial
6import operator
7from typing import Any, Optional, Union
9import numpy as np
11from pandas._libs import Timedelta, Timestamp, lib, ops as libops
13from pandas.core.dtypes.cast import (
14 construct_1d_object_array_from_listlike,
15 find_common_type,
16 maybe_upcast_putmask,
17)
18from pandas.core.dtypes.common import (
19 ensure_object,
20 is_bool_dtype,
21 is_integer_dtype,
22 is_list_like,
23 is_object_dtype,
24 is_scalar,
25)
26from pandas.core.dtypes.generic import (
27 ABCDatetimeArray,
28 ABCExtensionArray,
29 ABCIndex,
30 ABCIndexClass,
31 ABCSeries,
32 ABCTimedeltaArray,
33)
34from pandas.core.dtypes.missing import isna, notna
36from pandas.core.ops import missing
37from pandas.core.ops.dispatch import dispatch_to_extension_op, should_extension_dispatch
38from pandas.core.ops.invalid import invalid_comparison
39from pandas.core.ops.roperator import rpow
42def comp_method_OBJECT_ARRAY(op, x, y):
43 if isinstance(y, list):
44 y = construct_1d_object_array_from_listlike(y)
46 # TODO: Should the checks below be ABCIndexClass?
47 if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)):
48 # TODO: should this be ABCIndexClass??
49 if not is_object_dtype(y.dtype):
50 y = y.astype(np.object_)
52 if isinstance(y, (ABCSeries, ABCIndex)):
53 y = y.values
55 result = libops.vec_compare(x.ravel(), y, op)
56 else:
57 result = libops.scalar_compare(x.ravel(), y, op)
58 return result.reshape(x.shape)
61def masked_arith_op(x, y, op):
62 """
63 If the given arithmetic operation fails, attempt it again on
64 only the non-null elements of the input array(s).
66 Parameters
67 ----------
68 x : np.ndarray
69 y : np.ndarray, Series, Index
70 op : binary operator
71 """
72 # For Series `x` is 1D so ravel() is a no-op; calling it anyway makes
73 # the logic valid for both Series and DataFrame ops.
74 xrav = x.ravel()
75 assert isinstance(x, np.ndarray), type(x)
76 if isinstance(y, np.ndarray):
77 dtype = find_common_type([x.dtype, y.dtype])
78 result = np.empty(x.size, dtype=dtype)
80 # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
81 # we would get int64 dtype, see GH#19956
82 yrav = y.ravel()
83 mask = notna(xrav) & notna(yrav)
85 if yrav.shape != mask.shape:
86 # FIXME: GH#5284, GH#5035, GH#19448
87 # Without specifically raising here we get mismatched
88 # errors in Py3 (TypeError) vs Py2 (ValueError)
89 # Note: Only = an issue in DataFrame case
90 raise ValueError("Cannot broadcast operands together.")
92 if mask.any():
93 with np.errstate(all="ignore"):
94 result[mask] = op(xrav[mask], yrav[mask])
96 else:
97 if not is_scalar(y):
98 raise TypeError(type(y))
100 # mask is only meaningful for x
101 result = np.empty(x.size, dtype=x.dtype)
102 mask = notna(xrav)
104 # 1 ** np.nan is 1. So we have to unmask those.
105 if op is pow:
106 mask = np.where(x == 1, False, mask)
107 elif op is rpow:
108 mask = np.where(y == 1, False, mask)
110 if mask.any():
111 with np.errstate(all="ignore"):
112 result[mask] = op(xrav[mask], y)
114 result, _ = maybe_upcast_putmask(result, ~mask, np.nan)
115 result = result.reshape(x.shape) # 2D compat
116 return result
119def define_na_arithmetic_op(op, str_rep: str):
120 def na_op(x, y):
121 return na_arithmetic_op(x, y, op, str_rep)
123 return na_op
126def na_arithmetic_op(left, right, op, str_rep: str):
127 """
128 Return the result of evaluating op on the passed in values.
130 If native types are not compatible, try coersion to object dtype.
132 Parameters
133 ----------
134 left : np.ndarray
135 right : np.ndarray or scalar
136 str_rep : str or None
138 Returns
139 -------
140 array-like
142 Raises
143 ------
144 TypeError : invalid operation
145 """
146 import pandas.core.computation.expressions as expressions
148 try:
149 result = expressions.evaluate(op, str_rep, left, right)
150 except TypeError:
151 result = masked_arith_op(left, right, op)
153 return missing.dispatch_fill_zeros(op, left, right, result)
156def arithmetic_op(
157 left: Union[np.ndarray, ABCExtensionArray], right: Any, op, str_rep: str
158):
159 """
160 Evaluate an arithmetic operation `+`, `-`, `*`, `/`, `//`, `%`, `**`, ...
162 Parameters
163 ----------
164 left : np.ndarray or ExtensionArray
165 right : object
166 Cannot be a DataFrame or Index. Series is *not* excluded.
167 op : {operator.add, operator.sub, ...}
168 Or one of the reversed variants from roperator.
169 str_rep : str
171 Returns
172 -------
173 ndarrray or ExtensionArray
174 Or a 2-tuple of these in the case of divmod or rdivmod.
175 """
177 from pandas.core.ops import maybe_upcast_for_op
179 # NB: We assume that extract_array has already been called
180 # on `left` and `right`.
181 lvalues = left
182 rvalues = right
184 rvalues = maybe_upcast_for_op(rvalues, lvalues.shape)
186 if should_extension_dispatch(left, rvalues) or isinstance(
187 rvalues, (ABCTimedeltaArray, ABCDatetimeArray, Timestamp, Timedelta)
188 ):
189 # TimedeltaArray, DatetimeArray, and Timestamp are included here
190 # because they have `freq` attribute which is handled correctly
191 # by dispatch_to_extension_op.
192 # Timedelta is included because numexpr will fail on it, see GH#31457
193 res_values = dispatch_to_extension_op(op, lvalues, rvalues)
195 else:
196 with np.errstate(all="ignore"):
197 res_values = na_arithmetic_op(lvalues, rvalues, op, str_rep)
199 return res_values
202def comparison_op(
203 left: Union[np.ndarray, ABCExtensionArray], right: Any, op
204) -> Union[np.ndarray, ABCExtensionArray]:
205 """
206 Evaluate a comparison operation `=`, `!=`, `>=`, `>`, `<=`, or `<`.
208 Parameters
209 ----------
210 left : np.ndarray or ExtensionArray
211 right : object
212 Cannot be a DataFrame, Series, or Index.
213 op : {operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le}
215 Returns
216 -------
217 ndarrray or ExtensionArray
218 """
220 # NB: We assume extract_array has already been called on left and right
221 lvalues = left
222 rvalues = right
224 rvalues = lib.item_from_zerodim(rvalues)
225 if isinstance(rvalues, list):
226 # TODO: same for tuples?
227 rvalues = np.asarray(rvalues)
229 if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)):
230 # TODO: make this treatment consistent across ops and classes.
231 # We are not catching all listlikes here (e.g. frozenset, tuple)
232 # The ambiguous case is object-dtype. See GH#27803
233 if len(lvalues) != len(rvalues):
234 raise ValueError("Lengths must match to compare")
236 if should_extension_dispatch(lvalues, rvalues):
237 res_values = dispatch_to_extension_op(op, lvalues, rvalues)
239 elif is_scalar(rvalues) and isna(rvalues):
240 # numpy does not like comparisons vs None
241 if op is operator.ne:
242 res_values = np.ones(lvalues.shape, dtype=bool)
243 else:
244 res_values = np.zeros(lvalues.shape, dtype=bool)
246 elif is_object_dtype(lvalues.dtype):
247 res_values = comp_method_OBJECT_ARRAY(op, lvalues, rvalues)
249 else:
250 op_name = f"__{op.__name__}__"
251 method = getattr(lvalues, op_name)
252 with np.errstate(all="ignore"):
253 res_values = method(rvalues)
255 if res_values is NotImplemented:
256 res_values = invalid_comparison(lvalues, rvalues, op)
257 if is_scalar(res_values):
258 typ = type(rvalues)
259 raise TypeError(f"Could not compare {typ} type with Series")
261 return res_values
264def na_logical_op(x: np.ndarray, y, op):
265 try:
266 # For exposition, write:
267 # yarr = isinstance(y, np.ndarray)
268 # yint = is_integer(y) or (yarr and y.dtype.kind == "i")
269 # ybool = is_bool(y) or (yarr and y.dtype.kind == "b")
270 # xint = x.dtype.kind == "i"
271 # xbool = x.dtype.kind == "b"
272 # Then Cases where this goes through without raising include:
273 # (xint or xbool) and (yint or bool)
274 result = op(x, y)
275 except TypeError:
276 if isinstance(y, np.ndarray):
277 # bool-bool dtype operations should be OK, should not get here
278 assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype))
279 x = ensure_object(x)
280 y = ensure_object(y)
281 result = libops.vec_binop(x, y, op)
282 else:
283 # let null fall thru
284 assert lib.is_scalar(y)
285 if not isna(y):
286 y = bool(y)
287 try:
288 result = libops.scalar_binop(x, y, op)
289 except (
290 TypeError,
291 ValueError,
292 AttributeError,
293 OverflowError,
294 NotImplementedError,
295 ):
296 typ = type(y).__name__
297 raise TypeError(
298 f"Cannot perform '{op.__name__}' with a dtyped [{x.dtype}] array "
299 f"and scalar of type [{typ}]"
300 )
302 return result
305def logical_op(
306 left: Union[np.ndarray, ABCExtensionArray], right: Any, op
307) -> Union[np.ndarray, ABCExtensionArray]:
308 """
309 Evaluate a logical operation `|`, `&`, or `^`.
311 Parameters
312 ----------
313 left : np.ndarray or ExtensionArray
314 right : object
315 Cannot be a DataFrame, Series, or Index.
316 op : {operator.and_, operator.or_, operator.xor}
317 Or one of the reversed variants from roperator.
319 Returns
320 -------
321 ndarrray or ExtensionArray
322 """
324 fill_int = lambda x: x
326 def fill_bool(x, left=None):
327 # if `left` is specifically not-boolean, we do not cast to bool
328 if x.dtype.kind in ["c", "f", "O"]:
329 # dtypes that can hold NA
330 mask = isna(x)
331 if mask.any():
332 x = x.astype(object)
333 x[mask] = False
335 if left is None or is_bool_dtype(left.dtype):
336 x = x.astype(bool)
337 return x
339 is_self_int_dtype = is_integer_dtype(left.dtype)
341 right = lib.item_from_zerodim(right)
342 if is_list_like(right) and not hasattr(right, "dtype"):
343 # e.g. list, tuple
344 right = construct_1d_object_array_from_listlike(right)
346 # NB: We assume extract_array has already been called on left and right
347 lvalues = left
348 rvalues = right
350 if should_extension_dispatch(lvalues, rvalues):
351 res_values = dispatch_to_extension_op(op, lvalues, rvalues)
353 else:
354 if isinstance(rvalues, np.ndarray):
355 is_other_int_dtype = is_integer_dtype(rvalues.dtype)
356 rvalues = rvalues if is_other_int_dtype else fill_bool(rvalues, lvalues)
358 else:
359 # i.e. scalar
360 is_other_int_dtype = lib.is_integer(rvalues)
362 # For int vs int `^`, `|`, `&` are bitwise operators and return
363 # integer dtypes. Otherwise these are boolean ops
364 filler = fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool
366 res_values = na_logical_op(lvalues, rvalues, op)
367 res_values = filler(res_values) # type: ignore
369 return res_values
372def get_array_op(op, str_rep: Optional[str] = None):
373 """
374 Return a binary array operation corresponding to the given operator op.
376 Parameters
377 ----------
378 op : function
379 Binary operator from operator or roperator module.
380 str_rep : str or None, default None
381 str_rep to pass to arithmetic_op
383 Returns
384 -------
385 function
386 """
387 op_name = op.__name__.strip("_")
388 if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}:
389 return partial(comparison_op, op=op)
390 elif op_name in {"and", "or", "xor", "rand", "ror", "rxor"}:
391 return partial(logical_op, op=op)
392 else:
393 return partial(arithmetic_op, op=op, str_rep=str_rep)