Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Core eval alignment algorithms 

2""" 

3 

4from functools import partial, wraps 

5from typing import Dict, Optional, Sequence, Tuple, Type, Union 

6import warnings 

7 

8import numpy as np 

9 

10from pandas._typing import FrameOrSeries 

11from pandas.errors import PerformanceWarning 

12 

13from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries 

14 

15from pandas.core.base import PandasObject 

16import pandas.core.common as com 

17from pandas.core.computation.common import result_type_many 

18 

19 

20def _align_core_single_unary_op( 

21 term, 

22) -> Tuple[Union[partial, Type[FrameOrSeries]], Optional[Dict[str, int]]]: 

23 

24 typ: Union[partial, Type[FrameOrSeries]] 

25 axes: Optional[Dict[str, int]] = None 

26 

27 if isinstance(term.value, np.ndarray): 

28 typ = partial(np.asanyarray, dtype=term.value.dtype) 

29 else: 

30 typ = type(term.value) 

31 if hasattr(term.value, "axes"): 

32 axes = _zip_axes_from_type(typ, term.value.axes) 

33 

34 return typ, axes 

35 

36 

37def _zip_axes_from_type( 

38 typ: Type[FrameOrSeries], new_axes: Sequence[int] 

39) -> Dict[str, int]: 

40 axes = {name: new_axes[i] for i, name in typ._AXIS_NAMES.items()} 

41 return axes 

42 

43 

44def _any_pandas_objects(terms) -> bool: 

45 """ 

46 Check a sequence of terms for instances of PandasObject. 

47 """ 

48 return any(isinstance(term.value, PandasObject) for term in terms) 

49 

50 

51def _filter_special_cases(f): 

52 @wraps(f) 

53 def wrapper(terms): 

54 # single unary operand 

55 if len(terms) == 1: 

56 return _align_core_single_unary_op(terms[0]) 

57 

58 term_values = (term.value for term in terms) 

59 

60 # we don't have any pandas objects 

61 if not _any_pandas_objects(terms): 

62 return result_type_many(*term_values), None 

63 

64 return f(terms) 

65 

66 return wrapper 

67 

68 

69@_filter_special_cases 

70def _align_core(terms): 

71 term_index = [i for i, term in enumerate(terms) if hasattr(term.value, "axes")] 

72 term_dims = [terms[i].value.ndim for i in term_index] 

73 

74 from pandas import Series 

75 

76 ndims = Series(dict(zip(term_index, term_dims))) 

77 

78 # initial axes are the axes of the largest-axis'd term 

79 biggest = terms[ndims.idxmax()].value 

80 typ = biggest._constructor 

81 axes = biggest.axes 

82 naxes = len(axes) 

83 gt_than_one_axis = naxes > 1 

84 

85 for value in (terms[i].value for i in term_index): 

86 is_series = isinstance(value, ABCSeries) 

87 is_series_and_gt_one_axis = is_series and gt_than_one_axis 

88 

89 for axis, items in enumerate(value.axes): 

90 if is_series_and_gt_one_axis: 

91 ax, itm = naxes - 1, value.index 

92 else: 

93 ax, itm = axis, items 

94 

95 if not axes[ax].is_(itm): 

96 axes[ax] = axes[ax].join(itm, how="outer") 

97 

98 for i, ndim in ndims.items(): 

99 for axis, items in zip(range(ndim), axes): 

100 ti = terms[i].value 

101 

102 if hasattr(ti, "reindex"): 

103 transpose = isinstance(ti, ABCSeries) and naxes > 1 

104 reindexer = axes[naxes - 1] if transpose else items 

105 

106 term_axis_size = len(ti.axes[axis]) 

107 reindexer_size = len(reindexer) 

108 

109 ordm = np.log10(max(1, abs(reindexer_size - term_axis_size))) 

110 if ordm >= 1 and reindexer_size >= 10000: 

111 w = ( 

112 f"Alignment difference on axis {axis} is larger " 

113 f"than an order of magnitude on term {repr(terms[i].name)}, " 

114 f"by more than {ordm:.4g}; performance may suffer" 

115 ) 

116 warnings.warn(w, category=PerformanceWarning, stacklevel=6) 

117 

118 f = partial(ti.reindex, reindexer, axis=axis, copy=False) 

119 

120 terms[i].update(f()) 

121 

122 terms[i].update(terms[i].value.values) 

123 

124 return typ, _zip_axes_from_type(typ, axes) 

125 

126 

127def align_terms(terms): 

128 """ 

129 Align a set of terms. 

130 """ 

131 try: 

132 # flatten the parse tree (a nested list, really) 

133 terms = list(com.flatten(terms)) 

134 except TypeError: 

135 # can't iterate so it must just be a constant or single variable 

136 if isinstance(terms.value, (ABCSeries, ABCDataFrame)): 

137 typ = type(terms.value) 

138 return typ, _zip_axes_from_type(typ, terms.value.axes) 

139 return np.result_type(terms.type), None 

140 

141 # if all resolved variables are numeric scalars 

142 if all(term.is_scalar for term in terms): 

143 return result_type_many(*(term.value for term in terms)).type, None 

144 

145 # perform the main alignment 

146 typ, axes = _align_core(terms) 

147 return typ, axes 

148 

149 

150def reconstruct_object(typ, obj, axes, dtype): 

151 """ 

152 Reconstruct an object given its type, raw value, and possibly empty 

153 (None) axes. 

154 

155 Parameters 

156 ---------- 

157 typ : object 

158 A type 

159 obj : object 

160 The value to use in the type constructor 

161 axes : dict 

162 The axes to use to construct the resulting pandas object 

163 

164 Returns 

165 ------- 

166 ret : typ 

167 An object of type ``typ`` with the value `obj` and possible axes 

168 `axes`. 

169 """ 

170 try: 

171 typ = typ.type 

172 except AttributeError: 

173 pass 

174 

175 res_t = np.result_type(obj.dtype, dtype) 

176 

177 if not isinstance(typ, partial) and issubclass(typ, PandasObject): 

178 return typ(obj, dtype=res_t, **axes) 

179 

180 # special case for pathological things like ~True/~False 

181 if hasattr(res_t, "type") and typ == np.bool_ and res_t != np.bool_: 

182 ret_value = res_t.type(obj) 

183 else: 

184 ret_value = typ(obj).astype(res_t) 

185 # The condition is to distinguish 0-dim array (returned in case of 

186 # scalar) and 1 element array 

187 # e.g. np.array(0) and np.array([0]) 

188 if len(obj.shape) == 1 and len(obj) == 1: 

189 if not isinstance(ret_value, np.ndarray): 

190 ret_value = np.array([ret_value]).astype(res_t) 

191 

192 return ret_value