Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Module that contains many useful utilities 

3for validating data or function arguments 

4""" 

5from typing import Iterable, Union 

6import warnings 

7 

8import numpy as np 

9 

10from pandas.core.dtypes.common import is_bool 

11 

12 

13def _check_arg_length(fname, args, max_fname_arg_count, compat_args): 

14 """ 

15 Checks whether 'args' has length of at most 'compat_args'. Raises 

16 a TypeError if that is not the case, similar to in Python when a 

17 function is called with too many arguments. 

18 """ 

19 if max_fname_arg_count < 0: 

20 raise ValueError("'max_fname_arg_count' must be non-negative") 

21 

22 if len(args) > len(compat_args): 

23 max_arg_count = len(compat_args) + max_fname_arg_count 

24 actual_arg_count = len(args) + max_fname_arg_count 

25 argument = "argument" if max_arg_count == 1 else "arguments" 

26 

27 raise TypeError( 

28 f"{fname}() takes at most {max_arg_count} {argument} " 

29 f"({actual_arg_count} given)" 

30 ) 

31 

32 

33def _check_for_default_values(fname, arg_val_dict, compat_args): 

34 """ 

35 Check that the keys in `arg_val_dict` are mapped to their 

36 default values as specified in `compat_args`. 

37 

38 Note that this function is to be called only when it has been 

39 checked that arg_val_dict.keys() is a subset of compat_args 

40 """ 

41 for key in arg_val_dict: 

42 # try checking equality directly with '=' operator, 

43 # as comparison may have been overridden for the left 

44 # hand object 

45 try: 

46 v1 = arg_val_dict[key] 

47 v2 = compat_args[key] 

48 

49 # check for None-ness otherwise we could end up 

50 # comparing a numpy array vs None 

51 if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): 

52 match = False 

53 else: 

54 match = v1 == v2 

55 

56 if not is_bool(match): 

57 raise ValueError("'match' is not a boolean") 

58 

59 # could not compare them directly, so try comparison 

60 # using the 'is' operator 

61 except ValueError: 

62 match = arg_val_dict[key] is compat_args[key] 

63 

64 if not match: 

65 raise ValueError( 

66 f"the '{key}' parameter is not supported in " 

67 f"the pandas implementation of {fname}()" 

68 ) 

69 

70 

71def validate_args(fname, args, max_fname_arg_count, compat_args): 

72 """ 

73 Checks whether the length of the `*args` argument passed into a function 

74 has at most `len(compat_args)` arguments and whether or not all of these 

75 elements in `args` are set to their default values. 

76 

77 Parameters 

78 ---------- 

79 fname : str 

80 The name of the function being passed the `*args` parameter 

81 args : tuple 

82 The `*args` parameter passed into a function 

83 max_fname_arg_count : int 

84 The maximum number of arguments that the function `fname` 

85 can accept, excluding those in `args`. Used for displaying 

86 appropriate error messages. Must be non-negative. 

87 compat_args : dict 

88 A dictionary of keys and their associated default values. 

89 In order to accommodate buggy behaviour in some versions of `numpy`, 

90 where a signature displayed keyword arguments but then passed those 

91 arguments **positionally** internally when calling downstream 

92 implementations, a dict ensures that the original 

93 order of the keyword arguments is enforced. 

94 Raises 

95 ------ 

96 TypeError 

97 If `args` contains more values than there are `compat_args` 

98 ValueError 

99 If `args` contains values that do not correspond to those 

100 of the default values specified in `compat_args` 

101 """ 

102 _check_arg_length(fname, args, max_fname_arg_count, compat_args) 

103 

104 # We do this so that we can provide a more informative 

105 # error message about the parameters that we are not 

106 # supporting in the pandas implementation of 'fname' 

107 kwargs = dict(zip(compat_args, args)) 

108 _check_for_default_values(fname, kwargs, compat_args) 

109 

110 

111def _check_for_invalid_keys(fname, kwargs, compat_args): 

112 """ 

113 Checks whether 'kwargs' contains any keys that are not 

114 in 'compat_args' and raises a TypeError if there is one. 

115 """ 

116 # set(dict) --> set of the dictionary's keys 

117 diff = set(kwargs) - set(compat_args) 

118 

119 if diff: 

120 bad_arg = list(diff)[0] 

121 raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") 

122 

123 

124def validate_kwargs(fname, kwargs, compat_args): 

125 """ 

126 Checks whether parameters passed to the **kwargs argument in a 

127 function `fname` are valid parameters as specified in `*compat_args` 

128 and whether or not they are set to their default values. 

129 

130 Parameters 

131 ---------- 

132 fname : str 

133 The name of the function being passed the `**kwargs` parameter 

134 kwargs : dict 

135 The `**kwargs` parameter passed into `fname` 

136 compat_args: dict 

137 A dictionary of keys that `kwargs` is allowed to have and their 

138 associated default values 

139 

140 Raises 

141 ------ 

142 TypeError if `kwargs` contains keys not in `compat_args` 

143 ValueError if `kwargs` contains keys in `compat_args` that do not 

144 map to the default values specified in `compat_args` 

145 """ 

146 kwds = kwargs.copy() 

147 _check_for_invalid_keys(fname, kwargs, compat_args) 

148 _check_for_default_values(fname, kwds, compat_args) 

149 

150 

151def validate_args_and_kwargs(fname, args, kwargs, max_fname_arg_count, compat_args): 

152 """ 

153 Checks whether parameters passed to the *args and **kwargs argument in a 

154 function `fname` are valid parameters as specified in `*compat_args` 

155 and whether or not they are set to their default values. 

156 

157 Parameters 

158 ---------- 

159 fname: str 

160 The name of the function being passed the `**kwargs` parameter 

161 args: tuple 

162 The `*args` parameter passed into a function 

163 kwargs: dict 

164 The `**kwargs` parameter passed into `fname` 

165 max_fname_arg_count: int 

166 The minimum number of arguments that the function `fname` 

167 requires, excluding those in `args`. Used for displaying 

168 appropriate error messages. Must be non-negative. 

169 compat_args: dict 

170 A dictionary of keys that `kwargs` is allowed to 

171 have and their associated default values. 

172 

173 Raises 

174 ------ 

175 TypeError if `args` contains more values than there are 

176 `compat_args` OR `kwargs` contains keys not in `compat_args` 

177 ValueError if `args` contains values not at the default value (`None`) 

178 `kwargs` contains keys in `compat_args` that do not map to the default 

179 value as specified in `compat_args` 

180 

181 See Also 

182 -------- 

183 validate_args : Purely args validation. 

184 validate_kwargs : Purely kwargs validation. 

185 

186 """ 

187 # Check that the total number of arguments passed in (i.e. 

188 # args and kwargs) does not exceed the length of compat_args 

189 _check_arg_length( 

190 fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args 

191 ) 

192 

193 # Check there is no overlap with the positional and keyword 

194 # arguments, similar to what is done in actual Python functions 

195 args_dict = dict(zip(compat_args, args)) 

196 

197 for key in args_dict: 

198 if key in kwargs: 

199 raise TypeError( 

200 f"{fname}() got multiple values for keyword argument '{key}'" 

201 ) 

202 

203 kwargs.update(args_dict) 

204 validate_kwargs(fname, kwargs, compat_args) 

205 

206 

207def validate_bool_kwarg(value, arg_name): 

208 """ Ensures that argument passed in arg_name is of type bool. """ 

209 if not (is_bool(value) or value is None): 

210 raise ValueError( 

211 f'For argument "{arg_name}" expected type bool, received ' 

212 f"type {type(value).__name__}." 

213 ) 

214 return value 

215 

216 

217def validate_axis_style_args(data, args, kwargs, arg_name, method_name): 

218 """Argument handler for mixed index, columns / axis functions 

219 

220 In an attempt to handle both `.method(index, columns)`, and 

221 `.method(arg, axis=.)`, we have to do some bad things to argument 

222 parsing. This translates all arguments to `{index=., columns=.}` style. 

223 

224 Parameters 

225 ---------- 

226 data : DataFrame 

227 args : tuple 

228 All positional arguments from the user 

229 kwargs : dict 

230 All keyword arguments from the user 

231 arg_name, method_name : str 

232 Used for better error messages 

233 

234 Returns 

235 ------- 

236 kwargs : dict 

237 A dictionary of keyword arguments. Doesn't modify ``kwargs`` 

238 inplace, so update them with the return value here. 

239 

240 Examples 

241 -------- 

242 >>> df._validate_axis_style_args((str.upper,), {'columns': id}, 

243 ... 'mapper', 'rename') 

244 {'columns': <function id>, 'index': <method 'upper' of 'str' objects>} 

245 

246 This emits a warning 

247 >>> df._validate_axis_style_args((str.upper, id), {}, 

248 ... 'mapper', 'rename') 

249 {'columns': <function id>, 'index': <method 'upper' of 'str' objects>} 

250 """ 

251 # TODO: Change to keyword-only args and remove all this 

252 

253 out = {} 

254 # Goal: fill 'out' with index/columns-style arguments 

255 # like out = {'index': foo, 'columns': bar} 

256 

257 # Start by validating for consistency 

258 if "axis" in kwargs and any(x in kwargs for x in data._AXIS_NUMBERS): 

259 msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." 

260 raise TypeError(msg) 

261 

262 # First fill with explicit values provided by the user... 

263 if arg_name in kwargs: 

264 if args: 

265 msg = f"{method_name} got multiple values for argument '{arg_name}'" 

266 raise TypeError(msg) 

267 

268 axis = data._get_axis_name(kwargs.get("axis", 0)) 

269 out[axis] = kwargs[arg_name] 

270 

271 # More user-provided arguments, now from kwargs 

272 for k, v in kwargs.items(): 

273 try: 

274 ax = data._get_axis_name(k) 

275 except ValueError: 

276 pass 

277 else: 

278 out[ax] = v 

279 

280 # All user-provided kwargs have been handled now. 

281 # Now we supplement with positional arguments, emitting warnings 

282 # when there's ambiguity and raising when there's conflicts 

283 

284 if len(args) == 0: 

285 pass # It's up to the function to decide if this is valid 

286 elif len(args) == 1: 

287 axis = data._get_axis_name(kwargs.get("axis", 0)) 

288 out[axis] = args[0] 

289 elif len(args) == 2: 

290 if "axis" in kwargs: 

291 # Unambiguously wrong 

292 msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" 

293 raise TypeError(msg) 

294 

295 msg = ( 

296 "Interpreting call\n\t'.{method_name}(a, b)' as " 

297 "\n\t'.{method_name}(index=a, columns=b)'.\nUse named " 

298 "arguments to remove any ambiguity. In the future, using " 

299 "positional arguments for 'index' or 'columns' will raise " 

300 " a 'TypeError'." 

301 ) 

302 warnings.warn(msg.format(method_name=method_name), FutureWarning, stacklevel=4) 

303 out[data._AXIS_NAMES[0]] = args[0] 

304 out[data._AXIS_NAMES[1]] = args[1] 

305 else: 

306 msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'." 

307 raise TypeError(msg) 

308 return out 

309 

310 

311def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): 

312 """Validate the keyword arguments to 'fillna'. 

313 

314 This checks that exactly one of 'value' and 'method' is specified. 

315 If 'method' is specified, this validates that it's a valid method. 

316 

317 Parameters 

318 ---------- 

319 value, method : object 

320 The 'value' and 'method' keyword arguments for 'fillna'. 

321 validate_scalar_dict_value : bool, default True 

322 Whether to validate that 'value' is a scalar or dict. Specifically, 

323 validate that it is not a list or tuple. 

324 

325 Returns 

326 ------- 

327 value, method : object 

328 """ 

329 from pandas.core.missing import clean_fill_method 

330 

331 if value is None and method is None: 

332 raise ValueError("Must specify a fill 'value' or 'method'.") 

333 elif value is None and method is not None: 

334 method = clean_fill_method(method) 

335 

336 elif value is not None and method is None: 

337 if validate_scalar_dict_value and isinstance(value, (list, tuple)): 

338 raise TypeError( 

339 '"value" parameter must be a scalar or dict, but ' 

340 f'you passed a "{type(value).__name__}"' 

341 ) 

342 

343 elif value is not None and method is not None: 

344 raise ValueError("Cannot specify both 'value' and 'method'.") 

345 

346 return value, method 

347 

348 

349def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: 

350 """ 

351 Validate percentiles (used by describe and quantile). 

352 

353 This function checks if the given float oriterable of floats is a valid percentile 

354 otherwise raises a ValueError. 

355 

356 Parameters 

357 ---------- 

358 q: float or iterable of floats 

359 A single percentile or an iterable of percentiles. 

360 

361 Returns 

362 ------- 

363 ndarray 

364 An ndarray of the percentiles if valid. 

365 

366 Raises 

367 ------ 

368 ValueError if percentiles are not in given interval([0, 1]). 

369 """ 

370 msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." 

371 q_arr = np.asarray(q) 

372 if q_arr.ndim == 0: 

373 if not 0 <= q_arr <= 1: 

374 raise ValueError(msg.format(q_arr / 100.0)) 

375 else: 

376 if not all(0 <= qs <= 1 for qs in q_arr): 

377 raise ValueError(msg.format(q_arr / 100.0)) 

378 return q_arr