Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Misc tools for implementing data structures 

3 

4Note: pandas.core.common is *not* part of the public API. 

5""" 

6 

7import collections 

8from collections import abc 

9from datetime import datetime, timedelta 

10from functools import partial 

11import inspect 

12from typing import Any, Collection, Iterable, Union 

13 

14import numpy as np 

15 

16from pandas._libs import lib, tslibs 

17from pandas._typing import T 

18 

19from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike 

20from pandas.core.dtypes.common import ( 

21 is_array_like, 

22 is_bool_dtype, 

23 is_extension_array_dtype, 

24 is_integer, 

25) 

26from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries 

27from pandas.core.dtypes.inference import _iterable_not_string 

28from pandas.core.dtypes.missing import isna, isnull, notnull # noqa 

29 

30 

31class SettingWithCopyError(ValueError): 

32 pass 

33 

34 

35class SettingWithCopyWarning(Warning): 

36 pass 

37 

38 

39def flatten(l): 

40 """ 

41 Flatten an arbitrarily nested sequence. 

42 

43 Parameters 

44 ---------- 

45 l : sequence 

46 The non string sequence to flatten 

47 

48 Notes 

49 ----- 

50 This doesn't consider strings sequences. 

51 

52 Returns 

53 ------- 

54 flattened : generator 

55 """ 

56 for el in l: 

57 if _iterable_not_string(el): 

58 for s in flatten(el): 

59 yield s 

60 else: 

61 yield el 

62 

63 

64def consensus_name_attr(objs): 

65 name = objs[0].name 

66 for obj in objs[1:]: 

67 try: 

68 if obj.name != name: 

69 name = None 

70 except ValueError: 

71 name = None 

72 return name 

73 

74 

75def maybe_box(indexer, values, obj, key): 

76 

77 # if we have multiples coming back, box em 

78 if isinstance(values, np.ndarray): 

79 return obj[indexer.get_loc(key)] 

80 

81 # return the value 

82 return values 

83 

84 

85def maybe_box_datetimelike(value): 

86 # turn a datetime like into a Timestamp/timedelta as needed 

87 

88 if isinstance(value, (np.datetime64, datetime)): 

89 value = tslibs.Timestamp(value) 

90 elif isinstance(value, (np.timedelta64, timedelta)): 

91 value = tslibs.Timedelta(value) 

92 

93 return value 

94 

95 

96values_from_object = lib.values_from_object 

97 

98 

99def is_bool_indexer(key: Any) -> bool: 

100 """ 

101 Check whether `key` is a valid boolean indexer. 

102 

103 Parameters 

104 ---------- 

105 key : Any 

106 Only list-likes may be considered boolean indexers. 

107 All other types are not considered a boolean indexer. 

108 For array-like input, boolean ndarrays or ExtensionArrays 

109 with ``_is_boolean`` set are considered boolean indexers. 

110 

111 Returns 

112 ------- 

113 bool 

114 Whether `key` is a valid boolean indexer. 

115 

116 Raises 

117 ------ 

118 ValueError 

119 When the array is an object-dtype ndarray or ExtensionArray 

120 and contains missing values. 

121 

122 See Also 

123 -------- 

124 check_array_indexer : Check that `key` is a valid array to index, 

125 and convert to an ndarray. 

126 """ 

127 if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( 

128 is_array_like(key) and is_extension_array_dtype(key.dtype) 

129 ): 

130 if key.dtype == np.object_: 

131 key = np.asarray(values_from_object(key)) 

132 

133 if not lib.is_bool_array(key): 

134 na_msg = "Cannot mask with non-boolean array containing NA / NaN values" 

135 if isna(key).any(): 

136 raise ValueError(na_msg) 

137 return False 

138 return True 

139 elif is_bool_dtype(key.dtype): 

140 return True 

141 elif isinstance(key, list): 

142 try: 

143 arr = np.asarray(key) 

144 return arr.dtype == np.bool_ and len(arr) == len(key) 

145 except TypeError: # pragma: no cover 

146 return False 

147 

148 return False 

149 

150 

151def cast_scalar_indexer(val): 

152 """ 

153 To avoid numpy DeprecationWarnings, cast float to integer where valid. 

154 

155 Parameters 

156 ---------- 

157 val : scalar 

158 

159 Returns 

160 ------- 

161 outval : scalar 

162 """ 

163 # assumes lib.is_scalar(val) 

164 if lib.is_float(val) and val == int(val): 

165 return int(val) 

166 return val 

167 

168 

169def not_none(*args): 

170 """ 

171 Returns a generator consisting of the arguments that are not None. 

172 """ 

173 return (arg for arg in args if arg is not None) 

174 

175 

176def any_none(*args): 

177 """ 

178 Returns a boolean indicating if any argument is None. 

179 """ 

180 return any(arg is None for arg in args) 

181 

182 

183def all_none(*args): 

184 """ 

185 Returns a boolean indicating if all arguments are None. 

186 """ 

187 return all(arg is None for arg in args) 

188 

189 

190def any_not_none(*args): 

191 """ 

192 Returns a boolean indicating if any argument is not None. 

193 """ 

194 return any(arg is not None for arg in args) 

195 

196 

197def all_not_none(*args): 

198 """ 

199 Returns a boolean indicating if all arguments are not None. 

200 """ 

201 return all(arg is not None for arg in args) 

202 

203 

204def count_not_none(*args): 

205 """ 

206 Returns the count of arguments that are not None. 

207 """ 

208 return sum(x is not None for x in args) 

209 

210 

211def try_sort(iterable): 

212 listed = list(iterable) 

213 try: 

214 return sorted(listed) 

215 except TypeError: 

216 return listed 

217 

218 

219def asarray_tuplesafe(values, dtype=None): 

220 

221 if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): 

222 values = list(values) 

223 elif isinstance(values, ABCIndexClass): 

224 return values.values 

225 

226 if isinstance(values, list) and dtype in [np.object_, object]: 

227 return construct_1d_object_array_from_listlike(values) 

228 

229 result = np.asarray(values, dtype=dtype) 

230 

231 if issubclass(result.dtype.type, str): 

232 result = np.asarray(values, dtype=object) 

233 

234 if result.ndim == 2: 

235 # Avoid building an array of arrays: 

236 values = [tuple(x) for x in values] 

237 result = construct_1d_object_array_from_listlike(values) 

238 

239 return result 

240 

241 

242def index_labels_to_array(labels, dtype=None): 

243 """ 

244 Transform label or iterable of labels to array, for use in Index. 

245 

246 Parameters 

247 ---------- 

248 dtype : dtype 

249 If specified, use as dtype of the resulting array, otherwise infer. 

250 

251 Returns 

252 ------- 

253 array 

254 """ 

255 if isinstance(labels, (str, tuple)): 

256 labels = [labels] 

257 

258 if not isinstance(labels, (list, np.ndarray)): 

259 try: 

260 labels = list(labels) 

261 except TypeError: # non-iterable 

262 labels = [labels] 

263 

264 labels = asarray_tuplesafe(labels, dtype=dtype) 

265 

266 return labels 

267 

268 

269def maybe_make_list(obj): 

270 if obj is not None and not isinstance(obj, (tuple, list)): 

271 return [obj] 

272 return obj 

273 

274 

275def maybe_iterable_to_list(obj: Union[Iterable[T], T]) -> Union[Collection[T], T]: 

276 """ 

277 If obj is Iterable but not list-like, consume into list. 

278 """ 

279 if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): 

280 return list(obj) 

281 return obj 

282 

283 

284def is_null_slice(obj): 

285 """ 

286 We have a null slice. 

287 """ 

288 return ( 

289 isinstance(obj, slice) 

290 and obj.start is None 

291 and obj.stop is None 

292 and obj.step is None 

293 ) 

294 

295 

296def is_true_slices(l): 

297 """ 

298 Find non-trivial slices in "l": return a list of booleans with same length. 

299 """ 

300 return [isinstance(k, slice) and not is_null_slice(k) for k in l] 

301 

302 

303# TODO: used only once in indexing; belongs elsewhere? 

304def is_full_slice(obj, l): 

305 """ 

306 We have a full length slice. 

307 """ 

308 return ( 

309 isinstance(obj, slice) and obj.start == 0 and obj.stop == l and obj.step is None 

310 ) 

311 

312 

313def get_callable_name(obj): 

314 # typical case has name 

315 if hasattr(obj, "__name__"): 

316 return getattr(obj, "__name__") 

317 # some objects don't; could recurse 

318 if isinstance(obj, partial): 

319 return get_callable_name(obj.func) 

320 # fall back to class name 

321 if hasattr(obj, "__call__"): 

322 return type(obj).__name__ 

323 # everything failed (probably because the argument 

324 # wasn't actually callable); we return None 

325 # instead of the empty string in this case to allow 

326 # distinguishing between no name and a name of '' 

327 return None 

328 

329 

330def apply_if_callable(maybe_callable, obj, **kwargs): 

331 """ 

332 Evaluate possibly callable input using obj and kwargs if it is callable, 

333 otherwise return as it is. 

334 

335 Parameters 

336 ---------- 

337 maybe_callable : possibly a callable 

338 obj : NDFrame 

339 **kwargs 

340 """ 

341 

342 if callable(maybe_callable): 

343 return maybe_callable(obj, **kwargs) 

344 

345 return maybe_callable 

346 

347 

348def dict_compat(d): 

349 """ 

350 Helper function to convert datetimelike-keyed dicts 

351 to Timestamp-keyed dict. 

352 

353 Parameters 

354 ---------- 

355 d: dict like object 

356 

357 Returns 

358 ------- 

359 dict 

360 

361 """ 

362 return {maybe_box_datetimelike(key): value for key, value in d.items()} 

363 

364 

365def standardize_mapping(into): 

366 """ 

367 Helper function to standardize a supplied mapping. 

368 

369 .. versionadded:: 0.21.0 

370 

371 Parameters 

372 ---------- 

373 into : instance or subclass of collections.abc.Mapping 

374 Must be a class, an initialized collections.defaultdict, 

375 or an instance of a collections.abc.Mapping subclass. 

376 

377 Returns 

378 ------- 

379 mapping : a collections.abc.Mapping subclass or other constructor 

380 a callable object that can accept an iterator to create 

381 the desired Mapping. 

382 

383 See Also 

384 -------- 

385 DataFrame.to_dict 

386 Series.to_dict 

387 """ 

388 if not inspect.isclass(into): 

389 if isinstance(into, collections.defaultdict): 

390 return partial(collections.defaultdict, into.default_factory) 

391 into = type(into) 

392 if not issubclass(into, abc.Mapping): 

393 raise TypeError(f"unsupported type: {into}") 

394 elif into == collections.defaultdict: 

395 raise TypeError("to_dict() only accepts initialized defaultdicts") 

396 return into 

397 

398 

399def random_state(state=None): 

400 """ 

401 Helper function for processing random_state arguments. 

402 

403 Parameters 

404 ---------- 

405 state : int, np.random.RandomState, None. 

406 If receives an int, passes to np.random.RandomState() as seed. 

407 If receives an np.random.RandomState object, just returns object. 

408 If receives `None`, returns np.random. 

409 If receives anything else, raises an informative ValueError. 

410 Default None. 

411 

412 Returns 

413 ------- 

414 np.random.RandomState 

415 """ 

416 

417 if is_integer(state): 

418 return np.random.RandomState(state) 

419 elif isinstance(state, np.random.RandomState): 

420 return state 

421 elif state is None: 

422 return np.random 

423 else: 

424 raise ValueError( 

425 "random_state must be an integer, a numpy RandomState, or None" 

426 ) 

427 

428 

429def pipe(obj, func, *args, **kwargs): 

430 """ 

431 Apply a function ``func`` to object ``obj`` either by passing obj as the 

432 first argument to the function or, in the case that the func is a tuple, 

433 interpret the first element of the tuple as a function and pass the obj to 

434 that function as a keyword argument whose key is the value of the second 

435 element of the tuple. 

436 

437 Parameters 

438 ---------- 

439 func : callable or tuple of (callable, str) 

440 Function to apply to this object or, alternatively, a 

441 ``(callable, data_keyword)`` tuple where ``data_keyword`` is a 

442 string indicating the keyword of `callable`` that expects the 

443 object. 

444 *args : iterable, optional 

445 Positional arguments passed into ``func``. 

446 **kwargs : dict, optional 

447 A dictionary of keyword arguments passed into ``func``. 

448 

449 Returns 

450 ------- 

451 object : the return type of ``func``. 

452 """ 

453 if isinstance(func, tuple): 

454 func, target = func 

455 if target in kwargs: 

456 msg = f"{target} is both the pipe target and a keyword argument" 

457 raise ValueError(msg) 

458 kwargs[target] = obj 

459 return func(*args, **kwargs) 

460 else: 

461 return func(obj, *args, **kwargs) 

462 

463 

464def get_rename_function(mapper): 

465 """ 

466 Returns a function that will map names/labels, dependent if mapper 

467 is a dict, Series or just a function. 

468 """ 

469 if isinstance(mapper, (abc.Mapping, ABCSeries)): 

470 

471 def f(x): 

472 if x in mapper: 

473 return mapper[x] 

474 else: 

475 return x 

476 

477 else: 

478 f = mapper 

479 

480 return f