Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import numbers 

2from typing import Union 

3 

4import numpy as np 

5from numpy.lib.mixins import NDArrayOperatorsMixin 

6 

7from pandas._libs import lib 

8from pandas.compat.numpy import function as nv 

9from pandas.util._decorators import Appender 

10from pandas.util._validators import validate_fillna_kwargs 

11 

12from pandas.core.dtypes.dtypes import ExtensionDtype 

13from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries 

14from pandas.core.dtypes.inference import is_array_like 

15from pandas.core.dtypes.missing import isna 

16 

17from pandas import compat 

18from pandas.core import nanops 

19from pandas.core.algorithms import searchsorted, take, unique 

20from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin 

21from pandas.core.construction import extract_array 

22from pandas.core.indexers import check_array_indexer 

23from pandas.core.missing import backfill_1d, pad_1d 

24 

25 

26class PandasDtype(ExtensionDtype): 

27 """ 

28 A Pandas ExtensionDtype for NumPy dtypes. 

29 

30 .. versionadded:: 0.24.0 

31 

32 This is mostly for internal compatibility, and is not especially 

33 useful on its own. 

34 

35 Parameters 

36 ---------- 

37 dtype : numpy.dtype 

38 """ 

39 

40 _metadata = ("_dtype",) 

41 

42 def __init__(self, dtype): 

43 dtype = np.dtype(dtype) 

44 self._dtype = dtype 

45 self._type = dtype.type 

46 

47 def __repr__(self) -> str: 

48 return f"PandasDtype({repr(self.name)})" 

49 

50 @property 

51 def numpy_dtype(self): 

52 """The NumPy dtype this PandasDtype wraps.""" 

53 return self._dtype 

54 

55 @property 

56 def name(self): 

57 return self._dtype.name 

58 

59 @property 

60 def type(self): 

61 return self._type 

62 

63 @property 

64 def _is_numeric(self): 

65 # exclude object, str, unicode, void. 

66 return self.kind in set("biufc") 

67 

68 @property 

69 def _is_boolean(self): 

70 return self.kind == "b" 

71 

72 @classmethod 

73 def construct_from_string(cls, string): 

74 try: 

75 return cls(np.dtype(string)) 

76 except TypeError as err: 

77 raise TypeError( 

78 f"Cannot construct a 'PandasDtype' from '{string}'" 

79 ) from err 

80 

81 @classmethod 

82 def construct_array_type(cls): 

83 """ 

84 Return the array type associated with this dtype. 

85 

86 Returns 

87 ------- 

88 type 

89 """ 

90 return PandasArray 

91 

92 @property 

93 def kind(self): 

94 return self._dtype.kind 

95 

96 @property 

97 def itemsize(self): 

98 """The element size of this data-type object.""" 

99 return self._dtype.itemsize 

100 

101 

102class PandasArray(ExtensionArray, ExtensionOpsMixin, NDArrayOperatorsMixin): 

103 """ 

104 A pandas ExtensionArray for NumPy data. 

105 

106 .. versionadded:: 0.24.0 

107 

108 This is mostly for internal compatibility, and is not especially 

109 useful on its own. 

110 

111 Parameters 

112 ---------- 

113 values : ndarray 

114 The NumPy ndarray to wrap. Must be 1-dimensional. 

115 copy : bool, default False 

116 Whether to copy `values`. 

117 

118 Attributes 

119 ---------- 

120 None 

121 

122 Methods 

123 ------- 

124 None 

125 """ 

126 

127 # If you're wondering why pd.Series(cls) doesn't put the array in an 

128 # ExtensionBlock, search for `ABCPandasArray`. We check for 

129 # that _typ to ensure that that users don't unnecessarily use EAs inside 

130 # pandas internals, which turns off things like block consolidation. 

131 _typ = "npy_extension" 

132 __array_priority__ = 1000 

133 _ndarray: np.ndarray 

134 

135 # ------------------------------------------------------------------------ 

136 # Constructors 

137 

138 def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False): 

139 if isinstance(values, type(self)): 

140 values = values._ndarray 

141 if not isinstance(values, np.ndarray): 

142 raise ValueError( 

143 f"'values' must be a NumPy array, not {type(values).__name__}" 

144 ) 

145 

146 if values.ndim != 1: 

147 raise ValueError("PandasArray must be 1-dimensional.") 

148 

149 if copy: 

150 values = values.copy() 

151 

152 self._ndarray = values 

153 self._dtype = PandasDtype(values.dtype) 

154 

155 @classmethod 

156 def _from_sequence(cls, scalars, dtype=None, copy=False): 

157 if isinstance(dtype, PandasDtype): 

158 dtype = dtype._dtype 

159 

160 result = np.asarray(scalars, dtype=dtype) 

161 if copy and result is scalars: 

162 result = result.copy() 

163 return cls(result) 

164 

165 @classmethod 

166 def _from_factorized(cls, values, original): 

167 return cls(values) 

168 

169 @classmethod 

170 def _concat_same_type(cls, to_concat): 

171 return cls(np.concatenate(to_concat)) 

172 

173 # ------------------------------------------------------------------------ 

174 # Data 

175 

176 @property 

177 def dtype(self): 

178 return self._dtype 

179 

180 # ------------------------------------------------------------------------ 

181 # NumPy Array Interface 

182 

183 def __array__(self, dtype=None) -> np.ndarray: 

184 return np.asarray(self._ndarray, dtype=dtype) 

185 

186 _HANDLED_TYPES = (np.ndarray, numbers.Number) 

187 

188 def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): 

189 # Lightly modified version of 

190 # https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/\ 

191 # numpy.lib.mixins.NDArrayOperatorsMixin.html 

192 # The primary modification is not boxing scalar return values 

193 # in PandasArray, since pandas' ExtensionArrays are 1-d. 

194 out = kwargs.get("out", ()) 

195 for x in inputs + out: 

196 # Only support operations with instances of _HANDLED_TYPES. 

197 # Use PandasArray instead of type(self) for isinstance to 

198 # allow subclasses that don't override __array_ufunc__ to 

199 # handle PandasArray objects. 

200 if not isinstance(x, self._HANDLED_TYPES + (PandasArray,)): 

201 return NotImplemented 

202 

203 # Defer to the implementation of the ufunc on unwrapped values. 

204 inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs) 

205 if out: 

206 kwargs["out"] = tuple( 

207 x._ndarray if isinstance(x, PandasArray) else x for x in out 

208 ) 

209 result = getattr(ufunc, method)(*inputs, **kwargs) 

210 

211 if type(result) is tuple and len(result): 

212 # multiple return values 

213 if not lib.is_scalar(result[0]): 

214 # re-box array-like results 

215 return tuple(type(self)(x) for x in result) 

216 else: 

217 # but not scalar reductions 

218 return result 

219 elif method == "at": 

220 # no return value 

221 return None 

222 else: 

223 # one return value 

224 if not lib.is_scalar(result): 

225 # re-box array-like results, but not scalar reductions 

226 result = type(self)(result) 

227 return result 

228 

229 # ------------------------------------------------------------------------ 

230 # Pandas ExtensionArray Interface 

231 

232 def __getitem__(self, item): 

233 if isinstance(item, type(self)): 

234 item = item._ndarray 

235 

236 item = check_array_indexer(self, item) 

237 

238 result = self._ndarray[item] 

239 if not lib.is_scalar(item): 

240 result = type(self)(result) 

241 return result 

242 

243 def __setitem__(self, key, value): 

244 value = extract_array(value, extract_numpy=True) 

245 

246 key = check_array_indexer(self, key) 

247 scalar_value = lib.is_scalar(value) 

248 

249 if not scalar_value: 

250 value = np.asarray(value, dtype=self._ndarray.dtype) 

251 

252 self._ndarray[key] = value 

253 

254 def __len__(self) -> int: 

255 return len(self._ndarray) 

256 

257 @property 

258 def nbytes(self) -> int: 

259 return self._ndarray.nbytes 

260 

261 def isna(self): 

262 return isna(self._ndarray) 

263 

264 def fillna(self, value=None, method=None, limit=None): 

265 # TODO(_values_for_fillna): remove this 

266 value, method = validate_fillna_kwargs(value, method) 

267 

268 mask = self.isna() 

269 

270 if is_array_like(value): 

271 if len(value) != len(self): 

272 raise ValueError( 

273 f"Length of 'value' does not match. Got ({len(value)}) " 

274 f" expected {len(self)}" 

275 ) 

276 value = value[mask] 

277 

278 if mask.any(): 

279 if method is not None: 

280 func = pad_1d if method == "pad" else backfill_1d 

281 new_values = func(self._ndarray, limit=limit, mask=mask) 

282 new_values = self._from_sequence(new_values, dtype=self.dtype) 

283 else: 

284 # fill with value 

285 new_values = self.copy() 

286 new_values[mask] = value 

287 else: 

288 new_values = self.copy() 

289 return new_values 

290 

291 def take(self, indices, allow_fill=False, fill_value=None): 

292 if fill_value is None: 

293 # Primarily for subclasses 

294 fill_value = self.dtype.na_value 

295 result = take( 

296 self._ndarray, indices, allow_fill=allow_fill, fill_value=fill_value 

297 ) 

298 return type(self)(result) 

299 

300 def copy(self): 

301 return type(self)(self._ndarray.copy()) 

302 

303 def _values_for_argsort(self): 

304 return self._ndarray 

305 

306 def _values_for_factorize(self): 

307 return self._ndarray, -1 

308 

309 def unique(self): 

310 return type(self)(unique(self._ndarray)) 

311 

312 # ------------------------------------------------------------------------ 

313 # Reductions 

314 

315 def _reduce(self, name, skipna=True, **kwargs): 

316 meth = getattr(self, name, None) 

317 if meth: 

318 return meth(skipna=skipna, **kwargs) 

319 else: 

320 msg = f"'{type(self).__name__}' does not implement reduction '{name}'" 

321 raise TypeError(msg) 

322 

323 def any(self, axis=None, out=None, keepdims=False, skipna=True): 

324 nv.validate_any((), dict(out=out, keepdims=keepdims)) 

325 return nanops.nanany(self._ndarray, axis=axis, skipna=skipna) 

326 

327 def all(self, axis=None, out=None, keepdims=False, skipna=True): 

328 nv.validate_all((), dict(out=out, keepdims=keepdims)) 

329 return nanops.nanall(self._ndarray, axis=axis, skipna=skipna) 

330 

331 def min(self, axis=None, out=None, keepdims=False, skipna=True): 

332 nv.validate_min((), dict(out=out, keepdims=keepdims)) 

333 return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) 

334 

335 def max(self, axis=None, out=None, keepdims=False, skipna=True): 

336 nv.validate_max((), dict(out=out, keepdims=keepdims)) 

337 return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) 

338 

339 def sum( 

340 self, 

341 axis=None, 

342 dtype=None, 

343 out=None, 

344 keepdims=False, 

345 initial=None, 

346 skipna=True, 

347 min_count=0, 

348 ): 

349 nv.validate_sum( 

350 (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) 

351 ) 

352 return nanops.nansum( 

353 self._ndarray, axis=axis, skipna=skipna, min_count=min_count 

354 ) 

355 

356 def prod( 

357 self, 

358 axis=None, 

359 dtype=None, 

360 out=None, 

361 keepdims=False, 

362 initial=None, 

363 skipna=True, 

364 min_count=0, 

365 ): 

366 nv.validate_prod( 

367 (), dict(dtype=dtype, out=out, keepdims=keepdims, initial=initial) 

368 ) 

369 return nanops.nanprod( 

370 self._ndarray, axis=axis, skipna=skipna, min_count=min_count 

371 ) 

372 

373 def mean(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): 

374 nv.validate_mean((), dict(dtype=dtype, out=out, keepdims=keepdims)) 

375 return nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) 

376 

377 def median( 

378 self, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True 

379 ): 

380 nv.validate_median( 

381 (), dict(out=out, overwrite_input=overwrite_input, keepdims=keepdims) 

382 ) 

383 return nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna) 

384 

385 def std(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): 

386 nv.validate_stat_ddof_func( 

387 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="std" 

388 ) 

389 return nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

390 

391 def var(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): 

392 nv.validate_stat_ddof_func( 

393 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="var" 

394 ) 

395 return nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

396 

397 def sem(self, axis=None, dtype=None, out=None, ddof=1, keepdims=False, skipna=True): 

398 nv.validate_stat_ddof_func( 

399 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="sem" 

400 ) 

401 return nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof) 

402 

403 def kurt(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): 

404 nv.validate_stat_ddof_func( 

405 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="kurt" 

406 ) 

407 return nanops.nankurt(self._ndarray, axis=axis, skipna=skipna) 

408 

409 def skew(self, axis=None, dtype=None, out=None, keepdims=False, skipna=True): 

410 nv.validate_stat_ddof_func( 

411 (), dict(dtype=dtype, out=out, keepdims=keepdims), fname="skew" 

412 ) 

413 return nanops.nanskew(self._ndarray, axis=axis, skipna=skipna) 

414 

415 # ------------------------------------------------------------------------ 

416 # Additional Methods 

417 def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default): 

418 result = np.asarray(self._ndarray, dtype=dtype) 

419 

420 if (copy or na_value is not lib.no_default) and result is self._ndarray: 

421 result = result.copy() 

422 

423 if na_value is not lib.no_default: 

424 result[self.isna()] = na_value 

425 

426 return result 

427 

428 @Appender(ExtensionArray.searchsorted.__doc__) 

429 def searchsorted(self, value, side="left", sorter=None): 

430 return searchsorted(self.to_numpy(), value, side=side, sorter=sorter) 

431 

432 # ------------------------------------------------------------------------ 

433 # Ops 

434 

435 def __invert__(self): 

436 return type(self)(~self._ndarray) 

437 

438 @classmethod 

439 def _create_arithmetic_method(cls, op): 

440 def arithmetic_method(self, other): 

441 if isinstance(other, (ABCIndexClass, ABCSeries)): 

442 return NotImplemented 

443 

444 elif isinstance(other, cls): 

445 other = other._ndarray 

446 

447 with np.errstate(all="ignore"): 

448 result = op(self._ndarray, other) 

449 

450 if op is divmod: 

451 a, b = result 

452 return cls(a), cls(b) 

453 

454 return cls(result) 

455 

456 return compat.set_function_name(arithmetic_method, f"__{op.__name__}__", cls) 

457 

458 _create_comparison_method = _create_arithmetic_method 

459 

460 

461PandasArray._add_arithmetic_ops() 

462PandasArray._add_comparison_ops()