Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Provide a generic structure to support window functions, 

3similar to how we have a Groupby object. 

4""" 

5from datetime import timedelta 

6from functools import partial 

7import inspect 

8from textwrap import dedent 

9from typing import Callable, Dict, List, Optional, Set, Tuple, Union 

10 

11import numpy as np 

12 

13import pandas._libs.window.aggregations as window_aggregations 

14from pandas._typing import Axis, FrameOrSeries, Scalar 

15from pandas.compat._optional import import_optional_dependency 

16from pandas.compat.numpy import function as nv 

17from pandas.util._decorators import Appender, Substitution, cache_readonly 

18 

19from pandas.core.dtypes.common import ( 

20 ensure_float64, 

21 is_bool, 

22 is_float_dtype, 

23 is_integer, 

24 is_integer_dtype, 

25 is_list_like, 

26 is_scalar, 

27 needs_i8_conversion, 

28) 

29from pandas.core.dtypes.generic import ( 

30 ABCDataFrame, 

31 ABCDateOffset, 

32 ABCDatetimeIndex, 

33 ABCPeriodIndex, 

34 ABCSeries, 

35 ABCTimedeltaIndex, 

36) 

37 

38from pandas.core.base import DataError, PandasObject, SelectionMixin, ShallowMixin 

39import pandas.core.common as com 

40from pandas.core.indexes.api import Index, ensure_index 

41from pandas.core.window.common import ( 

42 WindowGroupByMixin, 

43 _doc_template, 

44 _flex_binary_moment, 

45 _shared_docs, 

46 calculate_center_offset, 

47 calculate_min_periods, 

48 get_weighted_roll_func, 

49 zsqrt, 

50) 

51from pandas.core.window.indexers import ( 

52 BaseIndexer, 

53 FixedWindowIndexer, 

54 VariableWindowIndexer, 

55) 

56from pandas.core.window.numba_ import generate_numba_apply_func 

57 

58 

59class _Window(PandasObject, ShallowMixin, SelectionMixin): 

60 _attributes: List[str] = [ 

61 "window", 

62 "min_periods", 

63 "center", 

64 "win_type", 

65 "axis", 

66 "on", 

67 "closed", 

68 ] 

69 exclusions: Set[str] = set() 

70 

71 def __init__( 

72 self, 

73 obj, 

74 window=None, 

75 min_periods: Optional[int] = None, 

76 center: Optional[bool] = False, 

77 win_type: Optional[str] = None, 

78 axis: Axis = 0, 

79 on: Optional[Union[str, Index]] = None, 

80 closed: Optional[str] = None, 

81 **kwargs, 

82 ): 

83 

84 self.__dict__.update(kwargs) 

85 self.obj = obj 

86 self.on = on 

87 self.closed = closed 

88 self.window = window 

89 self.min_periods = min_periods 

90 self.center = center 

91 self.win_type = win_type 

92 self.win_freq = None 

93 self.axis = obj._get_axis_number(axis) if axis is not None else None 

94 self.validate() 

95 self._numba_func_cache: Dict[Optional[str], Callable] = dict() 

96 

97 @property 

98 def _constructor(self): 

99 return Window 

100 

101 @property 

102 def is_datetimelike(self) -> Optional[bool]: 

103 return None 

104 

105 @property 

106 def _on(self): 

107 return None 

108 

109 @property 

110 def is_freq_type(self) -> bool: 

111 return self.win_type == "freq" 

112 

113 def validate(self) -> None: 

114 if self.center is not None and not is_bool(self.center): 

115 raise ValueError("center must be a boolean") 

116 if self.min_periods is not None and not is_integer(self.min_periods): 

117 raise ValueError("min_periods must be an integer") 

118 if self.closed is not None and self.closed not in [ 

119 "right", 

120 "both", 

121 "left", 

122 "neither", 

123 ]: 

124 raise ValueError("closed must be 'right', 'left', 'both' or 'neither'") 

125 if not isinstance(self.obj, (ABCSeries, ABCDataFrame)): 

126 raise TypeError(f"invalid type: {type(self)}") 

127 if isinstance(self.window, BaseIndexer): 

128 self._validate_get_window_bounds_signature(self.window) 

129 

130 @staticmethod 

131 def _validate_get_window_bounds_signature(window: BaseIndexer) -> None: 

132 """ 

133 Validate that the passed BaseIndexer subclass has 

134 a get_window_bounds with the correct signature. 

135 """ 

136 get_window_bounds_signature = inspect.signature( 

137 window.get_window_bounds 

138 ).parameters.keys() 

139 expected_signature = inspect.signature( 

140 BaseIndexer().get_window_bounds 

141 ).parameters.keys() 

142 if get_window_bounds_signature != expected_signature: 

143 raise ValueError( 

144 f"{type(window).__name__} does not implement the correct signature for " 

145 f"get_window_bounds" 

146 ) 

147 

148 def _create_blocks(self): 

149 """ 

150 Split data into blocks & return conformed data. 

151 """ 

152 

153 obj = self._selected_obj 

154 

155 # filter out the on from the object 

156 if self.on is not None and not isinstance(self.on, Index): 

157 if obj.ndim == 2: 

158 obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) 

159 blocks = obj._to_dict_of_blocks(copy=False).values() 

160 

161 return blocks, obj 

162 

163 def _gotitem(self, key, ndim, subset=None): 

164 """ 

165 Sub-classes to define. Return a sliced object. 

166 

167 Parameters 

168 ---------- 

169 key : str / list of selections 

170 ndim : 1,2 

171 requested ndim of result 

172 subset : object, default None 

173 subset to act on 

174 """ 

175 

176 # create a new object to prevent aliasing 

177 if subset is None: 

178 subset = self.obj 

179 self = self._shallow_copy(subset) 

180 self._reset_cache() 

181 if subset.ndim == 2: 

182 if is_scalar(key) and key in subset or is_list_like(key): 

183 self._selection = key 

184 return self 

185 

186 def __getattr__(self, attr: str): 

187 if attr in self._internal_names_set: 

188 return object.__getattribute__(self, attr) 

189 if attr in self.obj: 

190 return self[attr] 

191 

192 raise AttributeError( 

193 f"'{type(self).__name__}' object has no attribute '{attr}'" 

194 ) 

195 

196 def _dir_additions(self): 

197 return self.obj._dir_additions() 

198 

199 def _get_win_type(self, kwargs: Dict): 

200 """ 

201 Exists for compatibility, overriden by subclass Window. 

202 

203 Parameters 

204 ---------- 

205 kwargs : dict 

206 ignored, exists for compatibility 

207 

208 Returns 

209 ------- 

210 None 

211 """ 

212 return None 

213 

214 def _get_window(self, other=None, win_type: Optional[str] = None) -> int: 

215 """ 

216 Return window length. 

217 

218 Parameters 

219 ---------- 

220 other : 

221 ignored, exists for compatibility 

222 win_type : 

223 ignored, exists for compatibility 

224 

225 Returns 

226 ------- 

227 window : int 

228 """ 

229 if isinstance(self.window, BaseIndexer): 

230 return self.min_periods or 0 

231 return self.window 

232 

233 @property 

234 def _window_type(self) -> str: 

235 return type(self).__name__ 

236 

237 def __repr__(self) -> str: 

238 """ 

239 Provide a nice str repr of our rolling object. 

240 """ 

241 

242 attrs_list = ( 

243 f"{attr_name}={getattr(self, attr_name)}" 

244 for attr_name in self._attributes 

245 if getattr(self, attr_name, None) is not None 

246 ) 

247 attrs = ",".join(attrs_list) 

248 return f"{self._window_type} [{attrs}]" 

249 

250 def __iter__(self): 

251 url = "https://github.com/pandas-dev/pandas/issues/11704" 

252 raise NotImplementedError(f"See issue #11704 {url}") 

253 

254 def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray: 

255 """Convert input to numpy arrays for Cython routines""" 

256 if values is None: 

257 values = getattr(self._selected_obj, "values", self._selected_obj) 

258 

259 # GH #12373 : rolling functions error on float32 data 

260 # make sure the data is coerced to float64 

261 if is_float_dtype(values.dtype): 

262 values = ensure_float64(values) 

263 elif is_integer_dtype(values.dtype): 

264 values = ensure_float64(values) 

265 elif needs_i8_conversion(values.dtype): 

266 raise NotImplementedError( 

267 f"ops for {self._window_type} for this " 

268 f"dtype {values.dtype} are not implemented" 

269 ) 

270 else: 

271 try: 

272 values = ensure_float64(values) 

273 except (ValueError, TypeError): 

274 raise TypeError(f"cannot handle this type -> {values.dtype}") 

275 

276 # Convert inf to nan for C funcs 

277 inf = np.isinf(values) 

278 if inf.any(): 

279 values = np.where(inf, np.nan, values) 

280 

281 return values 

282 

283 def _wrap_result(self, result, block=None, obj=None): 

284 """ 

285 Wrap a single result. 

286 """ 

287 

288 if obj is None: 

289 obj = self._selected_obj 

290 index = obj.index 

291 

292 if isinstance(result, np.ndarray): 

293 

294 if result.ndim == 1: 

295 from pandas import Series 

296 

297 return Series(result, index, name=obj.name) 

298 

299 return type(obj)(result, index=index, columns=block.columns) 

300 return result 

301 

302 def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries: 

303 """ 

304 Wrap the results. 

305 

306 Parameters 

307 ---------- 

308 results : list of ndarrays 

309 blocks : list of blocks 

310 obj : conformed data (may be resampled) 

311 exclude: list of columns to exclude, default to None 

312 """ 

313 

314 from pandas import Series, concat 

315 

316 final = [] 

317 for result, block in zip(results, blocks): 

318 

319 result = self._wrap_result(result, block=block, obj=obj) 

320 if result.ndim == 1: 

321 return result 

322 final.append(result) 

323 

324 # if we have an 'on' column 

325 # we want to put it back into the results 

326 # in the same location 

327 columns = self._selected_obj.columns 

328 if self.on is not None and not self._on.equals(obj.index): 

329 

330 name = self._on.name 

331 final.append(Series(self._on, index=obj.index, name=name)) 

332 

333 if self._selection is not None: 

334 

335 selection = ensure_index(self._selection) 

336 

337 # need to reorder to include original location of 

338 # the on column (if its not already there) 

339 if name not in selection: 

340 columns = self.obj.columns 

341 indexer = columns.get_indexer(selection.tolist() + [name]) 

342 columns = columns.take(sorted(indexer)) 

343 

344 # exclude nuisance columns so that they are not reindexed 

345 if exclude is not None and exclude: 

346 columns = [c for c in columns if c not in exclude] 

347 

348 if not columns: 

349 raise DataError("No numeric types to aggregate") 

350 

351 if not len(final): 

352 return obj.astype("float64") 

353 return concat(final, axis=1).reindex(columns=columns, copy=False) 

354 

355 def _center_window(self, result, window) -> np.ndarray: 

356 """ 

357 Center the result in the window. 

358 """ 

359 if self.axis > result.ndim - 1: 

360 raise ValueError("Requested axis is larger then no. of argument dimensions") 

361 

362 offset = calculate_center_offset(window) 

363 if offset > 0: 

364 lead_indexer = [slice(None)] * result.ndim 

365 lead_indexer[self.axis] = slice(offset, None) 

366 result = np.copy(result[tuple(lead_indexer)]) 

367 return result 

368 

369 def _get_roll_func(self, func_name: str) -> Callable: 

370 """ 

371 Wrap rolling function to check values passed. 

372 

373 Parameters 

374 ---------- 

375 func_name : str 

376 Cython function used to calculate rolling statistics 

377 

378 Returns 

379 ------- 

380 func : callable 

381 """ 

382 window_func = getattr(window_aggregations, func_name, None) 

383 if window_func is None: 

384 raise ValueError( 

385 f"we do not support this function in window_aggregations.{func_name}" 

386 ) 

387 return window_func 

388 

389 def _get_cython_func_type(self, func: str) -> Callable: 

390 """ 

391 Return a variable or fixed cython function type. 

392 

393 Variable algorithms do not use window while fixed do. 

394 """ 

395 if self.is_freq_type or isinstance(self.window, BaseIndexer): 

396 return self._get_roll_func(f"{func}_variable") 

397 return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window()) 

398 

399 def _get_window_indexer(self, window: int) -> BaseIndexer: 

400 """ 

401 Return an indexer class that will compute the window start and end bounds 

402 """ 

403 if isinstance(self.window, BaseIndexer): 

404 return self.window 

405 if self.is_freq_type: 

406 return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) 

407 return FixedWindowIndexer(window_size=window) 

408 

409 def _apply( 

410 self, 

411 func: Callable, 

412 center: bool, 

413 require_min_periods: int = 0, 

414 floor: int = 1, 

415 is_weighted: bool = False, 

416 name: Optional[str] = None, 

417 use_numba_cache: bool = False, 

418 **kwargs, 

419 ): 

420 """ 

421 Rolling statistical measure using supplied function. 

422 

423 Designed to be used with passed-in Cython array-based functions. 

424 

425 Parameters 

426 ---------- 

427 func : callable function to apply 

428 center : bool 

429 require_min_periods : int 

430 floor : int 

431 is_weighted : bool 

432 name : str, 

433 compatibility with groupby.rolling 

434 use_numba_cache : bool 

435 whether to cache a numba compiled function. Only available for numba 

436 enabled methods (so far only apply) 

437 **kwargs 

438 additional arguments for rolling function and window function 

439 

440 Returns 

441 ------- 

442 y : type of input 

443 """ 

444 win_type = self._get_win_type(kwargs) 

445 window = self._get_window(win_type=win_type) 

446 

447 blocks, obj = self._create_blocks() 

448 block_list = list(blocks) 

449 window_indexer = self._get_window_indexer(window) 

450 

451 results = [] 

452 exclude: List[Scalar] = [] 

453 for i, b in enumerate(blocks): 

454 try: 

455 values = self._prep_values(b.values) 

456 

457 except (TypeError, NotImplementedError): 

458 if isinstance(obj, ABCDataFrame): 

459 exclude.extend(b.columns) 

460 del block_list[i] 

461 continue 

462 else: 

463 raise DataError("No numeric types to aggregate") 

464 

465 if values.size == 0: 

466 results.append(values.copy()) 

467 continue 

468 

469 # calculation function 

470 offset = calculate_center_offset(window) if center else 0 

471 additional_nans = np.array([np.nan] * offset) 

472 

473 if not is_weighted: 

474 

475 def calc(x): 

476 x = np.concatenate((x, additional_nans)) 

477 if not isinstance(window, BaseIndexer): 

478 min_periods = calculate_min_periods( 

479 window, self.min_periods, len(x), require_min_periods, floor 

480 ) 

481 else: 

482 min_periods = calculate_min_periods( 

483 self.min_periods or 1, 

484 self.min_periods, 

485 len(x), 

486 require_min_periods, 

487 floor, 

488 ) 

489 start, end = window_indexer.get_window_bounds( 

490 num_values=len(x), 

491 min_periods=self.min_periods, 

492 center=self.center, 

493 closed=self.closed, 

494 ) 

495 return func(x, start, end, min_periods) 

496 

497 else: 

498 

499 def calc(x): 

500 x = np.concatenate((x, additional_nans)) 

501 return func(x, window, self.min_periods) 

502 

503 with np.errstate(all="ignore"): 

504 if values.ndim > 1: 

505 result = np.apply_along_axis(calc, self.axis, values) 

506 else: 

507 result = calc(values) 

508 result = np.asarray(result) 

509 

510 if use_numba_cache: 

511 self._numba_func_cache[name] = func 

512 

513 if center: 

514 result = self._center_window(result, window) 

515 

516 results.append(result) 

517 

518 return self._wrap_results(results, block_list, obj, exclude) 

519 

520 def aggregate(self, func, *args, **kwargs): 

521 result, how = self._aggregate(func, *args, **kwargs) 

522 if result is None: 

523 return self.apply(func, raw=False, args=args, kwargs=kwargs) 

524 return result 

525 

526 agg = aggregate 

527 

528 _shared_docs["sum"] = dedent( 

529 """ 

530 Calculate %(name)s sum of given DataFrame or Series. 

531 

532 Parameters 

533 ---------- 

534 *args, **kwargs 

535 For compatibility with other %(name)s methods. Has no effect 

536 on the computed value. 

537 

538 Returns 

539 ------- 

540 Series or DataFrame 

541 Same type as the input, with the same index, containing the 

542 %(name)s sum. 

543 

544 See Also 

545 -------- 

546 Series.sum : Reducing sum for Series. 

547 DataFrame.sum : Reducing sum for DataFrame. 

548 

549 Examples 

550 -------- 

551 >>> s = pd.Series([1, 2, 3, 4, 5]) 

552 >>> s 

553 0 1 

554 1 2 

555 2 3 

556 3 4 

557 4 5 

558 dtype: int64 

559 

560 >>> s.rolling(3).sum() 

561 0 NaN 

562 1 NaN 

563 2 6.0 

564 3 9.0 

565 4 12.0 

566 dtype: float64 

567 

568 >>> s.expanding(3).sum() 

569 0 NaN 

570 1 NaN 

571 2 6.0 

572 3 10.0 

573 4 15.0 

574 dtype: float64 

575 

576 >>> s.rolling(3, center=True).sum() 

577 0 NaN 

578 1 6.0 

579 2 9.0 

580 3 12.0 

581 4 NaN 

582 dtype: float64 

583 

584 For DataFrame, each %(name)s sum is computed column-wise. 

585 

586 >>> df = pd.DataFrame({"A": s, "B": s ** 2}) 

587 >>> df 

588 A B 

589 0 1 1 

590 1 2 4 

591 2 3 9 

592 3 4 16 

593 4 5 25 

594 

595 >>> df.rolling(3).sum() 

596 A B 

597 0 NaN NaN 

598 1 NaN NaN 

599 2 6.0 14.0 

600 3 9.0 29.0 

601 4 12.0 50.0 

602 """ 

603 ) 

604 

605 _shared_docs["mean"] = dedent( 

606 """ 

607 Calculate the %(name)s mean of the values. 

608 

609 Parameters 

610 ---------- 

611 *args 

612 Under Review. 

613 **kwargs 

614 Under Review. 

615 

616 Returns 

617 ------- 

618 Series or DataFrame 

619 Returned object type is determined by the caller of the %(name)s 

620 calculation. 

621 

622 See Also 

623 -------- 

624 Series.%(name)s : Calling object with Series data. 

625 DataFrame.%(name)s : Calling object with DataFrames. 

626 Series.mean : Equivalent method for Series. 

627 DataFrame.mean : Equivalent method for DataFrame. 

628 

629 Examples 

630 -------- 

631 The below examples will show rolling mean calculations with window sizes of 

632 two and three, respectively. 

633 

634 >>> s = pd.Series([1, 2, 3, 4]) 

635 >>> s.rolling(2).mean() 

636 0 NaN 

637 1 1.5 

638 2 2.5 

639 3 3.5 

640 dtype: float64 

641 

642 >>> s.rolling(3).mean() 

643 0 NaN 

644 1 NaN 

645 2 2.0 

646 3 3.0 

647 dtype: float64 

648 """ 

649 ) 

650 

651 _shared_docs["var"] = dedent( 

652 """ 

653 Calculate unbiased %(name)s variance. 

654 %(versionadded)s 

655 Normalized by N-1 by default. This can be changed using the `ddof` 

656 argument. 

657 

658 Parameters 

659 ---------- 

660 ddof : int, default 1 

661 Delta Degrees of Freedom. The divisor used in calculations 

662 is ``N - ddof``, where ``N`` represents the number of elements. 

663 *args, **kwargs 

664 For NumPy compatibility. No additional arguments are used. 

665 

666 Returns 

667 ------- 

668 Series or DataFrame 

669 Returns the same object type as the caller of the %(name)s calculation. 

670 

671 See Also 

672 -------- 

673 Series.%(name)s : Calling object with Series data. 

674 DataFrame.%(name)s : Calling object with DataFrames. 

675 Series.var : Equivalent method for Series. 

676 DataFrame.var : Equivalent method for DataFrame. 

677 numpy.var : Equivalent method for Numpy array. 

678 

679 Notes 

680 ----- 

681 The default `ddof` of 1 used in :meth:`Series.var` is different than the 

682 default `ddof` of 0 in :func:`numpy.var`. 

683 

684 A minimum of 1 period is required for the rolling calculation. 

685 

686 Examples 

687 -------- 

688 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) 

689 >>> s.rolling(3).var() 

690 0 NaN 

691 1 NaN 

692 2 0.333333 

693 3 1.000000 

694 4 1.000000 

695 5 1.333333 

696 6 0.000000 

697 dtype: float64 

698 

699 >>> s.expanding(3).var() 

700 0 NaN 

701 1 NaN 

702 2 0.333333 

703 3 0.916667 

704 4 0.800000 

705 5 0.700000 

706 6 0.619048 

707 dtype: float64 

708 """ 

709 ) 

710 

711 _shared_docs["std"] = dedent( 

712 """ 

713 Calculate %(name)s standard deviation. 

714 %(versionadded)s 

715 Normalized by N-1 by default. This can be changed using the `ddof` 

716 argument. 

717 

718 Parameters 

719 ---------- 

720 ddof : int, default 1 

721 Delta Degrees of Freedom. The divisor used in calculations 

722 is ``N - ddof``, where ``N`` represents the number of elements. 

723 *args, **kwargs 

724 For NumPy compatibility. No additional arguments are used. 

725 

726 Returns 

727 ------- 

728 Series or DataFrame 

729 Returns the same object type as the caller of the %(name)s calculation. 

730 

731 See Also 

732 -------- 

733 Series.%(name)s : Calling object with Series data. 

734 DataFrame.%(name)s : Calling object with DataFrames. 

735 Series.std : Equivalent method for Series. 

736 DataFrame.std : Equivalent method for DataFrame. 

737 numpy.std : Equivalent method for Numpy array. 

738 

739 Notes 

740 ----- 

741 The default `ddof` of 1 used in Series.std is different than the default 

742 `ddof` of 0 in numpy.std. 

743 

744 A minimum of one period is required for the rolling calculation. 

745 

746 Examples 

747 -------- 

748 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5]) 

749 >>> s.rolling(3).std() 

750 0 NaN 

751 1 NaN 

752 2 0.577350 

753 3 1.000000 

754 4 1.000000 

755 5 1.154701 

756 6 0.000000 

757 dtype: float64 

758 

759 >>> s.expanding(3).std() 

760 0 NaN 

761 1 NaN 

762 2 0.577350 

763 3 0.957427 

764 4 0.894427 

765 5 0.836660 

766 6 0.786796 

767 dtype: float64 

768 """ 

769 ) 

770 

771 

772class Window(_Window): 

773 """ 

774 Provide rolling window calculations. 

775 

776 Parameters 

777 ---------- 

778 window : int, offset, or BaseIndexer subclass 

779 Size of the moving window. This is the number of observations used for 

780 calculating the statistic. Each window will be a fixed size. 

781 

782 If its an offset then this will be the time period of each window. Each 

783 window will be a variable sized based on the observations included in 

784 the time-period. This is only valid for datetimelike indexes. 

785 

786 If a BaseIndexer subclass is passed, calculates the window boundaries 

787 based on the defined ``get_window_bounds`` method. Additional rolling 

788 keyword arguments, namely `min_periods`, `center`, and 

789 `closed` will be passed to `get_window_bounds`. 

790 min_periods : int, default None 

791 Minimum number of observations in window required to have a value 

792 (otherwise result is NA). For a window that is specified by an offset, 

793 `min_periods` will default to 1. Otherwise, `min_periods` will default 

794 to the size of the window. 

795 center : bool, default False 

796 Set the labels at the center of the window. 

797 win_type : str, default None 

798 Provide a window type. If ``None``, all points are evenly weighted. 

799 See the notes below for further information. 

800 on : str, optional 

801 For a DataFrame, a datetime-like column or MultiIndex level on which 

802 to calculate the rolling window, rather than the DataFrame's index. 

803 Provided integer column is ignored and excluded from result since 

804 an integer index is not used to calculate the rolling window. 

805 axis : int or str, default 0 

806 closed : str, default None 

807 Make the interval closed on the 'right', 'left', 'both' or 

808 'neither' endpoints. 

809 For offset-based windows, it defaults to 'right'. 

810 For fixed windows, defaults to 'both'. Remaining cases not implemented 

811 for fixed windows. 

812 

813 Returns 

814 ------- 

815 a Window or Rolling sub-classed for the particular operation 

816 

817 See Also 

818 -------- 

819 expanding : Provides expanding transformations. 

820 ewm : Provides exponential weighted functions. 

821 

822 Notes 

823 ----- 

824 By default, the result is set to the right edge of the window. This can be 

825 changed to the center of the window by setting ``center=True``. 

826 

827 To learn more about the offsets & frequency strings, please see `this link 

828 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__. 

829 

830 The recognized win_types are: 

831 

832 * ``boxcar`` 

833 * ``triang`` 

834 * ``blackman`` 

835 * ``hamming`` 

836 * ``bartlett`` 

837 * ``parzen`` 

838 * ``bohman`` 

839 * ``blackmanharris`` 

840 * ``nuttall`` 

841 * ``barthann`` 

842 * ``kaiser`` (needs beta) 

843 * ``gaussian`` (needs std) 

844 * ``general_gaussian`` (needs power, width) 

845 * ``slepian`` (needs width) 

846 * ``exponential`` (needs tau), center is set to None. 

847 

848 If ``win_type=None`` all points are evenly weighted. To learn more about 

849 different window types see `scipy.signal window functions 

850 <https://docs.scipy.org/doc/scipy/reference/signal.html#window-functions>`__. 

851 

852 Examples 

853 -------- 

854 

855 >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) 

856 >>> df 

857 B 

858 0 0.0 

859 1 1.0 

860 2 2.0 

861 3 NaN 

862 4 4.0 

863 

864 Rolling sum with a window length of 2, using the 'triang' 

865 window type. 

866 

867 >>> df.rolling(2, win_type='triang').sum() 

868 B 

869 0 NaN 

870 1 0.5 

871 2 1.5 

872 3 NaN 

873 4 NaN 

874 

875 Rolling sum with a window length of 2, using the 'gaussian' 

876 window type (note how we need to specify std). 

877 

878 >>> df.rolling(2, win_type='gaussian').sum(std=3) 

879 B 

880 0 NaN 

881 1 0.986207 

882 2 2.958621 

883 3 NaN 

884 4 NaN 

885 

886 Rolling sum with a window length of 2, min_periods defaults 

887 to the window length. 

888 

889 >>> df.rolling(2).sum() 

890 B 

891 0 NaN 

892 1 1.0 

893 2 3.0 

894 3 NaN 

895 4 NaN 

896 

897 Same as above, but explicitly set the min_periods 

898 

899 >>> df.rolling(2, min_periods=1).sum() 

900 B 

901 0 0.0 

902 1 1.0 

903 2 3.0 

904 3 2.0 

905 4 4.0 

906 

907 A ragged (meaning not-a-regular frequency), time-indexed DataFrame 

908 

909 >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, 

910 ... index = [pd.Timestamp('20130101 09:00:00'), 

911 ... pd.Timestamp('20130101 09:00:02'), 

912 ... pd.Timestamp('20130101 09:00:03'), 

913 ... pd.Timestamp('20130101 09:00:05'), 

914 ... pd.Timestamp('20130101 09:00:06')]) 

915 

916 >>> df 

917 B 

918 2013-01-01 09:00:00 0.0 

919 2013-01-01 09:00:02 1.0 

920 2013-01-01 09:00:03 2.0 

921 2013-01-01 09:00:05 NaN 

922 2013-01-01 09:00:06 4.0 

923 

924 Contrasting to an integer rolling window, this will roll a variable 

925 length window corresponding to the time period. 

926 The default for min_periods is 1. 

927 

928 >>> df.rolling('2s').sum() 

929 B 

930 2013-01-01 09:00:00 0.0 

931 2013-01-01 09:00:02 1.0 

932 2013-01-01 09:00:03 3.0 

933 2013-01-01 09:00:05 NaN 

934 2013-01-01 09:00:06 4.0 

935 """ 

936 

937 def validate(self): 

938 super().validate() 

939 

940 window = self.window 

941 if isinstance(window, BaseIndexer): 

942 raise NotImplementedError( 

943 "BaseIndexer subclasses not implemented with win_types." 

944 ) 

945 elif isinstance(window, (list, tuple, np.ndarray)): 

946 pass 

947 elif is_integer(window): 

948 if window <= 0: 

949 raise ValueError("window must be > 0 ") 

950 import_optional_dependency( 

951 "scipy", extra="Scipy is required to generate window weight." 

952 ) 

953 import scipy.signal as sig 

954 

955 if not isinstance(self.win_type, str): 

956 raise ValueError(f"Invalid win_type {self.win_type}") 

957 if getattr(sig, self.win_type, None) is None: 

958 raise ValueError(f"Invalid win_type {self.win_type}") 

959 else: 

960 raise ValueError(f"Invalid window {window}") 

961 

962 def _get_win_type(self, kwargs: Dict) -> Union[str, Tuple]: 

963 """ 

964 Extract arguments for the window type, provide validation for it 

965 and return the validated window type. 

966 

967 Parameters 

968 ---------- 

969 kwargs : dict 

970 

971 Returns 

972 ------- 

973 win_type : str, or tuple 

974 """ 

975 # the below may pop from kwargs 

976 def _validate_win_type(win_type, kwargs): 

977 arg_map = { 

978 "kaiser": ["beta"], 

979 "gaussian": ["std"], 

980 "general_gaussian": ["power", "width"], 

981 "slepian": ["width"], 

982 "exponential": ["tau"], 

983 } 

984 

985 if win_type in arg_map: 

986 win_args = _pop_args(win_type, arg_map[win_type], kwargs) 

987 if win_type == "exponential": 

988 # exponential window requires the first arg (center) 

989 # to be set to None (necessary for symmetric window) 

990 win_args.insert(0, None) 

991 

992 return tuple([win_type] + win_args) 

993 

994 return win_type 

995 

996 def _pop_args(win_type, arg_names, kwargs): 

997 all_args = [] 

998 for n in arg_names: 

999 if n not in kwargs: 

1000 raise ValueError(f"{win_type} window requires {n}") 

1001 all_args.append(kwargs.pop(n)) 

1002 return all_args 

1003 

1004 return _validate_win_type(self.win_type, kwargs) 

1005 

1006 def _get_window( 

1007 self, other=None, win_type: Optional[Union[str, Tuple]] = None 

1008 ) -> np.ndarray: 

1009 """ 

1010 Get the window, weights. 

1011 

1012 Parameters 

1013 ---------- 

1014 other : 

1015 ignored, exists for compatibility 

1016 win_type : str, or tuple 

1017 type of window to create 

1018 

1019 Returns 

1020 ------- 

1021 window : ndarray 

1022 the window, weights 

1023 """ 

1024 

1025 window = self.window 

1026 if isinstance(window, (list, tuple, np.ndarray)): 

1027 return com.asarray_tuplesafe(window).astype(float) 

1028 elif is_integer(window): 

1029 import scipy.signal as sig 

1030 

1031 # GH #15662. `False` makes symmetric window, rather than periodic. 

1032 return sig.get_window(win_type, window, False).astype(float) 

1033 

1034 _agg_see_also_doc = dedent( 

1035 """ 

1036 See Also 

1037 -------- 

1038 pandas.DataFrame.rolling.aggregate 

1039 pandas.DataFrame.aggregate 

1040 """ 

1041 ) 

1042 

1043 _agg_examples_doc = dedent( 

1044 """ 

1045 Examples 

1046 -------- 

1047 

1048 >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) 

1049 >>> df 

1050 A B C 

1051 0 -2.385977 -0.102758 0.438822 

1052 1 -1.004295 0.905829 -0.954544 

1053 2 0.735167 -0.165272 -1.619346 

1054 3 -0.702657 -1.340923 -0.706334 

1055 4 -0.246845 0.211596 -0.901819 

1056 5 2.463718 3.157577 -1.380906 

1057 6 -1.142255 2.340594 -0.039875 

1058 7 1.396598 -1.647453 1.677227 

1059 8 -0.543425 1.761277 -0.220481 

1060 9 -0.640505 0.289374 -1.550670 

1061 

1062 >>> df.rolling(3, win_type='boxcar').agg('mean') 

1063 A B C 

1064 0 NaN NaN NaN 

1065 1 NaN NaN NaN 

1066 2 -0.885035 0.212600 -0.711689 

1067 3 -0.323928 -0.200122 -1.093408 

1068 4 -0.071445 -0.431533 -1.075833 

1069 5 0.504739 0.676083 -0.996353 

1070 6 0.358206 1.903256 -0.774200 

1071 7 0.906020 1.283573 0.085482 

1072 8 -0.096361 0.818139 0.472290 

1073 9 0.070889 0.134399 -0.031308 

1074 """ 

1075 ) 

1076 

1077 @Substitution( 

1078 see_also=_agg_see_also_doc, 

1079 examples=_agg_examples_doc, 

1080 versionadded="", 

1081 klass="Series/DataFrame", 

1082 axis="", 

1083 ) 

1084 @Appender(_shared_docs["aggregate"]) 

1085 def aggregate(self, func, *args, **kwargs): 

1086 result, how = self._aggregate(func, *args, **kwargs) 

1087 if result is None: 

1088 

1089 # these must apply directly 

1090 result = func(self) 

1091 

1092 return result 

1093 

1094 agg = aggregate 

1095 

1096 @Substitution(name="window") 

1097 @Appender(_shared_docs["sum"]) 

1098 def sum(self, *args, **kwargs): 

1099 nv.validate_window_func("sum", args, kwargs) 

1100 window_func = self._get_roll_func("roll_weighted_sum") 

1101 window_func = get_weighted_roll_func(window_func) 

1102 return self._apply( 

1103 window_func, center=self.center, is_weighted=True, name="sum", **kwargs 

1104 ) 

1105 

1106 @Substitution(name="window") 

1107 @Appender(_shared_docs["mean"]) 

1108 def mean(self, *args, **kwargs): 

1109 nv.validate_window_func("mean", args, kwargs) 

1110 window_func = self._get_roll_func("roll_weighted_mean") 

1111 window_func = get_weighted_roll_func(window_func) 

1112 return self._apply( 

1113 window_func, center=self.center, is_weighted=True, name="mean", **kwargs 

1114 ) 

1115 

1116 @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") 

1117 @Appender(_shared_docs["var"]) 

1118 def var(self, ddof=1, *args, **kwargs): 

1119 nv.validate_window_func("var", args, kwargs) 

1120 window_func = partial(self._get_roll_func("roll_weighted_var"), ddof=ddof) 

1121 window_func = get_weighted_roll_func(window_func) 

1122 kwargs.pop("name", None) 

1123 return self._apply( 

1124 window_func, center=self.center, is_weighted=True, name="var", **kwargs 

1125 ) 

1126 

1127 @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n") 

1128 @Appender(_shared_docs["std"]) 

1129 def std(self, ddof=1, *args, **kwargs): 

1130 nv.validate_window_func("std", args, kwargs) 

1131 return zsqrt(self.var(ddof=ddof, name="std", **kwargs)) 

1132 

1133 

1134class _Rolling(_Window): 

1135 @property 

1136 def _constructor(self): 

1137 return Rolling 

1138 

1139 

1140class _Rolling_and_Expanding(_Rolling): 

1141 

1142 _shared_docs["count"] = dedent( 

1143 r""" 

1144 The %(name)s count of any non-NaN observations inside the window. 

1145 

1146 Returns 

1147 ------- 

1148 Series or DataFrame 

1149 Returned object type is determined by the caller of the %(name)s 

1150 calculation. 

1151 

1152 See Also 

1153 -------- 

1154 Series.%(name)s : Calling object with Series data. 

1155 DataFrame.%(name)s : Calling object with DataFrames. 

1156 DataFrame.count : Count of the full DataFrame. 

1157 

1158 Examples 

1159 -------- 

1160 >>> s = pd.Series([2, 3, np.nan, 10]) 

1161 >>> s.rolling(2).count() 

1162 0 1.0 

1163 1 2.0 

1164 2 1.0 

1165 3 1.0 

1166 dtype: float64 

1167 >>> s.rolling(3).count() 

1168 0 1.0 

1169 1 2.0 

1170 2 2.0 

1171 3 2.0 

1172 dtype: float64 

1173 >>> s.rolling(4).count() 

1174 0 1.0 

1175 1 2.0 

1176 2 2.0 

1177 3 3.0 

1178 dtype: float64 

1179 """ 

1180 ) 

1181 

1182 def count(self): 

1183 

1184 blocks, obj = self._create_blocks() 

1185 results = [] 

1186 for b in blocks: 

1187 result = b.notna().astype(int) 

1188 result = self._constructor( 

1189 result, 

1190 window=self._get_window(), 

1191 min_periods=self.min_periods or 0, 

1192 center=self.center, 

1193 axis=self.axis, 

1194 closed=self.closed, 

1195 ).sum() 

1196 results.append(result) 

1197 

1198 return self._wrap_results(results, blocks, obj) 

1199 

1200 _shared_docs["apply"] = dedent( 

1201 r""" 

1202 The %(name)s function's apply function. 

1203 

1204 Parameters 

1205 ---------- 

1206 func : function 

1207 Must produce a single value from an ndarray input if ``raw=True`` 

1208 or a single value from a Series if ``raw=False``. Can also accept a 

1209 Numba JIT function with ``engine='numba'`` specified. 

1210 

1211 .. versionchanged:: 1.0.0 

1212 

1213 raw : bool, default None 

1214 * ``False`` : passes each row or column as a Series to the 

1215 function. 

1216 * ``True`` : the passed function will receive ndarray 

1217 objects instead. 

1218 If you are just applying a NumPy reduction function this will 

1219 achieve much better performance. 

1220 engine : str, default 'cython' 

1221 * ``'cython'`` : Runs rolling apply through C-extensions from cython. 

1222 * ``'numba'`` : Runs rolling apply through JIT compiled code from numba. 

1223 Only available when ``raw`` is set to ``True``. 

1224 

1225 .. versionadded:: 1.0.0 

1226 

1227 engine_kwargs : dict, default None 

1228 * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` 

1229 * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` 

1230 and ``parallel`` dictionary keys. The values must either be ``True`` or 

1231 ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is 

1232 ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be 

1233 applied to both the ``func`` and the ``apply`` rolling aggregation. 

1234 

1235 .. versionadded:: 1.0.0 

1236 

1237 args : tuple, default None 

1238 Positional arguments to be passed into func. 

1239 kwargs : dict, default None 

1240 Keyword arguments to be passed into func. 

1241 

1242 Returns 

1243 ------- 

1244 Series or DataFrame 

1245 Return type is determined by the caller. 

1246 

1247 See Also 

1248 -------- 

1249 Series.%(name)s : Series %(name)s. 

1250 DataFrame.%(name)s : DataFrame %(name)s. 

1251 

1252 Notes 

1253 ----- 

1254 See :ref:`stats.rolling_apply` for extended documentation and performance 

1255 considerations for the Numba engine. 

1256 """ 

1257 ) 

1258 

1259 def apply( 

1260 self, 

1261 func, 

1262 raw: bool = False, 

1263 engine: str = "cython", 

1264 engine_kwargs: Optional[Dict] = None, 

1265 args: Optional[Tuple] = None, 

1266 kwargs: Optional[Dict] = None, 

1267 ): 

1268 if args is None: 

1269 args = () 

1270 if kwargs is None: 

1271 kwargs = {} 

1272 kwargs.pop("_level", None) 

1273 kwargs.pop("floor", None) 

1274 window = self._get_window() 

1275 offset = calculate_center_offset(window) if self.center else 0 

1276 if not is_bool(raw): 

1277 raise ValueError("raw parameter must be `True` or `False`") 

1278 

1279 if engine == "cython": 

1280 if engine_kwargs is not None: 

1281 raise ValueError("cython engine does not accept engine_kwargs") 

1282 apply_func = self._generate_cython_apply_func( 

1283 args, kwargs, raw, offset, func 

1284 ) 

1285 elif engine == "numba": 

1286 if raw is False: 

1287 raise ValueError("raw must be `True` when using the numba engine") 

1288 if func in self._numba_func_cache: 

1289 # Return an already compiled version of roll_apply if available 

1290 apply_func = self._numba_func_cache[func] 

1291 else: 

1292 apply_func = generate_numba_apply_func( 

1293 args, kwargs, func, engine_kwargs 

1294 ) 

1295 else: 

1296 raise ValueError("engine must be either 'numba' or 'cython'") 

1297 

1298 # TODO: Why do we always pass center=False? 

1299 # name=func & raw=raw for WindowGroupByMixin._apply 

1300 return self._apply( 

1301 apply_func, 

1302 center=False, 

1303 floor=0, 

1304 name=func, 

1305 use_numba_cache=engine == "numba", 

1306 raw=raw, 

1307 args=args, 

1308 kwargs=kwargs, 

1309 ) 

1310 

1311 def _generate_cython_apply_func(self, args, kwargs, raw, offset, func): 

1312 from pandas import Series 

1313 

1314 window_func = partial( 

1315 self._get_cython_func_type("roll_generic"), 

1316 args=args, 

1317 kwargs=kwargs, 

1318 raw=raw, 

1319 offset=offset, 

1320 func=func, 

1321 ) 

1322 

1323 def apply_func(values, begin, end, min_periods, raw=raw): 

1324 if not raw: 

1325 values = Series(values, index=self.obj.index) 

1326 return window_func(values, begin, end, min_periods) 

1327 

1328 return apply_func 

1329 

1330 def sum(self, *args, **kwargs): 

1331 nv.validate_window_func("sum", args, kwargs) 

1332 window_func = self._get_cython_func_type("roll_sum") 

1333 kwargs.pop("floor", None) 

1334 return self._apply( 

1335 window_func, center=self.center, floor=0, name="sum", **kwargs 

1336 ) 

1337 

1338 _shared_docs["max"] = dedent( 

1339 """ 

1340 Calculate the %(name)s maximum. 

1341 

1342 Parameters 

1343 ---------- 

1344 *args, **kwargs 

1345 Arguments and keyword arguments to be passed into func. 

1346 """ 

1347 ) 

1348 

1349 def max(self, *args, **kwargs): 

1350 nv.validate_window_func("max", args, kwargs) 

1351 window_func = self._get_cython_func_type("roll_max") 

1352 return self._apply(window_func, center=self.center, name="max", **kwargs) 

1353 

1354 _shared_docs["min"] = dedent( 

1355 """ 

1356 Calculate the %(name)s minimum. 

1357 

1358 Parameters 

1359 ---------- 

1360 **kwargs 

1361 Under Review. 

1362 

1363 Returns 

1364 ------- 

1365 Series or DataFrame 

1366 Returned object type is determined by the caller of the %(name)s 

1367 calculation. 

1368 

1369 See Also 

1370 -------- 

1371 Series.%(name)s : Calling object with a Series. 

1372 DataFrame.%(name)s : Calling object with a DataFrame. 

1373 Series.min : Similar method for Series. 

1374 DataFrame.min : Similar method for DataFrame. 

1375 

1376 Examples 

1377 -------- 

1378 Performing a rolling minimum with a window size of 3. 

1379 

1380 >>> s = pd.Series([4, 3, 5, 2, 6]) 

1381 >>> s.rolling(3).min() 

1382 0 NaN 

1383 1 NaN 

1384 2 3.0 

1385 3 2.0 

1386 4 2.0 

1387 dtype: float64 

1388 """ 

1389 ) 

1390 

1391 def min(self, *args, **kwargs): 

1392 nv.validate_window_func("min", args, kwargs) 

1393 window_func = self._get_cython_func_type("roll_min") 

1394 return self._apply(window_func, center=self.center, name="min", **kwargs) 

1395 

1396 def mean(self, *args, **kwargs): 

1397 nv.validate_window_func("mean", args, kwargs) 

1398 window_func = self._get_cython_func_type("roll_mean") 

1399 return self._apply(window_func, center=self.center, name="mean", **kwargs) 

1400 

1401 _shared_docs["median"] = dedent( 

1402 """ 

1403 Calculate the %(name)s median. 

1404 

1405 Parameters 

1406 ---------- 

1407 **kwargs 

1408 For compatibility with other %(name)s methods. Has no effect 

1409 on the computed median. 

1410 

1411 Returns 

1412 ------- 

1413 Series or DataFrame 

1414 Returned type is the same as the original object. 

1415 

1416 See Also 

1417 -------- 

1418 Series.%(name)s : Calling object with Series data. 

1419 DataFrame.%(name)s : Calling object with DataFrames. 

1420 Series.median : Equivalent method for Series. 

1421 DataFrame.median : Equivalent method for DataFrame. 

1422 

1423 Examples 

1424 -------- 

1425 Compute the rolling median of a series with a window size of 3. 

1426 

1427 >>> s = pd.Series([0, 1, 2, 3, 4]) 

1428 >>> s.rolling(3).median() 

1429 0 NaN 

1430 1 NaN 

1431 2 1.0 

1432 3 2.0 

1433 4 3.0 

1434 dtype: float64 

1435 """ 

1436 ) 

1437 

1438 def median(self, **kwargs): 

1439 window_func = self._get_roll_func("roll_median_c") 

1440 window_func = partial(window_func, win=self._get_window()) 

1441 return self._apply(window_func, center=self.center, name="median", **kwargs) 

1442 

1443 def std(self, ddof=1, *args, **kwargs): 

1444 nv.validate_window_func("std", args, kwargs) 

1445 kwargs.pop("require_min_periods", None) 

1446 window_func = self._get_cython_func_type("roll_var") 

1447 

1448 def zsqrt_func(values, begin, end, min_periods): 

1449 return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof)) 

1450 

1451 # ddof passed again for compat with groupby.rolling 

1452 return self._apply( 

1453 zsqrt_func, 

1454 center=self.center, 

1455 require_min_periods=1, 

1456 name="std", 

1457 ddof=ddof, 

1458 **kwargs, 

1459 ) 

1460 

1461 def var(self, ddof=1, *args, **kwargs): 

1462 nv.validate_window_func("var", args, kwargs) 

1463 kwargs.pop("require_min_periods", None) 

1464 window_func = partial(self._get_cython_func_type("roll_var"), ddof=ddof) 

1465 # ddof passed again for compat with groupby.rolling 

1466 return self._apply( 

1467 window_func, 

1468 center=self.center, 

1469 require_min_periods=1, 

1470 name="var", 

1471 ddof=ddof, 

1472 **kwargs, 

1473 ) 

1474 

1475 _shared_docs[ 

1476 "skew" 

1477 ] = """ 

1478 Unbiased %(name)s skewness. 

1479 

1480 Parameters 

1481 ---------- 

1482 **kwargs 

1483 Keyword arguments to be passed into func. 

1484 """ 

1485 

1486 def skew(self, **kwargs): 

1487 window_func = self._get_cython_func_type("roll_skew") 

1488 kwargs.pop("require_min_periods", None) 

1489 return self._apply( 

1490 window_func, 

1491 center=self.center, 

1492 require_min_periods=3, 

1493 name="skew", 

1494 **kwargs, 

1495 ) 

1496 

1497 _shared_docs["kurt"] = dedent( 

1498 """ 

1499 Calculate unbiased %(name)s kurtosis. 

1500 

1501 This function uses Fisher's definition of kurtosis without bias. 

1502 

1503 Parameters 

1504 ---------- 

1505 **kwargs 

1506 Under Review. 

1507 

1508 Returns 

1509 ------- 

1510 Series or DataFrame 

1511 Returned object type is determined by the caller of the %(name)s 

1512 calculation. 

1513 

1514 See Also 

1515 -------- 

1516 Series.%(name)s : Calling object with Series data. 

1517 DataFrame.%(name)s : Calling object with DataFrames. 

1518 Series.kurt : Equivalent method for Series. 

1519 DataFrame.kurt : Equivalent method for DataFrame. 

1520 scipy.stats.skew : Third moment of a probability density. 

1521 scipy.stats.kurtosis : Reference SciPy method. 

1522 

1523 Notes 

1524 ----- 

1525 A minimum of 4 periods is required for the %(name)s calculation. 

1526 """ 

1527 ) 

1528 

1529 def kurt(self, **kwargs): 

1530 window_func = self._get_cython_func_type("roll_kurt") 

1531 kwargs.pop("require_min_periods", None) 

1532 return self._apply( 

1533 window_func, 

1534 center=self.center, 

1535 require_min_periods=4, 

1536 name="kurt", 

1537 **kwargs, 

1538 ) 

1539 

1540 _shared_docs["quantile"] = dedent( 

1541 """ 

1542 Calculate the %(name)s quantile. 

1543 

1544 Parameters 

1545 ---------- 

1546 quantile : float 

1547 Quantile to compute. 0 <= quantile <= 1. 

1548 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} 

1549 .. versionadded:: 0.23.0 

1550 

1551 This optional parameter specifies the interpolation method to use, 

1552 when the desired quantile lies between two data points `i` and `j`: 

1553 

1554 * linear: `i + (j - i) * fraction`, where `fraction` is the 

1555 fractional part of the index surrounded by `i` and `j`. 

1556 * lower: `i`. 

1557 * higher: `j`. 

1558 * nearest: `i` or `j` whichever is nearest. 

1559 * midpoint: (`i` + `j`) / 2. 

1560 **kwargs 

1561 For compatibility with other %(name)s methods. Has no effect on 

1562 the result. 

1563 

1564 Returns 

1565 ------- 

1566 Series or DataFrame 

1567 Returned object type is determined by the caller of the %(name)s 

1568 calculation. 

1569 

1570 See Also 

1571 -------- 

1572 Series.quantile : Computes value at the given quantile over all data 

1573 in Series. 

1574 DataFrame.quantile : Computes values at the given quantile over 

1575 requested axis in DataFrame. 

1576 

1577 Examples 

1578 -------- 

1579 >>> s = pd.Series([1, 2, 3, 4]) 

1580 >>> s.rolling(2).quantile(.4, interpolation='lower') 

1581 0 NaN 

1582 1 1.0 

1583 2 2.0 

1584 3 3.0 

1585 dtype: float64 

1586 

1587 >>> s.rolling(2).quantile(.4, interpolation='midpoint') 

1588 0 NaN 

1589 1 1.5 

1590 2 2.5 

1591 3 3.5 

1592 dtype: float64 

1593 """ 

1594 ) 

1595 

1596 def quantile(self, quantile, interpolation="linear", **kwargs): 

1597 if quantile == 1.0: 

1598 window_func = self._get_cython_func_type("roll_max") 

1599 elif quantile == 0.0: 

1600 window_func = self._get_cython_func_type("roll_min") 

1601 else: 

1602 window_func = partial( 

1603 self._get_roll_func("roll_quantile"), 

1604 win=self._get_window(), 

1605 quantile=quantile, 

1606 interpolation=interpolation, 

1607 ) 

1608 

1609 # Pass through for groupby.rolling 

1610 kwargs["quantile"] = quantile 

1611 kwargs["interpolation"] = interpolation 

1612 return self._apply(window_func, center=self.center, name="quantile", **kwargs) 

1613 

1614 _shared_docs[ 

1615 "cov" 

1616 ] = """ 

1617 Calculate the %(name)s sample covariance. 

1618 

1619 Parameters 

1620 ---------- 

1621 other : Series, DataFrame, or ndarray, optional 

1622 If not supplied then will default to self and produce pairwise 

1623 output. 

1624 pairwise : bool, default None 

1625 If False then only matching columns between self and other will be 

1626 used and the output will be a DataFrame. 

1627 If True then all pairwise combinations will be calculated and the 

1628 output will be a MultiIndexed DataFrame in the case of DataFrame 

1629 inputs. In the case of missing elements, only complete pairwise 

1630 observations will be used. 

1631 ddof : int, default 1 

1632 Delta Degrees of Freedom. The divisor used in calculations 

1633 is ``N - ddof``, where ``N`` represents the number of elements. 

1634 **kwargs 

1635 Keyword arguments to be passed into func. 

1636 """ 

1637 

1638 def cov(self, other=None, pairwise=None, ddof=1, **kwargs): 

1639 if other is None: 

1640 other = self._selected_obj 

1641 # only default unset 

1642 pairwise = True if pairwise is None else pairwise 

1643 other = self._shallow_copy(other) 

1644 

1645 # GH 16058: offset window 

1646 if self.is_freq_type: 

1647 window = self.win_freq 

1648 else: 

1649 window = self._get_window(other) 

1650 

1651 def _get_cov(X, Y): 

1652 # GH #12373 : rolling functions error on float32 data 

1653 # to avoid potential overflow, cast the data to float64 

1654 X = X.astype("float64") 

1655 Y = Y.astype("float64") 

1656 mean = lambda x: x.rolling( 

1657 window, self.min_periods, center=self.center 

1658 ).mean(**kwargs) 

1659 count = ( 

1660 (X + Y) 

1661 .rolling(window=window, min_periods=0, center=self.center) 

1662 .count(**kwargs) 

1663 ) 

1664 bias_adj = count / (count - ddof) 

1665 return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj 

1666 

1667 return _flex_binary_moment( 

1668 self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) 

1669 ) 

1670 

1671 _shared_docs["corr"] = dedent( 

1672 """ 

1673 Calculate %(name)s correlation. 

1674 

1675 Parameters 

1676 ---------- 

1677 other : Series, DataFrame, or ndarray, optional 

1678 If not supplied then will default to self. 

1679 pairwise : bool, default None 

1680 Calculate pairwise combinations of columns within a 

1681 DataFrame. If `other` is not specified, defaults to `True`, 

1682 otherwise defaults to `False`. 

1683 Not relevant for :class:`~pandas.Series`. 

1684 **kwargs 

1685 Unused. 

1686 

1687 Returns 

1688 ------- 

1689 Series or DataFrame 

1690 Returned object type is determined by the caller of the 

1691 %(name)s calculation. 

1692 

1693 See Also 

1694 -------- 

1695 Series.%(name)s : Calling object with Series data. 

1696 DataFrame.%(name)s : Calling object with DataFrames. 

1697 Series.corr : Equivalent method for Series. 

1698 DataFrame.corr : Equivalent method for DataFrame. 

1699 %(name)s.cov : Similar method to calculate covariance. 

1700 numpy.corrcoef : NumPy Pearson's correlation calculation. 

1701 

1702 Notes 

1703 ----- 

1704 This function uses Pearson's definition of correlation 

1705 (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient). 

1706 

1707 When `other` is not specified, the output will be self correlation (e.g. 

1708 all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` 

1709 set to `True`. 

1710 

1711 Function will return ``NaN`` for correlations of equal valued sequences; 

1712 this is the result of a 0/0 division error. 

1713 

1714 When `pairwise` is set to `False`, only matching columns between `self` and 

1715 `other` will be used. 

1716 

1717 When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame 

1718 with the original index on the first level, and the `other` DataFrame 

1719 columns on the second level. 

1720 

1721 In the case of missing elements, only complete pairwise observations 

1722 will be used. 

1723 

1724 Examples 

1725 -------- 

1726 The below example shows a rolling calculation with a window size of 

1727 four matching the equivalent function call using :meth:`numpy.corrcoef`. 

1728 

1729 >>> v1 = [3, 3, 3, 5, 8] 

1730 >>> v2 = [3, 4, 4, 4, 8] 

1731 >>> # numpy returns a 2X2 array, the correlation coefficient 

1732 >>> # is the number at entry [0][1] 

1733 >>> print(f"{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}") 

1734 0.333333 

1735 >>> print(f"{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}") 

1736 0.916949 

1737 >>> s1 = pd.Series(v1) 

1738 >>> s2 = pd.Series(v2) 

1739 >>> s1.rolling(4).corr(s2) 

1740 0 NaN 

1741 1 NaN 

1742 2 NaN 

1743 3 0.333333 

1744 4 0.916949 

1745 dtype: float64 

1746 

1747 The below example shows a similar rolling calculation on a 

1748 DataFrame using the pairwise option. 

1749 

1750 >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\ 

1751 [46., 31.], [50., 36.]]) 

1752 >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7)) 

1753 [[1. 0.6263001] 

1754 [0.6263001 1. ]] 

1755 >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7)) 

1756 [[1. 0.5553681] 

1757 [0.5553681 1. ]] 

1758 >>> df = pd.DataFrame(matrix, columns=['X','Y']) 

1759 >>> df 

1760 X Y 

1761 0 51.0 35.0 

1762 1 49.0 30.0 

1763 2 47.0 32.0 

1764 3 46.0 31.0 

1765 4 50.0 36.0 

1766 >>> df.rolling(4).corr(pairwise=True) 

1767 X Y 

1768 0 X NaN NaN 

1769 Y NaN NaN 

1770 1 X NaN NaN 

1771 Y NaN NaN 

1772 2 X NaN NaN 

1773 Y NaN NaN 

1774 3 X 1.000000 0.626300 

1775 Y 0.626300 1.000000 

1776 4 X 1.000000 0.555368 

1777 Y 0.555368 1.000000 

1778 """ 

1779 ) 

1780 

1781 def corr(self, other=None, pairwise=None, **kwargs): 

1782 if other is None: 

1783 other = self._selected_obj 

1784 # only default unset 

1785 pairwise = True if pairwise is None else pairwise 

1786 other = self._shallow_copy(other) 

1787 window = self._get_window(other) if not self.is_freq_type else self.win_freq 

1788 

1789 def _get_corr(a, b): 

1790 a = a.rolling( 

1791 window=window, min_periods=self.min_periods, center=self.center 

1792 ) 

1793 b = b.rolling( 

1794 window=window, min_periods=self.min_periods, center=self.center 

1795 ) 

1796 

1797 return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs)) 

1798 

1799 return _flex_binary_moment( 

1800 self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) 

1801 ) 

1802 

1803 

1804class Rolling(_Rolling_and_Expanding): 

1805 @cache_readonly 

1806 def is_datetimelike(self) -> bool: 

1807 return isinstance( 

1808 self._on, (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex) 

1809 ) 

1810 

1811 @cache_readonly 

1812 def _on(self) -> Index: 

1813 if self.on is None: 

1814 if self.axis == 0: 

1815 return self.obj.index 

1816 else: 

1817 # i.e. self.axis == 1 

1818 return self.obj.columns 

1819 elif isinstance(self.on, Index): 

1820 return self.on 

1821 elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns: 

1822 return Index(self.obj[self.on]) 

1823 else: 

1824 raise ValueError( 

1825 f"invalid on specified as {self.on}, " 

1826 "must be a column (of DataFrame), an Index " 

1827 "or None" 

1828 ) 

1829 

1830 def validate(self): 

1831 super().validate() 

1832 

1833 # we allow rolling on a datetimelike index 

1834 if (self.obj.empty or self.is_datetimelike) and isinstance( 

1835 self.window, (str, ABCDateOffset, timedelta) 

1836 ): 

1837 

1838 self._validate_monotonic() 

1839 freq = self._validate_freq() 

1840 

1841 # we don't allow center 

1842 if self.center: 

1843 raise NotImplementedError( 

1844 "center is not implemented " 

1845 "for datetimelike and offset " 

1846 "based windows" 

1847 ) 

1848 

1849 # this will raise ValueError on non-fixed freqs 

1850 self.win_freq = self.window 

1851 self.window = freq.nanos 

1852 self.win_type = "freq" 

1853 

1854 # min_periods must be an integer 

1855 if self.min_periods is None: 

1856 self.min_periods = 1 

1857 

1858 elif isinstance(self.window, BaseIndexer): 

1859 # Passed BaseIndexer subclass should handle all other rolling kwargs 

1860 return 

1861 elif not is_integer(self.window): 

1862 raise ValueError("window must be an integer") 

1863 elif self.window < 0: 

1864 raise ValueError("window must be non-negative") 

1865 

1866 if not self.is_datetimelike and self.closed is not None: 

1867 raise ValueError( 

1868 "closed only implemented for datetimelike and offset based windows" 

1869 ) 

1870 

1871 def _validate_monotonic(self): 

1872 """ 

1873 Validate monotonic (increasing or decreasing). 

1874 """ 

1875 if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing): 

1876 formatted = self.on 

1877 if self.on is None: 

1878 formatted = "index" 

1879 raise ValueError(f"{formatted} must be monotonic") 

1880 

1881 def _validate_freq(self): 

1882 """ 

1883 Validate & return window frequency. 

1884 """ 

1885 from pandas.tseries.frequencies import to_offset 

1886 

1887 try: 

1888 return to_offset(self.window) 

1889 except (TypeError, ValueError): 

1890 raise ValueError( 

1891 f"passed window {self.window} is not " 

1892 "compatible with a datetimelike " 

1893 "index" 

1894 ) 

1895 

1896 _agg_see_also_doc = dedent( 

1897 """ 

1898 See Also 

1899 -------- 

1900 Series.rolling 

1901 DataFrame.rolling 

1902 """ 

1903 ) 

1904 

1905 _agg_examples_doc = dedent( 

1906 """ 

1907 Examples 

1908 -------- 

1909 

1910 >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) 

1911 >>> df 

1912 A B C 

1913 0 -2.385977 -0.102758 0.438822 

1914 1 -1.004295 0.905829 -0.954544 

1915 2 0.735167 -0.165272 -1.619346 

1916 3 -0.702657 -1.340923 -0.706334 

1917 4 -0.246845 0.211596 -0.901819 

1918 5 2.463718 3.157577 -1.380906 

1919 6 -1.142255 2.340594 -0.039875 

1920 7 1.396598 -1.647453 1.677227 

1921 8 -0.543425 1.761277 -0.220481 

1922 9 -0.640505 0.289374 -1.550670 

1923 

1924 >>> df.rolling(3).sum() 

1925 A B C 

1926 0 NaN NaN NaN 

1927 1 NaN NaN NaN 

1928 2 -2.655105 0.637799 -2.135068 

1929 3 -0.971785 -0.600366 -3.280224 

1930 4 -0.214334 -1.294599 -3.227500 

1931 5 1.514216 2.028250 -2.989060 

1932 6 1.074618 5.709767 -2.322600 

1933 7 2.718061 3.850718 0.256446 

1934 8 -0.289082 2.454418 1.416871 

1935 9 0.212668 0.403198 -0.093924 

1936 

1937 >>> df.rolling(3).agg({'A':'sum', 'B':'min'}) 

1938 A B 

1939 0 NaN NaN 

1940 1 NaN NaN 

1941 2 -2.655105 -0.165272 

1942 3 -0.971785 -1.340923 

1943 4 -0.214334 -1.340923 

1944 5 1.514216 -1.340923 

1945 6 1.074618 0.211596 

1946 7 2.718061 -1.647453 

1947 8 -0.289082 -1.647453 

1948 9 0.212668 -1.647453 

1949 """ 

1950 ) 

1951 

1952 @Substitution( 

1953 see_also=_agg_see_also_doc, 

1954 examples=_agg_examples_doc, 

1955 versionadded="", 

1956 klass="Series/Dataframe", 

1957 axis="", 

1958 ) 

1959 @Appender(_shared_docs["aggregate"]) 

1960 def aggregate(self, func, *args, **kwargs): 

1961 return super().aggregate(func, *args, **kwargs) 

1962 

1963 agg = aggregate 

1964 

1965 @Substitution(name="rolling") 

1966 @Appender(_shared_docs["count"]) 

1967 def count(self): 

1968 

1969 # different impl for freq counting 

1970 if self.is_freq_type: 

1971 window_func = self._get_roll_func("roll_count") 

1972 return self._apply(window_func, center=self.center, name="count") 

1973 

1974 return super().count() 

1975 

1976 @Substitution(name="rolling") 

1977 @Appender(_shared_docs["apply"]) 

1978 def apply( 

1979 self, 

1980 func, 

1981 raw=False, 

1982 engine="cython", 

1983 engine_kwargs=None, 

1984 args=None, 

1985 kwargs=None, 

1986 ): 

1987 return super().apply( 

1988 func, 

1989 raw=raw, 

1990 engine=engine, 

1991 engine_kwargs=engine_kwargs, 

1992 args=args, 

1993 kwargs=kwargs, 

1994 ) 

1995 

1996 @Substitution(name="rolling") 

1997 @Appender(_shared_docs["sum"]) 

1998 def sum(self, *args, **kwargs): 

1999 nv.validate_rolling_func("sum", args, kwargs) 

2000 return super().sum(*args, **kwargs) 

2001 

2002 @Substitution(name="rolling") 

2003 @Appender(_doc_template) 

2004 @Appender(_shared_docs["max"]) 

2005 def max(self, *args, **kwargs): 

2006 nv.validate_rolling_func("max", args, kwargs) 

2007 return super().max(*args, **kwargs) 

2008 

2009 @Substitution(name="rolling") 

2010 @Appender(_shared_docs["min"]) 

2011 def min(self, *args, **kwargs): 

2012 nv.validate_rolling_func("min", args, kwargs) 

2013 return super().min(*args, **kwargs) 

2014 

2015 @Substitution(name="rolling") 

2016 @Appender(_shared_docs["mean"]) 

2017 def mean(self, *args, **kwargs): 

2018 nv.validate_rolling_func("mean", args, kwargs) 

2019 return super().mean(*args, **kwargs) 

2020 

2021 @Substitution(name="rolling") 

2022 @Appender(_shared_docs["median"]) 

2023 def median(self, **kwargs): 

2024 return super().median(**kwargs) 

2025 

2026 @Substitution(name="rolling", versionadded="") 

2027 @Appender(_shared_docs["std"]) 

2028 def std(self, ddof=1, *args, **kwargs): 

2029 nv.validate_rolling_func("std", args, kwargs) 

2030 return super().std(ddof=ddof, **kwargs) 

2031 

2032 @Substitution(name="rolling", versionadded="") 

2033 @Appender(_shared_docs["var"]) 

2034 def var(self, ddof=1, *args, **kwargs): 

2035 nv.validate_rolling_func("var", args, kwargs) 

2036 return super().var(ddof=ddof, **kwargs) 

2037 

2038 @Substitution(name="rolling") 

2039 @Appender(_doc_template) 

2040 @Appender(_shared_docs["skew"]) 

2041 def skew(self, **kwargs): 

2042 return super().skew(**kwargs) 

2043 

2044 _agg_doc = dedent( 

2045 """ 

2046 Examples 

2047 -------- 

2048 

2049 The example below will show a rolling calculation with a window size of 

2050 four matching the equivalent function call using `scipy.stats`. 

2051 

2052 >>> arr = [1, 2, 3, 4, 999] 

2053 >>> import scipy.stats 

2054 >>> print(f"{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}") 

2055 -1.200000 

2056 >>> print(f"{scipy.stats.kurtosis(arr[1:], bias=False):.6f}") 

2057 3.999946 

2058 >>> s = pd.Series(arr) 

2059 >>> s.rolling(4).kurt() 

2060 0 NaN 

2061 1 NaN 

2062 2 NaN 

2063 3 -1.200000 

2064 4 3.999946 

2065 dtype: float64 

2066 """ 

2067 ) 

2068 

2069 @Appender(_agg_doc) 

2070 @Substitution(name="rolling") 

2071 @Appender(_shared_docs["kurt"]) 

2072 def kurt(self, **kwargs): 

2073 return super().kurt(**kwargs) 

2074 

2075 @Substitution(name="rolling") 

2076 @Appender(_shared_docs["quantile"]) 

2077 def quantile(self, quantile, interpolation="linear", **kwargs): 

2078 return super().quantile( 

2079 quantile=quantile, interpolation=interpolation, **kwargs 

2080 ) 

2081 

2082 @Substitution(name="rolling") 

2083 @Appender(_doc_template) 

2084 @Appender(_shared_docs["cov"]) 

2085 def cov(self, other=None, pairwise=None, ddof=1, **kwargs): 

2086 return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) 

2087 

2088 @Substitution(name="rolling") 

2089 @Appender(_shared_docs["corr"]) 

2090 def corr(self, other=None, pairwise=None, **kwargs): 

2091 return super().corr(other=other, pairwise=pairwise, **kwargs) 

2092 

2093 

2094Rolling.__doc__ = Window.__doc__ 

2095 

2096 

2097class RollingGroupby(WindowGroupByMixin, Rolling): 

2098 """ 

2099 Provide a rolling groupby implementation. 

2100 """ 

2101 

2102 @property 

2103 def _constructor(self): 

2104 return Rolling 

2105 

2106 def _gotitem(self, key, ndim, subset=None): 

2107 

2108 # we are setting the index on the actual object 

2109 # here so our index is carried thru to the selected obj 

2110 # when we do the splitting for the groupby 

2111 if self.on is not None: 

2112 self._groupby.obj = self._groupby.obj.set_index(self._on) 

2113 self.on = None 

2114 return super()._gotitem(key, ndim, subset=subset) 

2115 

2116 def _validate_monotonic(self): 

2117 """ 

2118 Validate that on is monotonic; 

2119 we don't care for groupby.rolling 

2120 because we have already validated at a higher 

2121 level. 

2122 """ 

2123 pass