Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/window/rolling.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Provide a generic structure to support window functions,
3similar to how we have a Groupby object.
4"""
5from datetime import timedelta
6from functools import partial
7import inspect
8from textwrap import dedent
9from typing import Callable, Dict, List, Optional, Set, Tuple, Union
11import numpy as np
13import pandas._libs.window.aggregations as window_aggregations
14from pandas._typing import Axis, FrameOrSeries, Scalar
15from pandas.compat._optional import import_optional_dependency
16from pandas.compat.numpy import function as nv
17from pandas.util._decorators import Appender, Substitution, cache_readonly
19from pandas.core.dtypes.common import (
20 ensure_float64,
21 is_bool,
22 is_float_dtype,
23 is_integer,
24 is_integer_dtype,
25 is_list_like,
26 is_scalar,
27 needs_i8_conversion,
28)
29from pandas.core.dtypes.generic import (
30 ABCDataFrame,
31 ABCDateOffset,
32 ABCDatetimeIndex,
33 ABCPeriodIndex,
34 ABCSeries,
35 ABCTimedeltaIndex,
36)
38from pandas.core.base import DataError, PandasObject, SelectionMixin, ShallowMixin
39import pandas.core.common as com
40from pandas.core.indexes.api import Index, ensure_index
41from pandas.core.window.common import (
42 WindowGroupByMixin,
43 _doc_template,
44 _flex_binary_moment,
45 _shared_docs,
46 calculate_center_offset,
47 calculate_min_periods,
48 get_weighted_roll_func,
49 zsqrt,
50)
51from pandas.core.window.indexers import (
52 BaseIndexer,
53 FixedWindowIndexer,
54 VariableWindowIndexer,
55)
56from pandas.core.window.numba_ import generate_numba_apply_func
59class _Window(PandasObject, ShallowMixin, SelectionMixin):
60 _attributes: List[str] = [
61 "window",
62 "min_periods",
63 "center",
64 "win_type",
65 "axis",
66 "on",
67 "closed",
68 ]
69 exclusions: Set[str] = set()
71 def __init__(
72 self,
73 obj,
74 window=None,
75 min_periods: Optional[int] = None,
76 center: Optional[bool] = False,
77 win_type: Optional[str] = None,
78 axis: Axis = 0,
79 on: Optional[Union[str, Index]] = None,
80 closed: Optional[str] = None,
81 **kwargs,
82 ):
84 self.__dict__.update(kwargs)
85 self.obj = obj
86 self.on = on
87 self.closed = closed
88 self.window = window
89 self.min_periods = min_periods
90 self.center = center
91 self.win_type = win_type
92 self.win_freq = None
93 self.axis = obj._get_axis_number(axis) if axis is not None else None
94 self.validate()
95 self._numba_func_cache: Dict[Optional[str], Callable] = dict()
97 @property
98 def _constructor(self):
99 return Window
101 @property
102 def is_datetimelike(self) -> Optional[bool]:
103 return None
105 @property
106 def _on(self):
107 return None
109 @property
110 def is_freq_type(self) -> bool:
111 return self.win_type == "freq"
113 def validate(self) -> None:
114 if self.center is not None and not is_bool(self.center):
115 raise ValueError("center must be a boolean")
116 if self.min_periods is not None and not is_integer(self.min_periods):
117 raise ValueError("min_periods must be an integer")
118 if self.closed is not None and self.closed not in [
119 "right",
120 "both",
121 "left",
122 "neither",
123 ]:
124 raise ValueError("closed must be 'right', 'left', 'both' or 'neither'")
125 if not isinstance(self.obj, (ABCSeries, ABCDataFrame)):
126 raise TypeError(f"invalid type: {type(self)}")
127 if isinstance(self.window, BaseIndexer):
128 self._validate_get_window_bounds_signature(self.window)
130 @staticmethod
131 def _validate_get_window_bounds_signature(window: BaseIndexer) -> None:
132 """
133 Validate that the passed BaseIndexer subclass has
134 a get_window_bounds with the correct signature.
135 """
136 get_window_bounds_signature = inspect.signature(
137 window.get_window_bounds
138 ).parameters.keys()
139 expected_signature = inspect.signature(
140 BaseIndexer().get_window_bounds
141 ).parameters.keys()
142 if get_window_bounds_signature != expected_signature:
143 raise ValueError(
144 f"{type(window).__name__} does not implement the correct signature for "
145 f"get_window_bounds"
146 )
148 def _create_blocks(self):
149 """
150 Split data into blocks & return conformed data.
151 """
153 obj = self._selected_obj
155 # filter out the on from the object
156 if self.on is not None and not isinstance(self.on, Index):
157 if obj.ndim == 2:
158 obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
159 blocks = obj._to_dict_of_blocks(copy=False).values()
161 return blocks, obj
163 def _gotitem(self, key, ndim, subset=None):
164 """
165 Sub-classes to define. Return a sliced object.
167 Parameters
168 ----------
169 key : str / list of selections
170 ndim : 1,2
171 requested ndim of result
172 subset : object, default None
173 subset to act on
174 """
176 # create a new object to prevent aliasing
177 if subset is None:
178 subset = self.obj
179 self = self._shallow_copy(subset)
180 self._reset_cache()
181 if subset.ndim == 2:
182 if is_scalar(key) and key in subset or is_list_like(key):
183 self._selection = key
184 return self
186 def __getattr__(self, attr: str):
187 if attr in self._internal_names_set:
188 return object.__getattribute__(self, attr)
189 if attr in self.obj:
190 return self[attr]
192 raise AttributeError(
193 f"'{type(self).__name__}' object has no attribute '{attr}'"
194 )
196 def _dir_additions(self):
197 return self.obj._dir_additions()
199 def _get_win_type(self, kwargs: Dict):
200 """
201 Exists for compatibility, overriden by subclass Window.
203 Parameters
204 ----------
205 kwargs : dict
206 ignored, exists for compatibility
208 Returns
209 -------
210 None
211 """
212 return None
214 def _get_window(self, other=None, win_type: Optional[str] = None) -> int:
215 """
216 Return window length.
218 Parameters
219 ----------
220 other :
221 ignored, exists for compatibility
222 win_type :
223 ignored, exists for compatibility
225 Returns
226 -------
227 window : int
228 """
229 if isinstance(self.window, BaseIndexer):
230 return self.min_periods or 0
231 return self.window
233 @property
234 def _window_type(self) -> str:
235 return type(self).__name__
237 def __repr__(self) -> str:
238 """
239 Provide a nice str repr of our rolling object.
240 """
242 attrs_list = (
243 f"{attr_name}={getattr(self, attr_name)}"
244 for attr_name in self._attributes
245 if getattr(self, attr_name, None) is not None
246 )
247 attrs = ",".join(attrs_list)
248 return f"{self._window_type} [{attrs}]"
250 def __iter__(self):
251 url = "https://github.com/pandas-dev/pandas/issues/11704"
252 raise NotImplementedError(f"See issue #11704 {url}")
254 def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray:
255 """Convert input to numpy arrays for Cython routines"""
256 if values is None:
257 values = getattr(self._selected_obj, "values", self._selected_obj)
259 # GH #12373 : rolling functions error on float32 data
260 # make sure the data is coerced to float64
261 if is_float_dtype(values.dtype):
262 values = ensure_float64(values)
263 elif is_integer_dtype(values.dtype):
264 values = ensure_float64(values)
265 elif needs_i8_conversion(values.dtype):
266 raise NotImplementedError(
267 f"ops for {self._window_type} for this "
268 f"dtype {values.dtype} are not implemented"
269 )
270 else:
271 try:
272 values = ensure_float64(values)
273 except (ValueError, TypeError):
274 raise TypeError(f"cannot handle this type -> {values.dtype}")
276 # Convert inf to nan for C funcs
277 inf = np.isinf(values)
278 if inf.any():
279 values = np.where(inf, np.nan, values)
281 return values
283 def _wrap_result(self, result, block=None, obj=None):
284 """
285 Wrap a single result.
286 """
288 if obj is None:
289 obj = self._selected_obj
290 index = obj.index
292 if isinstance(result, np.ndarray):
294 if result.ndim == 1:
295 from pandas import Series
297 return Series(result, index, name=obj.name)
299 return type(obj)(result, index=index, columns=block.columns)
300 return result
302 def _wrap_results(self, results, blocks, obj, exclude=None) -> FrameOrSeries:
303 """
304 Wrap the results.
306 Parameters
307 ----------
308 results : list of ndarrays
309 blocks : list of blocks
310 obj : conformed data (may be resampled)
311 exclude: list of columns to exclude, default to None
312 """
314 from pandas import Series, concat
316 final = []
317 for result, block in zip(results, blocks):
319 result = self._wrap_result(result, block=block, obj=obj)
320 if result.ndim == 1:
321 return result
322 final.append(result)
324 # if we have an 'on' column
325 # we want to put it back into the results
326 # in the same location
327 columns = self._selected_obj.columns
328 if self.on is not None and not self._on.equals(obj.index):
330 name = self._on.name
331 final.append(Series(self._on, index=obj.index, name=name))
333 if self._selection is not None:
335 selection = ensure_index(self._selection)
337 # need to reorder to include original location of
338 # the on column (if its not already there)
339 if name not in selection:
340 columns = self.obj.columns
341 indexer = columns.get_indexer(selection.tolist() + [name])
342 columns = columns.take(sorted(indexer))
344 # exclude nuisance columns so that they are not reindexed
345 if exclude is not None and exclude:
346 columns = [c for c in columns if c not in exclude]
348 if not columns:
349 raise DataError("No numeric types to aggregate")
351 if not len(final):
352 return obj.astype("float64")
353 return concat(final, axis=1).reindex(columns=columns, copy=False)
355 def _center_window(self, result, window) -> np.ndarray:
356 """
357 Center the result in the window.
358 """
359 if self.axis > result.ndim - 1:
360 raise ValueError("Requested axis is larger then no. of argument dimensions")
362 offset = calculate_center_offset(window)
363 if offset > 0:
364 lead_indexer = [slice(None)] * result.ndim
365 lead_indexer[self.axis] = slice(offset, None)
366 result = np.copy(result[tuple(lead_indexer)])
367 return result
369 def _get_roll_func(self, func_name: str) -> Callable:
370 """
371 Wrap rolling function to check values passed.
373 Parameters
374 ----------
375 func_name : str
376 Cython function used to calculate rolling statistics
378 Returns
379 -------
380 func : callable
381 """
382 window_func = getattr(window_aggregations, func_name, None)
383 if window_func is None:
384 raise ValueError(
385 f"we do not support this function in window_aggregations.{func_name}"
386 )
387 return window_func
389 def _get_cython_func_type(self, func: str) -> Callable:
390 """
391 Return a variable or fixed cython function type.
393 Variable algorithms do not use window while fixed do.
394 """
395 if self.is_freq_type or isinstance(self.window, BaseIndexer):
396 return self._get_roll_func(f"{func}_variable")
397 return partial(self._get_roll_func(f"{func}_fixed"), win=self._get_window())
399 def _get_window_indexer(self, window: int) -> BaseIndexer:
400 """
401 Return an indexer class that will compute the window start and end bounds
402 """
403 if isinstance(self.window, BaseIndexer):
404 return self.window
405 if self.is_freq_type:
406 return VariableWindowIndexer(index_array=self._on.asi8, window_size=window)
407 return FixedWindowIndexer(window_size=window)
409 def _apply(
410 self,
411 func: Callable,
412 center: bool,
413 require_min_periods: int = 0,
414 floor: int = 1,
415 is_weighted: bool = False,
416 name: Optional[str] = None,
417 use_numba_cache: bool = False,
418 **kwargs,
419 ):
420 """
421 Rolling statistical measure using supplied function.
423 Designed to be used with passed-in Cython array-based functions.
425 Parameters
426 ----------
427 func : callable function to apply
428 center : bool
429 require_min_periods : int
430 floor : int
431 is_weighted : bool
432 name : str,
433 compatibility with groupby.rolling
434 use_numba_cache : bool
435 whether to cache a numba compiled function. Only available for numba
436 enabled methods (so far only apply)
437 **kwargs
438 additional arguments for rolling function and window function
440 Returns
441 -------
442 y : type of input
443 """
444 win_type = self._get_win_type(kwargs)
445 window = self._get_window(win_type=win_type)
447 blocks, obj = self._create_blocks()
448 block_list = list(blocks)
449 window_indexer = self._get_window_indexer(window)
451 results = []
452 exclude: List[Scalar] = []
453 for i, b in enumerate(blocks):
454 try:
455 values = self._prep_values(b.values)
457 except (TypeError, NotImplementedError):
458 if isinstance(obj, ABCDataFrame):
459 exclude.extend(b.columns)
460 del block_list[i]
461 continue
462 else:
463 raise DataError("No numeric types to aggregate")
465 if values.size == 0:
466 results.append(values.copy())
467 continue
469 # calculation function
470 offset = calculate_center_offset(window) if center else 0
471 additional_nans = np.array([np.nan] * offset)
473 if not is_weighted:
475 def calc(x):
476 x = np.concatenate((x, additional_nans))
477 if not isinstance(window, BaseIndexer):
478 min_periods = calculate_min_periods(
479 window, self.min_periods, len(x), require_min_periods, floor
480 )
481 else:
482 min_periods = calculate_min_periods(
483 self.min_periods or 1,
484 self.min_periods,
485 len(x),
486 require_min_periods,
487 floor,
488 )
489 start, end = window_indexer.get_window_bounds(
490 num_values=len(x),
491 min_periods=self.min_periods,
492 center=self.center,
493 closed=self.closed,
494 )
495 return func(x, start, end, min_periods)
497 else:
499 def calc(x):
500 x = np.concatenate((x, additional_nans))
501 return func(x, window, self.min_periods)
503 with np.errstate(all="ignore"):
504 if values.ndim > 1:
505 result = np.apply_along_axis(calc, self.axis, values)
506 else:
507 result = calc(values)
508 result = np.asarray(result)
510 if use_numba_cache:
511 self._numba_func_cache[name] = func
513 if center:
514 result = self._center_window(result, window)
516 results.append(result)
518 return self._wrap_results(results, block_list, obj, exclude)
520 def aggregate(self, func, *args, **kwargs):
521 result, how = self._aggregate(func, *args, **kwargs)
522 if result is None:
523 return self.apply(func, raw=False, args=args, kwargs=kwargs)
524 return result
526 agg = aggregate
528 _shared_docs["sum"] = dedent(
529 """
530 Calculate %(name)s sum of given DataFrame or Series.
532 Parameters
533 ----------
534 *args, **kwargs
535 For compatibility with other %(name)s methods. Has no effect
536 on the computed value.
538 Returns
539 -------
540 Series or DataFrame
541 Same type as the input, with the same index, containing the
542 %(name)s sum.
544 See Also
545 --------
546 Series.sum : Reducing sum for Series.
547 DataFrame.sum : Reducing sum for DataFrame.
549 Examples
550 --------
551 >>> s = pd.Series([1, 2, 3, 4, 5])
552 >>> s
553 0 1
554 1 2
555 2 3
556 3 4
557 4 5
558 dtype: int64
560 >>> s.rolling(3).sum()
561 0 NaN
562 1 NaN
563 2 6.0
564 3 9.0
565 4 12.0
566 dtype: float64
568 >>> s.expanding(3).sum()
569 0 NaN
570 1 NaN
571 2 6.0
572 3 10.0
573 4 15.0
574 dtype: float64
576 >>> s.rolling(3, center=True).sum()
577 0 NaN
578 1 6.0
579 2 9.0
580 3 12.0
581 4 NaN
582 dtype: float64
584 For DataFrame, each %(name)s sum is computed column-wise.
586 >>> df = pd.DataFrame({"A": s, "B": s ** 2})
587 >>> df
588 A B
589 0 1 1
590 1 2 4
591 2 3 9
592 3 4 16
593 4 5 25
595 >>> df.rolling(3).sum()
596 A B
597 0 NaN NaN
598 1 NaN NaN
599 2 6.0 14.0
600 3 9.0 29.0
601 4 12.0 50.0
602 """
603 )
605 _shared_docs["mean"] = dedent(
606 """
607 Calculate the %(name)s mean of the values.
609 Parameters
610 ----------
611 *args
612 Under Review.
613 **kwargs
614 Under Review.
616 Returns
617 -------
618 Series or DataFrame
619 Returned object type is determined by the caller of the %(name)s
620 calculation.
622 See Also
623 --------
624 Series.%(name)s : Calling object with Series data.
625 DataFrame.%(name)s : Calling object with DataFrames.
626 Series.mean : Equivalent method for Series.
627 DataFrame.mean : Equivalent method for DataFrame.
629 Examples
630 --------
631 The below examples will show rolling mean calculations with window sizes of
632 two and three, respectively.
634 >>> s = pd.Series([1, 2, 3, 4])
635 >>> s.rolling(2).mean()
636 0 NaN
637 1 1.5
638 2 2.5
639 3 3.5
640 dtype: float64
642 >>> s.rolling(3).mean()
643 0 NaN
644 1 NaN
645 2 2.0
646 3 3.0
647 dtype: float64
648 """
649 )
651 _shared_docs["var"] = dedent(
652 """
653 Calculate unbiased %(name)s variance.
654 %(versionadded)s
655 Normalized by N-1 by default. This can be changed using the `ddof`
656 argument.
658 Parameters
659 ----------
660 ddof : int, default 1
661 Delta Degrees of Freedom. The divisor used in calculations
662 is ``N - ddof``, where ``N`` represents the number of elements.
663 *args, **kwargs
664 For NumPy compatibility. No additional arguments are used.
666 Returns
667 -------
668 Series or DataFrame
669 Returns the same object type as the caller of the %(name)s calculation.
671 See Also
672 --------
673 Series.%(name)s : Calling object with Series data.
674 DataFrame.%(name)s : Calling object with DataFrames.
675 Series.var : Equivalent method for Series.
676 DataFrame.var : Equivalent method for DataFrame.
677 numpy.var : Equivalent method for Numpy array.
679 Notes
680 -----
681 The default `ddof` of 1 used in :meth:`Series.var` is different than the
682 default `ddof` of 0 in :func:`numpy.var`.
684 A minimum of 1 period is required for the rolling calculation.
686 Examples
687 --------
688 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
689 >>> s.rolling(3).var()
690 0 NaN
691 1 NaN
692 2 0.333333
693 3 1.000000
694 4 1.000000
695 5 1.333333
696 6 0.000000
697 dtype: float64
699 >>> s.expanding(3).var()
700 0 NaN
701 1 NaN
702 2 0.333333
703 3 0.916667
704 4 0.800000
705 5 0.700000
706 6 0.619048
707 dtype: float64
708 """
709 )
711 _shared_docs["std"] = dedent(
712 """
713 Calculate %(name)s standard deviation.
714 %(versionadded)s
715 Normalized by N-1 by default. This can be changed using the `ddof`
716 argument.
718 Parameters
719 ----------
720 ddof : int, default 1
721 Delta Degrees of Freedom. The divisor used in calculations
722 is ``N - ddof``, where ``N`` represents the number of elements.
723 *args, **kwargs
724 For NumPy compatibility. No additional arguments are used.
726 Returns
727 -------
728 Series or DataFrame
729 Returns the same object type as the caller of the %(name)s calculation.
731 See Also
732 --------
733 Series.%(name)s : Calling object with Series data.
734 DataFrame.%(name)s : Calling object with DataFrames.
735 Series.std : Equivalent method for Series.
736 DataFrame.std : Equivalent method for DataFrame.
737 numpy.std : Equivalent method for Numpy array.
739 Notes
740 -----
741 The default `ddof` of 1 used in Series.std is different than the default
742 `ddof` of 0 in numpy.std.
744 A minimum of one period is required for the rolling calculation.
746 Examples
747 --------
748 >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
749 >>> s.rolling(3).std()
750 0 NaN
751 1 NaN
752 2 0.577350
753 3 1.000000
754 4 1.000000
755 5 1.154701
756 6 0.000000
757 dtype: float64
759 >>> s.expanding(3).std()
760 0 NaN
761 1 NaN
762 2 0.577350
763 3 0.957427
764 4 0.894427
765 5 0.836660
766 6 0.786796
767 dtype: float64
768 """
769 )
772class Window(_Window):
773 """
774 Provide rolling window calculations.
776 Parameters
777 ----------
778 window : int, offset, or BaseIndexer subclass
779 Size of the moving window. This is the number of observations used for
780 calculating the statistic. Each window will be a fixed size.
782 If its an offset then this will be the time period of each window. Each
783 window will be a variable sized based on the observations included in
784 the time-period. This is only valid for datetimelike indexes.
786 If a BaseIndexer subclass is passed, calculates the window boundaries
787 based on the defined ``get_window_bounds`` method. Additional rolling
788 keyword arguments, namely `min_periods`, `center`, and
789 `closed` will be passed to `get_window_bounds`.
790 min_periods : int, default None
791 Minimum number of observations in window required to have a value
792 (otherwise result is NA). For a window that is specified by an offset,
793 `min_periods` will default to 1. Otherwise, `min_periods` will default
794 to the size of the window.
795 center : bool, default False
796 Set the labels at the center of the window.
797 win_type : str, default None
798 Provide a window type. If ``None``, all points are evenly weighted.
799 See the notes below for further information.
800 on : str, optional
801 For a DataFrame, a datetime-like column or MultiIndex level on which
802 to calculate the rolling window, rather than the DataFrame's index.
803 Provided integer column is ignored and excluded from result since
804 an integer index is not used to calculate the rolling window.
805 axis : int or str, default 0
806 closed : str, default None
807 Make the interval closed on the 'right', 'left', 'both' or
808 'neither' endpoints.
809 For offset-based windows, it defaults to 'right'.
810 For fixed windows, defaults to 'both'. Remaining cases not implemented
811 for fixed windows.
813 Returns
814 -------
815 a Window or Rolling sub-classed for the particular operation
817 See Also
818 --------
819 expanding : Provides expanding transformations.
820 ewm : Provides exponential weighted functions.
822 Notes
823 -----
824 By default, the result is set to the right edge of the window. This can be
825 changed to the center of the window by setting ``center=True``.
827 To learn more about the offsets & frequency strings, please see `this link
828 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
830 The recognized win_types are:
832 * ``boxcar``
833 * ``triang``
834 * ``blackman``
835 * ``hamming``
836 * ``bartlett``
837 * ``parzen``
838 * ``bohman``
839 * ``blackmanharris``
840 * ``nuttall``
841 * ``barthann``
842 * ``kaiser`` (needs beta)
843 * ``gaussian`` (needs std)
844 * ``general_gaussian`` (needs power, width)
845 * ``slepian`` (needs width)
846 * ``exponential`` (needs tau), center is set to None.
848 If ``win_type=None`` all points are evenly weighted. To learn more about
849 different window types see `scipy.signal window functions
850 <https://docs.scipy.org/doc/scipy/reference/signal.html#window-functions>`__.
852 Examples
853 --------
855 >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
856 >>> df
857 B
858 0 0.0
859 1 1.0
860 2 2.0
861 3 NaN
862 4 4.0
864 Rolling sum with a window length of 2, using the 'triang'
865 window type.
867 >>> df.rolling(2, win_type='triang').sum()
868 B
869 0 NaN
870 1 0.5
871 2 1.5
872 3 NaN
873 4 NaN
875 Rolling sum with a window length of 2, using the 'gaussian'
876 window type (note how we need to specify std).
878 >>> df.rolling(2, win_type='gaussian').sum(std=3)
879 B
880 0 NaN
881 1 0.986207
882 2 2.958621
883 3 NaN
884 4 NaN
886 Rolling sum with a window length of 2, min_periods defaults
887 to the window length.
889 >>> df.rolling(2).sum()
890 B
891 0 NaN
892 1 1.0
893 2 3.0
894 3 NaN
895 4 NaN
897 Same as above, but explicitly set the min_periods
899 >>> df.rolling(2, min_periods=1).sum()
900 B
901 0 0.0
902 1 1.0
903 2 3.0
904 3 2.0
905 4 4.0
907 A ragged (meaning not-a-regular frequency), time-indexed DataFrame
909 >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
910 ... index = [pd.Timestamp('20130101 09:00:00'),
911 ... pd.Timestamp('20130101 09:00:02'),
912 ... pd.Timestamp('20130101 09:00:03'),
913 ... pd.Timestamp('20130101 09:00:05'),
914 ... pd.Timestamp('20130101 09:00:06')])
916 >>> df
917 B
918 2013-01-01 09:00:00 0.0
919 2013-01-01 09:00:02 1.0
920 2013-01-01 09:00:03 2.0
921 2013-01-01 09:00:05 NaN
922 2013-01-01 09:00:06 4.0
924 Contrasting to an integer rolling window, this will roll a variable
925 length window corresponding to the time period.
926 The default for min_periods is 1.
928 >>> df.rolling('2s').sum()
929 B
930 2013-01-01 09:00:00 0.0
931 2013-01-01 09:00:02 1.0
932 2013-01-01 09:00:03 3.0
933 2013-01-01 09:00:05 NaN
934 2013-01-01 09:00:06 4.0
935 """
937 def validate(self):
938 super().validate()
940 window = self.window
941 if isinstance(window, BaseIndexer):
942 raise NotImplementedError(
943 "BaseIndexer subclasses not implemented with win_types."
944 )
945 elif isinstance(window, (list, tuple, np.ndarray)):
946 pass
947 elif is_integer(window):
948 if window <= 0:
949 raise ValueError("window must be > 0 ")
950 import_optional_dependency(
951 "scipy", extra="Scipy is required to generate window weight."
952 )
953 import scipy.signal as sig
955 if not isinstance(self.win_type, str):
956 raise ValueError(f"Invalid win_type {self.win_type}")
957 if getattr(sig, self.win_type, None) is None:
958 raise ValueError(f"Invalid win_type {self.win_type}")
959 else:
960 raise ValueError(f"Invalid window {window}")
962 def _get_win_type(self, kwargs: Dict) -> Union[str, Tuple]:
963 """
964 Extract arguments for the window type, provide validation for it
965 and return the validated window type.
967 Parameters
968 ----------
969 kwargs : dict
971 Returns
972 -------
973 win_type : str, or tuple
974 """
975 # the below may pop from kwargs
976 def _validate_win_type(win_type, kwargs):
977 arg_map = {
978 "kaiser": ["beta"],
979 "gaussian": ["std"],
980 "general_gaussian": ["power", "width"],
981 "slepian": ["width"],
982 "exponential": ["tau"],
983 }
985 if win_type in arg_map:
986 win_args = _pop_args(win_type, arg_map[win_type], kwargs)
987 if win_type == "exponential":
988 # exponential window requires the first arg (center)
989 # to be set to None (necessary for symmetric window)
990 win_args.insert(0, None)
992 return tuple([win_type] + win_args)
994 return win_type
996 def _pop_args(win_type, arg_names, kwargs):
997 all_args = []
998 for n in arg_names:
999 if n not in kwargs:
1000 raise ValueError(f"{win_type} window requires {n}")
1001 all_args.append(kwargs.pop(n))
1002 return all_args
1004 return _validate_win_type(self.win_type, kwargs)
1006 def _get_window(
1007 self, other=None, win_type: Optional[Union[str, Tuple]] = None
1008 ) -> np.ndarray:
1009 """
1010 Get the window, weights.
1012 Parameters
1013 ----------
1014 other :
1015 ignored, exists for compatibility
1016 win_type : str, or tuple
1017 type of window to create
1019 Returns
1020 -------
1021 window : ndarray
1022 the window, weights
1023 """
1025 window = self.window
1026 if isinstance(window, (list, tuple, np.ndarray)):
1027 return com.asarray_tuplesafe(window).astype(float)
1028 elif is_integer(window):
1029 import scipy.signal as sig
1031 # GH #15662. `False` makes symmetric window, rather than periodic.
1032 return sig.get_window(win_type, window, False).astype(float)
1034 _agg_see_also_doc = dedent(
1035 """
1036 See Also
1037 --------
1038 pandas.DataFrame.rolling.aggregate
1039 pandas.DataFrame.aggregate
1040 """
1041 )
1043 _agg_examples_doc = dedent(
1044 """
1045 Examples
1046 --------
1048 >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
1049 >>> df
1050 A B C
1051 0 -2.385977 -0.102758 0.438822
1052 1 -1.004295 0.905829 -0.954544
1053 2 0.735167 -0.165272 -1.619346
1054 3 -0.702657 -1.340923 -0.706334
1055 4 -0.246845 0.211596 -0.901819
1056 5 2.463718 3.157577 -1.380906
1057 6 -1.142255 2.340594 -0.039875
1058 7 1.396598 -1.647453 1.677227
1059 8 -0.543425 1.761277 -0.220481
1060 9 -0.640505 0.289374 -1.550670
1062 >>> df.rolling(3, win_type='boxcar').agg('mean')
1063 A B C
1064 0 NaN NaN NaN
1065 1 NaN NaN NaN
1066 2 -0.885035 0.212600 -0.711689
1067 3 -0.323928 -0.200122 -1.093408
1068 4 -0.071445 -0.431533 -1.075833
1069 5 0.504739 0.676083 -0.996353
1070 6 0.358206 1.903256 -0.774200
1071 7 0.906020 1.283573 0.085482
1072 8 -0.096361 0.818139 0.472290
1073 9 0.070889 0.134399 -0.031308
1074 """
1075 )
1077 @Substitution(
1078 see_also=_agg_see_also_doc,
1079 examples=_agg_examples_doc,
1080 versionadded="",
1081 klass="Series/DataFrame",
1082 axis="",
1083 )
1084 @Appender(_shared_docs["aggregate"])
1085 def aggregate(self, func, *args, **kwargs):
1086 result, how = self._aggregate(func, *args, **kwargs)
1087 if result is None:
1089 # these must apply directly
1090 result = func(self)
1092 return result
1094 agg = aggregate
1096 @Substitution(name="window")
1097 @Appender(_shared_docs["sum"])
1098 def sum(self, *args, **kwargs):
1099 nv.validate_window_func("sum", args, kwargs)
1100 window_func = self._get_roll_func("roll_weighted_sum")
1101 window_func = get_weighted_roll_func(window_func)
1102 return self._apply(
1103 window_func, center=self.center, is_weighted=True, name="sum", **kwargs
1104 )
1106 @Substitution(name="window")
1107 @Appender(_shared_docs["mean"])
1108 def mean(self, *args, **kwargs):
1109 nv.validate_window_func("mean", args, kwargs)
1110 window_func = self._get_roll_func("roll_weighted_mean")
1111 window_func = get_weighted_roll_func(window_func)
1112 return self._apply(
1113 window_func, center=self.center, is_weighted=True, name="mean", **kwargs
1114 )
1116 @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n")
1117 @Appender(_shared_docs["var"])
1118 def var(self, ddof=1, *args, **kwargs):
1119 nv.validate_window_func("var", args, kwargs)
1120 window_func = partial(self._get_roll_func("roll_weighted_var"), ddof=ddof)
1121 window_func = get_weighted_roll_func(window_func)
1122 kwargs.pop("name", None)
1123 return self._apply(
1124 window_func, center=self.center, is_weighted=True, name="var", **kwargs
1125 )
1127 @Substitution(name="window", versionadded="\n.. versionadded:: 1.0.0\n")
1128 @Appender(_shared_docs["std"])
1129 def std(self, ddof=1, *args, **kwargs):
1130 nv.validate_window_func("std", args, kwargs)
1131 return zsqrt(self.var(ddof=ddof, name="std", **kwargs))
1134class _Rolling(_Window):
1135 @property
1136 def _constructor(self):
1137 return Rolling
1140class _Rolling_and_Expanding(_Rolling):
1142 _shared_docs["count"] = dedent(
1143 r"""
1144 The %(name)s count of any non-NaN observations inside the window.
1146 Returns
1147 -------
1148 Series or DataFrame
1149 Returned object type is determined by the caller of the %(name)s
1150 calculation.
1152 See Also
1153 --------
1154 Series.%(name)s : Calling object with Series data.
1155 DataFrame.%(name)s : Calling object with DataFrames.
1156 DataFrame.count : Count of the full DataFrame.
1158 Examples
1159 --------
1160 >>> s = pd.Series([2, 3, np.nan, 10])
1161 >>> s.rolling(2).count()
1162 0 1.0
1163 1 2.0
1164 2 1.0
1165 3 1.0
1166 dtype: float64
1167 >>> s.rolling(3).count()
1168 0 1.0
1169 1 2.0
1170 2 2.0
1171 3 2.0
1172 dtype: float64
1173 >>> s.rolling(4).count()
1174 0 1.0
1175 1 2.0
1176 2 2.0
1177 3 3.0
1178 dtype: float64
1179 """
1180 )
1182 def count(self):
1184 blocks, obj = self._create_blocks()
1185 results = []
1186 for b in blocks:
1187 result = b.notna().astype(int)
1188 result = self._constructor(
1189 result,
1190 window=self._get_window(),
1191 min_periods=self.min_periods or 0,
1192 center=self.center,
1193 axis=self.axis,
1194 closed=self.closed,
1195 ).sum()
1196 results.append(result)
1198 return self._wrap_results(results, blocks, obj)
1200 _shared_docs["apply"] = dedent(
1201 r"""
1202 The %(name)s function's apply function.
1204 Parameters
1205 ----------
1206 func : function
1207 Must produce a single value from an ndarray input if ``raw=True``
1208 or a single value from a Series if ``raw=False``. Can also accept a
1209 Numba JIT function with ``engine='numba'`` specified.
1211 .. versionchanged:: 1.0.0
1213 raw : bool, default None
1214 * ``False`` : passes each row or column as a Series to the
1215 function.
1216 * ``True`` : the passed function will receive ndarray
1217 objects instead.
1218 If you are just applying a NumPy reduction function this will
1219 achieve much better performance.
1220 engine : str, default 'cython'
1221 * ``'cython'`` : Runs rolling apply through C-extensions from cython.
1222 * ``'numba'`` : Runs rolling apply through JIT compiled code from numba.
1223 Only available when ``raw`` is set to ``True``.
1225 .. versionadded:: 1.0.0
1227 engine_kwargs : dict, default None
1228 * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
1229 * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
1230 and ``parallel`` dictionary keys. The values must either be ``True`` or
1231 ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
1232 ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
1233 applied to both the ``func`` and the ``apply`` rolling aggregation.
1235 .. versionadded:: 1.0.0
1237 args : tuple, default None
1238 Positional arguments to be passed into func.
1239 kwargs : dict, default None
1240 Keyword arguments to be passed into func.
1242 Returns
1243 -------
1244 Series or DataFrame
1245 Return type is determined by the caller.
1247 See Also
1248 --------
1249 Series.%(name)s : Series %(name)s.
1250 DataFrame.%(name)s : DataFrame %(name)s.
1252 Notes
1253 -----
1254 See :ref:`stats.rolling_apply` for extended documentation and performance
1255 considerations for the Numba engine.
1256 """
1257 )
1259 def apply(
1260 self,
1261 func,
1262 raw: bool = False,
1263 engine: str = "cython",
1264 engine_kwargs: Optional[Dict] = None,
1265 args: Optional[Tuple] = None,
1266 kwargs: Optional[Dict] = None,
1267 ):
1268 if args is None:
1269 args = ()
1270 if kwargs is None:
1271 kwargs = {}
1272 kwargs.pop("_level", None)
1273 kwargs.pop("floor", None)
1274 window = self._get_window()
1275 offset = calculate_center_offset(window) if self.center else 0
1276 if not is_bool(raw):
1277 raise ValueError("raw parameter must be `True` or `False`")
1279 if engine == "cython":
1280 if engine_kwargs is not None:
1281 raise ValueError("cython engine does not accept engine_kwargs")
1282 apply_func = self._generate_cython_apply_func(
1283 args, kwargs, raw, offset, func
1284 )
1285 elif engine == "numba":
1286 if raw is False:
1287 raise ValueError("raw must be `True` when using the numba engine")
1288 if func in self._numba_func_cache:
1289 # Return an already compiled version of roll_apply if available
1290 apply_func = self._numba_func_cache[func]
1291 else:
1292 apply_func = generate_numba_apply_func(
1293 args, kwargs, func, engine_kwargs
1294 )
1295 else:
1296 raise ValueError("engine must be either 'numba' or 'cython'")
1298 # TODO: Why do we always pass center=False?
1299 # name=func & raw=raw for WindowGroupByMixin._apply
1300 return self._apply(
1301 apply_func,
1302 center=False,
1303 floor=0,
1304 name=func,
1305 use_numba_cache=engine == "numba",
1306 raw=raw,
1307 args=args,
1308 kwargs=kwargs,
1309 )
1311 def _generate_cython_apply_func(self, args, kwargs, raw, offset, func):
1312 from pandas import Series
1314 window_func = partial(
1315 self._get_cython_func_type("roll_generic"),
1316 args=args,
1317 kwargs=kwargs,
1318 raw=raw,
1319 offset=offset,
1320 func=func,
1321 )
1323 def apply_func(values, begin, end, min_periods, raw=raw):
1324 if not raw:
1325 values = Series(values, index=self.obj.index)
1326 return window_func(values, begin, end, min_periods)
1328 return apply_func
1330 def sum(self, *args, **kwargs):
1331 nv.validate_window_func("sum", args, kwargs)
1332 window_func = self._get_cython_func_type("roll_sum")
1333 kwargs.pop("floor", None)
1334 return self._apply(
1335 window_func, center=self.center, floor=0, name="sum", **kwargs
1336 )
1338 _shared_docs["max"] = dedent(
1339 """
1340 Calculate the %(name)s maximum.
1342 Parameters
1343 ----------
1344 *args, **kwargs
1345 Arguments and keyword arguments to be passed into func.
1346 """
1347 )
1349 def max(self, *args, **kwargs):
1350 nv.validate_window_func("max", args, kwargs)
1351 window_func = self._get_cython_func_type("roll_max")
1352 return self._apply(window_func, center=self.center, name="max", **kwargs)
1354 _shared_docs["min"] = dedent(
1355 """
1356 Calculate the %(name)s minimum.
1358 Parameters
1359 ----------
1360 **kwargs
1361 Under Review.
1363 Returns
1364 -------
1365 Series or DataFrame
1366 Returned object type is determined by the caller of the %(name)s
1367 calculation.
1369 See Also
1370 --------
1371 Series.%(name)s : Calling object with a Series.
1372 DataFrame.%(name)s : Calling object with a DataFrame.
1373 Series.min : Similar method for Series.
1374 DataFrame.min : Similar method for DataFrame.
1376 Examples
1377 --------
1378 Performing a rolling minimum with a window size of 3.
1380 >>> s = pd.Series([4, 3, 5, 2, 6])
1381 >>> s.rolling(3).min()
1382 0 NaN
1383 1 NaN
1384 2 3.0
1385 3 2.0
1386 4 2.0
1387 dtype: float64
1388 """
1389 )
1391 def min(self, *args, **kwargs):
1392 nv.validate_window_func("min", args, kwargs)
1393 window_func = self._get_cython_func_type("roll_min")
1394 return self._apply(window_func, center=self.center, name="min", **kwargs)
1396 def mean(self, *args, **kwargs):
1397 nv.validate_window_func("mean", args, kwargs)
1398 window_func = self._get_cython_func_type("roll_mean")
1399 return self._apply(window_func, center=self.center, name="mean", **kwargs)
1401 _shared_docs["median"] = dedent(
1402 """
1403 Calculate the %(name)s median.
1405 Parameters
1406 ----------
1407 **kwargs
1408 For compatibility with other %(name)s methods. Has no effect
1409 on the computed median.
1411 Returns
1412 -------
1413 Series or DataFrame
1414 Returned type is the same as the original object.
1416 See Also
1417 --------
1418 Series.%(name)s : Calling object with Series data.
1419 DataFrame.%(name)s : Calling object with DataFrames.
1420 Series.median : Equivalent method for Series.
1421 DataFrame.median : Equivalent method for DataFrame.
1423 Examples
1424 --------
1425 Compute the rolling median of a series with a window size of 3.
1427 >>> s = pd.Series([0, 1, 2, 3, 4])
1428 >>> s.rolling(3).median()
1429 0 NaN
1430 1 NaN
1431 2 1.0
1432 3 2.0
1433 4 3.0
1434 dtype: float64
1435 """
1436 )
1438 def median(self, **kwargs):
1439 window_func = self._get_roll_func("roll_median_c")
1440 window_func = partial(window_func, win=self._get_window())
1441 return self._apply(window_func, center=self.center, name="median", **kwargs)
1443 def std(self, ddof=1, *args, **kwargs):
1444 nv.validate_window_func("std", args, kwargs)
1445 kwargs.pop("require_min_periods", None)
1446 window_func = self._get_cython_func_type("roll_var")
1448 def zsqrt_func(values, begin, end, min_periods):
1449 return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof))
1451 # ddof passed again for compat with groupby.rolling
1452 return self._apply(
1453 zsqrt_func,
1454 center=self.center,
1455 require_min_periods=1,
1456 name="std",
1457 ddof=ddof,
1458 **kwargs,
1459 )
1461 def var(self, ddof=1, *args, **kwargs):
1462 nv.validate_window_func("var", args, kwargs)
1463 kwargs.pop("require_min_periods", None)
1464 window_func = partial(self._get_cython_func_type("roll_var"), ddof=ddof)
1465 # ddof passed again for compat with groupby.rolling
1466 return self._apply(
1467 window_func,
1468 center=self.center,
1469 require_min_periods=1,
1470 name="var",
1471 ddof=ddof,
1472 **kwargs,
1473 )
1475 _shared_docs[
1476 "skew"
1477 ] = """
1478 Unbiased %(name)s skewness.
1480 Parameters
1481 ----------
1482 **kwargs
1483 Keyword arguments to be passed into func.
1484 """
1486 def skew(self, **kwargs):
1487 window_func = self._get_cython_func_type("roll_skew")
1488 kwargs.pop("require_min_periods", None)
1489 return self._apply(
1490 window_func,
1491 center=self.center,
1492 require_min_periods=3,
1493 name="skew",
1494 **kwargs,
1495 )
1497 _shared_docs["kurt"] = dedent(
1498 """
1499 Calculate unbiased %(name)s kurtosis.
1501 This function uses Fisher's definition of kurtosis without bias.
1503 Parameters
1504 ----------
1505 **kwargs
1506 Under Review.
1508 Returns
1509 -------
1510 Series or DataFrame
1511 Returned object type is determined by the caller of the %(name)s
1512 calculation.
1514 See Also
1515 --------
1516 Series.%(name)s : Calling object with Series data.
1517 DataFrame.%(name)s : Calling object with DataFrames.
1518 Series.kurt : Equivalent method for Series.
1519 DataFrame.kurt : Equivalent method for DataFrame.
1520 scipy.stats.skew : Third moment of a probability density.
1521 scipy.stats.kurtosis : Reference SciPy method.
1523 Notes
1524 -----
1525 A minimum of 4 periods is required for the %(name)s calculation.
1526 """
1527 )
1529 def kurt(self, **kwargs):
1530 window_func = self._get_cython_func_type("roll_kurt")
1531 kwargs.pop("require_min_periods", None)
1532 return self._apply(
1533 window_func,
1534 center=self.center,
1535 require_min_periods=4,
1536 name="kurt",
1537 **kwargs,
1538 )
1540 _shared_docs["quantile"] = dedent(
1541 """
1542 Calculate the %(name)s quantile.
1544 Parameters
1545 ----------
1546 quantile : float
1547 Quantile to compute. 0 <= quantile <= 1.
1548 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
1549 .. versionadded:: 0.23.0
1551 This optional parameter specifies the interpolation method to use,
1552 when the desired quantile lies between two data points `i` and `j`:
1554 * linear: `i + (j - i) * fraction`, where `fraction` is the
1555 fractional part of the index surrounded by `i` and `j`.
1556 * lower: `i`.
1557 * higher: `j`.
1558 * nearest: `i` or `j` whichever is nearest.
1559 * midpoint: (`i` + `j`) / 2.
1560 **kwargs
1561 For compatibility with other %(name)s methods. Has no effect on
1562 the result.
1564 Returns
1565 -------
1566 Series or DataFrame
1567 Returned object type is determined by the caller of the %(name)s
1568 calculation.
1570 See Also
1571 --------
1572 Series.quantile : Computes value at the given quantile over all data
1573 in Series.
1574 DataFrame.quantile : Computes values at the given quantile over
1575 requested axis in DataFrame.
1577 Examples
1578 --------
1579 >>> s = pd.Series([1, 2, 3, 4])
1580 >>> s.rolling(2).quantile(.4, interpolation='lower')
1581 0 NaN
1582 1 1.0
1583 2 2.0
1584 3 3.0
1585 dtype: float64
1587 >>> s.rolling(2).quantile(.4, interpolation='midpoint')
1588 0 NaN
1589 1 1.5
1590 2 2.5
1591 3 3.5
1592 dtype: float64
1593 """
1594 )
1596 def quantile(self, quantile, interpolation="linear", **kwargs):
1597 if quantile == 1.0:
1598 window_func = self._get_cython_func_type("roll_max")
1599 elif quantile == 0.0:
1600 window_func = self._get_cython_func_type("roll_min")
1601 else:
1602 window_func = partial(
1603 self._get_roll_func("roll_quantile"),
1604 win=self._get_window(),
1605 quantile=quantile,
1606 interpolation=interpolation,
1607 )
1609 # Pass through for groupby.rolling
1610 kwargs["quantile"] = quantile
1611 kwargs["interpolation"] = interpolation
1612 return self._apply(window_func, center=self.center, name="quantile", **kwargs)
1614 _shared_docs[
1615 "cov"
1616 ] = """
1617 Calculate the %(name)s sample covariance.
1619 Parameters
1620 ----------
1621 other : Series, DataFrame, or ndarray, optional
1622 If not supplied then will default to self and produce pairwise
1623 output.
1624 pairwise : bool, default None
1625 If False then only matching columns between self and other will be
1626 used and the output will be a DataFrame.
1627 If True then all pairwise combinations will be calculated and the
1628 output will be a MultiIndexed DataFrame in the case of DataFrame
1629 inputs. In the case of missing elements, only complete pairwise
1630 observations will be used.
1631 ddof : int, default 1
1632 Delta Degrees of Freedom. The divisor used in calculations
1633 is ``N - ddof``, where ``N`` represents the number of elements.
1634 **kwargs
1635 Keyword arguments to be passed into func.
1636 """
1638 def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
1639 if other is None:
1640 other = self._selected_obj
1641 # only default unset
1642 pairwise = True if pairwise is None else pairwise
1643 other = self._shallow_copy(other)
1645 # GH 16058: offset window
1646 if self.is_freq_type:
1647 window = self.win_freq
1648 else:
1649 window = self._get_window(other)
1651 def _get_cov(X, Y):
1652 # GH #12373 : rolling functions error on float32 data
1653 # to avoid potential overflow, cast the data to float64
1654 X = X.astype("float64")
1655 Y = Y.astype("float64")
1656 mean = lambda x: x.rolling(
1657 window, self.min_periods, center=self.center
1658 ).mean(**kwargs)
1659 count = (
1660 (X + Y)
1661 .rolling(window=window, min_periods=0, center=self.center)
1662 .count(**kwargs)
1663 )
1664 bias_adj = count / (count - ddof)
1665 return (mean(X * Y) - mean(X) * mean(Y)) * bias_adj
1667 return _flex_binary_moment(
1668 self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)
1669 )
1671 _shared_docs["corr"] = dedent(
1672 """
1673 Calculate %(name)s correlation.
1675 Parameters
1676 ----------
1677 other : Series, DataFrame, or ndarray, optional
1678 If not supplied then will default to self.
1679 pairwise : bool, default None
1680 Calculate pairwise combinations of columns within a
1681 DataFrame. If `other` is not specified, defaults to `True`,
1682 otherwise defaults to `False`.
1683 Not relevant for :class:`~pandas.Series`.
1684 **kwargs
1685 Unused.
1687 Returns
1688 -------
1689 Series or DataFrame
1690 Returned object type is determined by the caller of the
1691 %(name)s calculation.
1693 See Also
1694 --------
1695 Series.%(name)s : Calling object with Series data.
1696 DataFrame.%(name)s : Calling object with DataFrames.
1697 Series.corr : Equivalent method for Series.
1698 DataFrame.corr : Equivalent method for DataFrame.
1699 %(name)s.cov : Similar method to calculate covariance.
1700 numpy.corrcoef : NumPy Pearson's correlation calculation.
1702 Notes
1703 -----
1704 This function uses Pearson's definition of correlation
1705 (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
1707 When `other` is not specified, the output will be self correlation (e.g.
1708 all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
1709 set to `True`.
1711 Function will return ``NaN`` for correlations of equal valued sequences;
1712 this is the result of a 0/0 division error.
1714 When `pairwise` is set to `False`, only matching columns between `self` and
1715 `other` will be used.
1717 When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
1718 with the original index on the first level, and the `other` DataFrame
1719 columns on the second level.
1721 In the case of missing elements, only complete pairwise observations
1722 will be used.
1724 Examples
1725 --------
1726 The below example shows a rolling calculation with a window size of
1727 four matching the equivalent function call using :meth:`numpy.corrcoef`.
1729 >>> v1 = [3, 3, 3, 5, 8]
1730 >>> v2 = [3, 4, 4, 4, 8]
1731 >>> # numpy returns a 2X2 array, the correlation coefficient
1732 >>> # is the number at entry [0][1]
1733 >>> print(f"{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}")
1734 0.333333
1735 >>> print(f"{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}")
1736 0.916949
1737 >>> s1 = pd.Series(v1)
1738 >>> s2 = pd.Series(v2)
1739 >>> s1.rolling(4).corr(s2)
1740 0 NaN
1741 1 NaN
1742 2 NaN
1743 3 0.333333
1744 4 0.916949
1745 dtype: float64
1747 The below example shows a similar rolling calculation on a
1748 DataFrame using the pairwise option.
1750 >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
1751 [46., 31.], [50., 36.]])
1752 >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
1753 [[1. 0.6263001]
1754 [0.6263001 1. ]]
1755 >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
1756 [[1. 0.5553681]
1757 [0.5553681 1. ]]
1758 >>> df = pd.DataFrame(matrix, columns=['X','Y'])
1759 >>> df
1760 X Y
1761 0 51.0 35.0
1762 1 49.0 30.0
1763 2 47.0 32.0
1764 3 46.0 31.0
1765 4 50.0 36.0
1766 >>> df.rolling(4).corr(pairwise=True)
1767 X Y
1768 0 X NaN NaN
1769 Y NaN NaN
1770 1 X NaN NaN
1771 Y NaN NaN
1772 2 X NaN NaN
1773 Y NaN NaN
1774 3 X 1.000000 0.626300
1775 Y 0.626300 1.000000
1776 4 X 1.000000 0.555368
1777 Y 0.555368 1.000000
1778 """
1779 )
1781 def corr(self, other=None, pairwise=None, **kwargs):
1782 if other is None:
1783 other = self._selected_obj
1784 # only default unset
1785 pairwise = True if pairwise is None else pairwise
1786 other = self._shallow_copy(other)
1787 window = self._get_window(other) if not self.is_freq_type else self.win_freq
1789 def _get_corr(a, b):
1790 a = a.rolling(
1791 window=window, min_periods=self.min_periods, center=self.center
1792 )
1793 b = b.rolling(
1794 window=window, min_periods=self.min_periods, center=self.center
1795 )
1797 return a.cov(b, **kwargs) / (a.std(**kwargs) * b.std(**kwargs))
1799 return _flex_binary_moment(
1800 self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)
1801 )
1804class Rolling(_Rolling_and_Expanding):
1805 @cache_readonly
1806 def is_datetimelike(self) -> bool:
1807 return isinstance(
1808 self._on, (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex)
1809 )
1811 @cache_readonly
1812 def _on(self) -> Index:
1813 if self.on is None:
1814 if self.axis == 0:
1815 return self.obj.index
1816 else:
1817 # i.e. self.axis == 1
1818 return self.obj.columns
1819 elif isinstance(self.on, Index):
1820 return self.on
1821 elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
1822 return Index(self.obj[self.on])
1823 else:
1824 raise ValueError(
1825 f"invalid on specified as {self.on}, "
1826 "must be a column (of DataFrame), an Index "
1827 "or None"
1828 )
1830 def validate(self):
1831 super().validate()
1833 # we allow rolling on a datetimelike index
1834 if (self.obj.empty or self.is_datetimelike) and isinstance(
1835 self.window, (str, ABCDateOffset, timedelta)
1836 ):
1838 self._validate_monotonic()
1839 freq = self._validate_freq()
1841 # we don't allow center
1842 if self.center:
1843 raise NotImplementedError(
1844 "center is not implemented "
1845 "for datetimelike and offset "
1846 "based windows"
1847 )
1849 # this will raise ValueError on non-fixed freqs
1850 self.win_freq = self.window
1851 self.window = freq.nanos
1852 self.win_type = "freq"
1854 # min_periods must be an integer
1855 if self.min_periods is None:
1856 self.min_periods = 1
1858 elif isinstance(self.window, BaseIndexer):
1859 # Passed BaseIndexer subclass should handle all other rolling kwargs
1860 return
1861 elif not is_integer(self.window):
1862 raise ValueError("window must be an integer")
1863 elif self.window < 0:
1864 raise ValueError("window must be non-negative")
1866 if not self.is_datetimelike and self.closed is not None:
1867 raise ValueError(
1868 "closed only implemented for datetimelike and offset based windows"
1869 )
1871 def _validate_monotonic(self):
1872 """
1873 Validate monotonic (increasing or decreasing).
1874 """
1875 if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
1876 formatted = self.on
1877 if self.on is None:
1878 formatted = "index"
1879 raise ValueError(f"{formatted} must be monotonic")
1881 def _validate_freq(self):
1882 """
1883 Validate & return window frequency.
1884 """
1885 from pandas.tseries.frequencies import to_offset
1887 try:
1888 return to_offset(self.window)
1889 except (TypeError, ValueError):
1890 raise ValueError(
1891 f"passed window {self.window} is not "
1892 "compatible with a datetimelike "
1893 "index"
1894 )
1896 _agg_see_also_doc = dedent(
1897 """
1898 See Also
1899 --------
1900 Series.rolling
1901 DataFrame.rolling
1902 """
1903 )
1905 _agg_examples_doc = dedent(
1906 """
1907 Examples
1908 --------
1910 >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
1911 >>> df
1912 A B C
1913 0 -2.385977 -0.102758 0.438822
1914 1 -1.004295 0.905829 -0.954544
1915 2 0.735167 -0.165272 -1.619346
1916 3 -0.702657 -1.340923 -0.706334
1917 4 -0.246845 0.211596 -0.901819
1918 5 2.463718 3.157577 -1.380906
1919 6 -1.142255 2.340594 -0.039875
1920 7 1.396598 -1.647453 1.677227
1921 8 -0.543425 1.761277 -0.220481
1922 9 -0.640505 0.289374 -1.550670
1924 >>> df.rolling(3).sum()
1925 A B C
1926 0 NaN NaN NaN
1927 1 NaN NaN NaN
1928 2 -2.655105 0.637799 -2.135068
1929 3 -0.971785 -0.600366 -3.280224
1930 4 -0.214334 -1.294599 -3.227500
1931 5 1.514216 2.028250 -2.989060
1932 6 1.074618 5.709767 -2.322600
1933 7 2.718061 3.850718 0.256446
1934 8 -0.289082 2.454418 1.416871
1935 9 0.212668 0.403198 -0.093924
1937 >>> df.rolling(3).agg({'A':'sum', 'B':'min'})
1938 A B
1939 0 NaN NaN
1940 1 NaN NaN
1941 2 -2.655105 -0.165272
1942 3 -0.971785 -1.340923
1943 4 -0.214334 -1.340923
1944 5 1.514216 -1.340923
1945 6 1.074618 0.211596
1946 7 2.718061 -1.647453
1947 8 -0.289082 -1.647453
1948 9 0.212668 -1.647453
1949 """
1950 )
1952 @Substitution(
1953 see_also=_agg_see_also_doc,
1954 examples=_agg_examples_doc,
1955 versionadded="",
1956 klass="Series/Dataframe",
1957 axis="",
1958 )
1959 @Appender(_shared_docs["aggregate"])
1960 def aggregate(self, func, *args, **kwargs):
1961 return super().aggregate(func, *args, **kwargs)
1963 agg = aggregate
1965 @Substitution(name="rolling")
1966 @Appender(_shared_docs["count"])
1967 def count(self):
1969 # different impl for freq counting
1970 if self.is_freq_type:
1971 window_func = self._get_roll_func("roll_count")
1972 return self._apply(window_func, center=self.center, name="count")
1974 return super().count()
1976 @Substitution(name="rolling")
1977 @Appender(_shared_docs["apply"])
1978 def apply(
1979 self,
1980 func,
1981 raw=False,
1982 engine="cython",
1983 engine_kwargs=None,
1984 args=None,
1985 kwargs=None,
1986 ):
1987 return super().apply(
1988 func,
1989 raw=raw,
1990 engine=engine,
1991 engine_kwargs=engine_kwargs,
1992 args=args,
1993 kwargs=kwargs,
1994 )
1996 @Substitution(name="rolling")
1997 @Appender(_shared_docs["sum"])
1998 def sum(self, *args, **kwargs):
1999 nv.validate_rolling_func("sum", args, kwargs)
2000 return super().sum(*args, **kwargs)
2002 @Substitution(name="rolling")
2003 @Appender(_doc_template)
2004 @Appender(_shared_docs["max"])
2005 def max(self, *args, **kwargs):
2006 nv.validate_rolling_func("max", args, kwargs)
2007 return super().max(*args, **kwargs)
2009 @Substitution(name="rolling")
2010 @Appender(_shared_docs["min"])
2011 def min(self, *args, **kwargs):
2012 nv.validate_rolling_func("min", args, kwargs)
2013 return super().min(*args, **kwargs)
2015 @Substitution(name="rolling")
2016 @Appender(_shared_docs["mean"])
2017 def mean(self, *args, **kwargs):
2018 nv.validate_rolling_func("mean", args, kwargs)
2019 return super().mean(*args, **kwargs)
2021 @Substitution(name="rolling")
2022 @Appender(_shared_docs["median"])
2023 def median(self, **kwargs):
2024 return super().median(**kwargs)
2026 @Substitution(name="rolling", versionadded="")
2027 @Appender(_shared_docs["std"])
2028 def std(self, ddof=1, *args, **kwargs):
2029 nv.validate_rolling_func("std", args, kwargs)
2030 return super().std(ddof=ddof, **kwargs)
2032 @Substitution(name="rolling", versionadded="")
2033 @Appender(_shared_docs["var"])
2034 def var(self, ddof=1, *args, **kwargs):
2035 nv.validate_rolling_func("var", args, kwargs)
2036 return super().var(ddof=ddof, **kwargs)
2038 @Substitution(name="rolling")
2039 @Appender(_doc_template)
2040 @Appender(_shared_docs["skew"])
2041 def skew(self, **kwargs):
2042 return super().skew(**kwargs)
2044 _agg_doc = dedent(
2045 """
2046 Examples
2047 --------
2049 The example below will show a rolling calculation with a window size of
2050 four matching the equivalent function call using `scipy.stats`.
2052 >>> arr = [1, 2, 3, 4, 999]
2053 >>> import scipy.stats
2054 >>> print(f"{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}")
2055 -1.200000
2056 >>> print(f"{scipy.stats.kurtosis(arr[1:], bias=False):.6f}")
2057 3.999946
2058 >>> s = pd.Series(arr)
2059 >>> s.rolling(4).kurt()
2060 0 NaN
2061 1 NaN
2062 2 NaN
2063 3 -1.200000
2064 4 3.999946
2065 dtype: float64
2066 """
2067 )
2069 @Appender(_agg_doc)
2070 @Substitution(name="rolling")
2071 @Appender(_shared_docs["kurt"])
2072 def kurt(self, **kwargs):
2073 return super().kurt(**kwargs)
2075 @Substitution(name="rolling")
2076 @Appender(_shared_docs["quantile"])
2077 def quantile(self, quantile, interpolation="linear", **kwargs):
2078 return super().quantile(
2079 quantile=quantile, interpolation=interpolation, **kwargs
2080 )
2082 @Substitution(name="rolling")
2083 @Appender(_doc_template)
2084 @Appender(_shared_docs["cov"])
2085 def cov(self, other=None, pairwise=None, ddof=1, **kwargs):
2086 return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs)
2088 @Substitution(name="rolling")
2089 @Appender(_shared_docs["corr"])
2090 def corr(self, other=None, pairwise=None, **kwargs):
2091 return super().corr(other=other, pairwise=pairwise, **kwargs)
2094Rolling.__doc__ = Window.__doc__
2097class RollingGroupby(WindowGroupByMixin, Rolling):
2098 """
2099 Provide a rolling groupby implementation.
2100 """
2102 @property
2103 def _constructor(self):
2104 return Rolling
2106 def _gotitem(self, key, ndim, subset=None):
2108 # we are setting the index on the actual object
2109 # here so our index is carried thru to the selected obj
2110 # when we do the splitting for the groupby
2111 if self.on is not None:
2112 self._groupby.obj = self._groupby.obj.set_index(self._on)
2113 self.on = None
2114 return super()._gotitem(key, ndim, subset=subset)
2116 def _validate_monotonic(self):
2117 """
2118 Validate that on is monotonic;
2119 we don't care for groupby.rolling
2120 because we have already validated at a higher
2121 level.
2122 """
2123 pass