Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/window/ewm.py : 35%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from textwrap import dedent
3import numpy as np
5import pandas._libs.window.aggregations as window_aggregations
6from pandas.compat.numpy import function as nv
7from pandas.util._decorators import Appender, Substitution
9from pandas.core.dtypes.generic import ABCDataFrame
11from pandas.core.base import DataError
12from pandas.core.window.common import (
13 _doc_template,
14 _get_center_of_mass,
15 _shared_docs,
16 zsqrt,
17)
18from pandas.core.window.rolling import _flex_binary_moment, _Rolling
20_bias_template = """
21 Parameters
22 ----------
23 bias : bool, default False
24 Use a standard estimation bias correction.
25 *args, **kwargs
26 Arguments and keyword arguments to be passed into func.
27"""
30class EWM(_Rolling):
31 r"""
32 Provide exponential weighted functions.
34 Parameters
35 ----------
36 com : float, optional
37 Specify decay in terms of center of mass,
38 :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`.
39 span : float, optional
40 Specify decay in terms of span,
41 :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`.
42 halflife : float, optional
43 Specify decay in terms of half-life,
44 :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{for} halflife > 0`.
45 alpha : float, optional
46 Specify smoothing factor :math:`\alpha` directly,
47 :math:`0 < \alpha \leq 1`.
48 min_periods : int, default 0
49 Minimum number of observations in window required to have a value
50 (otherwise result is NA).
51 adjust : bool, default True
52 Divide by decaying adjustment factor in beginning periods to account
53 for imbalance in relative weightings
54 (viewing EWMA as a moving average).
55 ignore_na : bool, default False
56 Ignore missing values when calculating weights;
57 specify True to reproduce pre-0.15.0 behavior.
58 axis : {0 or 'index', 1 or 'columns'}, default 0
59 The axis to use. The value 0 identifies the rows, and 1
60 identifies the columns.
62 Returns
63 -------
64 DataFrame
65 A Window sub-classed for the particular operation.
67 See Also
68 --------
69 rolling : Provides rolling window calculations.
70 expanding : Provides expanding transformations.
72 Notes
73 -----
74 Exactly one of center of mass, span, half-life, and alpha must be provided.
75 Allowed values and relationship between the parameters are specified in the
76 parameter descriptions above; see the link at the end of this section for
77 a detailed explanation.
79 When adjust is True (default), weighted averages are calculated using
80 weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1.
82 When adjust is False, weighted averages are calculated recursively as:
83 weighted_average[0] = arg[0];
84 weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i].
86 When ignore_na is False (default), weights are based on absolute positions.
87 For example, the weights of x and y used in calculating the final weighted
88 average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and
89 (1-alpha)**2 and alpha (if adjust is False).
91 When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based
92 on relative positions. For example, the weights of x and y used in
93 calculating the final weighted average of [x, None, y] are 1-alpha and 1
94 (if adjust is True), and 1-alpha and alpha (if adjust is False).
96 More details can be found at
97 https://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows
99 Examples
100 --------
102 >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
103 >>> df
104 B
105 0 0.0
106 1 1.0
107 2 2.0
108 3 NaN
109 4 4.0
111 >>> df.ewm(com=0.5).mean()
112 B
113 0 0.000000
114 1 0.750000
115 2 1.615385
116 3 1.615385
117 4 3.670213
118 """
119 _attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"]
121 def __init__(
122 self,
123 obj,
124 com=None,
125 span=None,
126 halflife=None,
127 alpha=None,
128 min_periods=0,
129 adjust=True,
130 ignore_na=False,
131 axis=0,
132 ):
133 self.obj = obj
134 self.com = _get_center_of_mass(com, span, halflife, alpha)
135 self.min_periods = min_periods
136 self.adjust = adjust
137 self.ignore_na = ignore_na
138 self.axis = axis
139 self.on = None
141 @property
142 def _constructor(self):
143 return EWM
145 _agg_see_also_doc = dedent(
146 """
147 See Also
148 --------
149 pandas.DataFrame.rolling.aggregate
150 """
151 )
153 _agg_examples_doc = dedent(
154 """
155 Examples
156 --------
158 >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'])
159 >>> df
160 A B C
161 0 -2.385977 -0.102758 0.438822
162 1 -1.004295 0.905829 -0.954544
163 2 0.735167 -0.165272 -1.619346
164 3 -0.702657 -1.340923 -0.706334
165 4 -0.246845 0.211596 -0.901819
166 5 2.463718 3.157577 -1.380906
167 6 -1.142255 2.340594 -0.039875
168 7 1.396598 -1.647453 1.677227
169 8 -0.543425 1.761277 -0.220481
170 9 -0.640505 0.289374 -1.550670
172 >>> df.ewm(alpha=0.5).mean()
173 A B C
174 0 -2.385977 -0.102758 0.438822
175 1 -1.464856 0.569633 -0.490089
176 2 -0.207700 0.149687 -1.135379
177 3 -0.471677 -0.645305 -0.906555
178 4 -0.355635 -0.203033 -0.904111
179 5 1.076417 1.503943 -1.146293
180 6 -0.041654 1.925562 -0.588728
181 7 0.680292 0.132049 0.548693
182 8 0.067236 0.948257 0.163353
183 9 -0.286980 0.618493 -0.694496
184 """
185 )
187 @Substitution(
188 see_also=_agg_see_also_doc,
189 examples=_agg_examples_doc,
190 versionadded="",
191 klass="Series/Dataframe",
192 axis="",
193 )
194 @Appender(_shared_docs["aggregate"])
195 def aggregate(self, func, *args, **kwargs):
196 return super().aggregate(func, *args, **kwargs)
198 agg = aggregate
200 def _apply(self, func, **kwargs):
201 """
202 Rolling statistical measure using supplied function. Designed to be
203 used with passed-in Cython array-based functions.
205 Parameters
206 ----------
207 func : str/callable to apply
209 Returns
210 -------
211 y : same type as input argument
212 """
213 blocks, obj = self._create_blocks()
214 block_list = list(blocks)
216 results = []
217 exclude = []
218 for i, b in enumerate(blocks):
219 try:
220 values = self._prep_values(b.values)
222 except (TypeError, NotImplementedError):
223 if isinstance(obj, ABCDataFrame):
224 exclude.extend(b.columns)
225 del block_list[i]
226 continue
227 else:
228 raise DataError("No numeric types to aggregate")
230 if values.size == 0:
231 results.append(values.copy())
232 continue
234 # if we have a string function name, wrap it
235 if isinstance(func, str):
236 cfunc = getattr(window_aggregations, func, None)
237 if cfunc is None:
238 raise ValueError(
239 f"we do not support this function in window_aggregations.{func}"
240 )
242 def func(arg):
243 return cfunc(
244 arg,
245 self.com,
246 int(self.adjust),
247 int(self.ignore_na),
248 int(self.min_periods),
249 )
251 results.append(np.apply_along_axis(func, self.axis, values))
253 return self._wrap_results(results, block_list, obj, exclude)
255 @Substitution(name="ewm")
256 @Appender(_doc_template)
257 def mean(self, *args, **kwargs):
258 """
259 Exponential weighted moving average.
261 Parameters
262 ----------
263 *args, **kwargs
264 Arguments and keyword arguments to be passed into func.
265 """
266 nv.validate_window_func("mean", args, kwargs)
267 return self._apply("ewma", **kwargs)
269 @Substitution(name="ewm")
270 @Appender(_doc_template)
271 @Appender(_bias_template)
272 def std(self, bias=False, *args, **kwargs):
273 """
274 Exponential weighted moving stddev.
275 """
276 nv.validate_window_func("std", args, kwargs)
277 return zsqrt(self.var(bias=bias, **kwargs))
279 vol = std
281 @Substitution(name="ewm")
282 @Appender(_doc_template)
283 @Appender(_bias_template)
284 def var(self, bias=False, *args, **kwargs):
285 """
286 Exponential weighted moving variance.
287 """
288 nv.validate_window_func("var", args, kwargs)
290 def f(arg):
291 return window_aggregations.ewmcov(
292 arg,
293 arg,
294 self.com,
295 int(self.adjust),
296 int(self.ignore_na),
297 int(self.min_periods),
298 int(bias),
299 )
301 return self._apply(f, **kwargs)
303 @Substitution(name="ewm")
304 @Appender(_doc_template)
305 def cov(self, other=None, pairwise=None, bias=False, **kwargs):
306 """
307 Exponential weighted sample covariance.
309 Parameters
310 ----------
311 other : Series, DataFrame, or ndarray, optional
312 If not supplied then will default to self and produce pairwise
313 output.
314 pairwise : bool, default None
315 If False then only matching columns between self and other will be
316 used and the output will be a DataFrame.
317 If True then all pairwise combinations will be calculated and the
318 output will be a MultiIndex DataFrame in the case of DataFrame
319 inputs. In the case of missing elements, only complete pairwise
320 observations will be used.
321 bias : bool, default False
322 Use a standard estimation bias correction.
323 **kwargs
324 Keyword arguments to be passed into func.
325 """
326 if other is None:
327 other = self._selected_obj
328 # only default unset
329 pairwise = True if pairwise is None else pairwise
330 other = self._shallow_copy(other)
332 def _get_cov(X, Y):
333 X = self._shallow_copy(X)
334 Y = self._shallow_copy(Y)
335 cov = window_aggregations.ewmcov(
336 X._prep_values(),
337 Y._prep_values(),
338 self.com,
339 int(self.adjust),
340 int(self.ignore_na),
341 int(self.min_periods),
342 int(bias),
343 )
344 return X._wrap_result(cov)
346 return _flex_binary_moment(
347 self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise)
348 )
350 @Substitution(name="ewm")
351 @Appender(_doc_template)
352 def corr(self, other=None, pairwise=None, **kwargs):
353 """
354 Exponential weighted sample correlation.
356 Parameters
357 ----------
358 other : Series, DataFrame, or ndarray, optional
359 If not supplied then will default to self and produce pairwise
360 output.
361 pairwise : bool, default None
362 If False then only matching columns between self and other will be
363 used and the output will be a DataFrame.
364 If True then all pairwise combinations will be calculated and the
365 output will be a MultiIndex DataFrame in the case of DataFrame
366 inputs. In the case of missing elements, only complete pairwise
367 observations will be used.
368 **kwargs
369 Keyword arguments to be passed into func.
370 """
371 if other is None:
372 other = self._selected_obj
373 # only default unset
374 pairwise = True if pairwise is None else pairwise
375 other = self._shallow_copy(other)
377 def _get_corr(X, Y):
378 X = self._shallow_copy(X)
379 Y = self._shallow_copy(Y)
381 def _cov(x, y):
382 return window_aggregations.ewmcov(
383 x,
384 y,
385 self.com,
386 int(self.adjust),
387 int(self.ignore_na),
388 int(self.min_periods),
389 1,
390 )
392 x_values = X._prep_values()
393 y_values = Y._prep_values()
394 with np.errstate(all="ignore"):
395 cov = _cov(x_values, y_values)
396 x_var = _cov(x_values, x_values)
397 y_var = _cov(y_values, y_values)
398 corr = cov / zsqrt(x_var * y_var)
399 return X._wrap_result(corr)
401 return _flex_binary_moment(
402 self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise)
403 )