Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/tseries/frequencies.py : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from datetime import timedelta
2import re
3from typing import Dict, Optional
4import warnings
6import numpy as np
7from pytz import AmbiguousTimeError
9from pandas._libs.algos import unique_deltas
10from pandas._libs.tslibs import Timedelta, Timestamp
11from pandas._libs.tslibs.ccalendar import MONTH_ALIASES, int_to_weekday
12from pandas._libs.tslibs.fields import build_field_sarray
13import pandas._libs.tslibs.frequencies as libfreqs
14from pandas._libs.tslibs.offsets import _offset_to_period_map
15import pandas._libs.tslibs.resolution as libresolution
16from pandas._libs.tslibs.resolution import Resolution
17from pandas._libs.tslibs.timezones import UTC
18from pandas._libs.tslibs.tzconversion import tz_convert
19from pandas.util._decorators import cache_readonly
21from pandas.core.dtypes.common import (
22 is_datetime64_dtype,
23 is_period_arraylike,
24 is_timedelta64_dtype,
25)
26from pandas.core.dtypes.generic import ABCSeries
28from pandas.core.algorithms import unique
30from pandas.tseries.offsets import (
31 DateOffset,
32 Day,
33 Hour,
34 Micro,
35 Milli,
36 Minute,
37 Nano,
38 Second,
39 prefix_mapping,
40)
42_ONE_MICRO = 1000
43_ONE_MILLI = _ONE_MICRO * 1000
44_ONE_SECOND = _ONE_MILLI * 1000
45_ONE_MINUTE = 60 * _ONE_SECOND
46_ONE_HOUR = 60 * _ONE_MINUTE
47_ONE_DAY = 24 * _ONE_HOUR
49# ---------------------------------------------------------------------
50# Offset names ("time rules") and related functions
52#: cache of previously seen offsets
53_offset_map: Dict[str, DateOffset] = {}
56def get_period_alias(offset_str: str) -> Optional[str]:
57 """
58 Alias to closest period strings BQ->Q etc.
59 """
60 return _offset_to_period_map.get(offset_str, None)
63_name_to_offset_map = {
64 "days": Day(1),
65 "hours": Hour(1),
66 "minutes": Minute(1),
67 "seconds": Second(1),
68 "milliseconds": Milli(1),
69 "microseconds": Micro(1),
70 "nanoseconds": Nano(1),
71}
74def to_offset(freq) -> Optional[DateOffset]:
75 """
76 Return DateOffset object from string or tuple representation
77 or datetime.timedelta object.
79 Parameters
80 ----------
81 freq : str, tuple, datetime.timedelta, DateOffset or None
83 Returns
84 -------
85 DateOffset
86 None if freq is None.
88 Raises
89 ------
90 ValueError
91 If freq is an invalid frequency
93 See Also
94 --------
95 DateOffset
97 Examples
98 --------
99 >>> to_offset('5min')
100 <5 * Minutes>
102 >>> to_offset('1D1H')
103 <25 * Hours>
105 >>> to_offset(('W', 2))
106 <2 * Weeks: weekday=6>
108 >>> to_offset((2, 'B'))
109 <2 * BusinessDays>
111 >>> to_offset(datetime.timedelta(days=1))
112 <Day>
114 >>> to_offset(Hour())
115 <Hour>
116 """
117 if freq is None:
118 return None
120 if isinstance(freq, DateOffset):
121 return freq
123 if isinstance(freq, tuple):
124 name = freq[0]
125 stride = freq[1]
126 if isinstance(stride, str):
127 name, stride = stride, name
128 name, _ = libfreqs._base_and_stride(name)
129 delta = _get_offset(name) * stride
131 elif isinstance(freq, timedelta):
132 delta = None
133 freq = Timedelta(freq)
134 try:
135 for name in freq.components._fields:
136 offset = _name_to_offset_map[name]
137 stride = getattr(freq.components, name)
138 if stride != 0:
139 offset = stride * offset
140 if delta is None:
141 delta = offset
142 else:
143 delta = delta + offset
144 except ValueError:
145 raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq))
147 else:
148 delta = None
149 stride_sign = None
150 try:
151 splitted = re.split(libfreqs.opattern, freq)
152 if splitted[-1] != "" and not splitted[-1].isspace():
153 # the last element must be blank
154 raise ValueError("last element must be blank")
155 for sep, stride, name in zip(
156 splitted[0::4], splitted[1::4], splitted[2::4]
157 ):
158 if sep != "" and not sep.isspace():
159 raise ValueError("separator must be spaces")
160 prefix = libfreqs._lite_rule_alias.get(name) or name
161 if stride_sign is None:
162 stride_sign = -1 if stride.startswith("-") else 1
163 if not stride:
164 stride = 1
165 if prefix in Resolution._reso_str_bump_map.keys():
166 stride, name = Resolution.get_stride_from_decimal(
167 float(stride), prefix
168 )
169 stride = int(stride)
170 offset = _get_offset(name)
171 offset = offset * int(np.fabs(stride) * stride_sign)
172 if delta is None:
173 delta = offset
174 else:
175 delta = delta + offset
176 except (ValueError, TypeError):
177 raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq))
179 if delta is None:
180 raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(freq))
182 return delta
185def get_offset(name: str) -> DateOffset:
186 """
187 Return DateOffset object associated with rule name.
189 .. deprecated:: 1.0.0
191 Examples
192 --------
193 get_offset('EOM') --> BMonthEnd(1)
194 """
195 warnings.warn(
196 "get_offset is deprecated and will be removed in a future version, "
197 "use to_offset instead",
198 FutureWarning,
199 stacklevel=2,
200 )
201 return _get_offset(name)
204def _get_offset(name: str) -> DateOffset:
205 """
206 Return DateOffset object associated with rule name.
208 Examples
209 --------
210 _get_offset('EOM') --> BMonthEnd(1)
211 """
212 if name not in libfreqs._dont_uppercase:
213 name = name.upper()
214 name = libfreqs._lite_rule_alias.get(name, name)
215 name = libfreqs._lite_rule_alias.get(name.lower(), name)
216 else:
217 name = libfreqs._lite_rule_alias.get(name, name)
219 if name not in _offset_map:
220 try:
221 split = name.split("-")
222 klass = prefix_mapping[split[0]]
223 # handles case where there's no suffix (and will TypeError if too
224 # many '-')
225 offset = klass._from_name(*split[1:])
226 except (ValueError, TypeError, KeyError):
227 # bad prefix or suffix
228 raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name))
229 # cache
230 _offset_map[name] = offset
232 return _offset_map[name]
235# ---------------------------------------------------------------------
236# Period codes
239def infer_freq(index, warn: bool = True) -> Optional[str]:
240 """
241 Infer the most likely frequency given the input index. If the frequency is
242 uncertain, a warning will be printed.
244 Parameters
245 ----------
246 index : DatetimeIndex or TimedeltaIndex
247 If passed a Series will use the values of the series (NOT THE INDEX).
248 warn : bool, default True
250 Returns
251 -------
252 str or None
253 None if no discernible frequency
254 TypeError if the index is not datetime-like
255 ValueError if there are less than three values.
256 """
257 import pandas as pd
259 if isinstance(index, ABCSeries):
260 values = index._values
261 if not (
262 is_datetime64_dtype(values)
263 or is_timedelta64_dtype(values)
264 or values.dtype == object
265 ):
266 raise TypeError(
267 "cannot infer freq from a non-convertible dtype "
268 f"on a Series of {index.dtype}"
269 )
270 index = values
272 inferer: _FrequencyInferer
273 if is_period_arraylike(index):
274 raise TypeError(
275 "PeriodIndex given. Check the `freq` attribute "
276 "instead of using infer_freq."
277 )
278 elif is_timedelta64_dtype(index):
279 # Allow TimedeltaIndex and TimedeltaArray
280 inferer = _TimedeltaFrequencyInferer(index, warn=warn)
281 return inferer.get_freq()
283 if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
284 if isinstance(index, (pd.Int64Index, pd.Float64Index)):
285 raise TypeError(
286 f"cannot infer freq from a non-convertible index type {type(index)}"
287 )
288 index = index.values
290 if not isinstance(index, pd.DatetimeIndex):
291 try:
292 index = pd.DatetimeIndex(index)
293 except AmbiguousTimeError:
294 index = pd.DatetimeIndex(index.asi8)
296 inferer = _FrequencyInferer(index, warn=warn)
297 return inferer.get_freq()
300class _FrequencyInferer:
301 """
302 Not sure if I can avoid the state machine here
303 """
305 def __init__(self, index, warn: bool = True):
306 self.index = index
307 self.values = index.asi8
309 # This moves the values, which are implicitly in UTC, to the
310 # the timezone so they are in local time
311 if hasattr(index, "tz"):
312 if index.tz is not None:
313 self.values = tz_convert(self.values, UTC, index.tz)
315 self.warn = warn
317 if len(index) < 3:
318 raise ValueError("Need at least 3 dates to infer frequency")
320 self.is_monotonic = (
321 self.index._is_monotonic_increasing or self.index._is_monotonic_decreasing
322 )
324 @cache_readonly
325 def deltas(self):
326 return unique_deltas(self.values)
328 @cache_readonly
329 def deltas_asi8(self):
330 return unique_deltas(self.index.asi8)
332 @cache_readonly
333 def is_unique(self) -> bool:
334 return len(self.deltas) == 1
336 @cache_readonly
337 def is_unique_asi8(self):
338 return len(self.deltas_asi8) == 1
340 def get_freq(self) -> Optional[str]:
341 """
342 Find the appropriate frequency string to describe the inferred
343 frequency of self.values
345 Returns
346 -------
347 str or None
348 """
349 if not self.is_monotonic or not self.index._is_unique:
350 return None
352 delta = self.deltas[0]
353 if _is_multiple(delta, _ONE_DAY):
354 return self._infer_daily_rule()
356 # Business hourly, maybe. 17: one day / 65: one weekend
357 if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
358 return "BH"
359 # Possibly intraday frequency. Here we use the
360 # original .asi8 values as the modified values
361 # will not work around DST transitions. See #8772
362 elif not self.is_unique_asi8:
363 return None
365 delta = self.deltas_asi8[0]
366 if _is_multiple(delta, _ONE_HOUR):
367 # Hours
368 return _maybe_add_count("H", delta / _ONE_HOUR)
369 elif _is_multiple(delta, _ONE_MINUTE):
370 # Minutes
371 return _maybe_add_count("T", delta / _ONE_MINUTE)
372 elif _is_multiple(delta, _ONE_SECOND):
373 # Seconds
374 return _maybe_add_count("S", delta / _ONE_SECOND)
375 elif _is_multiple(delta, _ONE_MILLI):
376 # Milliseconds
377 return _maybe_add_count("L", delta / _ONE_MILLI)
378 elif _is_multiple(delta, _ONE_MICRO):
379 # Microseconds
380 return _maybe_add_count("U", delta / _ONE_MICRO)
381 else:
382 # Nanoseconds
383 return _maybe_add_count("N", delta)
385 @cache_readonly
386 def day_deltas(self):
387 return [x / _ONE_DAY for x in self.deltas]
389 @cache_readonly
390 def hour_deltas(self):
391 return [x / _ONE_HOUR for x in self.deltas]
393 @cache_readonly
394 def fields(self):
395 return build_field_sarray(self.values)
397 @cache_readonly
398 def rep_stamp(self):
399 return Timestamp(self.values[0])
401 def month_position_check(self):
402 return libresolution.month_position_check(self.fields, self.index.dayofweek)
404 @cache_readonly
405 def mdiffs(self):
406 nmonths = self.fields["Y"] * 12 + self.fields["M"]
407 return unique_deltas(nmonths.astype("i8"))
409 @cache_readonly
410 def ydiffs(self):
411 return unique_deltas(self.fields["Y"].astype("i8"))
413 def _infer_daily_rule(self) -> Optional[str]:
414 annual_rule = self._get_annual_rule()
415 if annual_rule:
416 nyears = self.ydiffs[0]
417 month = MONTH_ALIASES[self.rep_stamp.month]
418 alias = f"{annual_rule}-{month}"
419 return _maybe_add_count(alias, nyears)
421 quarterly_rule = self._get_quarterly_rule()
422 if quarterly_rule:
423 nquarters = self.mdiffs[0] / 3
424 mod_dict = {0: 12, 2: 11, 1: 10}
425 month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]]
426 alias = f"{quarterly_rule}-{month}"
427 return _maybe_add_count(alias, nquarters)
429 monthly_rule = self._get_monthly_rule()
430 if monthly_rule:
431 return _maybe_add_count(monthly_rule, self.mdiffs[0])
433 if self.is_unique:
434 days = self.deltas[0] / _ONE_DAY
435 if days % 7 == 0:
436 # Weekly
437 day = int_to_weekday[self.rep_stamp.weekday()]
438 return _maybe_add_count(f"W-{day}", days / 7)
439 else:
440 return _maybe_add_count("D", days)
442 if self._is_business_daily():
443 return "B"
445 wom_rule = self._get_wom_rule()
446 if wom_rule:
447 return wom_rule
449 return None
451 def _get_annual_rule(self) -> Optional[str]:
452 if len(self.ydiffs) > 1:
453 return None
455 if len(unique(self.fields["M"])) > 1:
456 return None
458 pos_check = self.month_position_check()
459 return {"cs": "AS", "bs": "BAS", "ce": "A", "be": "BA"}.get(pos_check)
461 def _get_quarterly_rule(self) -> Optional[str]:
462 if len(self.mdiffs) > 1:
463 return None
465 if not self.mdiffs[0] % 3 == 0:
466 return None
468 pos_check = self.month_position_check()
469 return {"cs": "QS", "bs": "BQS", "ce": "Q", "be": "BQ"}.get(pos_check)
471 def _get_monthly_rule(self) -> Optional[str]:
472 if len(self.mdiffs) > 1:
473 return None
474 pos_check = self.month_position_check()
475 return {"cs": "MS", "bs": "BMS", "ce": "M", "be": "BM"}.get(pos_check)
477 def _is_business_daily(self) -> bool:
478 # quick check: cannot be business daily
479 if self.day_deltas != [1, 3]:
480 return False
482 # probably business daily, but need to confirm
483 first_weekday = self.index[0].weekday()
484 shifts = np.diff(self.index.asi8)
485 shifts = np.floor_divide(shifts, _ONE_DAY)
486 weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
487 return np.all(
488 ((weekdays == 0) & (shifts == 3))
489 | ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))
490 )
492 def _get_wom_rule(self) -> Optional[str]:
493 # wdiffs = unique(np.diff(self.index.week))
494 # We also need -47, -49, -48 to catch index spanning year boundary
495 # if not lib.ismember(wdiffs, set([4, 5, -47, -49, -48])).all():
496 # return None
498 weekdays = unique(self.index.weekday)
499 if len(weekdays) > 1:
500 return None
502 week_of_months = unique((self.index.day - 1) // 7)
503 # Only attempt to infer up to WOM-4. See #9425
504 week_of_months = week_of_months[week_of_months < 4]
505 if len(week_of_months) == 0 or len(week_of_months) > 1:
506 return None
508 # get which week
509 week = week_of_months[0] + 1
510 wd = int_to_weekday[weekdays[0]]
512 return f"WOM-{week}{wd}"
515class _TimedeltaFrequencyInferer(_FrequencyInferer):
516 def _infer_daily_rule(self):
517 if self.is_unique:
518 days = self.deltas[0] / _ONE_DAY
519 if days % 7 == 0:
520 # Weekly
521 wd = int_to_weekday[self.rep_stamp.weekday()]
522 alias = f"W-{wd}"
523 return _maybe_add_count(alias, days / 7)
524 else:
525 return _maybe_add_count("D", days)
528def _is_multiple(us, mult: int) -> bool:
529 return us % mult == 0
532def _maybe_add_count(base: str, count: float) -> str:
533 if count != 1:
534 assert count == int(count)
535 count = int(count)
536 return f"{count}{base}"
537 else:
538 return base