Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/arrays/datetimes.py : 21%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from datetime import datetime, time, timedelta
2from typing import Union
3import warnings
5import numpy as np
6from pytz import utc
8from pandas._libs import lib, tslib
9from pandas._libs.tslibs import (
10 NaT,
11 Timestamp,
12 ccalendar,
13 conversion,
14 fields,
15 iNaT,
16 normalize_date,
17 resolution as libresolution,
18 timezones,
19 tzconversion,
20)
21from pandas.errors import PerformanceWarning
23from pandas.core.dtypes.common import (
24 _INT64_DTYPE,
25 _NS_DTYPE,
26 is_categorical_dtype,
27 is_datetime64_any_dtype,
28 is_datetime64_dtype,
29 is_datetime64_ns_dtype,
30 is_datetime64tz_dtype,
31 is_dtype_equal,
32 is_extension_array_dtype,
33 is_float_dtype,
34 is_object_dtype,
35 is_period_dtype,
36 is_string_dtype,
37 is_timedelta64_dtype,
38 pandas_dtype,
39)
40from pandas.core.dtypes.dtypes import DatetimeTZDtype
41from pandas.core.dtypes.generic import ABCIndexClass, ABCPandasArray, ABCSeries
42from pandas.core.dtypes.missing import isna
44from pandas.core.algorithms import checked_add_with_arr
45from pandas.core.arrays import datetimelike as dtl
46from pandas.core.arrays._ranges import generate_regular_range
47import pandas.core.common as com
49from pandas.tseries.frequencies import get_period_alias, to_offset
50from pandas.tseries.offsets import Day, Tick
52_midnight = time(0, 0)
55def tz_to_dtype(tz):
56 """
57 Return a datetime64[ns] dtype appropriate for the given timezone.
59 Parameters
60 ----------
61 tz : tzinfo or None
63 Returns
64 -------
65 np.dtype or Datetime64TZDType
66 """
67 if tz is None:
68 return _NS_DTYPE
69 else:
70 return DatetimeTZDtype(tz=tz)
73def _field_accessor(name, field, docstring=None):
74 def f(self):
75 values = self.asi8
76 if self.tz is not None and not timezones.is_utc(self.tz):
77 values = self._local_timestamps()
79 if field in self._bool_ops:
80 if field.endswith(("start", "end")):
81 freq = self.freq
82 month_kw = 12
83 if freq:
84 kwds = freq.kwds
85 month_kw = kwds.get("startingMonth", kwds.get("month", 12))
87 result = fields.get_start_end_field(
88 values, field, self.freqstr, month_kw
89 )
90 else:
91 result = fields.get_date_field(values, field)
93 # these return a boolean by-definition
94 return result
96 if field in self._object_ops:
97 result = fields.get_date_name_field(values, field)
98 result = self._maybe_mask_results(result, fill_value=None)
100 else:
101 result = fields.get_date_field(values, field)
102 result = self._maybe_mask_results(
103 result, fill_value=None, convert="float64"
104 )
106 return result
108 f.__name__ = name
109 f.__doc__ = docstring
110 return property(f)
113class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps):
114 """
115 Pandas ExtensionArray for tz-naive or tz-aware datetime data.
117 .. versionadded:: 0.24.0
119 .. warning::
121 DatetimeArray is currently experimental, and its API may change
122 without warning. In particular, :attr:`DatetimeArray.dtype` is
123 expected to change to always be an instance of an ``ExtensionDtype``
124 subclass.
126 Parameters
127 ----------
128 values : Series, Index, DatetimeArray, ndarray
129 The datetime data.
131 For DatetimeArray `values` (or a Series or Index boxing one),
132 `dtype` and `freq` will be extracted from `values`.
134 dtype : numpy.dtype or DatetimeTZDtype
135 Note that the only NumPy dtype allowed is 'datetime64[ns]'.
136 freq : str or Offset, optional
137 The frequency.
138 copy : bool, default False
139 Whether to copy the underlying array of values.
141 Attributes
142 ----------
143 None
145 Methods
146 -------
147 None
148 """
150 _typ = "datetimearray"
151 _scalar_type = Timestamp
152 _recognized_scalars = (datetime, np.datetime64)
153 _is_recognized_dtype = is_datetime64_any_dtype
155 # define my properties & methods for delegation
156 _bool_ops = [
157 "is_month_start",
158 "is_month_end",
159 "is_quarter_start",
160 "is_quarter_end",
161 "is_year_start",
162 "is_year_end",
163 "is_leap_year",
164 ]
165 _object_ops = ["freq", "tz"]
166 _field_ops = [
167 "year",
168 "month",
169 "day",
170 "hour",
171 "minute",
172 "second",
173 "weekofyear",
174 "week",
175 "weekday",
176 "dayofweek",
177 "dayofyear",
178 "quarter",
179 "days_in_month",
180 "daysinmonth",
181 "microsecond",
182 "nanosecond",
183 ]
184 _other_ops = ["date", "time", "timetz"]
185 _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops
186 _datetimelike_methods = [
187 "to_period",
188 "tz_localize",
189 "tz_convert",
190 "normalize",
191 "strftime",
192 "round",
193 "floor",
194 "ceil",
195 "month_name",
196 "day_name",
197 ]
199 # ndim is inherited from ExtensionArray, must exist to ensure
200 # Timestamp.__richcmp__(DateTimeArray) operates pointwise
202 # ensure that operations with numpy arrays defer to our implementation
203 __array_priority__ = 1000
205 # -----------------------------------------------------------------
206 # Constructors
208 _dtype: Union[np.dtype, DatetimeTZDtype]
209 _freq = None
211 def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False):
212 if isinstance(values, (ABCSeries, ABCIndexClass)):
213 values = values._values
215 inferred_freq = getattr(values, "_freq", None)
217 if isinstance(values, type(self)):
218 # validation
219 dtz = getattr(dtype, "tz", None)
220 if dtz and values.tz is None:
221 dtype = DatetimeTZDtype(tz=dtype.tz)
222 elif dtz and values.tz:
223 if not timezones.tz_compare(dtz, values.tz):
224 msg = (
225 "Timezone of the array and 'dtype' do not match. "
226 f"'{dtz}' != '{values.tz}'"
227 )
228 raise TypeError(msg)
229 elif values.tz:
230 dtype = values.dtype
232 if freq is None:
233 freq = values.freq
234 values = values._data
236 if not isinstance(values, np.ndarray):
237 msg = (
238 f"Unexpected type '{type(values).__name__}'. 'values' must be "
239 "a DatetimeArray ndarray, or Series or Index containing one of those."
240 )
241 raise ValueError(msg)
242 if values.ndim not in [1, 2]:
243 raise ValueError("Only 1-dimensional input arrays are supported.")
245 if values.dtype == "i8":
246 # for compat with datetime/timedelta/period shared methods,
247 # we can sometimes get here with int64 values. These represent
248 # nanosecond UTC (or tz-naive) unix timestamps
249 values = values.view(_NS_DTYPE)
251 if values.dtype != _NS_DTYPE:
252 msg = (
253 "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'."
254 f" Got {values.dtype} instead."
255 )
256 raise ValueError(msg)
258 dtype = _validate_dt64_dtype(dtype)
260 if freq == "infer":
261 msg = (
262 "Frequency inference not allowed in DatetimeArray.__init__. "
263 "Use 'pd.array()' instead."
264 )
265 raise ValueError(msg)
267 if copy:
268 values = values.copy()
269 if freq:
270 freq = to_offset(freq)
271 if getattr(dtype, "tz", None):
272 # https://github.com/pandas-dev/pandas/issues/18595
273 # Ensure that we have a standard timezone for pytz objects.
274 # Without this, things like adding an array of timedeltas and
275 # a tz-aware Timestamp (with a tz specific to its datetime) will
276 # be incorrect(ish?) for the array as a whole
277 dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))
279 self._data = values
280 self._dtype = dtype
281 self._freq = freq
283 if inferred_freq is None and freq is not None:
284 type(self)._validate_frequency(self, freq)
286 @classmethod
287 def _simple_new(cls, values, freq=None, dtype=_NS_DTYPE):
288 assert isinstance(values, np.ndarray)
289 if values.dtype == "i8":
290 values = values.view(_NS_DTYPE)
292 result = object.__new__(cls)
293 result._data = values
294 result._freq = freq
295 result._dtype = dtype
296 return result
298 @classmethod
299 def _from_sequence(
300 cls,
301 data,
302 dtype=None,
303 copy=False,
304 tz=None,
305 freq=None,
306 dayfirst=False,
307 yearfirst=False,
308 ambiguous="raise",
309 ):
311 freq, freq_infer = dtl.maybe_infer_freq(freq)
313 subarr, tz, inferred_freq = sequence_to_dt64ns(
314 data,
315 dtype=dtype,
316 copy=copy,
317 tz=tz,
318 dayfirst=dayfirst,
319 yearfirst=yearfirst,
320 ambiguous=ambiguous,
321 )
323 freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
325 dtype = tz_to_dtype(tz)
326 result = cls._simple_new(subarr, freq=freq, dtype=dtype)
328 if inferred_freq is None and freq is not None:
329 # this condition precludes `freq_infer`
330 cls._validate_frequency(result, freq, ambiguous=ambiguous)
332 elif freq_infer:
333 # Set _freq directly to bypass duplicative _validate_frequency
334 # check.
335 result._freq = to_offset(result.inferred_freq)
337 return result
339 @classmethod
340 def _generate_range(
341 cls,
342 start,
343 end,
344 periods,
345 freq,
346 tz=None,
347 normalize=False,
348 ambiguous="raise",
349 nonexistent="raise",
350 closed=None,
351 ):
353 periods = dtl.validate_periods(periods)
354 if freq is None and any(x is None for x in [periods, start, end]):
355 raise ValueError("Must provide freq argument if no data is supplied")
357 if com.count_not_none(start, end, periods, freq) != 3:
358 raise ValueError(
359 "Of the four parameters: start, end, periods, "
360 "and freq, exactly three must be specified"
361 )
362 freq = to_offset(freq)
364 if start is not None:
365 start = Timestamp(start)
367 if end is not None:
368 end = Timestamp(end)
370 if start is None and end is None:
371 if closed is not None:
372 raise ValueError(
373 "Closed has to be None if not both of start and end are defined"
374 )
375 if start is NaT or end is NaT:
376 raise ValueError("Neither `start` nor `end` can be NaT")
378 left_closed, right_closed = dtl.validate_endpoints(closed)
380 start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize)
382 tz = _infer_tz_from_endpoints(start, end, tz)
384 if tz is not None:
385 # Localize the start and end arguments
386 start = _maybe_localize_point(
387 start,
388 getattr(start, "tz", None),
389 start,
390 freq,
391 tz,
392 ambiguous,
393 nonexistent,
394 )
395 end = _maybe_localize_point(
396 end, getattr(end, "tz", None), end, freq, tz, ambiguous, nonexistent
397 )
398 if freq is not None:
399 # We break Day arithmetic (fixed 24 hour) here and opt for
400 # Day to mean calendar day (23/24/25 hour). Therefore, strip
401 # tz info from start and day to avoid DST arithmetic
402 if isinstance(freq, Day):
403 if start is not None:
404 start = start.tz_localize(None)
405 if end is not None:
406 end = end.tz_localize(None)
407 # TODO: consider re-implementing _cached_range; GH#17914
408 values, _tz = generate_regular_range(start, end, periods, freq)
409 index = cls._simple_new(values, freq=freq, dtype=tz_to_dtype(_tz))
411 if tz is not None and index.tz is None:
412 arr = conversion.tz_localize_to_utc(
413 index.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent
414 )
416 index = cls(arr)
418 # index is localized datetime64 array -> have to convert
419 # start/end as well to compare
420 if start is not None:
421 start = start.tz_localize(tz).asm8
422 if end is not None:
423 end = end.tz_localize(tz).asm8
424 else:
425 # Create a linearly spaced date_range in local time
426 # Nanosecond-granularity timestamps aren't always correctly
427 # representable with doubles, so we limit the range that we
428 # pass to np.linspace as much as possible
429 arr = (
430 np.linspace(0, end.value - start.value, periods, dtype="int64")
431 + start.value
432 )
433 dtype = tz_to_dtype(tz)
434 index = cls._simple_new(
435 arr.astype("M8[ns]", copy=False), freq=None, dtype=dtype
436 )
438 if not left_closed and len(index) and index[0] == start:
439 index = index[1:]
440 if not right_closed and len(index) and index[-1] == end:
441 index = index[:-1]
443 dtype = tz_to_dtype(tz)
444 return cls._simple_new(index.asi8, freq=freq, dtype=dtype)
446 # -----------------------------------------------------------------
447 # DatetimeLike Interface
449 def _unbox_scalar(self, value):
450 if not isinstance(value, self._scalar_type) and value is not NaT:
451 raise ValueError("'value' should be a Timestamp.")
452 if not isna(value):
453 self._check_compatible_with(value)
454 return value.value
456 def _scalar_from_string(self, value):
457 return Timestamp(value, tz=self.tz)
459 def _check_compatible_with(self, other, setitem: bool = False):
460 if other is NaT:
461 return
462 self._assert_tzawareness_compat(other)
463 if setitem:
464 # Stricter check for setitem vs comparison methods
465 if not timezones.tz_compare(self.tz, other.tz):
466 raise ValueError(f"Timezones don't match. '{self.tz} != {other.tz}'")
468 def _maybe_clear_freq(self):
469 self._freq = None
471 # -----------------------------------------------------------------
472 # Descriptive Properties
474 @property
475 def _box_func(self):
476 return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)
478 @property
479 def dtype(self) -> Union[np.dtype, DatetimeTZDtype]:
480 """
481 The dtype for the DatetimeArray.
483 .. warning::
485 A future version of pandas will change dtype to never be a
486 ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will
487 always be an instance of an ``ExtensionDtype`` subclass.
489 Returns
490 -------
491 numpy.dtype or DatetimeTZDtype
492 If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
493 is returned.
495 If the values are tz-aware, then the ``DatetimeTZDtype``
496 is returned.
497 """
498 return self._dtype
500 @property
501 def tz(self):
502 """
503 Return timezone, if any.
505 Returns
506 -------
507 datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
508 Returns None when the array is tz-naive.
509 """
510 # GH 18595
511 return getattr(self.dtype, "tz", None)
513 @tz.setter
514 def tz(self, value):
515 # GH 3746: Prevent localizing or converting the index by setting tz
516 raise AttributeError(
517 "Cannot directly set timezone. Use tz_localize() "
518 "or tz_convert() as appropriate"
519 )
521 @property
522 def tzinfo(self):
523 """
524 Alias for tz attribute
525 """
526 return self.tz
528 @property # NB: override with cache_readonly in immutable subclasses
529 def _timezone(self):
530 """
531 Comparable timezone both for pytz / dateutil
532 """
533 return timezones.get_timezone(self.tzinfo)
535 @property # NB: override with cache_readonly in immutable subclasses
536 def is_normalized(self):
537 """
538 Returns True if all of the dates are at midnight ("no time")
539 """
540 return conversion.is_date_array_normalized(self.asi8, self.tz)
542 @property # NB: override with cache_readonly in immutable subclasses
543 def _resolution(self):
544 return libresolution.resolution(self.asi8, self.tz)
546 # ----------------------------------------------------------------
547 # Array-Like / EA-Interface Methods
549 def __array__(self, dtype=None) -> np.ndarray:
550 if dtype is None and self.tz:
551 # The default for tz-aware is object, to preserve tz info
552 dtype = object
554 return super().__array__(dtype=dtype)
556 def __iter__(self):
557 """
558 Return an iterator over the boxed values
560 Yields
561 ------
562 tstamp : Timestamp
563 """
565 # convert in chunks of 10k for efficiency
566 data = self.asi8
567 length = len(self)
568 chunksize = 10000
569 chunks = int(length / chunksize) + 1
570 for i in range(chunks):
571 start_i = i * chunksize
572 end_i = min((i + 1) * chunksize, length)
573 converted = tslib.ints_to_pydatetime(
574 data[start_i:end_i], tz=self.tz, freq=self.freq, box="timestamp"
575 )
576 for v in converted:
577 yield v
579 def astype(self, dtype, copy=True):
580 # We handle
581 # --> datetime
582 # --> period
583 # DatetimeLikeArrayMixin Super handles the rest.
584 dtype = pandas_dtype(dtype)
586 if is_datetime64_ns_dtype(dtype) and not is_dtype_equal(dtype, self.dtype):
587 # GH#18951: datetime64_ns dtype but not equal means different tz
588 new_tz = getattr(dtype, "tz", None)
589 if getattr(self.dtype, "tz", None) is None:
590 return self.tz_localize(new_tz)
591 result = self.tz_convert(new_tz)
592 if copy:
593 result = result.copy()
594 if new_tz is None:
595 # Do we want .astype('datetime64[ns]') to be an ndarray.
596 # The astype in Block._astype expects this to return an
597 # ndarray, but we could maybe work around it there.
598 result = result._data
599 return result
600 elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype, dtype):
601 if copy:
602 return self.copy()
603 return self
604 elif is_period_dtype(dtype):
605 return self.to_period(freq=dtype.freq)
606 return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
608 # -----------------------------------------------------------------
609 # Rendering Methods
611 def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs):
612 from pandas.io.formats.format import _get_format_datetime64_from_values
614 fmt = _get_format_datetime64_from_values(self, date_format)
616 return tslib.format_array_from_datetime(
617 self.asi8, tz=self.tz, format=fmt, na_rep=na_rep
618 )
620 # -----------------------------------------------------------------
621 # Comparison Methods
623 def _has_same_tz(self, other):
624 zzone = self._timezone
626 # vzone shouldn't be None if value is non-datetime like
627 if isinstance(other, np.datetime64):
628 # convert to Timestamp as np.datetime64 doesn't have tz attr
629 other = Timestamp(other)
630 vzone = timezones.get_timezone(getattr(other, "tzinfo", "__no_tz__"))
631 return zzone == vzone
633 def _assert_tzawareness_compat(self, other):
634 # adapted from _Timestamp._assert_tzawareness_compat
635 other_tz = getattr(other, "tzinfo", None)
636 if is_datetime64tz_dtype(other):
637 # Get tzinfo from Series dtype
638 other_tz = other.dtype.tz
639 if other is NaT:
640 # pd.NaT quacks both aware and naive
641 pass
642 elif self.tz is None:
643 if other_tz is not None:
644 raise TypeError(
645 "Cannot compare tz-naive and tz-aware datetime-like objects."
646 )
647 elif other_tz is None:
648 raise TypeError(
649 "Cannot compare tz-naive and tz-aware datetime-like objects"
650 )
652 # -----------------------------------------------------------------
653 # Arithmetic Methods
655 def _sub_datetime_arraylike(self, other):
656 """subtract DatetimeArray/Index or ndarray[datetime64]"""
657 if len(self) != len(other):
658 raise ValueError("cannot add indices of unequal length")
660 if isinstance(other, np.ndarray):
661 assert is_datetime64_dtype(other)
662 other = type(self)(other)
664 if not self._has_same_tz(other):
665 # require tz compat
666 raise TypeError(
667 f"{type(self).__name__} subtraction must have the same "
668 "timezones or no timezones"
669 )
671 self_i8 = self.asi8
672 other_i8 = other.asi8
673 arr_mask = self._isnan | other._isnan
674 new_values = checked_add_with_arr(self_i8, -other_i8, arr_mask=arr_mask)
675 if self._hasnans or other._hasnans:
676 new_values[arr_mask] = iNaT
677 return new_values.view("timedelta64[ns]")
679 def _add_offset(self, offset):
680 if self.ndim == 2:
681 return self.ravel()._add_offset(offset).reshape(self.shape)
683 assert not isinstance(offset, Tick)
684 try:
685 if self.tz is not None:
686 values = self.tz_localize(None)
687 else:
688 values = self
689 result = offset.apply_index(values).tz_localize(self.tz)
691 except NotImplementedError:
692 warnings.warn(
693 "Non-vectorized DateOffset being applied to Series or DatetimeIndex",
694 PerformanceWarning,
695 )
696 result = self.astype("O") + offset
697 if not len(self):
698 # GH#30336 _from_sequence won't be able to infer self.tz
699 return type(self)._from_sequence(result).tz_localize(self.tz)
701 return type(self)._from_sequence(result, freq="infer")
703 def _sub_datetimelike_scalar(self, other):
704 # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
705 assert isinstance(other, (datetime, np.datetime64))
706 assert other is not NaT
707 other = Timestamp(other)
708 if other is NaT:
709 return self - NaT
711 if not self._has_same_tz(other):
712 # require tz compat
713 raise TypeError(
714 "Timestamp subtraction must have the same timezones or no timezones"
715 )
717 i8 = self.asi8
718 result = checked_add_with_arr(i8, -other.value, arr_mask=self._isnan)
719 result = self._maybe_mask_results(result)
720 return result.view("timedelta64[ns]")
722 def _add_delta(self, delta):
723 """
724 Add a timedelta-like, Tick, or TimedeltaIndex-like object
725 to self, yielding a new DatetimeArray
727 Parameters
728 ----------
729 other : {timedelta, np.timedelta64, Tick,
730 TimedeltaIndex, ndarray[timedelta64]}
732 Returns
733 -------
734 result : DatetimeArray
735 """
736 new_values = super()._add_delta(delta)
737 return type(self)._from_sequence(new_values, tz=self.tz, freq="infer")
739 # -----------------------------------------------------------------
740 # Timezone Conversion and Localization Methods
742 def _local_timestamps(self):
743 """
744 Convert to an i8 (unix-like nanosecond timestamp) representation
745 while keeping the local timezone and not using UTC.
746 This is used to calculate time-of-day information as if the timestamps
747 were timezone-naive.
748 """
749 return tzconversion.tz_convert(self.asi8, utc, self.tz)
751 def tz_convert(self, tz):
752 """
753 Convert tz-aware Datetime Array/Index from one time zone to another.
755 Parameters
756 ----------
757 tz : str, pytz.timezone, dateutil.tz.tzfile or None
758 Time zone for time. Corresponding timestamps would be converted
759 to this time zone of the Datetime Array/Index. A `tz` of None will
760 convert to UTC and remove the timezone information.
762 Returns
763 -------
764 Array or Index
766 Raises
767 ------
768 TypeError
769 If Datetime Array/Index is tz-naive.
771 See Also
772 --------
773 DatetimeIndex.tz : A timezone that has a variable offset from UTC.
774 DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
775 given time zone, or remove timezone from a tz-aware DatetimeIndex.
777 Examples
778 --------
779 With the `tz` parameter, we can change the DatetimeIndex
780 to other time zones:
782 >>> dti = pd.date_range(start='2014-08-01 09:00',
783 ... freq='H', periods=3, tz='Europe/Berlin')
785 >>> dti
786 DatetimeIndex(['2014-08-01 09:00:00+02:00',
787 '2014-08-01 10:00:00+02:00',
788 '2014-08-01 11:00:00+02:00'],
789 dtype='datetime64[ns, Europe/Berlin]', freq='H')
791 >>> dti.tz_convert('US/Central')
792 DatetimeIndex(['2014-08-01 02:00:00-05:00',
793 '2014-08-01 03:00:00-05:00',
794 '2014-08-01 04:00:00-05:00'],
795 dtype='datetime64[ns, US/Central]', freq='H')
797 With the ``tz=None``, we can remove the timezone (after converting
798 to UTC if necessary):
800 >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H',
801 ... periods=3, tz='Europe/Berlin')
803 >>> dti
804 DatetimeIndex(['2014-08-01 09:00:00+02:00',
805 '2014-08-01 10:00:00+02:00',
806 '2014-08-01 11:00:00+02:00'],
807 dtype='datetime64[ns, Europe/Berlin]', freq='H')
809 >>> dti.tz_convert(None)
810 DatetimeIndex(['2014-08-01 07:00:00',
811 '2014-08-01 08:00:00',
812 '2014-08-01 09:00:00'],
813 dtype='datetime64[ns]', freq='H')
814 """
815 tz = timezones.maybe_get_tz(tz)
817 if self.tz is None:
818 # tz naive, use tz_localize
819 raise TypeError(
820 "Cannot convert tz-naive timestamps, use tz_localize to localize"
821 )
823 # No conversion since timestamps are all UTC to begin with
824 dtype = tz_to_dtype(tz)
825 return self._simple_new(self.asi8, dtype=dtype, freq=self.freq)
827 def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"):
828 """
829 Localize tz-naive Datetime Array/Index to tz-aware
830 Datetime Array/Index.
832 This method takes a time zone (tz) naive Datetime Array/Index object
833 and makes this time zone aware. It does not move the time to another
834 time zone.
835 Time zone localization helps to switch from time zone aware to time
836 zone unaware objects.
838 Parameters
839 ----------
840 tz : str, pytz.timezone, dateutil.tz.tzfile or None
841 Time zone to convert timestamps to. Passing ``None`` will
842 remove the time zone information preserving local time.
843 ambiguous : 'infer', 'NaT', bool array, default 'raise'
844 When clocks moved backward due to DST, ambiguous times may arise.
845 For example in Central European Time (UTC+01), when going from
846 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
847 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
848 `ambiguous` parameter dictates how ambiguous times should be
849 handled.
851 - 'infer' will attempt to infer fall dst-transition hours based on
852 order
853 - bool-ndarray where True signifies a DST time, False signifies a
854 non-DST time (note that this flag is only applicable for
855 ambiguous times)
856 - 'NaT' will return NaT where there are ambiguous times
857 - 'raise' will raise an AmbiguousTimeError if there are ambiguous
858 times.
860 nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
861default 'raise'
862 A nonexistent time does not exist in a particular timezone
863 where clocks moved forward due to DST.
865 - 'shift_forward' will shift the nonexistent time forward to the
866 closest existing time
867 - 'shift_backward' will shift the nonexistent time backward to the
868 closest existing time
869 - 'NaT' will return NaT where there are nonexistent times
870 - timedelta objects will shift nonexistent times by the timedelta
871 - 'raise' will raise an NonExistentTimeError if there are
872 nonexistent times.
874 .. versionadded:: 0.24.0
876 Returns
877 -------
878 Same type as self
879 Array/Index converted to the specified time zone.
881 Raises
882 ------
883 TypeError
884 If the Datetime Array/Index is tz-aware and tz is not None.
886 See Also
887 --------
888 DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
889 one time zone to another.
891 Examples
892 --------
893 >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)
894 >>> tz_naive
895 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
896 '2018-03-03 09:00:00'],
897 dtype='datetime64[ns]', freq='D')
899 Localize DatetimeIndex in US/Eastern time zone:
901 >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')
902 >>> tz_aware
903 DatetimeIndex(['2018-03-01 09:00:00-05:00',
904 '2018-03-02 09:00:00-05:00',
905 '2018-03-03 09:00:00-05:00'],
906 dtype='datetime64[ns, US/Eastern]', freq='D')
908 With the ``tz=None``, we can remove the time zone information
909 while keeping the local time (not converted to UTC):
911 >>> tz_aware.tz_localize(None)
912 DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
913 '2018-03-03 09:00:00'],
914 dtype='datetime64[ns]', freq='D')
916 Be careful with DST changes. When there is sequential data, pandas can
917 infer the DST time:
919 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',
920 ... '2018-10-28 02:00:00',
921 ... '2018-10-28 02:30:00',
922 ... '2018-10-28 02:00:00',
923 ... '2018-10-28 02:30:00',
924 ... '2018-10-28 03:00:00',
925 ... '2018-10-28 03:30:00']))
926 >>> s.dt.tz_localize('CET', ambiguous='infer')
927 0 2018-10-28 01:30:00+02:00
928 1 2018-10-28 02:00:00+02:00
929 2 2018-10-28 02:30:00+02:00
930 3 2018-10-28 02:00:00+01:00
931 4 2018-10-28 02:30:00+01:00
932 5 2018-10-28 03:00:00+01:00
933 6 2018-10-28 03:30:00+01:00
934 dtype: datetime64[ns, CET]
936 In some cases, inferring the DST is impossible. In such cases, you can
937 pass an ndarray to the ambiguous parameter to set the DST explicitly
939 >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',
940 ... '2018-10-28 02:36:00',
941 ... '2018-10-28 03:46:00']))
942 >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))
943 0 2015-03-29 03:00:00+02:00
944 1 2015-03-29 03:30:00+02:00
945 dtype: datetime64[ns, Europe/Warsaw]
947 If the DST transition causes nonexistent times, you can shift these
948 dates forward or backwards with a timedelta object or `'shift_forward'`
949 or `'shift_backwards'`.
951 >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',
952 ... '2015-03-29 03:30:00']))
953 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
954 0 2015-03-29 03:00:00+02:00
955 1 2015-03-29 03:30:00+02:00
956 dtype: datetime64[ns, 'Europe/Warsaw']
957 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
958 0 2015-03-29 01:59:59.999999999+01:00
959 1 2015-03-29 03:30:00+02:00
960 dtype: datetime64[ns, 'Europe/Warsaw']
961 >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
962 0 2015-03-29 03:30:00+02:00
963 1 2015-03-29 03:30:00+02:00
964 dtype: datetime64[ns, 'Europe/Warsaw']
965 """
966 nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
967 if nonexistent not in nonexistent_options and not isinstance(
968 nonexistent, timedelta
969 ):
970 raise ValueError(
971 "The nonexistent argument must be one of 'raise', "
972 "'NaT', 'shift_forward', 'shift_backward' or "
973 "a timedelta object"
974 )
976 if self.tz is not None:
977 if tz is None:
978 new_dates = tzconversion.tz_convert(self.asi8, timezones.UTC, self.tz)
979 else:
980 raise TypeError("Already tz-aware, use tz_convert to convert.")
981 else:
982 tz = timezones.maybe_get_tz(tz)
983 # Convert to UTC
985 new_dates = conversion.tz_localize_to_utc(
986 self.asi8, tz, ambiguous=ambiguous, nonexistent=nonexistent
987 )
988 new_dates = new_dates.view(_NS_DTYPE)
989 dtype = tz_to_dtype(tz)
990 return self._simple_new(new_dates, dtype=dtype, freq=self.freq)
992 # ----------------------------------------------------------------
993 # Conversion Methods - Vectorized analogues of Timestamp methods
995 def to_pydatetime(self):
996 """
997 Return Datetime Array/Index as object ndarray of datetime.datetime
998 objects.
1000 Returns
1001 -------
1002 datetimes : ndarray
1003 """
1004 return tslib.ints_to_pydatetime(self.asi8, tz=self.tz)
1006 def normalize(self):
1007 """
1008 Convert times to midnight.
1010 The time component of the date-time is converted to midnight i.e.
1011 00:00:00. This is useful in cases, when the time does not matter.
1012 Length is unaltered. The timezones are unaffected.
1014 This method is available on Series with datetime values under
1015 the ``.dt`` accessor, and directly on Datetime Array/Index.
1017 Returns
1018 -------
1019 DatetimeArray, DatetimeIndex or Series
1020 The same type as the original data. Series will have the same
1021 name and index. DatetimeIndex will have the same name.
1023 See Also
1024 --------
1025 floor : Floor the datetimes to the specified freq.
1026 ceil : Ceil the datetimes to the specified freq.
1027 round : Round the datetimes to the specified freq.
1029 Examples
1030 --------
1031 >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H',
1032 ... periods=3, tz='Asia/Calcutta')
1033 >>> idx
1034 DatetimeIndex(['2014-08-01 10:00:00+05:30',
1035 '2014-08-01 11:00:00+05:30',
1036 '2014-08-01 12:00:00+05:30'],
1037 dtype='datetime64[ns, Asia/Calcutta]', freq='H')
1038 >>> idx.normalize()
1039 DatetimeIndex(['2014-08-01 00:00:00+05:30',
1040 '2014-08-01 00:00:00+05:30',
1041 '2014-08-01 00:00:00+05:30'],
1042 dtype='datetime64[ns, Asia/Calcutta]', freq=None)
1043 """
1044 if self.tz is None or timezones.is_utc(self.tz):
1045 not_null = ~self.isna()
1046 DAY_NS = ccalendar.DAY_SECONDS * 1_000_000_000
1047 new_values = self.asi8.copy()
1048 adjustment = new_values[not_null] % DAY_NS
1049 new_values[not_null] = new_values[not_null] - adjustment
1050 else:
1051 new_values = conversion.normalize_i8_timestamps(self.asi8, self.tz)
1052 return type(self)._from_sequence(new_values, freq="infer").tz_localize(self.tz)
1054 def to_period(self, freq=None):
1055 """
1056 Cast to PeriodArray/Index at a particular frequency.
1058 Converts DatetimeArray/Index to PeriodArray/Index.
1060 Parameters
1061 ----------
1062 freq : str or Offset, optional
1063 One of pandas' :ref:`offset strings <timeseries.offset_aliases>`
1064 or an Offset object. Will be inferred by default.
1066 Returns
1067 -------
1068 PeriodArray/Index
1070 Raises
1071 ------
1072 ValueError
1073 When converting a DatetimeArray/Index with non-regular values,
1074 so that a frequency cannot be inferred.
1076 See Also
1077 --------
1078 PeriodIndex: Immutable ndarray holding ordinal values.
1079 DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.
1081 Examples
1082 --------
1083 >>> df = pd.DataFrame({"y": [1, 2, 3]},
1084 ... index=pd.to_datetime(["2000-03-31 00:00:00",
1085 ... "2000-05-31 00:00:00",
1086 ... "2000-08-31 00:00:00"]))
1087 >>> df.index.to_period("M")
1088 PeriodIndex(['2000-03', '2000-05', '2000-08'],
1089 dtype='period[M]', freq='M')
1091 Infer the daily frequency
1093 >>> idx = pd.date_range("2017-01-01", periods=2)
1094 >>> idx.to_period()
1095 PeriodIndex(['2017-01-01', '2017-01-02'],
1096 dtype='period[D]', freq='D')
1097 """
1098 from pandas.core.arrays import PeriodArray
1100 if self.tz is not None:
1101 warnings.warn(
1102 "Converting to PeriodArray/Index representation "
1103 "will drop timezone information.",
1104 UserWarning,
1105 )
1107 if freq is None:
1108 freq = self.freqstr or self.inferred_freq
1110 if freq is None:
1111 raise ValueError(
1112 "You must pass a freq argument as current index has none."
1113 )
1115 freq = get_period_alias(freq)
1117 return PeriodArray._from_datetime64(self._data, freq, tz=self.tz)
1119 def to_perioddelta(self, freq):
1120 """
1121 Calculate TimedeltaArray of difference between index
1122 values and index converted to PeriodArray at specified
1123 freq. Used for vectorized offsets.
1125 Parameters
1126 ----------
1127 freq : Period frequency
1129 Returns
1130 -------
1131 TimedeltaArray/Index
1132 """
1133 # TODO: consider privatizing (discussion in GH#23113)
1134 from pandas.core.arrays.timedeltas import TimedeltaArray
1136 i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8
1137 m8delta = i8delta.view("m8[ns]")
1138 return TimedeltaArray(m8delta)
1140 # -----------------------------------------------------------------
1141 # Properties - Vectorized Timestamp Properties/Methods
1143 def month_name(self, locale=None):
1144 """
1145 Return the month names of the DateTimeIndex with specified locale.
1147 .. versionadded:: 0.23.0
1149 Parameters
1150 ----------
1151 locale : str, optional
1152 Locale determining the language in which to return the month name.
1153 Default is English locale.
1155 Returns
1156 -------
1157 Index
1158 Index of month names.
1160 Examples
1161 --------
1162 >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)
1163 >>> idx
1164 DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
1165 dtype='datetime64[ns]', freq='M')
1166 >>> idx.month_name()
1167 Index(['January', 'February', 'March'], dtype='object')
1168 """
1169 if self.tz is not None and not timezones.is_utc(self.tz):
1170 values = self._local_timestamps()
1171 else:
1172 values = self.asi8
1174 result = fields.get_date_name_field(values, "month_name", locale=locale)
1175 result = self._maybe_mask_results(result, fill_value=None)
1176 return result
1178 def day_name(self, locale=None):
1179 """
1180 Return the day names of the DateTimeIndex with specified locale.
1182 .. versionadded:: 0.23.0
1184 Parameters
1185 ----------
1186 locale : str, optional
1187 Locale determining the language in which to return the day name.
1188 Default is English locale.
1190 Returns
1191 -------
1192 Index
1193 Index of day names.
1195 Examples
1196 --------
1197 >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
1198 >>> idx
1199 DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
1200 dtype='datetime64[ns]', freq='D')
1201 >>> idx.day_name()
1202 Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
1203 """
1204 if self.tz is not None and not timezones.is_utc(self.tz):
1205 values = self._local_timestamps()
1206 else:
1207 values = self.asi8
1209 result = fields.get_date_name_field(values, "day_name", locale=locale)
1210 result = self._maybe_mask_results(result, fill_value=None)
1211 return result
1213 @property
1214 def time(self):
1215 """
1216 Returns numpy array of datetime.time. The time part of the Timestamps.
1217 """
1218 # If the Timestamps have a timezone that is not UTC,
1219 # convert them into their i8 representation while
1220 # keeping their timezone and not using UTC
1221 if self.tz is not None and not timezones.is_utc(self.tz):
1222 timestamps = self._local_timestamps()
1223 else:
1224 timestamps = self.asi8
1226 return tslib.ints_to_pydatetime(timestamps, box="time")
1228 @property
1229 def timetz(self):
1230 """
1231 Returns numpy array of datetime.time also containing timezone
1232 information. The time part of the Timestamps.
1233 """
1234 return tslib.ints_to_pydatetime(self.asi8, self.tz, box="time")
1236 @property
1237 def date(self):
1238 """
1239 Returns numpy array of python datetime.date objects (namely, the date
1240 part of Timestamps without timezone information).
1241 """
1242 # If the Timestamps have a timezone that is not UTC,
1243 # convert them into their i8 representation while
1244 # keeping their timezone and not using UTC
1245 if self.tz is not None and not timezones.is_utc(self.tz):
1246 timestamps = self._local_timestamps()
1247 else:
1248 timestamps = self.asi8
1250 return tslib.ints_to_pydatetime(timestamps, box="date")
1252 year = _field_accessor(
1253 "year",
1254 "Y",
1255 """
1256 The year of the datetime.
1257 """,
1258 )
1259 month = _field_accessor(
1260 "month",
1261 "M",
1262 """
1263 The month as January=1, December=12.
1264 """,
1265 )
1266 day = _field_accessor(
1267 "day",
1268 "D",
1269 """
1270 The month as January=1, December=12.
1271 """,
1272 )
1273 hour = _field_accessor(
1274 "hour",
1275 "h",
1276 """
1277 The hours of the datetime.
1278 """,
1279 )
1280 minute = _field_accessor(
1281 "minute",
1282 "m",
1283 """
1284 The minutes of the datetime.
1285 """,
1286 )
1287 second = _field_accessor(
1288 "second",
1289 "s",
1290 """
1291 The seconds of the datetime.
1292 """,
1293 )
1294 microsecond = _field_accessor(
1295 "microsecond",
1296 "us",
1297 """
1298 The microseconds of the datetime.
1299 """,
1300 )
1301 nanosecond = _field_accessor(
1302 "nanosecond",
1303 "ns",
1304 """
1305 The nanoseconds of the datetime.
1306 """,
1307 )
1308 weekofyear = _field_accessor(
1309 "weekofyear",
1310 "woy",
1311 """
1312 The week ordinal of the year.
1313 """,
1314 )
1315 week = weekofyear
1316 _dayofweek_doc = """
1317 The day of the week with Monday=0, Sunday=6.
1319 Return the day of the week. It is assumed the week starts on
1320 Monday, which is denoted by 0 and ends on Sunday which is denoted
1321 by 6. This method is available on both Series with datetime
1322 values (using the `dt` accessor) or DatetimeIndex.
1324 Returns
1325 -------
1326 Series or Index
1327 Containing integers indicating the day number.
1329 See Also
1330 --------
1331 Series.dt.dayofweek : Alias.
1332 Series.dt.weekday : Alias.
1333 Series.dt.day_name : Returns the name of the day of the week.
1335 Examples
1336 --------
1337 >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()
1338 >>> s.dt.dayofweek
1339 2016-12-31 5
1340 2017-01-01 6
1341 2017-01-02 0
1342 2017-01-03 1
1343 2017-01-04 2
1344 2017-01-05 3
1345 2017-01-06 4
1346 2017-01-07 5
1347 2017-01-08 6
1348 Freq: D, dtype: int64
1349 """
1350 dayofweek = _field_accessor("dayofweek", "dow", _dayofweek_doc)
1351 weekday = dayofweek
1353 dayofyear = _field_accessor(
1354 "dayofyear",
1355 "doy",
1356 """
1357 The ordinal day of the year.
1358 """,
1359 )
1360 quarter = _field_accessor(
1361 "quarter",
1362 "q",
1363 """
1364 The quarter of the date.
1365 """,
1366 )
1367 days_in_month = _field_accessor(
1368 "days_in_month",
1369 "dim",
1370 """
1371 The number of days in the month.
1372 """,
1373 )
1374 daysinmonth = days_in_month
1375 _is_month_doc = """
1376 Indicates whether the date is the {first_or_last} day of the month.
1378 Returns
1379 -------
1380 Series or array
1381 For Series, returns a Series with boolean values.
1382 For DatetimeIndex, returns a boolean array.
1384 See Also
1385 --------
1386 is_month_start : Return a boolean indicating whether the date
1387 is the first day of the month.
1388 is_month_end : Return a boolean indicating whether the date
1389 is the last day of the month.
1391 Examples
1392 --------
1393 This method is available on Series with datetime values under
1394 the ``.dt`` accessor, and directly on DatetimeIndex.
1396 >>> s = pd.Series(pd.date_range("2018-02-27", periods=3))
1397 >>> s
1398 0 2018-02-27
1399 1 2018-02-28
1400 2 2018-03-01
1401 dtype: datetime64[ns]
1402 >>> s.dt.is_month_start
1403 0 False
1404 1 False
1405 2 True
1406 dtype: bool
1407 >>> s.dt.is_month_end
1408 0 False
1409 1 True
1410 2 False
1411 dtype: bool
1413 >>> idx = pd.date_range("2018-02-27", periods=3)
1414 >>> idx.is_month_start
1415 array([False, False, True])
1416 >>> idx.is_month_end
1417 array([False, True, False])
1418 """
1419 is_month_start = _field_accessor(
1420 "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")
1421 )
1423 is_month_end = _field_accessor(
1424 "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")
1425 )
1427 is_quarter_start = _field_accessor(
1428 "is_quarter_start",
1429 "is_quarter_start",
1430 """
1431 Indicator for whether the date is the first day of a quarter.
1433 Returns
1434 -------
1435 is_quarter_start : Series or DatetimeIndex
1436 The same type as the original data with boolean values. Series will
1437 have the same name and index. DatetimeIndex will have the same
1438 name.
1440 See Also
1441 --------
1442 quarter : Return the quarter of the date.
1443 is_quarter_end : Similar property for indicating the quarter start.
1445 Examples
1446 --------
1447 This method is available on Series with datetime values under
1448 the ``.dt`` accessor, and directly on DatetimeIndex.
1450 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
1451 ... periods=4)})
1452 >>> df.assign(quarter=df.dates.dt.quarter,
1453 ... is_quarter_start=df.dates.dt.is_quarter_start)
1454 dates quarter is_quarter_start
1455 0 2017-03-30 1 False
1456 1 2017-03-31 1 False
1457 2 2017-04-01 2 True
1458 3 2017-04-02 2 False
1460 >>> idx = pd.date_range('2017-03-30', periods=4)
1461 >>> idx
1462 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
1463 dtype='datetime64[ns]', freq='D')
1465 >>> idx.is_quarter_start
1466 array([False, False, True, False])
1467 """,
1468 )
1469 is_quarter_end = _field_accessor(
1470 "is_quarter_end",
1471 "is_quarter_end",
1472 """
1473 Indicator for whether the date is the last day of a quarter.
1475 Returns
1476 -------
1477 is_quarter_end : Series or DatetimeIndex
1478 The same type as the original data with boolean values. Series will
1479 have the same name and index. DatetimeIndex will have the same
1480 name.
1482 See Also
1483 --------
1484 quarter : Return the quarter of the date.
1485 is_quarter_start : Similar property indicating the quarter start.
1487 Examples
1488 --------
1489 This method is available on Series with datetime values under
1490 the ``.dt`` accessor, and directly on DatetimeIndex.
1492 >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
1493 ... periods=4)})
1494 >>> df.assign(quarter=df.dates.dt.quarter,
1495 ... is_quarter_end=df.dates.dt.is_quarter_end)
1496 dates quarter is_quarter_end
1497 0 2017-03-30 1 False
1498 1 2017-03-31 1 True
1499 2 2017-04-01 2 False
1500 3 2017-04-02 2 False
1502 >>> idx = pd.date_range('2017-03-30', periods=4)
1503 >>> idx
1504 DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
1505 dtype='datetime64[ns]', freq='D')
1507 >>> idx.is_quarter_end
1508 array([False, True, False, False])
1509 """,
1510 )
1511 is_year_start = _field_accessor(
1512 "is_year_start",
1513 "is_year_start",
1514 """
1515 Indicate whether the date is the first day of a year.
1517 Returns
1518 -------
1519 Series or DatetimeIndex
1520 The same type as the original data with boolean values. Series will
1521 have the same name and index. DatetimeIndex will have the same
1522 name.
1524 See Also
1525 --------
1526 is_year_end : Similar property indicating the last day of the year.
1528 Examples
1529 --------
1530 This method is available on Series with datetime values under
1531 the ``.dt`` accessor, and directly on DatetimeIndex.
1533 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
1534 >>> dates
1535 0 2017-12-30
1536 1 2017-12-31
1537 2 2018-01-01
1538 dtype: datetime64[ns]
1540 >>> dates.dt.is_year_start
1541 0 False
1542 1 False
1543 2 True
1544 dtype: bool
1546 >>> idx = pd.date_range("2017-12-30", periods=3)
1547 >>> idx
1548 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
1549 dtype='datetime64[ns]', freq='D')
1551 >>> idx.is_year_start
1552 array([False, False, True])
1553 """,
1554 )
1555 is_year_end = _field_accessor(
1556 "is_year_end",
1557 "is_year_end",
1558 """
1559 Indicate whether the date is the last day of the year.
1561 Returns
1562 -------
1563 Series or DatetimeIndex
1564 The same type as the original data with boolean values. Series will
1565 have the same name and index. DatetimeIndex will have the same
1566 name.
1568 See Also
1569 --------
1570 is_year_start : Similar property indicating the start of the year.
1572 Examples
1573 --------
1574 This method is available on Series with datetime values under
1575 the ``.dt`` accessor, and directly on DatetimeIndex.
1577 >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
1578 >>> dates
1579 0 2017-12-30
1580 1 2017-12-31
1581 2 2018-01-01
1582 dtype: datetime64[ns]
1584 >>> dates.dt.is_year_end
1585 0 False
1586 1 True
1587 2 False
1588 dtype: bool
1590 >>> idx = pd.date_range("2017-12-30", periods=3)
1591 >>> idx
1592 DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
1593 dtype='datetime64[ns]', freq='D')
1595 >>> idx.is_year_end
1596 array([False, True, False])
1597 """,
1598 )
1599 is_leap_year = _field_accessor(
1600 "is_leap_year",
1601 "is_leap_year",
1602 """
1603 Boolean indicator if the date belongs to a leap year.
1605 A leap year is a year, which has 366 days (instead of 365) including
1606 29th of February as an intercalary day.
1607 Leap years are years which are multiples of four with the exception
1608 of years divisible by 100 but not by 400.
1610 Returns
1611 -------
1612 Series or ndarray
1613 Booleans indicating if dates belong to a leap year.
1615 Examples
1616 --------
1617 This method is available on Series with datetime values under
1618 the ``.dt`` accessor, and directly on DatetimeIndex.
1620 >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y")
1621 >>> idx
1622 DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
1623 dtype='datetime64[ns]', freq='A-DEC')
1624 >>> idx.is_leap_year
1625 array([ True, False, False], dtype=bool)
1627 >>> dates = pd.Series(idx)
1628 >>> dates_series
1629 0 2012-12-31
1630 1 2013-12-31
1631 2 2014-12-31
1632 dtype: datetime64[ns]
1633 >>> dates_series.dt.is_leap_year
1634 0 True
1635 1 False
1636 2 False
1637 dtype: bool
1638 """,
1639 )
1641 def to_julian_date(self):
1642 """
1643 Convert Datetime Array to float64 ndarray of Julian Dates.
1644 0 Julian date is noon January 1, 4713 BC.
1645 http://en.wikipedia.org/wiki/Julian_day
1646 """
1648 # http://mysite.verizon.net/aesir_research/date/jdalg2.htm
1649 year = np.asarray(self.year)
1650 month = np.asarray(self.month)
1651 day = np.asarray(self.day)
1652 testarr = month < 3
1653 year[testarr] -= 1
1654 month[testarr] += 12
1655 return (
1656 day
1657 + np.fix((153 * month - 457) / 5)
1658 + 365 * year
1659 + np.floor(year / 4)
1660 - np.floor(year / 100)
1661 + np.floor(year / 400)
1662 + 1_721_118.5
1663 + (
1664 self.hour
1665 + self.minute / 60.0
1666 + self.second / 3600.0
1667 + self.microsecond / 3600.0 / 1e6
1668 + self.nanosecond / 3600.0 / 1e9
1669 )
1670 / 24.0
1671 )
1674# -------------------------------------------------------------------
1675# Constructor Helpers
1678def sequence_to_dt64ns(
1679 data,
1680 dtype=None,
1681 copy=False,
1682 tz=None,
1683 dayfirst=False,
1684 yearfirst=False,
1685 ambiguous="raise",
1686):
1687 """
1688 Parameters
1689 ----------
1690 data : list-like
1691 dtype : dtype, str, or None, default None
1692 copy : bool, default False
1693 tz : tzinfo, str, or None, default None
1694 dayfirst : bool, default False
1695 yearfirst : bool, default False
1696 ambiguous : str, bool, or arraylike, default 'raise'
1697 See pandas._libs.tslibs.conversion.tz_localize_to_utc.
1699 Returns
1700 -------
1701 result : numpy.ndarray
1702 The sequence converted to a numpy array with dtype ``datetime64[ns]``.
1703 tz : tzinfo or None
1704 Either the user-provided tzinfo or one inferred from the data.
1705 inferred_freq : Tick or None
1706 The inferred frequency of the sequence.
1708 Raises
1709 ------
1710 TypeError : PeriodDType data is passed
1711 """
1713 inferred_freq = None
1715 dtype = _validate_dt64_dtype(dtype)
1717 if not hasattr(data, "dtype"):
1718 # e.g. list, tuple
1719 if np.ndim(data) == 0:
1720 # i.e. generator
1721 data = list(data)
1722 data = np.asarray(data)
1723 copy = False
1724 elif isinstance(data, ABCSeries):
1725 data = data._values
1726 if isinstance(data, ABCPandasArray):
1727 data = data.to_numpy()
1729 if hasattr(data, "freq"):
1730 # i.e. DatetimeArray/Index
1731 inferred_freq = data.freq
1733 # if dtype has an embedded tz, capture it
1734 tz = validate_tz_from_dtype(dtype, tz)
1736 if isinstance(data, ABCIndexClass):
1737 if data.nlevels > 1:
1738 # Without this check, data._data below is None
1739 raise TypeError("Cannot create a DatetimeArray from a MultiIndex.")
1740 data = data._data
1742 # By this point we are assured to have either a numpy array or Index
1743 data, copy = maybe_convert_dtype(data, copy)
1745 if is_object_dtype(data) or is_string_dtype(data):
1746 # TODO: We do not have tests specific to string-dtypes,
1747 # also complex or categorical or other extension
1748 copy = False
1749 if lib.infer_dtype(data, skipna=False) == "integer":
1750 data = data.astype(np.int64)
1751 else:
1752 # data comes back here as either i8 to denote UTC timestamps
1753 # or M8[ns] to denote wall times
1754 data, inferred_tz = objects_to_datetime64ns(
1755 data, dayfirst=dayfirst, yearfirst=yearfirst
1756 )
1757 tz = maybe_infer_tz(tz, inferred_tz)
1759 # `data` may have originally been a Categorical[datetime64[ns, tz]],
1760 # so we need to handle these types.
1761 if is_datetime64tz_dtype(data):
1762 # DatetimeArray -> ndarray
1763 tz = maybe_infer_tz(tz, data.tz)
1764 result = data._data
1766 elif is_datetime64_dtype(data):
1767 # tz-naive DatetimeArray or ndarray[datetime64]
1768 data = getattr(data, "_data", data)
1769 if data.dtype != _NS_DTYPE:
1770 data = conversion.ensure_datetime64ns(data)
1772 if tz is not None:
1773 # Convert tz-naive to UTC
1774 tz = timezones.maybe_get_tz(tz)
1775 data = conversion.tz_localize_to_utc(
1776 data.view("i8"), tz, ambiguous=ambiguous
1777 )
1778 data = data.view(_NS_DTYPE)
1780 assert data.dtype == _NS_DTYPE, data.dtype
1781 result = data
1783 else:
1784 # must be integer dtype otherwise
1785 # assume this data are epoch timestamps
1786 if tz:
1787 tz = timezones.maybe_get_tz(tz)
1789 if data.dtype != _INT64_DTYPE:
1790 data = data.astype(np.int64, copy=False)
1791 result = data.view(_NS_DTYPE)
1793 if copy:
1794 # TODO: should this be deepcopy?
1795 result = result.copy()
1797 assert isinstance(result, np.ndarray), type(result)
1798 assert result.dtype == "M8[ns]", result.dtype
1800 # We have to call this again after possibly inferring a tz above
1801 validate_tz_from_dtype(dtype, tz)
1803 return result, tz, inferred_freq
1806def objects_to_datetime64ns(
1807 data,
1808 dayfirst,
1809 yearfirst,
1810 utc=False,
1811 errors="raise",
1812 require_iso8601=False,
1813 allow_object=False,
1814):
1815 """
1816 Convert data to array of timestamps.
1818 Parameters
1819 ----------
1820 data : np.ndarray[object]
1821 dayfirst : bool
1822 yearfirst : bool
1823 utc : bool, default False
1824 Whether to convert timezone-aware timestamps to UTC.
1825 errors : {'raise', 'ignore', 'coerce'}
1826 allow_object : bool
1827 Whether to return an object-dtype ndarray instead of raising if the
1828 data contains more than one timezone.
1830 Returns
1831 -------
1832 result : ndarray
1833 np.int64 dtype if returned values represent UTC timestamps
1834 np.datetime64[ns] if returned values represent wall times
1835 object if mixed timezones
1836 inferred_tz : tzinfo or None
1838 Raises
1839 ------
1840 ValueError : if data cannot be converted to datetimes
1841 """
1842 assert errors in ["raise", "ignore", "coerce"]
1844 # if str-dtype, convert
1845 data = np.array(data, copy=False, dtype=np.object_)
1847 try:
1848 result, tz_parsed = tslib.array_to_datetime(
1849 data,
1850 errors=errors,
1851 utc=utc,
1852 dayfirst=dayfirst,
1853 yearfirst=yearfirst,
1854 require_iso8601=require_iso8601,
1855 )
1856 except ValueError as e:
1857 try:
1858 values, tz_parsed = conversion.datetime_to_datetime64(data)
1859 # If tzaware, these values represent unix timestamps, so we
1860 # return them as i8 to distinguish from wall times
1861 return values.view("i8"), tz_parsed
1862 except (ValueError, TypeError):
1863 raise e
1865 if tz_parsed is not None:
1866 # We can take a shortcut since the datetime64 numpy array
1867 # is in UTC
1868 # Return i8 values to denote unix timestamps
1869 return result.view("i8"), tz_parsed
1870 elif is_datetime64_dtype(result):
1871 # returning M8[ns] denotes wall-times; since tz is None
1872 # the distinction is a thin one
1873 return result, tz_parsed
1874 elif is_object_dtype(result):
1875 # GH#23675 when called via `pd.to_datetime`, returning an object-dtype
1876 # array is allowed. When called via `pd.DatetimeIndex`, we can
1877 # only accept datetime64 dtype, so raise TypeError if object-dtype
1878 # is returned, as that indicates the values can be recognized as
1879 # datetimes but they have conflicting timezones/awareness
1880 if allow_object:
1881 return result, tz_parsed
1882 raise TypeError(result)
1883 else: # pragma: no cover
1884 # GH#23675 this TypeError should never be hit, whereas the TypeError
1885 # in the object-dtype branch above is reachable.
1886 raise TypeError(result)
1889def maybe_convert_dtype(data, copy):
1890 """
1891 Convert data based on dtype conventions, issuing deprecation warnings
1892 or errors where appropriate.
1894 Parameters
1895 ----------
1896 data : np.ndarray or pd.Index
1897 copy : bool
1899 Returns
1900 -------
1901 data : np.ndarray or pd.Index
1902 copy : bool
1904 Raises
1905 ------
1906 TypeError : PeriodDType data is passed
1907 """
1908 if is_float_dtype(data):
1909 # Note: we must cast to datetime64[ns] here in order to treat these
1910 # as wall-times instead of UTC timestamps.
1911 data = data.astype(_NS_DTYPE)
1912 copy = False
1913 # TODO: deprecate this behavior to instead treat symmetrically
1914 # with integer dtypes. See discussion in GH#23675
1916 elif is_timedelta64_dtype(data):
1917 # GH#29794 enforcing deprecation introduced in GH#23539
1918 raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")
1919 elif is_period_dtype(data):
1920 # Note: without explicitly raising here, PeriodIndex
1921 # test_setops.test_join_does_not_recur fails
1922 raise TypeError(
1923 "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"
1924 )
1926 elif is_categorical_dtype(data):
1927 # GH#18664 preserve tz in going DTI->Categorical->DTI
1928 # TODO: cases where we need to do another pass through this func,
1929 # e.g. the categories are timedelta64s
1930 data = data.categories.take(data.codes, fill_value=NaT)._values
1931 copy = False
1933 elif is_extension_array_dtype(data) and not is_datetime64tz_dtype(data):
1934 # Includes categorical
1935 # TODO: We have no tests for these
1936 data = np.array(data, dtype=np.object_)
1937 copy = False
1939 return data, copy
1942# -------------------------------------------------------------------
1943# Validation and Inference
1946def maybe_infer_tz(tz, inferred_tz):
1947 """
1948 If a timezone is inferred from data, check that it is compatible with
1949 the user-provided timezone, if any.
1951 Parameters
1952 ----------
1953 tz : tzinfo or None
1954 inferred_tz : tzinfo or None
1956 Returns
1957 -------
1958 tz : tzinfo or None
1960 Raises
1961 ------
1962 TypeError : if both timezones are present but do not match
1963 """
1964 if tz is None:
1965 tz = inferred_tz
1966 elif inferred_tz is None:
1967 pass
1968 elif not timezones.tz_compare(tz, inferred_tz):
1969 raise TypeError(
1970 f"data is already tz-aware {inferred_tz}, unable to "
1971 f"set specified tz: {tz}"
1972 )
1973 return tz
1976def _validate_dt64_dtype(dtype):
1977 """
1978 Check that a dtype, if passed, represents either a numpy datetime64[ns]
1979 dtype or a pandas DatetimeTZDtype.
1981 Parameters
1982 ----------
1983 dtype : object
1985 Returns
1986 -------
1987 dtype : None, numpy.dtype, or DatetimeTZDtype
1989 Raises
1990 ------
1991 ValueError : invalid dtype
1993 Notes
1994 -----
1995 Unlike validate_tz_from_dtype, this does _not_ allow non-existent
1996 tz errors to go through
1997 """
1998 if dtype is not None:
1999 dtype = pandas_dtype(dtype)
2000 if is_dtype_equal(dtype, np.dtype("M8")):
2001 # no precision, disallowed GH#24806
2002 msg = (
2003 "Passing in 'datetime64' dtype with no precision is not allowed. "
2004 "Please pass in 'datetime64[ns]' instead."
2005 )
2006 raise ValueError(msg)
2008 if (isinstance(dtype, np.dtype) and dtype != _NS_DTYPE) or not isinstance(
2009 dtype, (np.dtype, DatetimeTZDtype)
2010 ):
2011 raise ValueError(
2012 f"Unexpected value for 'dtype': '{dtype}'. "
2013 "Must be 'datetime64[ns]' or DatetimeTZDtype'."
2014 )
2015 return dtype
2018def validate_tz_from_dtype(dtype, tz):
2019 """
2020 If the given dtype is a DatetimeTZDtype, extract the implied
2021 tzinfo object from it and check that it does not conflict with the given
2022 tz.
2024 Parameters
2025 ----------
2026 dtype : dtype, str
2027 tz : None, tzinfo
2029 Returns
2030 -------
2031 tz : consensus tzinfo
2033 Raises
2034 ------
2035 ValueError : on tzinfo mismatch
2036 """
2037 if dtype is not None:
2038 if isinstance(dtype, str):
2039 try:
2040 dtype = DatetimeTZDtype.construct_from_string(dtype)
2041 except TypeError:
2042 # Things like `datetime64[ns]`, which is OK for the
2043 # constructors, but also nonsense, which should be validated
2044 # but not by us. We *do* allow non-existent tz errors to
2045 # go through
2046 pass
2047 dtz = getattr(dtype, "tz", None)
2048 if dtz is not None:
2049 if tz is not None and not timezones.tz_compare(tz, dtz):
2050 raise ValueError("cannot supply both a tz and a dtype with a tz")
2051 tz = dtz
2053 if tz is not None and is_datetime64_dtype(dtype):
2054 # We also need to check for the case where the user passed a
2055 # tz-naive dtype (i.e. datetime64[ns])
2056 if tz is not None and not timezones.tz_compare(tz, dtz):
2057 raise ValueError(
2058 "cannot supply both a tz and a "
2059 "timezone-naive dtype (i.e. datetime64[ns])"
2060 )
2062 return tz
2065def _infer_tz_from_endpoints(start, end, tz):
2066 """
2067 If a timezone is not explicitly given via `tz`, see if one can
2068 be inferred from the `start` and `end` endpoints. If more than one
2069 of these inputs provides a timezone, require that they all agree.
2071 Parameters
2072 ----------
2073 start : Timestamp
2074 end : Timestamp
2075 tz : tzinfo or None
2077 Returns
2078 -------
2079 tz : tzinfo or None
2081 Raises
2082 ------
2083 TypeError : if start and end timezones do not agree
2084 """
2085 try:
2086 inferred_tz = timezones.infer_tzinfo(start, end)
2087 except AssertionError:
2088 # infer_tzinfo raises AssertionError if passed mismatched timezones
2089 raise TypeError(
2090 "Start and end cannot both be tz-aware with different timezones"
2091 )
2093 inferred_tz = timezones.maybe_get_tz(inferred_tz)
2094 tz = timezones.maybe_get_tz(tz)
2096 if tz is not None and inferred_tz is not None:
2097 if not timezones.tz_compare(inferred_tz, tz):
2098 raise AssertionError("Inferred time zone not equal to passed time zone")
2100 elif inferred_tz is not None:
2101 tz = inferred_tz
2103 return tz
2106def _maybe_normalize_endpoints(start, end, normalize):
2107 _normalized = True
2109 if start is not None:
2110 if normalize:
2111 start = normalize_date(start)
2112 _normalized = True
2113 else:
2114 _normalized = _normalized and start.time() == _midnight
2116 if end is not None:
2117 if normalize:
2118 end = normalize_date(end)
2119 _normalized = True
2120 else:
2121 _normalized = _normalized and end.time() == _midnight
2123 return start, end, _normalized
2126def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent):
2127 """
2128 Localize a start or end Timestamp to the timezone of the corresponding
2129 start or end Timestamp
2131 Parameters
2132 ----------
2133 ts : start or end Timestamp to potentially localize
2134 is_none : argument that should be None
2135 is_not_none : argument that should not be None
2136 freq : Tick, DateOffset, or None
2137 tz : str, timezone object or None
2138 ambiguous: str, localization behavior for ambiguous times
2139 nonexistent: str, localization behavior for nonexistent times
2141 Returns
2142 -------
2143 ts : Timestamp
2144 """
2145 # Make sure start and end are timezone localized if:
2146 # 1) freq = a Timedelta-like frequency (Tick)
2147 # 2) freq = None i.e. generating a linspaced range
2148 if is_none is None and is_not_none is not None:
2149 # Note: We can't ambiguous='infer' a singular ambiguous time; however,
2150 # we have historically defaulted ambiguous=False
2151 ambiguous = ambiguous if ambiguous != "infer" else False
2152 localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}
2153 if isinstance(freq, Tick) or freq is None:
2154 localize_args["tz"] = tz
2155 ts = ts.tz_localize(**localize_args)
2156 return ts