Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/tools/datetimes.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from collections import abc
2from datetime import datetime, time
3from functools import partial
4from itertools import islice
5from typing import Optional, TypeVar, Union
7import numpy as np
9from pandas._libs import tslib, tslibs
10from pandas._libs.tslibs import Timestamp, conversion, parsing
11from pandas._libs.tslibs.parsing import ( # noqa
12 DateParseError,
13 _format_is_iso,
14 _guess_datetime_format,
15 parse_time_string,
16)
17from pandas._libs.tslibs.strptime import array_strptime
18from pandas._typing import ArrayLike
20from pandas.core.dtypes.common import (
21 ensure_object,
22 is_datetime64_dtype,
23 is_datetime64_ns_dtype,
24 is_datetime64tz_dtype,
25 is_float,
26 is_integer,
27 is_integer_dtype,
28 is_list_like,
29 is_numeric_dtype,
30 is_scalar,
31)
32from pandas.core.dtypes.generic import (
33 ABCDataFrame,
34 ABCDatetimeIndex,
35 ABCIndex,
36 ABCIndexClass,
37 ABCSeries,
38)
39from pandas.core.dtypes.missing import notna
41from pandas.arrays import IntegerArray
42from pandas.core import algorithms
43from pandas.core.algorithms import unique
45# ---------------------------------------------------------------------
46# types used in annotations
48ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries]
49Scalar = Union[int, float, str]
50DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
51DatetimeScalarOrArrayConvertible = Union[
52 DatetimeScalar, list, tuple, ArrayLike, ABCSeries
53]
56# ---------------------------------------------------------------------
59def _guess_datetime_format_for_array(arr, **kwargs):
60 # Try to guess the format based on the first non-NaN element
61 non_nan_elements = notna(arr).nonzero()[0]
62 if len(non_nan_elements):
63 return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs)
66def should_cache(
67 arg: ArrayConvertible, unique_share: float = 0.7, check_count: Optional[int] = None
68) -> bool:
69 """
70 Decides whether to do caching.
72 If the percent of unique elements among `check_count` elements less
73 than `unique_share * 100` then we can do caching.
75 Parameters
76 ----------
77 arg: listlike, tuple, 1-d array, Series
78 unique_share: float, default=0.7, optional
79 0 < unique_share < 1
80 check_count: int, optional
81 0 <= check_count <= len(arg)
83 Returns
84 -------
85 do_caching: bool
87 Notes
88 -----
89 By default for a sequence of less than 50 items in size, we don't do
90 caching; for the number of elements less than 5000, we take ten percent of
91 all elements to check for a uniqueness share; if the sequence size is more
92 than 5000, then we check only the first 500 elements.
93 All constants were chosen empirically by.
94 """
95 do_caching = True
97 # default realization
98 if check_count is None:
99 # in this case, the gain from caching is negligible
100 if len(arg) <= 50:
101 return False
103 if len(arg) <= 5000:
104 check_count = int(len(arg) * 0.1)
105 else:
106 check_count = 500
107 else:
108 assert (
109 0 <= check_count <= len(arg)
110 ), "check_count must be in next bounds: [0; len(arg)]"
111 if check_count == 0:
112 return False
114 assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)"
116 unique_elements = set(islice(arg, check_count))
117 if len(unique_elements) > check_count * unique_share:
118 do_caching = False
119 return do_caching
122def _maybe_cache(arg, format, cache, convert_listlike):
123 """
124 Create a cache of unique dates from an array of dates
126 Parameters
127 ----------
128 arg : listlike, tuple, 1-d array, Series
129 format : string
130 Strftime format to parse time
131 cache : boolean
132 True attempts to create a cache of converted values
133 convert_listlike : function
134 Conversion function to apply on dates
136 Returns
137 -------
138 cache_array : Series
139 Cache of converted, unique dates. Can be empty
140 """
141 from pandas import Series
143 cache_array = Series(dtype=object)
145 if cache:
146 # Perform a quicker unique check
147 if not should_cache(arg):
148 return cache_array
150 unique_dates = unique(arg)
151 if len(unique_dates) < len(arg):
152 cache_dates = convert_listlike(unique_dates, format)
153 cache_array = Series(cache_dates, index=unique_dates)
154 return cache_array
157def _box_as_indexlike(
158 dt_array: ArrayLike, utc: Optional[bool] = None, name: Optional[str] = None
159) -> Union[ABCIndex, ABCDatetimeIndex]:
160 """
161 Properly boxes the ndarray of datetimes to DatetimeIndex
162 if it is possible or to generic Index instead
164 Parameters
165 ----------
166 dt_array: 1-d array
167 Array of datetimes to be wrapped in an Index.
168 tz : object
169 None or 'utc'
170 name : string, default None
171 Name for a resulting index
173 Returns
174 -------
175 result : datetime of converted dates
176 - DatetimeIndex if convertible to sole datetime64 type
177 - general Index otherwise
178 """
179 from pandas import DatetimeIndex, Index
181 if is_datetime64_dtype(dt_array):
182 tz = "utc" if utc else None
183 return DatetimeIndex(dt_array, tz=tz, name=name)
184 return Index(dt_array, name=name)
187def _convert_and_box_cache(
188 arg: DatetimeScalarOrArrayConvertible,
189 cache_array: ABCSeries,
190 name: Optional[str] = None,
191) -> ABCIndexClass:
192 """
193 Convert array of dates with a cache and wrap the result in an Index.
195 Parameters
196 ----------
197 arg : integer, float, string, datetime, list, tuple, 1-d array, Series
198 cache_array : Series
199 Cache of converted, unique dates
200 name : string, default None
201 Name for a DatetimeIndex
203 Returns
204 -------
205 result : Index-like of converted dates
206 """
207 from pandas import Series
209 result = Series(arg).map(cache_array)
210 return _box_as_indexlike(result, utc=None, name=name)
213def _return_parsed_timezone_results(result, timezones, tz, name):
214 """
215 Return results from array_strptime if a %z or %Z directive was passed.
217 Parameters
218 ----------
219 result : ndarray
220 int64 date representations of the dates
221 timezones : ndarray
222 pytz timezone objects
223 tz : object
224 None or pytz timezone object
225 name : string, default None
226 Name for a DatetimeIndex
228 Returns
229 -------
230 tz_result : Index-like of parsed dates with timezone
231 """
232 if tz is not None:
233 raise ValueError(
234 "Cannot pass a tz argument when "
235 "parsing strings with timezone "
236 "information."
237 )
238 tz_results = np.array(
239 [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)]
240 )
241 from pandas import Index
243 return Index(tz_results, name=name)
246def _convert_listlike_datetimes(
247 arg,
248 format,
249 name=None,
250 tz=None,
251 unit=None,
252 errors=None,
253 infer_datetime_format=None,
254 dayfirst=None,
255 yearfirst=None,
256 exact=None,
257):
258 """
259 Helper function for to_datetime. Performs the conversions of 1D listlike
260 of dates
262 Parameters
263 ----------
264 arg : list, tuple, ndarray, Series, Index
265 date to be parced
266 name : object
267 None or string for the Index name
268 tz : object
269 None or 'utc'
270 unit : string
271 None or string of the frequency of the passed data
272 errors : string
273 error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
274 infer_datetime_format : boolean
275 inferring format behavior from to_datetime
276 dayfirst : boolean
277 dayfirst parsing behavior from to_datetime
278 yearfirst : boolean
279 yearfirst parsing behavior from to_datetime
280 exact : boolean
281 exact format matching behavior from to_datetime
283 Returns
284 -------
285 Index-like of parsed dates
286 """
287 from pandas import DatetimeIndex
288 from pandas.core.arrays import DatetimeArray
289 from pandas.core.arrays.datetimes import (
290 maybe_convert_dtype,
291 objects_to_datetime64ns,
292 )
294 if isinstance(arg, (list, tuple)):
295 arg = np.array(arg, dtype="O")
297 # these are shortcutable
298 if is_datetime64tz_dtype(arg):
299 if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
300 return DatetimeIndex(arg, tz=tz, name=name)
301 if tz == "utc":
302 arg = arg.tz_convert(None).tz_localize(tz)
303 return arg
305 elif is_datetime64_ns_dtype(arg):
306 if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
307 try:
308 return DatetimeIndex(arg, tz=tz, name=name)
309 except ValueError:
310 pass
311 elif tz:
312 # DatetimeArray, DatetimeIndex
313 return arg.tz_localize(tz)
315 return arg
317 elif unit is not None:
318 if format is not None:
319 raise ValueError("cannot specify both format and unit")
320 arg = getattr(arg, "_values", arg)
322 # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
323 # because it expects an ndarray argument
324 if isinstance(arg, IntegerArray):
325 # Explicitly pass NaT mask to array_with_unit_to_datetime
326 mask = arg.isna()
327 arg = arg._ndarray_values
328 else:
329 mask = None
331 result, tz_parsed = tslib.array_with_unit_to_datetime(
332 arg, mask, unit, errors=errors
333 )
335 if errors == "ignore":
336 from pandas import Index
338 result = Index(result, name=name)
339 else:
340 result = DatetimeIndex(result, name=name)
341 # GH 23758: We may still need to localize the result with tz
342 # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
343 # result will be naive but in UTC
344 try:
345 result = result.tz_localize("UTC").tz_convert(tz_parsed)
346 except AttributeError:
347 # Regular Index from 'ignore' path
348 return result
349 if tz is not None:
350 if result.tz is None:
351 result = result.tz_localize(tz)
352 else:
353 result = result.tz_convert(tz)
354 return result
355 elif getattr(arg, "ndim", 1) > 1:
356 raise TypeError(
357 "arg must be a string, datetime, list, tuple, 1-d array, or Series"
358 )
360 # warn if passing timedelta64, raise for PeriodDtype
361 # NB: this must come after unit transformation
362 orig_arg = arg
363 arg, _ = maybe_convert_dtype(arg, copy=False)
365 arg = ensure_object(arg)
366 require_iso8601 = False
368 if infer_datetime_format and format is None:
369 format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)
371 if format is not None:
372 # There is a special fast-path for iso8601 formatted
373 # datetime strings, so in those cases don't use the inferred
374 # format because this path makes process slower in this
375 # special case
376 format_is_iso8601 = _format_is_iso(format)
377 if format_is_iso8601:
378 require_iso8601 = not infer_datetime_format
379 format = None
381 tz_parsed = None
382 result = None
384 if format is not None:
385 try:
386 # shortcut formatting here
387 if format == "%Y%m%d":
388 try:
389 # pass orig_arg as float-dtype may have been converted to
390 # datetime64[ns]
391 orig_arg = ensure_object(orig_arg)
392 result = _attempt_YYYYMMDD(orig_arg, errors=errors)
393 except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
394 raise ValueError("cannot convert the input to '%Y%m%d' date format")
396 # fallback
397 if result is None:
398 try:
399 result, timezones = array_strptime(
400 arg, format, exact=exact, errors=errors
401 )
402 if "%Z" in format or "%z" in format:
403 return _return_parsed_timezone_results(
404 result, timezones, tz, name
405 )
406 except tslibs.OutOfBoundsDatetime:
407 if errors == "raise":
408 raise
409 elif errors == "coerce":
410 result = np.empty(arg.shape, dtype="M8[ns]")
411 iresult = result.view("i8")
412 iresult.fill(tslibs.iNaT)
413 else:
414 result = arg
415 except ValueError:
416 # if format was inferred, try falling back
417 # to array_to_datetime - terminate here
418 # for specified formats
419 if not infer_datetime_format:
420 if errors == "raise":
421 raise
422 elif errors == "coerce":
423 result = np.empty(arg.shape, dtype="M8[ns]")
424 iresult = result.view("i8")
425 iresult.fill(tslibs.iNaT)
426 else:
427 result = arg
428 except ValueError as e:
429 # Fallback to try to convert datetime objects if timezone-aware
430 # datetime objects are found without passing `utc=True`
431 try:
432 values, tz = conversion.datetime_to_datetime64(arg)
433 return DatetimeIndex._simple_new(values, name=name, tz=tz)
434 except (ValueError, TypeError):
435 raise e
437 if result is None:
438 assert format is None or infer_datetime_format
439 utc = tz == "utc"
440 result, tz_parsed = objects_to_datetime64ns(
441 arg,
442 dayfirst=dayfirst,
443 yearfirst=yearfirst,
444 utc=utc,
445 errors=errors,
446 require_iso8601=require_iso8601,
447 allow_object=True,
448 )
450 if tz_parsed is not None:
451 # We can take a shortcut since the datetime64 numpy array
452 # is in UTC
453 return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
455 utc = tz == "utc"
456 return _box_as_indexlike(result, utc=utc, name=name)
459def _adjust_to_origin(arg, origin, unit):
460 """
461 Helper function for to_datetime.
462 Adjust input argument to the specified origin
464 Parameters
465 ----------
466 arg : list, tuple, ndarray, Series, Index
467 date to be adjusted
468 origin : 'julian' or Timestamp
469 origin offset for the arg
470 unit : string
471 passed unit from to_datetime, must be 'D'
473 Returns
474 -------
475 ndarray or scalar of adjusted date(s)
476 """
477 if origin == "julian":
478 original = arg
479 j0 = Timestamp(0).to_julian_date()
480 if unit != "D":
481 raise ValueError("unit must be 'D' for origin='julian'")
482 try:
483 arg = arg - j0
484 except TypeError:
485 raise ValueError("incompatible 'arg' type for given 'origin'='julian'")
487 # preemptively check this for a nice range
488 j_max = Timestamp.max.to_julian_date() - j0
489 j_min = Timestamp.min.to_julian_date() - j0
490 if np.any(arg > j_max) or np.any(arg < j_min):
491 raise tslibs.OutOfBoundsDatetime(
492 f"{original} is Out of Bounds for origin='julian'"
493 )
494 else:
495 # arg must be numeric
496 if not (
497 (is_scalar(arg) and (is_integer(arg) or is_float(arg)))
498 or is_numeric_dtype(np.asarray(arg))
499 ):
500 raise ValueError(
501 f"'{arg}' is not compatible with origin='{origin}'; "
502 "it must be numeric with a unit specified"
503 )
505 # we are going to offset back to unix / epoch time
506 try:
507 offset = Timestamp(origin)
508 except tslibs.OutOfBoundsDatetime:
509 raise tslibs.OutOfBoundsDatetime(f"origin {origin} is Out of Bounds")
510 except ValueError:
511 raise ValueError(f"origin {origin} cannot be converted to a Timestamp")
513 if offset.tz is not None:
514 raise ValueError(f"origin offset {offset} must be tz-naive")
515 offset -= Timestamp(0)
517 # convert the offset to the unit of the arg
518 # this should be lossless in terms of precision
519 offset = offset // tslibs.Timedelta(1, unit=unit)
521 # scalars & ndarray-like can handle the addition
522 if is_list_like(arg) and not isinstance(
523 arg, (ABCSeries, ABCIndexClass, np.ndarray)
524 ):
525 arg = np.asarray(arg)
526 arg = arg + offset
527 return arg
530def to_datetime(
531 arg,
532 errors="raise",
533 dayfirst=False,
534 yearfirst=False,
535 utc=None,
536 format=None,
537 exact=True,
538 unit=None,
539 infer_datetime_format=False,
540 origin="unix",
541 cache=True,
542):
543 """
544 Convert argument to datetime.
546 Parameters
547 ----------
548 arg : int, float, str, datetime, list, tuple, 1-d array, Series DataFrame/dict-like
549 The object to convert to a datetime.
550 errors : {'ignore', 'raise', 'coerce'}, default 'raise'
551 - If 'raise', then invalid parsing will raise an exception.
552 - If 'coerce', then invalid parsing will be set as NaT.
553 - If 'ignore', then invalid parsing will return the input.
554 dayfirst : bool, default False
555 Specify a date parse order if `arg` is str or its list-likes.
556 If True, parses dates with the day first, eg 10/11/12 is parsed as
557 2012-11-10.
558 Warning: dayfirst=True is not strict, but will prefer to parse
559 with day first (this is a known bug, based on dateutil behavior).
560 yearfirst : bool, default False
561 Specify a date parse order if `arg` is str or its list-likes.
563 - If True parses dates with the year first, eg 10/11/12 is parsed as
564 2010-11-12.
565 - If both dayfirst and yearfirst are True, yearfirst is preceded (same
566 as dateutil).
568 Warning: yearfirst=True is not strict, but will prefer to parse
569 with year first (this is a known bug, based on dateutil behavior).
570 utc : bool, default None
571 Return UTC DatetimeIndex if True (converting any tz-aware
572 datetime.datetime objects as well).
573 format : str, default None
574 The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
575 all the way up to nanoseconds.
576 See strftime documentation for more information on choices:
577 https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
578 exact : bool, True by default
579 Behaves as:
580 - If True, require an exact format match.
581 - If False, allow the format to match anywhere in the target string.
583 unit : str, default 'ns'
584 The unit of the arg (D,s,ms,us,ns) denote the unit, which is an
585 integer or float number. This will be based off the origin.
586 Example, with unit='ms' and origin='unix' (the default), this
587 would calculate the number of milliseconds to the unix epoch start.
588 infer_datetime_format : bool, default False
589 If True and no `format` is given, attempt to infer the format of the
590 datetime strings, and if it can be inferred, switch to a faster
591 method of parsing them. In some cases this can increase the parsing
592 speed by ~5-10x.
593 origin : scalar, default 'unix'
594 Define the reference date. The numeric values would be parsed as number
595 of units (defined by `unit`) since this reference date.
597 - If 'unix' (or POSIX) time; origin is set to 1970-01-01.
598 - If 'julian', unit must be 'D', and origin is set to beginning of
599 Julian Calendar. Julian day number 0 is assigned to the day starting
600 at noon on January 1, 4713 BC.
601 - If Timestamp convertible, origin is set to Timestamp identified by
602 origin.
603 cache : bool, default True
604 If True, use a cache of unique, converted dates to apply the datetime
605 conversion. May produce significant speed-up when parsing duplicate
606 date strings, especially ones with timezone offsets. The cache is only
607 used when there are at least 50 values. The presence of out-of-bounds
608 values will render the cache unusable and may slow down parsing.
610 .. versionadded:: 0.23.0
612 .. versionchanged:: 0.25.0
613 - changed default value from False to True.
615 Returns
616 -------
617 datetime
618 If parsing succeeded.
619 Return type depends on input:
621 - list-like: DatetimeIndex
622 - Series: Series of datetime64 dtype
623 - scalar: Timestamp
625 In case when it is not possible to return designated types (e.g. when
626 any element of input is before Timestamp.min or after Timestamp.max)
627 return will have datetime.datetime type (or corresponding
628 array/Series).
630 See Also
631 --------
632 DataFrame.astype : Cast argument to a specified dtype.
633 to_timedelta : Convert argument to timedelta.
634 convert_dtypes : Convert dtypes.
636 Examples
637 --------
638 Assembling a datetime from multiple columns of a DataFrame. The keys can be
639 common abbreviations like ['year', 'month', 'day', 'minute', 'second',
640 'ms', 'us', 'ns']) or plurals of the same
642 >>> df = pd.DataFrame({'year': [2015, 2016],
643 ... 'month': [2, 3],
644 ... 'day': [4, 5]})
645 >>> pd.to_datetime(df)
646 0 2015-02-04
647 1 2016-03-05
648 dtype: datetime64[ns]
650 If a date does not meet the `timestamp limitations
651 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
652 #timeseries-timestamp-limits>`_, passing errors='ignore'
653 will return the original input instead of raising any exception.
655 Passing errors='coerce' will force an out-of-bounds date to NaT,
656 in addition to forcing non-dates (or non-parseable dates) to NaT.
658 >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore')
659 datetime.datetime(1300, 1, 1, 0, 0)
660 >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
661 NaT
663 Passing infer_datetime_format=True can often-times speedup a parsing
664 if its not an ISO8601 format exactly, but in a regular format.
666 >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
667 >>> s.head()
668 0 3/11/2000
669 1 3/12/2000
670 2 3/13/2000
671 3 3/11/2000
672 4 3/12/2000
673 dtype: object
675 >>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP
676 100 loops, best of 3: 10.4 ms per loop
678 >>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP
679 1 loop, best of 3: 471 ms per loop
681 Using a unix epoch time
683 >>> pd.to_datetime(1490195805, unit='s')
684 Timestamp('2017-03-22 15:16:45')
685 >>> pd.to_datetime(1490195805433502912, unit='ns')
686 Timestamp('2017-03-22 15:16:45.433502912')
688 .. warning:: For float arg, precision rounding might happen. To prevent
689 unexpected behavior use a fixed-width exact type.
691 Using a non-unix epoch origin
693 >>> pd.to_datetime([1, 2, 3], unit='D',
694 ... origin=pd.Timestamp('1960-01-01'))
695 DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], \
696dtype='datetime64[ns]', freq=None)
697 """
698 if arg is None:
699 return None
701 if origin != "unix":
702 arg = _adjust_to_origin(arg, origin, unit)
704 tz = "utc" if utc else None
705 convert_listlike = partial(
706 _convert_listlike_datetimes,
707 tz=tz,
708 unit=unit,
709 dayfirst=dayfirst,
710 yearfirst=yearfirst,
711 errors=errors,
712 exact=exact,
713 infer_datetime_format=infer_datetime_format,
714 )
716 if isinstance(arg, Timestamp):
717 result = arg
718 if tz is not None:
719 if arg.tz is not None:
720 result = result.tz_convert(tz)
721 else:
722 result = result.tz_localize(tz)
723 elif isinstance(arg, ABCSeries):
724 cache_array = _maybe_cache(arg, format, cache, convert_listlike)
725 if not cache_array.empty:
726 result = arg.map(cache_array)
727 else:
728 values = convert_listlike(arg._values, format)
729 result = arg._constructor(values, index=arg.index, name=arg.name)
730 elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):
731 result = _assemble_from_unit_mappings(arg, errors, tz)
732 elif isinstance(arg, ABCIndexClass):
733 cache_array = _maybe_cache(arg, format, cache, convert_listlike)
734 if not cache_array.empty:
735 result = _convert_and_box_cache(arg, cache_array, name=arg.name)
736 else:
737 convert_listlike = partial(convert_listlike, name=arg.name)
738 result = convert_listlike(arg, format)
739 elif is_list_like(arg):
740 try:
741 cache_array = _maybe_cache(arg, format, cache, convert_listlike)
742 except tslibs.OutOfBoundsDatetime:
743 # caching attempts to create a DatetimeIndex, which may raise
744 # an OOB. If that's the desired behavior, then just reraise...
745 if errors == "raise":
746 raise
747 # ... otherwise, continue without the cache.
748 from pandas import Series
750 cache_array = Series([], dtype=object) # just an empty array
751 if not cache_array.empty:
752 result = _convert_and_box_cache(arg, cache_array)
753 else:
754 result = convert_listlike(arg, format)
755 else:
756 result = convert_listlike(np.array([arg]), format)[0]
758 return result
761# mappings for assembling units
762_unit_map = {
763 "year": "year",
764 "years": "year",
765 "month": "month",
766 "months": "month",
767 "day": "day",
768 "days": "day",
769 "hour": "h",
770 "hours": "h",
771 "minute": "m",
772 "minutes": "m",
773 "second": "s",
774 "seconds": "s",
775 "ms": "ms",
776 "millisecond": "ms",
777 "milliseconds": "ms",
778 "us": "us",
779 "microsecond": "us",
780 "microseconds": "us",
781 "ns": "ns",
782 "nanosecond": "ns",
783 "nanoseconds": "ns",
784}
787def _assemble_from_unit_mappings(arg, errors, tz):
788 """
789 assemble the unit specified fields from the arg (DataFrame)
790 Return a Series for actual parsing
792 Parameters
793 ----------
794 arg : DataFrame
795 errors : {'ignore', 'raise', 'coerce'}, default 'raise'
797 - If 'raise', then invalid parsing will raise an exception
798 - If 'coerce', then invalid parsing will be set as NaT
799 - If 'ignore', then invalid parsing will return the input
800 tz : None or 'utc'
802 Returns
803 -------
804 Series
805 """
806 from pandas import to_timedelta, to_numeric, DataFrame
808 arg = DataFrame(arg)
809 if not arg.columns.is_unique:
810 raise ValueError("cannot assemble with duplicate keys")
812 # replace passed unit with _unit_map
813 def f(value):
814 if value in _unit_map:
815 return _unit_map[value]
817 # m is case significant
818 if value.lower() in _unit_map:
819 return _unit_map[value.lower()]
821 return value
823 unit = {k: f(k) for k in arg.keys()}
824 unit_rev = {v: k for k, v in unit.items()}
826 # we require at least Ymd
827 required = ["year", "month", "day"]
828 req = sorted(set(required) - set(unit_rev.keys()))
829 if len(req):
830 required = ",".join(req)
831 raise ValueError(
832 "to assemble mappings requires at least that "
833 f"[year, month, day] be specified: [{required}] "
834 "is missing"
835 )
837 # keys we don't recognize
838 excess = sorted(set(unit_rev.keys()) - set(_unit_map.values()))
839 if len(excess):
840 excess = ",".join(excess)
841 raise ValueError(
842 f"extra keys have been passed to the datetime assemblage: [{excess}]"
843 )
845 def coerce(values):
846 # we allow coercion to if errors allows
847 values = to_numeric(values, errors=errors)
849 # prevent overflow in case of int8 or int16
850 if is_integer_dtype(values):
851 values = values.astype("int64", copy=False)
852 return values
854 values = (
855 coerce(arg[unit_rev["year"]]) * 10000
856 + coerce(arg[unit_rev["month"]]) * 100
857 + coerce(arg[unit_rev["day"]])
858 )
859 try:
860 values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz)
861 except (TypeError, ValueError) as err:
862 raise ValueError(f"cannot assemble the datetimes: {err}")
864 for u in ["h", "m", "s", "ms", "us", "ns"]:
865 value = unit_rev.get(u)
866 if value is not None and value in arg:
867 try:
868 values += to_timedelta(coerce(arg[value]), unit=u, errors=errors)
869 except (TypeError, ValueError) as err:
870 raise ValueError(f"cannot assemble the datetimes [{value}]: {err}")
871 return values
874def _attempt_YYYYMMDD(arg, errors):
875 """
876 try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
877 arg is a passed in as an object dtype, but could really be ints/strings
878 with nan-like/or floats (e.g. with nan)
880 Parameters
881 ----------
882 arg : passed value
883 errors : 'raise','ignore','coerce'
884 """
886 def calc(carg):
887 # calculate the actual result
888 carg = carg.astype(object)
889 parsed = parsing.try_parse_year_month_day(
890 carg / 10000, carg / 100 % 100, carg % 100
891 )
892 return tslib.array_to_datetime(parsed, errors=errors)[0]
894 def calc_with_mask(carg, mask):
895 result = np.empty(carg.shape, dtype="M8[ns]")
896 iresult = result.view("i8")
897 iresult[~mask] = tslibs.iNaT
899 masked_result = calc(carg[mask].astype(np.float64).astype(np.int64))
900 result[mask] = masked_result.astype("M8[ns]")
901 return result
903 # try intlike / strings that are ints
904 try:
905 return calc(arg.astype(np.int64))
906 except (ValueError, OverflowError, TypeError):
907 pass
909 # a float with actual np.nan
910 try:
911 carg = arg.astype(np.float64)
912 return calc_with_mask(carg, notna(carg))
913 except (ValueError, OverflowError, TypeError):
914 pass
916 # string with NaN-like
917 try:
918 mask = ~algorithms.isin(arg, list(tslib.nat_strings))
919 return calc_with_mask(arg, mask)
920 except (ValueError, OverflowError, TypeError):
921 pass
923 return None
926# Fixed time formats for time parsing
927_time_formats = [
928 "%H:%M",
929 "%H%M",
930 "%I:%M%p",
931 "%I%M%p",
932 "%H:%M:%S",
933 "%H%M%S",
934 "%I:%M:%S%p",
935 "%I%M%S%p",
936]
939def _guess_time_format_for_array(arr):
940 # Try to guess the format based on the first non-NaN element
941 non_nan_elements = notna(arr).nonzero()[0]
942 if len(non_nan_elements):
943 element = arr[non_nan_elements[0]]
944 for time_format in _time_formats:
945 try:
946 datetime.strptime(element, time_format)
947 return time_format
948 except ValueError:
949 pass
951 return None
954def to_time(arg, format=None, infer_time_format=False, errors="raise"):
955 """
956 Parse time strings to time objects using fixed strptime formats ("%H:%M",
957 "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
958 "%I%M%S%p")
960 Use infer_time_format if all the strings are in the same format to speed
961 up conversion.
963 Parameters
964 ----------
965 arg : string in time format, datetime.time, list, tuple, 1-d array, Series
966 format : str, default None
967 Format used to convert arg into a time object. If None, fixed formats
968 are used.
969 infer_time_format: bool, default False
970 Infer the time format based on the first non-NaN element. If all
971 strings are in the same format, this will speed up conversion.
972 errors : {'ignore', 'raise', 'coerce'}, default 'raise'
973 - If 'raise', then invalid parsing will raise an exception
974 - If 'coerce', then invalid parsing will be set as None
975 - If 'ignore', then invalid parsing will return the input
977 Returns
978 -------
979 datetime.time
980 """
982 def _convert_listlike(arg, format):
984 if isinstance(arg, (list, tuple)):
985 arg = np.array(arg, dtype="O")
987 elif getattr(arg, "ndim", 1) > 1:
988 raise TypeError(
989 "arg must be a string, datetime, list, tuple, 1-d array, or Series"
990 )
992 arg = ensure_object(arg)
994 if infer_time_format and format is None:
995 format = _guess_time_format_for_array(arg)
997 times = []
998 if format is not None:
999 for element in arg:
1000 try:
1001 times.append(datetime.strptime(element, format).time())
1002 except (ValueError, TypeError):
1003 if errors == "raise":
1004 msg = (
1005 f"Cannot convert {element} to a time with given "
1006 f"format {format}"
1007 )
1008 raise ValueError(msg)
1009 elif errors == "ignore":
1010 return arg
1011 else:
1012 times.append(None)
1013 else:
1014 formats = _time_formats[:]
1015 format_found = False
1016 for element in arg:
1017 time_object = None
1018 for time_format in formats:
1019 try:
1020 time_object = datetime.strptime(element, time_format).time()
1021 if not format_found:
1022 # Put the found format in front
1023 fmt = formats.pop(formats.index(time_format))
1024 formats.insert(0, fmt)
1025 format_found = True
1026 break
1027 except (ValueError, TypeError):
1028 continue
1030 if time_object is not None:
1031 times.append(time_object)
1032 elif errors == "raise":
1033 raise ValueError(f"Cannot convert arg {arg} to a time")
1034 elif errors == "ignore":
1035 return arg
1036 else:
1037 times.append(None)
1039 return times
1041 if arg is None:
1042 return arg
1043 elif isinstance(arg, time):
1044 return arg
1045 elif isinstance(arg, ABCSeries):
1046 values = _convert_listlike(arg._values, format)
1047 return arg._constructor(values, index=arg.index, name=arg.name)
1048 elif isinstance(arg, ABCIndexClass):
1049 return _convert_listlike(arg, format)
1050 elif is_list_like(arg):
1051 return _convert_listlike(arg, format)
1053 return _convert_listlike(np.array([arg]), format)[0]