Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from collections import abc 

2from datetime import datetime, time 

3from functools import partial 

4from itertools import islice 

5from typing import Optional, TypeVar, Union 

6 

7import numpy as np 

8 

9from pandas._libs import tslib, tslibs 

10from pandas._libs.tslibs import Timestamp, conversion, parsing 

11from pandas._libs.tslibs.parsing import ( # noqa 

12 DateParseError, 

13 _format_is_iso, 

14 _guess_datetime_format, 

15 parse_time_string, 

16) 

17from pandas._libs.tslibs.strptime import array_strptime 

18from pandas._typing import ArrayLike 

19 

20from pandas.core.dtypes.common import ( 

21 ensure_object, 

22 is_datetime64_dtype, 

23 is_datetime64_ns_dtype, 

24 is_datetime64tz_dtype, 

25 is_float, 

26 is_integer, 

27 is_integer_dtype, 

28 is_list_like, 

29 is_numeric_dtype, 

30 is_scalar, 

31) 

32from pandas.core.dtypes.generic import ( 

33 ABCDataFrame, 

34 ABCDatetimeIndex, 

35 ABCIndex, 

36 ABCIndexClass, 

37 ABCSeries, 

38) 

39from pandas.core.dtypes.missing import notna 

40 

41from pandas.arrays import IntegerArray 

42from pandas.core import algorithms 

43from pandas.core.algorithms import unique 

44 

45# --------------------------------------------------------------------- 

46# types used in annotations 

47 

48ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries] 

49Scalar = Union[int, float, str] 

50DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime) 

51DatetimeScalarOrArrayConvertible = Union[ 

52 DatetimeScalar, list, tuple, ArrayLike, ABCSeries 

53] 

54 

55 

56# --------------------------------------------------------------------- 

57 

58 

59def _guess_datetime_format_for_array(arr, **kwargs): 

60 # Try to guess the format based on the first non-NaN element 

61 non_nan_elements = notna(arr).nonzero()[0] 

62 if len(non_nan_elements): 

63 return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) 

64 

65 

66def should_cache( 

67 arg: ArrayConvertible, unique_share: float = 0.7, check_count: Optional[int] = None 

68) -> bool: 

69 """ 

70 Decides whether to do caching. 

71 

72 If the percent of unique elements among `check_count` elements less 

73 than `unique_share * 100` then we can do caching. 

74 

75 Parameters 

76 ---------- 

77 arg: listlike, tuple, 1-d array, Series 

78 unique_share: float, default=0.7, optional 

79 0 < unique_share < 1 

80 check_count: int, optional 

81 0 <= check_count <= len(arg) 

82 

83 Returns 

84 ------- 

85 do_caching: bool 

86 

87 Notes 

88 ----- 

89 By default for a sequence of less than 50 items in size, we don't do 

90 caching; for the number of elements less than 5000, we take ten percent of 

91 all elements to check for a uniqueness share; if the sequence size is more 

92 than 5000, then we check only the first 500 elements. 

93 All constants were chosen empirically by. 

94 """ 

95 do_caching = True 

96 

97 # default realization 

98 if check_count is None: 

99 # in this case, the gain from caching is negligible 

100 if len(arg) <= 50: 

101 return False 

102 

103 if len(arg) <= 5000: 

104 check_count = int(len(arg) * 0.1) 

105 else: 

106 check_count = 500 

107 else: 

108 assert ( 

109 0 <= check_count <= len(arg) 

110 ), "check_count must be in next bounds: [0; len(arg)]" 

111 if check_count == 0: 

112 return False 

113 

114 assert 0 < unique_share < 1, "unique_share must be in next bounds: (0; 1)" 

115 

116 unique_elements = set(islice(arg, check_count)) 

117 if len(unique_elements) > check_count * unique_share: 

118 do_caching = False 

119 return do_caching 

120 

121 

122def _maybe_cache(arg, format, cache, convert_listlike): 

123 """ 

124 Create a cache of unique dates from an array of dates 

125 

126 Parameters 

127 ---------- 

128 arg : listlike, tuple, 1-d array, Series 

129 format : string 

130 Strftime format to parse time 

131 cache : boolean 

132 True attempts to create a cache of converted values 

133 convert_listlike : function 

134 Conversion function to apply on dates 

135 

136 Returns 

137 ------- 

138 cache_array : Series 

139 Cache of converted, unique dates. Can be empty 

140 """ 

141 from pandas import Series 

142 

143 cache_array = Series(dtype=object) 

144 

145 if cache: 

146 # Perform a quicker unique check 

147 if not should_cache(arg): 

148 return cache_array 

149 

150 unique_dates = unique(arg) 

151 if len(unique_dates) < len(arg): 

152 cache_dates = convert_listlike(unique_dates, format) 

153 cache_array = Series(cache_dates, index=unique_dates) 

154 return cache_array 

155 

156 

157def _box_as_indexlike( 

158 dt_array: ArrayLike, utc: Optional[bool] = None, name: Optional[str] = None 

159) -> Union[ABCIndex, ABCDatetimeIndex]: 

160 """ 

161 Properly boxes the ndarray of datetimes to DatetimeIndex 

162 if it is possible or to generic Index instead 

163 

164 Parameters 

165 ---------- 

166 dt_array: 1-d array 

167 Array of datetimes to be wrapped in an Index. 

168 tz : object 

169 None or 'utc' 

170 name : string, default None 

171 Name for a resulting index 

172 

173 Returns 

174 ------- 

175 result : datetime of converted dates 

176 - DatetimeIndex if convertible to sole datetime64 type 

177 - general Index otherwise 

178 """ 

179 from pandas import DatetimeIndex, Index 

180 

181 if is_datetime64_dtype(dt_array): 

182 tz = "utc" if utc else None 

183 return DatetimeIndex(dt_array, tz=tz, name=name) 

184 return Index(dt_array, name=name) 

185 

186 

187def _convert_and_box_cache( 

188 arg: DatetimeScalarOrArrayConvertible, 

189 cache_array: ABCSeries, 

190 name: Optional[str] = None, 

191) -> ABCIndexClass: 

192 """ 

193 Convert array of dates with a cache and wrap the result in an Index. 

194 

195 Parameters 

196 ---------- 

197 arg : integer, float, string, datetime, list, tuple, 1-d array, Series 

198 cache_array : Series 

199 Cache of converted, unique dates 

200 name : string, default None 

201 Name for a DatetimeIndex 

202 

203 Returns 

204 ------- 

205 result : Index-like of converted dates 

206 """ 

207 from pandas import Series 

208 

209 result = Series(arg).map(cache_array) 

210 return _box_as_indexlike(result, utc=None, name=name) 

211 

212 

213def _return_parsed_timezone_results(result, timezones, tz, name): 

214 """ 

215 Return results from array_strptime if a %z or %Z directive was passed. 

216 

217 Parameters 

218 ---------- 

219 result : ndarray 

220 int64 date representations of the dates 

221 timezones : ndarray 

222 pytz timezone objects 

223 tz : object 

224 None or pytz timezone object 

225 name : string, default None 

226 Name for a DatetimeIndex 

227 

228 Returns 

229 ------- 

230 tz_result : Index-like of parsed dates with timezone 

231 """ 

232 if tz is not None: 

233 raise ValueError( 

234 "Cannot pass a tz argument when " 

235 "parsing strings with timezone " 

236 "information." 

237 ) 

238 tz_results = np.array( 

239 [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)] 

240 ) 

241 from pandas import Index 

242 

243 return Index(tz_results, name=name) 

244 

245 

246def _convert_listlike_datetimes( 

247 arg, 

248 format, 

249 name=None, 

250 tz=None, 

251 unit=None, 

252 errors=None, 

253 infer_datetime_format=None, 

254 dayfirst=None, 

255 yearfirst=None, 

256 exact=None, 

257): 

258 """ 

259 Helper function for to_datetime. Performs the conversions of 1D listlike 

260 of dates 

261 

262 Parameters 

263 ---------- 

264 arg : list, tuple, ndarray, Series, Index 

265 date to be parced 

266 name : object 

267 None or string for the Index name 

268 tz : object 

269 None or 'utc' 

270 unit : string 

271 None or string of the frequency of the passed data 

272 errors : string 

273 error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' 

274 infer_datetime_format : boolean 

275 inferring format behavior from to_datetime 

276 dayfirst : boolean 

277 dayfirst parsing behavior from to_datetime 

278 yearfirst : boolean 

279 yearfirst parsing behavior from to_datetime 

280 exact : boolean 

281 exact format matching behavior from to_datetime 

282 

283 Returns 

284 ------- 

285 Index-like of parsed dates 

286 """ 

287 from pandas import DatetimeIndex 

288 from pandas.core.arrays import DatetimeArray 

289 from pandas.core.arrays.datetimes import ( 

290 maybe_convert_dtype, 

291 objects_to_datetime64ns, 

292 ) 

293 

294 if isinstance(arg, (list, tuple)): 

295 arg = np.array(arg, dtype="O") 

296 

297 # these are shortcutable 

298 if is_datetime64tz_dtype(arg): 

299 if not isinstance(arg, (DatetimeArray, DatetimeIndex)): 

300 return DatetimeIndex(arg, tz=tz, name=name) 

301 if tz == "utc": 

302 arg = arg.tz_convert(None).tz_localize(tz) 

303 return arg 

304 

305 elif is_datetime64_ns_dtype(arg): 

306 if not isinstance(arg, (DatetimeArray, DatetimeIndex)): 

307 try: 

308 return DatetimeIndex(arg, tz=tz, name=name) 

309 except ValueError: 

310 pass 

311 elif tz: 

312 # DatetimeArray, DatetimeIndex 

313 return arg.tz_localize(tz) 

314 

315 return arg 

316 

317 elif unit is not None: 

318 if format is not None: 

319 raise ValueError("cannot specify both format and unit") 

320 arg = getattr(arg, "_values", arg) 

321 

322 # GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime 

323 # because it expects an ndarray argument 

324 if isinstance(arg, IntegerArray): 

325 # Explicitly pass NaT mask to array_with_unit_to_datetime 

326 mask = arg.isna() 

327 arg = arg._ndarray_values 

328 else: 

329 mask = None 

330 

331 result, tz_parsed = tslib.array_with_unit_to_datetime( 

332 arg, mask, unit, errors=errors 

333 ) 

334 

335 if errors == "ignore": 

336 from pandas import Index 

337 

338 result = Index(result, name=name) 

339 else: 

340 result = DatetimeIndex(result, name=name) 

341 # GH 23758: We may still need to localize the result with tz 

342 # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) 

343 # result will be naive but in UTC 

344 try: 

345 result = result.tz_localize("UTC").tz_convert(tz_parsed) 

346 except AttributeError: 

347 # Regular Index from 'ignore' path 

348 return result 

349 if tz is not None: 

350 if result.tz is None: 

351 result = result.tz_localize(tz) 

352 else: 

353 result = result.tz_convert(tz) 

354 return result 

355 elif getattr(arg, "ndim", 1) > 1: 

356 raise TypeError( 

357 "arg must be a string, datetime, list, tuple, 1-d array, or Series" 

358 ) 

359 

360 # warn if passing timedelta64, raise for PeriodDtype 

361 # NB: this must come after unit transformation 

362 orig_arg = arg 

363 arg, _ = maybe_convert_dtype(arg, copy=False) 

364 

365 arg = ensure_object(arg) 

366 require_iso8601 = False 

367 

368 if infer_datetime_format and format is None: 

369 format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) 

370 

371 if format is not None: 

372 # There is a special fast-path for iso8601 formatted 

373 # datetime strings, so in those cases don't use the inferred 

374 # format because this path makes process slower in this 

375 # special case 

376 format_is_iso8601 = _format_is_iso(format) 

377 if format_is_iso8601: 

378 require_iso8601 = not infer_datetime_format 

379 format = None 

380 

381 tz_parsed = None 

382 result = None 

383 

384 if format is not None: 

385 try: 

386 # shortcut formatting here 

387 if format == "%Y%m%d": 

388 try: 

389 # pass orig_arg as float-dtype may have been converted to 

390 # datetime64[ns] 

391 orig_arg = ensure_object(orig_arg) 

392 result = _attempt_YYYYMMDD(orig_arg, errors=errors) 

393 except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): 

394 raise ValueError("cannot convert the input to '%Y%m%d' date format") 

395 

396 # fallback 

397 if result is None: 

398 try: 

399 result, timezones = array_strptime( 

400 arg, format, exact=exact, errors=errors 

401 ) 

402 if "%Z" in format or "%z" in format: 

403 return _return_parsed_timezone_results( 

404 result, timezones, tz, name 

405 ) 

406 except tslibs.OutOfBoundsDatetime: 

407 if errors == "raise": 

408 raise 

409 elif errors == "coerce": 

410 result = np.empty(arg.shape, dtype="M8[ns]") 

411 iresult = result.view("i8") 

412 iresult.fill(tslibs.iNaT) 

413 else: 

414 result = arg 

415 except ValueError: 

416 # if format was inferred, try falling back 

417 # to array_to_datetime - terminate here 

418 # for specified formats 

419 if not infer_datetime_format: 

420 if errors == "raise": 

421 raise 

422 elif errors == "coerce": 

423 result = np.empty(arg.shape, dtype="M8[ns]") 

424 iresult = result.view("i8") 

425 iresult.fill(tslibs.iNaT) 

426 else: 

427 result = arg 

428 except ValueError as e: 

429 # Fallback to try to convert datetime objects if timezone-aware 

430 # datetime objects are found without passing `utc=True` 

431 try: 

432 values, tz = conversion.datetime_to_datetime64(arg) 

433 return DatetimeIndex._simple_new(values, name=name, tz=tz) 

434 except (ValueError, TypeError): 

435 raise e 

436 

437 if result is None: 

438 assert format is None or infer_datetime_format 

439 utc = tz == "utc" 

440 result, tz_parsed = objects_to_datetime64ns( 

441 arg, 

442 dayfirst=dayfirst, 

443 yearfirst=yearfirst, 

444 utc=utc, 

445 errors=errors, 

446 require_iso8601=require_iso8601, 

447 allow_object=True, 

448 ) 

449 

450 if tz_parsed is not None: 

451 # We can take a shortcut since the datetime64 numpy array 

452 # is in UTC 

453 return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) 

454 

455 utc = tz == "utc" 

456 return _box_as_indexlike(result, utc=utc, name=name) 

457 

458 

459def _adjust_to_origin(arg, origin, unit): 

460 """ 

461 Helper function for to_datetime. 

462 Adjust input argument to the specified origin 

463 

464 Parameters 

465 ---------- 

466 arg : list, tuple, ndarray, Series, Index 

467 date to be adjusted 

468 origin : 'julian' or Timestamp 

469 origin offset for the arg 

470 unit : string 

471 passed unit from to_datetime, must be 'D' 

472 

473 Returns 

474 ------- 

475 ndarray or scalar of adjusted date(s) 

476 """ 

477 if origin == "julian": 

478 original = arg 

479 j0 = Timestamp(0).to_julian_date() 

480 if unit != "D": 

481 raise ValueError("unit must be 'D' for origin='julian'") 

482 try: 

483 arg = arg - j0 

484 except TypeError: 

485 raise ValueError("incompatible 'arg' type for given 'origin'='julian'") 

486 

487 # preemptively check this for a nice range 

488 j_max = Timestamp.max.to_julian_date() - j0 

489 j_min = Timestamp.min.to_julian_date() - j0 

490 if np.any(arg > j_max) or np.any(arg < j_min): 

491 raise tslibs.OutOfBoundsDatetime( 

492 f"{original} is Out of Bounds for origin='julian'" 

493 ) 

494 else: 

495 # arg must be numeric 

496 if not ( 

497 (is_scalar(arg) and (is_integer(arg) or is_float(arg))) 

498 or is_numeric_dtype(np.asarray(arg)) 

499 ): 

500 raise ValueError( 

501 f"'{arg}' is not compatible with origin='{origin}'; " 

502 "it must be numeric with a unit specified" 

503 ) 

504 

505 # we are going to offset back to unix / epoch time 

506 try: 

507 offset = Timestamp(origin) 

508 except tslibs.OutOfBoundsDatetime: 

509 raise tslibs.OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") 

510 except ValueError: 

511 raise ValueError(f"origin {origin} cannot be converted to a Timestamp") 

512 

513 if offset.tz is not None: 

514 raise ValueError(f"origin offset {offset} must be tz-naive") 

515 offset -= Timestamp(0) 

516 

517 # convert the offset to the unit of the arg 

518 # this should be lossless in terms of precision 

519 offset = offset // tslibs.Timedelta(1, unit=unit) 

520 

521 # scalars & ndarray-like can handle the addition 

522 if is_list_like(arg) and not isinstance( 

523 arg, (ABCSeries, ABCIndexClass, np.ndarray) 

524 ): 

525 arg = np.asarray(arg) 

526 arg = arg + offset 

527 return arg 

528 

529 

530def to_datetime( 

531 arg, 

532 errors="raise", 

533 dayfirst=False, 

534 yearfirst=False, 

535 utc=None, 

536 format=None, 

537 exact=True, 

538 unit=None, 

539 infer_datetime_format=False, 

540 origin="unix", 

541 cache=True, 

542): 

543 """ 

544 Convert argument to datetime. 

545 

546 Parameters 

547 ---------- 

548 arg : int, float, str, datetime, list, tuple, 1-d array, Series DataFrame/dict-like 

549 The object to convert to a datetime. 

550 errors : {'ignore', 'raise', 'coerce'}, default 'raise' 

551 - If 'raise', then invalid parsing will raise an exception. 

552 - If 'coerce', then invalid parsing will be set as NaT. 

553 - If 'ignore', then invalid parsing will return the input. 

554 dayfirst : bool, default False 

555 Specify a date parse order if `arg` is str or its list-likes. 

556 If True, parses dates with the day first, eg 10/11/12 is parsed as 

557 2012-11-10. 

558 Warning: dayfirst=True is not strict, but will prefer to parse 

559 with day first (this is a known bug, based on dateutil behavior). 

560 yearfirst : bool, default False 

561 Specify a date parse order if `arg` is str or its list-likes. 

562 

563 - If True parses dates with the year first, eg 10/11/12 is parsed as 

564 2010-11-12. 

565 - If both dayfirst and yearfirst are True, yearfirst is preceded (same 

566 as dateutil). 

567 

568 Warning: yearfirst=True is not strict, but will prefer to parse 

569 with year first (this is a known bug, based on dateutil behavior). 

570 utc : bool, default None 

571 Return UTC DatetimeIndex if True (converting any tz-aware 

572 datetime.datetime objects as well). 

573 format : str, default None 

574 The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse 

575 all the way up to nanoseconds. 

576 See strftime documentation for more information on choices: 

577 https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. 

578 exact : bool, True by default 

579 Behaves as: 

580 - If True, require an exact format match. 

581 - If False, allow the format to match anywhere in the target string. 

582 

583 unit : str, default 'ns' 

584 The unit of the arg (D,s,ms,us,ns) denote the unit, which is an 

585 integer or float number. This will be based off the origin. 

586 Example, with unit='ms' and origin='unix' (the default), this 

587 would calculate the number of milliseconds to the unix epoch start. 

588 infer_datetime_format : bool, default False 

589 If True and no `format` is given, attempt to infer the format of the 

590 datetime strings, and if it can be inferred, switch to a faster 

591 method of parsing them. In some cases this can increase the parsing 

592 speed by ~5-10x. 

593 origin : scalar, default 'unix' 

594 Define the reference date. The numeric values would be parsed as number 

595 of units (defined by `unit`) since this reference date. 

596 

597 - If 'unix' (or POSIX) time; origin is set to 1970-01-01. 

598 - If 'julian', unit must be 'D', and origin is set to beginning of 

599 Julian Calendar. Julian day number 0 is assigned to the day starting 

600 at noon on January 1, 4713 BC. 

601 - If Timestamp convertible, origin is set to Timestamp identified by 

602 origin. 

603 cache : bool, default True 

604 If True, use a cache of unique, converted dates to apply the datetime 

605 conversion. May produce significant speed-up when parsing duplicate 

606 date strings, especially ones with timezone offsets. The cache is only 

607 used when there are at least 50 values. The presence of out-of-bounds 

608 values will render the cache unusable and may slow down parsing. 

609 

610 .. versionadded:: 0.23.0 

611 

612 .. versionchanged:: 0.25.0 

613 - changed default value from False to True. 

614 

615 Returns 

616 ------- 

617 datetime 

618 If parsing succeeded. 

619 Return type depends on input: 

620 

621 - list-like: DatetimeIndex 

622 - Series: Series of datetime64 dtype 

623 - scalar: Timestamp 

624 

625 In case when it is not possible to return designated types (e.g. when 

626 any element of input is before Timestamp.min or after Timestamp.max) 

627 return will have datetime.datetime type (or corresponding 

628 array/Series). 

629 

630 See Also 

631 -------- 

632 DataFrame.astype : Cast argument to a specified dtype. 

633 to_timedelta : Convert argument to timedelta. 

634 convert_dtypes : Convert dtypes. 

635 

636 Examples 

637 -------- 

638 Assembling a datetime from multiple columns of a DataFrame. The keys can be 

639 common abbreviations like ['year', 'month', 'day', 'minute', 'second', 

640 'ms', 'us', 'ns']) or plurals of the same 

641 

642 >>> df = pd.DataFrame({'year': [2015, 2016], 

643 ... 'month': [2, 3], 

644 ... 'day': [4, 5]}) 

645 >>> pd.to_datetime(df) 

646 0 2015-02-04 

647 1 2016-03-05 

648 dtype: datetime64[ns] 

649 

650 If a date does not meet the `timestamp limitations 

651 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html 

652 #timeseries-timestamp-limits>`_, passing errors='ignore' 

653 will return the original input instead of raising any exception. 

654 

655 Passing errors='coerce' will force an out-of-bounds date to NaT, 

656 in addition to forcing non-dates (or non-parseable dates) to NaT. 

657 

658 >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore') 

659 datetime.datetime(1300, 1, 1, 0, 0) 

660 >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') 

661 NaT 

662 

663 Passing infer_datetime_format=True can often-times speedup a parsing 

664 if its not an ISO8601 format exactly, but in a regular format. 

665 

666 >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000) 

667 >>> s.head() 

668 0 3/11/2000 

669 1 3/12/2000 

670 2 3/13/2000 

671 3 3/11/2000 

672 4 3/12/2000 

673 dtype: object 

674 

675 >>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP 

676 100 loops, best of 3: 10.4 ms per loop 

677 

678 >>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP 

679 1 loop, best of 3: 471 ms per loop 

680 

681 Using a unix epoch time 

682 

683 >>> pd.to_datetime(1490195805, unit='s') 

684 Timestamp('2017-03-22 15:16:45') 

685 >>> pd.to_datetime(1490195805433502912, unit='ns') 

686 Timestamp('2017-03-22 15:16:45.433502912') 

687 

688 .. warning:: For float arg, precision rounding might happen. To prevent 

689 unexpected behavior use a fixed-width exact type. 

690 

691 Using a non-unix epoch origin 

692 

693 >>> pd.to_datetime([1, 2, 3], unit='D', 

694 ... origin=pd.Timestamp('1960-01-01')) 

695 DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], \ 

696dtype='datetime64[ns]', freq=None) 

697 """ 

698 if arg is None: 

699 return None 

700 

701 if origin != "unix": 

702 arg = _adjust_to_origin(arg, origin, unit) 

703 

704 tz = "utc" if utc else None 

705 convert_listlike = partial( 

706 _convert_listlike_datetimes, 

707 tz=tz, 

708 unit=unit, 

709 dayfirst=dayfirst, 

710 yearfirst=yearfirst, 

711 errors=errors, 

712 exact=exact, 

713 infer_datetime_format=infer_datetime_format, 

714 ) 

715 

716 if isinstance(arg, Timestamp): 

717 result = arg 

718 if tz is not None: 

719 if arg.tz is not None: 

720 result = result.tz_convert(tz) 

721 else: 

722 result = result.tz_localize(tz) 

723 elif isinstance(arg, ABCSeries): 

724 cache_array = _maybe_cache(arg, format, cache, convert_listlike) 

725 if not cache_array.empty: 

726 result = arg.map(cache_array) 

727 else: 

728 values = convert_listlike(arg._values, format) 

729 result = arg._constructor(values, index=arg.index, name=arg.name) 

730 elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)): 

731 result = _assemble_from_unit_mappings(arg, errors, tz) 

732 elif isinstance(arg, ABCIndexClass): 

733 cache_array = _maybe_cache(arg, format, cache, convert_listlike) 

734 if not cache_array.empty: 

735 result = _convert_and_box_cache(arg, cache_array, name=arg.name) 

736 else: 

737 convert_listlike = partial(convert_listlike, name=arg.name) 

738 result = convert_listlike(arg, format) 

739 elif is_list_like(arg): 

740 try: 

741 cache_array = _maybe_cache(arg, format, cache, convert_listlike) 

742 except tslibs.OutOfBoundsDatetime: 

743 # caching attempts to create a DatetimeIndex, which may raise 

744 # an OOB. If that's the desired behavior, then just reraise... 

745 if errors == "raise": 

746 raise 

747 # ... otherwise, continue without the cache. 

748 from pandas import Series 

749 

750 cache_array = Series([], dtype=object) # just an empty array 

751 if not cache_array.empty: 

752 result = _convert_and_box_cache(arg, cache_array) 

753 else: 

754 result = convert_listlike(arg, format) 

755 else: 

756 result = convert_listlike(np.array([arg]), format)[0] 

757 

758 return result 

759 

760 

761# mappings for assembling units 

762_unit_map = { 

763 "year": "year", 

764 "years": "year", 

765 "month": "month", 

766 "months": "month", 

767 "day": "day", 

768 "days": "day", 

769 "hour": "h", 

770 "hours": "h", 

771 "minute": "m", 

772 "minutes": "m", 

773 "second": "s", 

774 "seconds": "s", 

775 "ms": "ms", 

776 "millisecond": "ms", 

777 "milliseconds": "ms", 

778 "us": "us", 

779 "microsecond": "us", 

780 "microseconds": "us", 

781 "ns": "ns", 

782 "nanosecond": "ns", 

783 "nanoseconds": "ns", 

784} 

785 

786 

787def _assemble_from_unit_mappings(arg, errors, tz): 

788 """ 

789 assemble the unit specified fields from the arg (DataFrame) 

790 Return a Series for actual parsing 

791 

792 Parameters 

793 ---------- 

794 arg : DataFrame 

795 errors : {'ignore', 'raise', 'coerce'}, default 'raise' 

796 

797 - If 'raise', then invalid parsing will raise an exception 

798 - If 'coerce', then invalid parsing will be set as NaT 

799 - If 'ignore', then invalid parsing will return the input 

800 tz : None or 'utc' 

801 

802 Returns 

803 ------- 

804 Series 

805 """ 

806 from pandas import to_timedelta, to_numeric, DataFrame 

807 

808 arg = DataFrame(arg) 

809 if not arg.columns.is_unique: 

810 raise ValueError("cannot assemble with duplicate keys") 

811 

812 # replace passed unit with _unit_map 

813 def f(value): 

814 if value in _unit_map: 

815 return _unit_map[value] 

816 

817 # m is case significant 

818 if value.lower() in _unit_map: 

819 return _unit_map[value.lower()] 

820 

821 return value 

822 

823 unit = {k: f(k) for k in arg.keys()} 

824 unit_rev = {v: k for k, v in unit.items()} 

825 

826 # we require at least Ymd 

827 required = ["year", "month", "day"] 

828 req = sorted(set(required) - set(unit_rev.keys())) 

829 if len(req): 

830 required = ",".join(req) 

831 raise ValueError( 

832 "to assemble mappings requires at least that " 

833 f"[year, month, day] be specified: [{required}] " 

834 "is missing" 

835 ) 

836 

837 # keys we don't recognize 

838 excess = sorted(set(unit_rev.keys()) - set(_unit_map.values())) 

839 if len(excess): 

840 excess = ",".join(excess) 

841 raise ValueError( 

842 f"extra keys have been passed to the datetime assemblage: [{excess}]" 

843 ) 

844 

845 def coerce(values): 

846 # we allow coercion to if errors allows 

847 values = to_numeric(values, errors=errors) 

848 

849 # prevent overflow in case of int8 or int16 

850 if is_integer_dtype(values): 

851 values = values.astype("int64", copy=False) 

852 return values 

853 

854 values = ( 

855 coerce(arg[unit_rev["year"]]) * 10000 

856 + coerce(arg[unit_rev["month"]]) * 100 

857 + coerce(arg[unit_rev["day"]]) 

858 ) 

859 try: 

860 values = to_datetime(values, format="%Y%m%d", errors=errors, utc=tz) 

861 except (TypeError, ValueError) as err: 

862 raise ValueError(f"cannot assemble the datetimes: {err}") 

863 

864 for u in ["h", "m", "s", "ms", "us", "ns"]: 

865 value = unit_rev.get(u) 

866 if value is not None and value in arg: 

867 try: 

868 values += to_timedelta(coerce(arg[value]), unit=u, errors=errors) 

869 except (TypeError, ValueError) as err: 

870 raise ValueError(f"cannot assemble the datetimes [{value}]: {err}") 

871 return values 

872 

873 

874def _attempt_YYYYMMDD(arg, errors): 

875 """ 

876 try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like, 

877 arg is a passed in as an object dtype, but could really be ints/strings 

878 with nan-like/or floats (e.g. with nan) 

879 

880 Parameters 

881 ---------- 

882 arg : passed value 

883 errors : 'raise','ignore','coerce' 

884 """ 

885 

886 def calc(carg): 

887 # calculate the actual result 

888 carg = carg.astype(object) 

889 parsed = parsing.try_parse_year_month_day( 

890 carg / 10000, carg / 100 % 100, carg % 100 

891 ) 

892 return tslib.array_to_datetime(parsed, errors=errors)[0] 

893 

894 def calc_with_mask(carg, mask): 

895 result = np.empty(carg.shape, dtype="M8[ns]") 

896 iresult = result.view("i8") 

897 iresult[~mask] = tslibs.iNaT 

898 

899 masked_result = calc(carg[mask].astype(np.float64).astype(np.int64)) 

900 result[mask] = masked_result.astype("M8[ns]") 

901 return result 

902 

903 # try intlike / strings that are ints 

904 try: 

905 return calc(arg.astype(np.int64)) 

906 except (ValueError, OverflowError, TypeError): 

907 pass 

908 

909 # a float with actual np.nan 

910 try: 

911 carg = arg.astype(np.float64) 

912 return calc_with_mask(carg, notna(carg)) 

913 except (ValueError, OverflowError, TypeError): 

914 pass 

915 

916 # string with NaN-like 

917 try: 

918 mask = ~algorithms.isin(arg, list(tslib.nat_strings)) 

919 return calc_with_mask(arg, mask) 

920 except (ValueError, OverflowError, TypeError): 

921 pass 

922 

923 return None 

924 

925 

926# Fixed time formats for time parsing 

927_time_formats = [ 

928 "%H:%M", 

929 "%H%M", 

930 "%I:%M%p", 

931 "%I%M%p", 

932 "%H:%M:%S", 

933 "%H%M%S", 

934 "%I:%M:%S%p", 

935 "%I%M%S%p", 

936] 

937 

938 

939def _guess_time_format_for_array(arr): 

940 # Try to guess the format based on the first non-NaN element 

941 non_nan_elements = notna(arr).nonzero()[0] 

942 if len(non_nan_elements): 

943 element = arr[non_nan_elements[0]] 

944 for time_format in _time_formats: 

945 try: 

946 datetime.strptime(element, time_format) 

947 return time_format 

948 except ValueError: 

949 pass 

950 

951 return None 

952 

953 

954def to_time(arg, format=None, infer_time_format=False, errors="raise"): 

955 """ 

956 Parse time strings to time objects using fixed strptime formats ("%H:%M", 

957 "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", 

958 "%I%M%S%p") 

959 

960 Use infer_time_format if all the strings are in the same format to speed 

961 up conversion. 

962 

963 Parameters 

964 ---------- 

965 arg : string in time format, datetime.time, list, tuple, 1-d array, Series 

966 format : str, default None 

967 Format used to convert arg into a time object. If None, fixed formats 

968 are used. 

969 infer_time_format: bool, default False 

970 Infer the time format based on the first non-NaN element. If all 

971 strings are in the same format, this will speed up conversion. 

972 errors : {'ignore', 'raise', 'coerce'}, default 'raise' 

973 - If 'raise', then invalid parsing will raise an exception 

974 - If 'coerce', then invalid parsing will be set as None 

975 - If 'ignore', then invalid parsing will return the input 

976 

977 Returns 

978 ------- 

979 datetime.time 

980 """ 

981 

982 def _convert_listlike(arg, format): 

983 

984 if isinstance(arg, (list, tuple)): 

985 arg = np.array(arg, dtype="O") 

986 

987 elif getattr(arg, "ndim", 1) > 1: 

988 raise TypeError( 

989 "arg must be a string, datetime, list, tuple, 1-d array, or Series" 

990 ) 

991 

992 arg = ensure_object(arg) 

993 

994 if infer_time_format and format is None: 

995 format = _guess_time_format_for_array(arg) 

996 

997 times = [] 

998 if format is not None: 

999 for element in arg: 

1000 try: 

1001 times.append(datetime.strptime(element, format).time()) 

1002 except (ValueError, TypeError): 

1003 if errors == "raise": 

1004 msg = ( 

1005 f"Cannot convert {element} to a time with given " 

1006 f"format {format}" 

1007 ) 

1008 raise ValueError(msg) 

1009 elif errors == "ignore": 

1010 return arg 

1011 else: 

1012 times.append(None) 

1013 else: 

1014 formats = _time_formats[:] 

1015 format_found = False 

1016 for element in arg: 

1017 time_object = None 

1018 for time_format in formats: 

1019 try: 

1020 time_object = datetime.strptime(element, time_format).time() 

1021 if not format_found: 

1022 # Put the found format in front 

1023 fmt = formats.pop(formats.index(time_format)) 

1024 formats.insert(0, fmt) 

1025 format_found = True 

1026 break 

1027 except (ValueError, TypeError): 

1028 continue 

1029 

1030 if time_object is not None: 

1031 times.append(time_object) 

1032 elif errors == "raise": 

1033 raise ValueError(f"Cannot convert arg {arg} to a time") 

1034 elif errors == "ignore": 

1035 return arg 

1036 else: 

1037 times.append(None) 

1038 

1039 return times 

1040 

1041 if arg is None: 

1042 return arg 

1043 elif isinstance(arg, time): 

1044 return arg 

1045 elif isinstance(arg, ABCSeries): 

1046 values = _convert_listlike(arg._values, format) 

1047 return arg._constructor(values, index=arg.index, name=arg.name) 

1048 elif isinstance(arg, ABCIndexClass): 

1049 return _convert_listlike(arg, format) 

1050 elif is_list_like(arg): 

1051 return _convert_listlike(arg, format) 

1052 

1053 return _convert_listlike(np.array([arg]), format)[0]