Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3This module offers a generic date/time string parser which is able to parse 

4most known formats to represent a date and/or time. 

5 

6This module attempts to be forgiving with regards to unlikely input formats, 

7returning a datetime object even for dates which are ambiguous. If an element 

8of a date/time stamp is omitted, the following rules are applied: 

9 

10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour 

11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is 

12 specified. 

13- If a time zone is omitted, a timezone-naive datetime is returned. 

14 

15If any other elements are missing, they are taken from the 

16:class:`datetime.datetime` object passed to the parameter ``default``. If this 

17results in a day number exceeding the valid number of days per month, the 

18value falls back to the end of the month. 

19 

20Additional resources about date/time string formats can be found below: 

21 

22- `A summary of the international standard date and time notation 

23 <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ 

24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_ 

25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ 

26- `CPAN ParseDate module 

27 <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ 

28- `Java SimpleDateFormat Class 

29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ 

30""" 

31from __future__ import unicode_literals 

32 

33import datetime 

34import re 

35import string 

36import time 

37import warnings 

38 

39from calendar import monthrange 

40from io import StringIO 

41 

42import six 

43from six import integer_types, text_type 

44 

45from decimal import Decimal 

46 

47from warnings import warn 

48 

49from .. import relativedelta 

50from .. import tz 

51 

52__all__ = ["parse", "parserinfo", "ParserError"] 

53 

54 

55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth 

56# making public and/or figuring out if there is something we can 

57# take off their plate. 

58class _timelex(object): 

59 # Fractional seconds are sometimes split by a comma 

60 _split_decimal = re.compile("([.,])") 

61 

62 def __init__(self, instream): 

63 if six.PY2: 

64 # In Python 2, we can't duck type properly because unicode has 

65 # a 'decode' function, and we'd be double-decoding 

66 if isinstance(instream, (bytes, bytearray)): 

67 instream = instream.decode() 

68 else: 

69 if getattr(instream, 'decode', None) is not None: 

70 instream = instream.decode() 

71 

72 if isinstance(instream, text_type): 

73 instream = StringIO(instream) 

74 elif getattr(instream, 'read', None) is None: 

75 raise TypeError('Parser must be a string or character stream, not ' 

76 '{itype}'.format(itype=instream.__class__.__name__)) 

77 

78 self.instream = instream 

79 self.charstack = [] 

80 self.tokenstack = [] 

81 self.eof = False 

82 

83 def get_token(self): 

84 """ 

85 This function breaks the time string into lexical units (tokens), which 

86 can be parsed by the parser. Lexical units are demarcated by changes in 

87 the character set, so any continuous string of letters is considered 

88 one unit, any continuous string of numbers is considered one unit. 

89 

90 The main complication arises from the fact that dots ('.') can be used 

91 both as separators (e.g. "Sep.20.2009") or decimal points (e.g. 

92 "4:30:21.447"). As such, it is necessary to read the full context of 

93 any dot-separated strings before breaking it into tokens; as such, this 

94 function maintains a "token stack", for when the ambiguous context 

95 demands that multiple tokens be parsed at once. 

96 """ 

97 if self.tokenstack: 

98 return self.tokenstack.pop(0) 

99 

100 seenletters = False 

101 token = None 

102 state = None 

103 

104 while not self.eof: 

105 # We only realize that we've reached the end of a token when we 

106 # find a character that's not part of the current token - since 

107 # that character may be part of the next token, it's stored in the 

108 # charstack. 

109 if self.charstack: 

110 nextchar = self.charstack.pop(0) 

111 else: 

112 nextchar = self.instream.read(1) 

113 while nextchar == '\x00': 

114 nextchar = self.instream.read(1) 

115 

116 if not nextchar: 

117 self.eof = True 

118 break 

119 elif not state: 

120 # First character of the token - determines if we're starting 

121 # to parse a word, a number or something else. 

122 token = nextchar 

123 if self.isword(nextchar): 

124 state = 'a' 

125 elif self.isnum(nextchar): 

126 state = '0' 

127 elif self.isspace(nextchar): 

128 token = ' ' 

129 break # emit token 

130 else: 

131 break # emit token 

132 elif state == 'a': 

133 # If we've already started reading a word, we keep reading 

134 # letters until we find something that's not part of a word. 

135 seenletters = True 

136 if self.isword(nextchar): 

137 token += nextchar 

138 elif nextchar == '.': 

139 token += nextchar 

140 state = 'a.' 

141 else: 

142 self.charstack.append(nextchar) 

143 break # emit token 

144 elif state == '0': 

145 # If we've already started reading a number, we keep reading 

146 # numbers until we find something that doesn't fit. 

147 if self.isnum(nextchar): 

148 token += nextchar 

149 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): 

150 token += nextchar 

151 state = '0.' 

152 else: 

153 self.charstack.append(nextchar) 

154 break # emit token 

155 elif state == 'a.': 

156 # If we've seen some letters and a dot separator, continue 

157 # parsing, and the tokens will be broken up later. 

158 seenletters = True 

159 if nextchar == '.' or self.isword(nextchar): 

160 token += nextchar 

161 elif self.isnum(nextchar) and token[-1] == '.': 

162 token += nextchar 

163 state = '0.' 

164 else: 

165 self.charstack.append(nextchar) 

166 break # emit token 

167 elif state == '0.': 

168 # If we've seen at least one dot separator, keep going, we'll 

169 # break up the tokens later. 

170 if nextchar == '.' or self.isnum(nextchar): 

171 token += nextchar 

172 elif self.isword(nextchar) and token[-1] == '.': 

173 token += nextchar 

174 state = 'a.' 

175 else: 

176 self.charstack.append(nextchar) 

177 break # emit token 

178 

179 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or 

180 token[-1] in '.,')): 

181 l = self._split_decimal.split(token) 

182 token = l[0] 

183 for tok in l[1:]: 

184 if tok: 

185 self.tokenstack.append(tok) 

186 

187 if state == '0.' and token.count('.') == 0: 

188 token = token.replace(',', '.') 

189 

190 return token 

191 

192 def __iter__(self): 

193 return self 

194 

195 def __next__(self): 

196 token = self.get_token() 

197 if token is None: 

198 raise StopIteration 

199 

200 return token 

201 

202 def next(self): 

203 return self.__next__() # Python 2.x support 

204 

205 @classmethod 

206 def split(cls, s): 

207 return list(cls(s)) 

208 

209 @classmethod 

210 def isword(cls, nextchar): 

211 """ Whether or not the next character is part of a word """ 

212 return nextchar.isalpha() 

213 

214 @classmethod 

215 def isnum(cls, nextchar): 

216 """ Whether the next character is part of a number """ 

217 return nextchar.isdigit() 

218 

219 @classmethod 

220 def isspace(cls, nextchar): 

221 """ Whether the next character is whitespace """ 

222 return nextchar.isspace() 

223 

224 

225class _resultbase(object): 

226 

227 def __init__(self): 

228 for attr in self.__slots__: 

229 setattr(self, attr, None) 

230 

231 def _repr(self, classname): 

232 l = [] 

233 for attr in self.__slots__: 

234 value = getattr(self, attr) 

235 if value is not None: 

236 l.append("%s=%s" % (attr, repr(value))) 

237 return "%s(%s)" % (classname, ", ".join(l)) 

238 

239 def __len__(self): 

240 return (sum(getattr(self, attr) is not None 

241 for attr in self.__slots__)) 

242 

243 def __repr__(self): 

244 return self._repr(self.__class__.__name__) 

245 

246 

247class parserinfo(object): 

248 """ 

249 Class which handles what inputs are accepted. Subclass this to customize 

250 the language and acceptable values for each parameter. 

251 

252 :param dayfirst: 

253 Whether to interpret the first value in an ambiguous 3-integer date 

254 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 

255 ``yearfirst`` is set to ``True``, this distinguishes between YDM 

256 and YMD. Default is ``False``. 

257 

258 :param yearfirst: 

259 Whether to interpret the first value in an ambiguous 3-integer date 

260 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 

261 to be the year, otherwise the last number is taken to be the year. 

262 Default is ``False``. 

263 """ 

264 

265 # m from a.m/p.m, t from ISO T separator 

266 JUMP = [" ", ".", ",", ";", "-", "/", "'", 

267 "at", "on", "and", "ad", "m", "t", "of", 

268 "st", "nd", "rd", "th"] 

269 

270 WEEKDAYS = [("Mon", "Monday"), 

271 ("Tue", "Tuesday"), # TODO: "Tues" 

272 ("Wed", "Wednesday"), 

273 ("Thu", "Thursday"), # TODO: "Thurs" 

274 ("Fri", "Friday"), 

275 ("Sat", "Saturday"), 

276 ("Sun", "Sunday")] 

277 MONTHS = [("Jan", "January"), 

278 ("Feb", "February"), # TODO: "Febr" 

279 ("Mar", "March"), 

280 ("Apr", "April"), 

281 ("May", "May"), 

282 ("Jun", "June"), 

283 ("Jul", "July"), 

284 ("Aug", "August"), 

285 ("Sep", "Sept", "September"), 

286 ("Oct", "October"), 

287 ("Nov", "November"), 

288 ("Dec", "December")] 

289 HMS = [("h", "hour", "hours"), 

290 ("m", "minute", "minutes"), 

291 ("s", "second", "seconds")] 

292 AMPM = [("am", "a"), 

293 ("pm", "p")] 

294 UTCZONE = ["UTC", "GMT", "Z", "z"] 

295 PERTAIN = ["of"] 

296 TZOFFSET = {} 

297 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", 

298 # "Anno Domini", "Year of Our Lord"] 

299 

300 def __init__(self, dayfirst=False, yearfirst=False): 

301 self._jump = self._convert(self.JUMP) 

302 self._weekdays = self._convert(self.WEEKDAYS) 

303 self._months = self._convert(self.MONTHS) 

304 self._hms = self._convert(self.HMS) 

305 self._ampm = self._convert(self.AMPM) 

306 self._utczone = self._convert(self.UTCZONE) 

307 self._pertain = self._convert(self.PERTAIN) 

308 

309 self.dayfirst = dayfirst 

310 self.yearfirst = yearfirst 

311 

312 self._year = time.localtime().tm_year 

313 self._century = self._year // 100 * 100 

314 

315 def _convert(self, lst): 

316 dct = {} 

317 for i, v in enumerate(lst): 

318 if isinstance(v, tuple): 

319 for v in v: 

320 dct[v.lower()] = i 

321 else: 

322 dct[v.lower()] = i 

323 return dct 

324 

325 def jump(self, name): 

326 return name.lower() in self._jump 

327 

328 def weekday(self, name): 

329 try: 

330 return self._weekdays[name.lower()] 

331 except KeyError: 

332 pass 

333 return None 

334 

335 def month(self, name): 

336 try: 

337 return self._months[name.lower()] + 1 

338 except KeyError: 

339 pass 

340 return None 

341 

342 def hms(self, name): 

343 try: 

344 return self._hms[name.lower()] 

345 except KeyError: 

346 return None 

347 

348 def ampm(self, name): 

349 try: 

350 return self._ampm[name.lower()] 

351 except KeyError: 

352 return None 

353 

354 def pertain(self, name): 

355 return name.lower() in self._pertain 

356 

357 def utczone(self, name): 

358 return name.lower() in self._utczone 

359 

360 def tzoffset(self, name): 

361 if name in self._utczone: 

362 return 0 

363 

364 return self.TZOFFSET.get(name) 

365 

366 def convertyear(self, year, century_specified=False): 

367 """ 

368 Converts two-digit years to year within [-50, 49] 

369 range of self._year (current local time) 

370 """ 

371 

372 # Function contract is that the year is always positive 

373 assert year >= 0 

374 

375 if year < 100 and not century_specified: 

376 # assume current century to start 

377 year += self._century 

378 

379 if year >= self._year + 50: # if too far in future 

380 year -= 100 

381 elif year < self._year - 50: # if too far in past 

382 year += 100 

383 

384 return year 

385 

386 def validate(self, res): 

387 # move to info 

388 if res.year is not None: 

389 res.year = self.convertyear(res.year, res.century_specified) 

390 

391 if ((res.tzoffset == 0 and not res.tzname) or 

392 (res.tzname == 'Z' or res.tzname == 'z')): 

393 res.tzname = "UTC" 

394 res.tzoffset = 0 

395 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): 

396 res.tzoffset = 0 

397 return True 

398 

399 

400class _ymd(list): 

401 def __init__(self, *args, **kwargs): 

402 super(self.__class__, self).__init__(*args, **kwargs) 

403 self.century_specified = False 

404 self.dstridx = None 

405 self.mstridx = None 

406 self.ystridx = None 

407 

408 @property 

409 def has_year(self): 

410 return self.ystridx is not None 

411 

412 @property 

413 def has_month(self): 

414 return self.mstridx is not None 

415 

416 @property 

417 def has_day(self): 

418 return self.dstridx is not None 

419 

420 def could_be_day(self, value): 

421 if self.has_day: 

422 return False 

423 elif not self.has_month: 

424 return 1 <= value <= 31 

425 elif not self.has_year: 

426 # Be permissive, assume leap year 

427 month = self[self.mstridx] 

428 return 1 <= value <= monthrange(2000, month)[1] 

429 else: 

430 month = self[self.mstridx] 

431 year = self[self.ystridx] 

432 return 1 <= value <= monthrange(year, month)[1] 

433 

434 def append(self, val, label=None): 

435 if hasattr(val, '__len__'): 

436 if val.isdigit() and len(val) > 2: 

437 self.century_specified = True 

438 if label not in [None, 'Y']: # pragma: no cover 

439 raise ValueError(label) 

440 label = 'Y' 

441 elif val > 100: 

442 self.century_specified = True 

443 if label not in [None, 'Y']: # pragma: no cover 

444 raise ValueError(label) 

445 label = 'Y' 

446 

447 super(self.__class__, self).append(int(val)) 

448 

449 if label == 'M': 

450 if self.has_month: 

451 raise ValueError('Month is already set') 

452 self.mstridx = len(self) - 1 

453 elif label == 'D': 

454 if self.has_day: 

455 raise ValueError('Day is already set') 

456 self.dstridx = len(self) - 1 

457 elif label == 'Y': 

458 if self.has_year: 

459 raise ValueError('Year is already set') 

460 self.ystridx = len(self) - 1 

461 

462 def _resolve_from_stridxs(self, strids): 

463 """ 

464 Try to resolve the identities of year/month/day elements using 

465 ystridx, mstridx, and dstridx, if enough of these are specified. 

466 """ 

467 if len(self) == 3 and len(strids) == 2: 

468 # we can back out the remaining stridx value 

469 missing = [x for x in range(3) if x not in strids.values()] 

470 key = [x for x in ['y', 'm', 'd'] if x not in strids] 

471 assert len(missing) == len(key) == 1 

472 key = key[0] 

473 val = missing[0] 

474 strids[key] = val 

475 

476 assert len(self) == len(strids) # otherwise this should not be called 

477 out = {key: self[strids[key]] for key in strids} 

478 return (out.get('y'), out.get('m'), out.get('d')) 

479 

480 def resolve_ymd(self, yearfirst, dayfirst): 

481 len_ymd = len(self) 

482 year, month, day = (None, None, None) 

483 

484 strids = (('y', self.ystridx), 

485 ('m', self.mstridx), 

486 ('d', self.dstridx)) 

487 

488 strids = {key: val for key, val in strids if val is not None} 

489 if (len(self) == len(strids) > 0 or 

490 (len(self) == 3 and len(strids) == 2)): 

491 return self._resolve_from_stridxs(strids) 

492 

493 mstridx = self.mstridx 

494 

495 if len_ymd > 3: 

496 raise ValueError("More than three YMD values") 

497 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): 

498 # One member, or two members with a month string 

499 if mstridx is not None: 

500 month = self[mstridx] 

501 # since mstridx is 0 or 1, self[mstridx-1] always 

502 # looks up the other element 

503 other = self[mstridx - 1] 

504 else: 

505 other = self[0] 

506 

507 if len_ymd > 1 or mstridx is None: 

508 if other > 31: 

509 year = other 

510 else: 

511 day = other 

512 

513 elif len_ymd == 2: 

514 # Two members with numbers 

515 if self[0] > 31: 

516 # 99-01 

517 year, month = self 

518 elif self[1] > 31: 

519 # 01-99 

520 month, year = self 

521 elif dayfirst and self[1] <= 12: 

522 # 13-01 

523 day, month = self 

524 else: 

525 # 01-13 

526 month, day = self 

527 

528 elif len_ymd == 3: 

529 # Three members 

530 if mstridx == 0: 

531 if self[1] > 31: 

532 # Apr-2003-25 

533 month, year, day = self 

534 else: 

535 month, day, year = self 

536 elif mstridx == 1: 

537 if self[0] > 31 or (yearfirst and self[2] <= 31): 

538 # 99-Jan-01 

539 year, month, day = self 

540 else: 

541 # 01-Jan-01 

542 # Give precedence to day-first, since 

543 # two-digit years is usually hand-written. 

544 day, month, year = self 

545 

546 elif mstridx == 2: 

547 # WTF!? 

548 if self[1] > 31: 

549 # 01-99-Jan 

550 day, year, month = self 

551 else: 

552 # 99-01-Jan 

553 year, day, month = self 

554 

555 else: 

556 if (self[0] > 31 or 

557 self.ystridx == 0 or 

558 (yearfirst and self[1] <= 12 and self[2] <= 31)): 

559 # 99-01-01 

560 if dayfirst and self[2] <= 12: 

561 year, day, month = self 

562 else: 

563 year, month, day = self 

564 elif self[0] > 12 or (dayfirst and self[1] <= 12): 

565 # 13-01-01 

566 day, month, year = self 

567 else: 

568 # 01-13-01 

569 month, day, year = self 

570 

571 return year, month, day 

572 

573 

574class parser(object): 

575 def __init__(self, info=None): 

576 self.info = info or parserinfo() 

577 

578 def parse(self, timestr, default=None, 

579 ignoretz=False, tzinfos=None, **kwargs): 

580 """ 

581 Parse the date/time string into a :class:`datetime.datetime` object. 

582 

583 :param timestr: 

584 Any date/time string using the supported formats. 

585 

586 :param default: 

587 The default datetime object, if this is a datetime object and not 

588 ``None``, elements specified in ``timestr`` replace elements in the 

589 default object. 

590 

591 :param ignoretz: 

592 If set ``True``, time zones in parsed strings are ignored and a 

593 naive :class:`datetime.datetime` object is returned. 

594 

595 :param tzinfos: 

596 Additional time zone names / aliases which may be present in the 

597 string. This argument maps time zone names (and optionally offsets 

598 from those time zones) to time zones. This parameter can be a 

599 dictionary with timezone aliases mapping time zone names to time 

600 zones or a function taking two parameters (``tzname`` and 

601 ``tzoffset``) and returning a time zone. 

602 

603 The timezones to which the names are mapped can be an integer 

604 offset from UTC in seconds or a :class:`tzinfo` object. 

605 

606 .. doctest:: 

607 :options: +NORMALIZE_WHITESPACE 

608 

609 >>> from dateutil.parser import parse 

610 >>> from dateutil.tz import gettz 

611 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 

612 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 

613 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 

614 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 

615 datetime.datetime(2012, 1, 19, 17, 21, 

616 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 

617 

618 This parameter is ignored if ``ignoretz`` is set. 

619 

620 :param \\*\\*kwargs: 

621 Keyword arguments as passed to ``_parse()``. 

622 

623 :return: 

624 Returns a :class:`datetime.datetime` object or, if the 

625 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 

626 first element being a :class:`datetime.datetime` object, the second 

627 a tuple containing the fuzzy tokens. 

628 

629 :raises ParserError: 

630 Raised for invalid or unknown string format, if the provided 

631 :class:`tzinfo` is not in a valid format, or if an invalid date 

632 would be created. 

633 

634 :raises TypeError: 

635 Raised for non-string or character stream input. 

636 

637 :raises OverflowError: 

638 Raised if the parsed date exceeds the largest valid C integer on 

639 your system. 

640 """ 

641 

642 if default is None: 

643 default = datetime.datetime.now().replace(hour=0, minute=0, 

644 second=0, microsecond=0) 

645 

646 res, skipped_tokens = self._parse(timestr, **kwargs) 

647 

648 if res is None: 

649 raise ParserError("Unknown string format: %s", timestr) 

650 

651 if len(res) == 0: 

652 raise ParserError("String does not contain a date: %s", timestr) 

653 

654 try: 

655 ret = self._build_naive(res, default) 

656 except ValueError as e: 

657 six.raise_from(ParserError(e.args[0] + ": %s", timestr), e) 

658 

659 if not ignoretz: 

660 ret = self._build_tzaware(ret, res, tzinfos) 

661 

662 if kwargs.get('fuzzy_with_tokens', False): 

663 return ret, skipped_tokens 

664 else: 

665 return ret 

666 

667 class _result(_resultbase): 

668 __slots__ = ["year", "month", "day", "weekday", 

669 "hour", "minute", "second", "microsecond", 

670 "tzname", "tzoffset", "ampm","any_unused_tokens"] 

671 

672 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, 

673 fuzzy_with_tokens=False): 

674 """ 

675 Private method which performs the heavy lifting of parsing, called from 

676 ``parse()``, which passes on its ``kwargs`` to this function. 

677 

678 :param timestr: 

679 The string to parse. 

680 

681 :param dayfirst: 

682 Whether to interpret the first value in an ambiguous 3-integer date 

683 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 

684 ``yearfirst`` is set to ``True``, this distinguishes between YDM 

685 and YMD. If set to ``None``, this value is retrieved from the 

686 current :class:`parserinfo` object (which itself defaults to 

687 ``False``). 

688 

689 :param yearfirst: 

690 Whether to interpret the first value in an ambiguous 3-integer date 

691 (e.g. 01/05/09) as the year. If ``True``, the first number is taken 

692 to be the year, otherwise the last number is taken to be the year. 

693 If this is set to ``None``, the value is retrieved from the current 

694 :class:`parserinfo` object (which itself defaults to ``False``). 

695 

696 :param fuzzy: 

697 Whether to allow fuzzy parsing, allowing for string like "Today is 

698 January 1, 2047 at 8:21:00AM". 

699 

700 :param fuzzy_with_tokens: 

701 If ``True``, ``fuzzy`` is automatically set to True, and the parser 

702 will return a tuple where the first element is the parsed 

703 :class:`datetime.datetime` datetimestamp and the second element is 

704 a tuple containing the portions of the string which were ignored: 

705 

706 .. doctest:: 

707 

708 >>> from dateutil.parser import parse 

709 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 

710 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 

711 

712 """ 

713 if fuzzy_with_tokens: 

714 fuzzy = True 

715 

716 info = self.info 

717 

718 if dayfirst is None: 

719 dayfirst = info.dayfirst 

720 

721 if yearfirst is None: 

722 yearfirst = info.yearfirst 

723 

724 res = self._result() 

725 l = _timelex.split(timestr) # Splits the timestr into tokens 

726 

727 skipped_idxs = [] 

728 

729 # year/month/day list 

730 ymd = _ymd() 

731 

732 len_l = len(l) 

733 i = 0 

734 try: 

735 while i < len_l: 

736 

737 # Check if it's a number 

738 value_repr = l[i] 

739 try: 

740 value = float(value_repr) 

741 except ValueError: 

742 value = None 

743 

744 if value is not None: 

745 # Numeric token 

746 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) 

747 

748 # Check weekday 

749 elif info.weekday(l[i]) is not None: 

750 value = info.weekday(l[i]) 

751 res.weekday = value 

752 

753 # Check month name 

754 elif info.month(l[i]) is not None: 

755 value = info.month(l[i]) 

756 ymd.append(value, 'M') 

757 

758 if i + 1 < len_l: 

759 if l[i + 1] in ('-', '/'): 

760 # Jan-01[-99] 

761 sep = l[i + 1] 

762 ymd.append(l[i + 2]) 

763 

764 if i + 3 < len_l and l[i + 3] == sep: 

765 # Jan-01-99 

766 ymd.append(l[i + 4]) 

767 i += 2 

768 

769 i += 2 

770 

771 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and 

772 info.pertain(l[i + 2])): 

773 # Jan of 01 

774 # In this case, 01 is clearly year 

775 if l[i + 4].isdigit(): 

776 # Convert it here to become unambiguous 

777 value = int(l[i + 4]) 

778 year = str(info.convertyear(value)) 

779 ymd.append(year, 'Y') 

780 else: 

781 # Wrong guess 

782 pass 

783 # TODO: not hit in tests 

784 i += 4 

785 

786 # Check am/pm 

787 elif info.ampm(l[i]) is not None: 

788 value = info.ampm(l[i]) 

789 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) 

790 

791 if val_is_ampm: 

792 res.hour = self._adjust_ampm(res.hour, value) 

793 res.ampm = value 

794 

795 elif fuzzy: 

796 skipped_idxs.append(i) 

797 

798 # Check for a timezone name 

799 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): 

800 res.tzname = l[i] 

801 res.tzoffset = info.tzoffset(res.tzname) 

802 

803 # Check for something like GMT+3, or BRST+3. Notice 

804 # that it doesn't mean "I am 3 hours after GMT", but 

805 # "my time +3 is GMT". If found, we reverse the 

806 # logic so that timezone parsing code will get it 

807 # right. 

808 if i + 1 < len_l and l[i + 1] in ('+', '-'): 

809 l[i + 1] = ('+', '-')[l[i + 1] == '+'] 

810 res.tzoffset = None 

811 if info.utczone(res.tzname): 

812 # With something like GMT+3, the timezone 

813 # is *not* GMT. 

814 res.tzname = None 

815 

816 # Check for a numbered timezone 

817 elif res.hour is not None and l[i] in ('+', '-'): 

818 signal = (-1, 1)[l[i] == '+'] 

819 len_li = len(l[i + 1]) 

820 

821 # TODO: check that l[i + 1] is integer? 

822 if len_li == 4: 

823 # -0300 

824 hour_offset = int(l[i + 1][:2]) 

825 min_offset = int(l[i + 1][2:]) 

826 elif i + 2 < len_l and l[i + 2] == ':': 

827 # -03:00 

828 hour_offset = int(l[i + 1]) 

829 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? 

830 i += 2 

831 elif len_li <= 2: 

832 # -[0]3 

833 hour_offset = int(l[i + 1][:2]) 

834 min_offset = 0 

835 else: 

836 raise ValueError(timestr) 

837 

838 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) 

839 

840 # Look for a timezone name between parenthesis 

841 if (i + 5 < len_l and 

842 info.jump(l[i + 2]) and l[i + 3] == '(' and 

843 l[i + 5] == ')' and 

844 3 <= len(l[i + 4]) and 

845 self._could_be_tzname(res.hour, res.tzname, 

846 None, l[i + 4])): 

847 # -0300 (BRST) 

848 res.tzname = l[i + 4] 

849 i += 4 

850 

851 i += 1 

852 

853 # Check jumps 

854 elif not (info.jump(l[i]) or fuzzy): 

855 raise ValueError(timestr) 

856 

857 else: 

858 skipped_idxs.append(i) 

859 i += 1 

860 

861 # Process year/month/day 

862 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) 

863 

864 res.century_specified = ymd.century_specified 

865 res.year = year 

866 res.month = month 

867 res.day = day 

868 

869 except (IndexError, ValueError): 

870 return None, None 

871 

872 if not info.validate(res): 

873 return None, None 

874 

875 if fuzzy_with_tokens: 

876 skipped_tokens = self._recombine_skipped(l, skipped_idxs) 

877 return res, tuple(skipped_tokens) 

878 else: 

879 return res, None 

880 

881 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): 

882 # Token is a number 

883 value_repr = tokens[idx] 

884 try: 

885 value = self._to_decimal(value_repr) 

886 except Exception as e: 

887 six.raise_from(ValueError('Unknown numeric token'), e) 

888 

889 len_li = len(value_repr) 

890 

891 len_l = len(tokens) 

892 

893 if (len(ymd) == 3 and len_li in (2, 4) and 

894 res.hour is None and 

895 (idx + 1 >= len_l or 

896 (tokens[idx + 1] != ':' and 

897 info.hms(tokens[idx + 1]) is None))): 

898 # 19990101T23[59] 

899 s = tokens[idx] 

900 res.hour = int(s[:2]) 

901 

902 if len_li == 4: 

903 res.minute = int(s[2:]) 

904 

905 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): 

906 # YYMMDD or HHMMSS[.ss] 

907 s = tokens[idx] 

908 

909 if not ymd and '.' not in tokens[idx]: 

910 ymd.append(s[:2]) 

911 ymd.append(s[2:4]) 

912 ymd.append(s[4:]) 

913 else: 

914 # 19990101T235959[.59] 

915 

916 # TODO: Check if res attributes already set. 

917 res.hour = int(s[:2]) 

918 res.minute = int(s[2:4]) 

919 res.second, res.microsecond = self._parsems(s[4:]) 

920 

921 elif len_li in (8, 12, 14): 

922 # YYYYMMDD 

923 s = tokens[idx] 

924 ymd.append(s[:4], 'Y') 

925 ymd.append(s[4:6]) 

926 ymd.append(s[6:8]) 

927 

928 if len_li > 8: 

929 res.hour = int(s[8:10]) 

930 res.minute = int(s[10:12]) 

931 

932 if len_li > 12: 

933 res.second = int(s[12:]) 

934 

935 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: 

936 # HH[ ]h or MM[ ]m or SS[.ss][ ]s 

937 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) 

938 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) 

939 if hms is not None: 

940 # TODO: checking that hour/minute/second are not 

941 # already set? 

942 self._assign_hms(res, value_repr, hms) 

943 

944 elif idx + 2 < len_l and tokens[idx + 1] == ':': 

945 # HH:MM[:SS[.ss]] 

946 res.hour = int(value) 

947 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? 

948 (res.minute, res.second) = self._parse_min_sec(value) 

949 

950 if idx + 4 < len_l and tokens[idx + 3] == ':': 

951 res.second, res.microsecond = self._parsems(tokens[idx + 4]) 

952 

953 idx += 2 

954 

955 idx += 2 

956 

957 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): 

958 sep = tokens[idx + 1] 

959 ymd.append(value_repr) 

960 

961 if idx + 2 < len_l and not info.jump(tokens[idx + 2]): 

962 if tokens[idx + 2].isdigit(): 

963 # 01-01[-01] 

964 ymd.append(tokens[idx + 2]) 

965 else: 

966 # 01-Jan[-01] 

967 value = info.month(tokens[idx + 2]) 

968 

969 if value is not None: 

970 ymd.append(value, 'M') 

971 else: 

972 raise ValueError() 

973 

974 if idx + 3 < len_l and tokens[idx + 3] == sep: 

975 # We have three members 

976 value = info.month(tokens[idx + 4]) 

977 

978 if value is not None: 

979 ymd.append(value, 'M') 

980 else: 

981 ymd.append(tokens[idx + 4]) 

982 idx += 2 

983 

984 idx += 1 

985 idx += 1 

986 

987 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): 

988 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: 

989 # 12 am 

990 hour = int(value) 

991 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) 

992 idx += 1 

993 else: 

994 # Year, month or day 

995 ymd.append(value) 

996 idx += 1 

997 

998 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): 

999 # 12am 

1000 hour = int(value) 

1001 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) 

1002 idx += 1 

1003 

1004 elif ymd.could_be_day(value): 

1005 ymd.append(value) 

1006 

1007 elif not fuzzy: 

1008 raise ValueError() 

1009 

1010 return idx 

1011 

1012 def _find_hms_idx(self, idx, tokens, info, allow_jump): 

1013 len_l = len(tokens) 

1014 

1015 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: 

1016 # There is an "h", "m", or "s" label following this token. We take 

1017 # assign the upcoming label to the current token. 

1018 # e.g. the "12" in 12h" 

1019 hms_idx = idx + 1 

1020 

1021 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and 

1022 info.hms(tokens[idx+2]) is not None): 

1023 # There is a space and then an "h", "m", or "s" label. 

1024 # e.g. the "12" in "12 h" 

1025 hms_idx = idx + 2 

1026 

1027 elif idx > 0 and info.hms(tokens[idx-1]) is not None: 

1028 # There is a "h", "m", or "s" preceding this token. Since neither 

1029 # of the previous cases was hit, there is no label following this 

1030 # token, so we use the previous label. 

1031 # e.g. the "04" in "12h04" 

1032 hms_idx = idx-1 

1033 

1034 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and 

1035 info.hms(tokens[idx-2]) is not None): 

1036 # If we are looking at the final token, we allow for a 

1037 # backward-looking check to skip over a space. 

1038 # TODO: Are we sure this is the right condition here? 

1039 hms_idx = idx - 2 

1040 

1041 else: 

1042 hms_idx = None 

1043 

1044 return hms_idx 

1045 

1046 def _assign_hms(self, res, value_repr, hms): 

1047 # See GH issue #427, fixing float rounding 

1048 value = self._to_decimal(value_repr) 

1049 

1050 if hms == 0: 

1051 # Hour 

1052 res.hour = int(value) 

1053 if value % 1: 

1054 res.minute = int(60*(value % 1)) 

1055 

1056 elif hms == 1: 

1057 (res.minute, res.second) = self._parse_min_sec(value) 

1058 

1059 elif hms == 2: 

1060 (res.second, res.microsecond) = self._parsems(value_repr) 

1061 

1062 def _could_be_tzname(self, hour, tzname, tzoffset, token): 

1063 return (hour is not None and 

1064 tzname is None and 

1065 tzoffset is None and 

1066 len(token) <= 5 and 

1067 (all(x in string.ascii_uppercase for x in token) 

1068 or token in self.info.UTCZONE)) 

1069 

1070 def _ampm_valid(self, hour, ampm, fuzzy): 

1071 """ 

1072 For fuzzy parsing, 'a' or 'am' (both valid English words) 

1073 may erroneously trigger the AM/PM flag. Deal with that 

1074 here. 

1075 """ 

1076 val_is_ampm = True 

1077 

1078 # If there's already an AM/PM flag, this one isn't one. 

1079 if fuzzy and ampm is not None: 

1080 val_is_ampm = False 

1081 

1082 # If AM/PM is found and hour is not, raise a ValueError 

1083 if hour is None: 

1084 if fuzzy: 

1085 val_is_ampm = False 

1086 else: 

1087 raise ValueError('No hour specified with AM or PM flag.') 

1088 elif not 0 <= hour <= 12: 

1089 # If AM/PM is found, it's a 12 hour clock, so raise 

1090 # an error for invalid range 

1091 if fuzzy: 

1092 val_is_ampm = False 

1093 else: 

1094 raise ValueError('Invalid hour specified for 12-hour clock.') 

1095 

1096 return val_is_ampm 

1097 

1098 def _adjust_ampm(self, hour, ampm): 

1099 if hour < 12 and ampm == 1: 

1100 hour += 12 

1101 elif hour == 12 and ampm == 0: 

1102 hour = 0 

1103 return hour 

1104 

1105 def _parse_min_sec(self, value): 

1106 # TODO: Every usage of this function sets res.second to the return 

1107 # value. Are there any cases where second will be returned as None and 

1108 # we *don't* want to set res.second = None? 

1109 minute = int(value) 

1110 second = None 

1111 

1112 sec_remainder = value % 1 

1113 if sec_remainder: 

1114 second = int(60 * sec_remainder) 

1115 return (minute, second) 

1116 

1117 def _parse_hms(self, idx, tokens, info, hms_idx): 

1118 # TODO: Is this going to admit a lot of false-positives for when we 

1119 # just happen to have digits and "h", "m" or "s" characters in non-date 

1120 # text? I guess hex hashes won't have that problem, but there's plenty 

1121 # of random junk out there. 

1122 if hms_idx is None: 

1123 hms = None 

1124 new_idx = idx 

1125 elif hms_idx > idx: 

1126 hms = info.hms(tokens[hms_idx]) 

1127 new_idx = hms_idx 

1128 else: 

1129 # Looking backwards, increment one. 

1130 hms = info.hms(tokens[hms_idx]) + 1 

1131 new_idx = idx 

1132 

1133 return (new_idx, hms) 

1134 

1135 # ------------------------------------------------------------------ 

1136 # Handling for individual tokens. These are kept as methods instead 

1137 # of functions for the sake of customizability via subclassing. 

1138 

1139 def _parsems(self, value): 

1140 """Parse a I[.F] seconds value into (seconds, microseconds).""" 

1141 if "." not in value: 

1142 return int(value), 0 

1143 else: 

1144 i, f = value.split(".") 

1145 return int(i), int(f.ljust(6, "0")[:6]) 

1146 

1147 def _to_decimal(self, val): 

1148 try: 

1149 decimal_value = Decimal(val) 

1150 # See GH 662, edge case, infinite value should not be converted 

1151 # via `_to_decimal` 

1152 if not decimal_value.is_finite(): 

1153 raise ValueError("Converted decimal value is infinite or NaN") 

1154 except Exception as e: 

1155 msg = "Could not convert %s to decimal" % val 

1156 six.raise_from(ValueError(msg), e) 

1157 else: 

1158 return decimal_value 

1159 

1160 # ------------------------------------------------------------------ 

1161 # Post-Parsing construction of datetime output. These are kept as 

1162 # methods instead of functions for the sake of customizability via 

1163 # subclassing. 

1164 

1165 def _build_tzinfo(self, tzinfos, tzname, tzoffset): 

1166 if callable(tzinfos): 

1167 tzdata = tzinfos(tzname, tzoffset) 

1168 else: 

1169 tzdata = tzinfos.get(tzname) 

1170 # handle case where tzinfo is paased an options that returns None 

1171 # eg tzinfos = {'BRST' : None} 

1172 if isinstance(tzdata, datetime.tzinfo) or tzdata is None: 

1173 tzinfo = tzdata 

1174 elif isinstance(tzdata, text_type): 

1175 tzinfo = tz.tzstr(tzdata) 

1176 elif isinstance(tzdata, integer_types): 

1177 tzinfo = tz.tzoffset(tzname, tzdata) 

1178 else: 

1179 raise TypeError("Offset must be tzinfo subclass, tz string, " 

1180 "or int offset.") 

1181 return tzinfo 

1182 

1183 def _build_tzaware(self, naive, res, tzinfos): 

1184 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): 

1185 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) 

1186 aware = naive.replace(tzinfo=tzinfo) 

1187 aware = self._assign_tzname(aware, res.tzname) 

1188 

1189 elif res.tzname and res.tzname in time.tzname: 

1190 aware = naive.replace(tzinfo=tz.tzlocal()) 

1191 

1192 # Handle ambiguous local datetime 

1193 aware = self._assign_tzname(aware, res.tzname) 

1194 

1195 # This is mostly relevant for winter GMT zones parsed in the UK 

1196 if (aware.tzname() != res.tzname and 

1197 res.tzname in self.info.UTCZONE): 

1198 aware = aware.replace(tzinfo=tz.UTC) 

1199 

1200 elif res.tzoffset == 0: 

1201 aware = naive.replace(tzinfo=tz.UTC) 

1202 

1203 elif res.tzoffset: 

1204 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) 

1205 

1206 elif not res.tzname and not res.tzoffset: 

1207 # i.e. no timezone information was found. 

1208 aware = naive 

1209 

1210 elif res.tzname: 

1211 # tz-like string was parsed but we don't know what to do 

1212 # with it 

1213 warnings.warn("tzname {tzname} identified but not understood. " 

1214 "Pass `tzinfos` argument in order to correctly " 

1215 "return a timezone-aware datetime. In a future " 

1216 "version, this will raise an " 

1217 "exception.".format(tzname=res.tzname), 

1218 category=UnknownTimezoneWarning) 

1219 aware = naive 

1220 

1221 return aware 

1222 

1223 def _build_naive(self, res, default): 

1224 repl = {} 

1225 for attr in ("year", "month", "day", "hour", 

1226 "minute", "second", "microsecond"): 

1227 value = getattr(res, attr) 

1228 if value is not None: 

1229 repl[attr] = value 

1230 

1231 if 'day' not in repl: 

1232 # If the default day exceeds the last day of the month, fall back 

1233 # to the end of the month. 

1234 cyear = default.year if res.year is None else res.year 

1235 cmonth = default.month if res.month is None else res.month 

1236 cday = default.day if res.day is None else res.day 

1237 

1238 if cday > monthrange(cyear, cmonth)[1]: 

1239 repl['day'] = monthrange(cyear, cmonth)[1] 

1240 

1241 naive = default.replace(**repl) 

1242 

1243 if res.weekday is not None and not res.day: 

1244 naive = naive + relativedelta.relativedelta(weekday=res.weekday) 

1245 

1246 return naive 

1247 

1248 def _assign_tzname(self, dt, tzname): 

1249 if dt.tzname() != tzname: 

1250 new_dt = tz.enfold(dt, fold=1) 

1251 if new_dt.tzname() == tzname: 

1252 return new_dt 

1253 

1254 return dt 

1255 

1256 def _recombine_skipped(self, tokens, skipped_idxs): 

1257 """ 

1258 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] 

1259 >>> skipped_idxs = [0, 1, 2, 5] 

1260 >>> _recombine_skipped(tokens, skipped_idxs) 

1261 ["foo bar", "baz"] 

1262 """ 

1263 skipped_tokens = [] 

1264 for i, idx in enumerate(sorted(skipped_idxs)): 

1265 if i > 0 and idx - 1 == skipped_idxs[i - 1]: 

1266 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] 

1267 else: 

1268 skipped_tokens.append(tokens[idx]) 

1269 

1270 return skipped_tokens 

1271 

1272 

1273DEFAULTPARSER = parser() 

1274 

1275 

1276def parse(timestr, parserinfo=None, **kwargs): 

1277 """ 

1278 

1279 Parse a string in one of the supported formats, using the 

1280 ``parserinfo`` parameters. 

1281 

1282 :param timestr: 

1283 A string containing a date/time stamp. 

1284 

1285 :param parserinfo: 

1286 A :class:`parserinfo` object containing parameters for the parser. 

1287 If ``None``, the default arguments to the :class:`parserinfo` 

1288 constructor are used. 

1289 

1290 The ``**kwargs`` parameter takes the following keyword arguments: 

1291 

1292 :param default: 

1293 The default datetime object, if this is a datetime object and not 

1294 ``None``, elements specified in ``timestr`` replace elements in the 

1295 default object. 

1296 

1297 :param ignoretz: 

1298 If set ``True``, time zones in parsed strings are ignored and a naive 

1299 :class:`datetime` object is returned. 

1300 

1301 :param tzinfos: 

1302 Additional time zone names / aliases which may be present in the 

1303 string. This argument maps time zone names (and optionally offsets 

1304 from those time zones) to time zones. This parameter can be a 

1305 dictionary with timezone aliases mapping time zone names to time 

1306 zones or a function taking two parameters (``tzname`` and 

1307 ``tzoffset``) and returning a time zone. 

1308 

1309 The timezones to which the names are mapped can be an integer 

1310 offset from UTC in seconds or a :class:`tzinfo` object. 

1311 

1312 .. doctest:: 

1313 :options: +NORMALIZE_WHITESPACE 

1314 

1315 >>> from dateutil.parser import parse 

1316 >>> from dateutil.tz import gettz 

1317 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} 

1318 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) 

1319 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) 

1320 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) 

1321 datetime.datetime(2012, 1, 19, 17, 21, 

1322 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) 

1323 

1324 This parameter is ignored if ``ignoretz`` is set. 

1325 

1326 :param dayfirst: 

1327 Whether to interpret the first value in an ambiguous 3-integer date 

1328 (e.g. 01/05/09) as the day (``True``) or month (``False``). If 

1329 ``yearfirst`` is set to ``True``, this distinguishes between YDM and 

1330 YMD. If set to ``None``, this value is retrieved from the current 

1331 :class:`parserinfo` object (which itself defaults to ``False``). 

1332 

1333 :param yearfirst: 

1334 Whether to interpret the first value in an ambiguous 3-integer date 

1335 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to 

1336 be the year, otherwise the last number is taken to be the year. If 

1337 this is set to ``None``, the value is retrieved from the current 

1338 :class:`parserinfo` object (which itself defaults to ``False``). 

1339 

1340 :param fuzzy: 

1341 Whether to allow fuzzy parsing, allowing for string like "Today is 

1342 January 1, 2047 at 8:21:00AM". 

1343 

1344 :param fuzzy_with_tokens: 

1345 If ``True``, ``fuzzy`` is automatically set to True, and the parser 

1346 will return a tuple where the first element is the parsed 

1347 :class:`datetime.datetime` datetimestamp and the second element is 

1348 a tuple containing the portions of the string which were ignored: 

1349 

1350 .. doctest:: 

1351 

1352 >>> from dateutil.parser import parse 

1353 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) 

1354 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) 

1355 

1356 :return: 

1357 Returns a :class:`datetime.datetime` object or, if the 

1358 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the 

1359 first element being a :class:`datetime.datetime` object, the second 

1360 a tuple containing the fuzzy tokens. 

1361 

1362 :raises ValueError: 

1363 Raised for invalid or unknown string format, if the provided 

1364 :class:`tzinfo` is not in a valid format, or if an invalid date 

1365 would be created. 

1366 

1367 :raises OverflowError: 

1368 Raised if the parsed date exceeds the largest valid C integer on 

1369 your system. 

1370 """ 

1371 if parserinfo: 

1372 return parser(parserinfo).parse(timestr, **kwargs) 

1373 else: 

1374 return DEFAULTPARSER.parse(timestr, **kwargs) 

1375 

1376 

1377class _tzparser(object): 

1378 

1379 class _result(_resultbase): 

1380 

1381 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", 

1382 "start", "end"] 

1383 

1384 class _attr(_resultbase): 

1385 __slots__ = ["month", "week", "weekday", 

1386 "yday", "jyday", "day", "time"] 

1387 

1388 def __repr__(self): 

1389 return self._repr("") 

1390 

1391 def __init__(self): 

1392 _resultbase.__init__(self) 

1393 self.start = self._attr() 

1394 self.end = self._attr() 

1395 

1396 def parse(self, tzstr): 

1397 res = self._result() 

1398 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] 

1399 used_idxs = list() 

1400 try: 

1401 

1402 len_l = len(l) 

1403 

1404 i = 0 

1405 while i < len_l: 

1406 # BRST+3[BRDT[+2]] 

1407 j = i 

1408 while j < len_l and not [x for x in l[j] 

1409 if x in "0123456789:,-+"]: 

1410 j += 1 

1411 if j != i: 

1412 if not res.stdabbr: 

1413 offattr = "stdoffset" 

1414 res.stdabbr = "".join(l[i:j]) 

1415 else: 

1416 offattr = "dstoffset" 

1417 res.dstabbr = "".join(l[i:j]) 

1418 

1419 for ii in range(j): 

1420 used_idxs.append(ii) 

1421 i = j 

1422 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in 

1423 "0123456789")): 

1424 if l[i] in ('+', '-'): 

1425 # Yes, that's right. See the TZ variable 

1426 # documentation. 

1427 signal = (1, -1)[l[i] == '+'] 

1428 used_idxs.append(i) 

1429 i += 1 

1430 else: 

1431 signal = -1 

1432 len_li = len(l[i]) 

1433 if len_li == 4: 

1434 # -0300 

1435 setattr(res, offattr, (int(l[i][:2]) * 3600 + 

1436 int(l[i][2:]) * 60) * signal) 

1437 elif i + 1 < len_l and l[i + 1] == ':': 

1438 # -03:00 

1439 setattr(res, offattr, 

1440 (int(l[i]) * 3600 + 

1441 int(l[i + 2]) * 60) * signal) 

1442 used_idxs.append(i) 

1443 i += 2 

1444 elif len_li <= 2: 

1445 # -[0]3 

1446 setattr(res, offattr, 

1447 int(l[i][:2]) * 3600 * signal) 

1448 else: 

1449 return None 

1450 used_idxs.append(i) 

1451 i += 1 

1452 if res.dstabbr: 

1453 break 

1454 else: 

1455 break 

1456 

1457 

1458 if i < len_l: 

1459 for j in range(i, len_l): 

1460 if l[j] == ';': 

1461 l[j] = ',' 

1462 

1463 assert l[i] == ',' 

1464 

1465 i += 1 

1466 

1467 if i >= len_l: 

1468 pass 

1469 elif (8 <= l.count(',') <= 9 and 

1470 not [y for x in l[i:] if x != ',' 

1471 for y in x if y not in "0123456789+-"]): 

1472 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600] 

1473 for x in (res.start, res.end): 

1474 x.month = int(l[i]) 

1475 used_idxs.append(i) 

1476 i += 2 

1477 if l[i] == '-': 

1478 value = int(l[i + 1]) * -1 

1479 used_idxs.append(i) 

1480 i += 1 

1481 else: 

1482 value = int(l[i]) 

1483 used_idxs.append(i) 

1484 i += 2 

1485 if value: 

1486 x.week = value 

1487 x.weekday = (int(l[i]) - 1) % 7 

1488 else: 

1489 x.day = int(l[i]) 

1490 used_idxs.append(i) 

1491 i += 2 

1492 x.time = int(l[i]) 

1493 used_idxs.append(i) 

1494 i += 2 

1495 if i < len_l: 

1496 if l[i] in ('-', '+'): 

1497 signal = (-1, 1)[l[i] == "+"] 

1498 used_idxs.append(i) 

1499 i += 1 

1500 else: 

1501 signal = 1 

1502 used_idxs.append(i) 

1503 res.dstoffset = (res.stdoffset + int(l[i]) * signal) 

1504 

1505 # This was a made-up format that is not in normal use 

1506 warn(('Parsed time zone "%s"' % tzstr) + 

1507 'is in a non-standard dateutil-specific format, which ' + 

1508 'is now deprecated; support for parsing this format ' + 

1509 'will be removed in future versions. It is recommended ' + 

1510 'that you switch to a standard format like the GNU ' + 

1511 'TZ variable format.', tz.DeprecatedTzFormatWarning) 

1512 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and 

1513 not [y for x in l[i:] if x not in (',', '/', 'J', 'M', 

1514 '.', '-', ':') 

1515 for y in x if y not in "0123456789"]): 

1516 for x in (res.start, res.end): 

1517 if l[i] == 'J': 

1518 # non-leap year day (1 based) 

1519 used_idxs.append(i) 

1520 i += 1 

1521 x.jyday = int(l[i]) 

1522 elif l[i] == 'M': 

1523 # month[-.]week[-.]weekday 

1524 used_idxs.append(i) 

1525 i += 1 

1526 x.month = int(l[i]) 

1527 used_idxs.append(i) 

1528 i += 1 

1529 assert l[i] in ('-', '.') 

1530 used_idxs.append(i) 

1531 i += 1 

1532 x.week = int(l[i]) 

1533 if x.week == 5: 

1534 x.week = -1 

1535 used_idxs.append(i) 

1536 i += 1 

1537 assert l[i] in ('-', '.') 

1538 used_idxs.append(i) 

1539 i += 1 

1540 x.weekday = (int(l[i]) - 1) % 7 

1541 else: 

1542 # year day (zero based) 

1543 x.yday = int(l[i]) + 1 

1544 

1545 used_idxs.append(i) 

1546 i += 1 

1547 

1548 if i < len_l and l[i] == '/': 

1549 used_idxs.append(i) 

1550 i += 1 

1551 # start time 

1552 len_li = len(l[i]) 

1553 if len_li == 4: 

1554 # -0300 

1555 x.time = (int(l[i][:2]) * 3600 + 

1556 int(l[i][2:]) * 60) 

1557 elif i + 1 < len_l and l[i + 1] == ':': 

1558 # -03:00 

1559 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 

1560 used_idxs.append(i) 

1561 i += 2 

1562 if i + 1 < len_l and l[i + 1] == ':': 

1563 used_idxs.append(i) 

1564 i += 2 

1565 x.time += int(l[i]) 

1566 elif len_li <= 2: 

1567 # -[0]3 

1568 x.time = (int(l[i][:2]) * 3600) 

1569 else: 

1570 return None 

1571 used_idxs.append(i) 

1572 i += 1 

1573 

1574 assert i == len_l or l[i] == ',' 

1575 

1576 i += 1 

1577 

1578 assert i >= len_l 

1579 

1580 except (IndexError, ValueError, AssertionError): 

1581 return None 

1582 

1583 unused_idxs = set(range(len_l)).difference(used_idxs) 

1584 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) 

1585 return res 

1586 

1587 

1588DEFAULTTZPARSER = _tzparser() 

1589 

1590 

1591def _parsetz(tzstr): 

1592 return DEFAULTTZPARSER.parse(tzstr) 

1593 

1594 

1595class ParserError(ValueError): 

1596 """Error class for representing failure to parse a datetime string.""" 

1597 def __str__(self): 

1598 try: 

1599 return self.args[0] % self.args[1:] 

1600 except (TypeError, IndexError): 

1601 return super(ParserError, self).__str__() 

1602 

1603 def __repr__(self): 

1604 return "%s(%s)" % (self.__class__.__name__, str(self)) 

1605 

1606 

1607class UnknownTimezoneWarning(RuntimeWarning): 

1608 """Raised when the parser finds a timezone it cannot parse into a tzinfo""" 

1609# vim:ts=4:sw=4:et