Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/dateutil/parser/_parser.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3This module offers a generic date/time string parser which is able to parse
4most known formats to represent a date and/or time.
6This module attempts to be forgiving with regards to unlikely input formats,
7returning a datetime object even for dates which are ambiguous. If an element
8of a date/time stamp is omitted, the following rules are applied:
10- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour
11 on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is
12 specified.
13- If a time zone is omitted, a timezone-naive datetime is returned.
15If any other elements are missing, they are taken from the
16:class:`datetime.datetime` object passed to the parameter ``default``. If this
17results in a day number exceeding the valid number of days per month, the
18value falls back to the end of the month.
20Additional resources about date/time string formats can be found below:
22- `A summary of the international standard date and time notation
23 <http://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_
24- `W3C Date and Time Formats <http://www.w3.org/TR/NOTE-datetime>`_
25- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_
26- `CPAN ParseDate module
27 <http://search.cpan.org/~muir/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_
28- `Java SimpleDateFormat Class
29 <https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_
30"""
31from __future__ import unicode_literals
33import datetime
34import re
35import string
36import time
37import warnings
39from calendar import monthrange
40from io import StringIO
42import six
43from six import integer_types, text_type
45from decimal import Decimal
47from warnings import warn
49from .. import relativedelta
50from .. import tz
52__all__ = ["parse", "parserinfo", "ParserError"]
55# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth
56# making public and/or figuring out if there is something we can
57# take off their plate.
58class _timelex(object):
59 # Fractional seconds are sometimes split by a comma
60 _split_decimal = re.compile("([.,])")
62 def __init__(self, instream):
63 if six.PY2:
64 # In Python 2, we can't duck type properly because unicode has
65 # a 'decode' function, and we'd be double-decoding
66 if isinstance(instream, (bytes, bytearray)):
67 instream = instream.decode()
68 else:
69 if getattr(instream, 'decode', None) is not None:
70 instream = instream.decode()
72 if isinstance(instream, text_type):
73 instream = StringIO(instream)
74 elif getattr(instream, 'read', None) is None:
75 raise TypeError('Parser must be a string or character stream, not '
76 '{itype}'.format(itype=instream.__class__.__name__))
78 self.instream = instream
79 self.charstack = []
80 self.tokenstack = []
81 self.eof = False
83 def get_token(self):
84 """
85 This function breaks the time string into lexical units (tokens), which
86 can be parsed by the parser. Lexical units are demarcated by changes in
87 the character set, so any continuous string of letters is considered
88 one unit, any continuous string of numbers is considered one unit.
90 The main complication arises from the fact that dots ('.') can be used
91 both as separators (e.g. "Sep.20.2009") or decimal points (e.g.
92 "4:30:21.447"). As such, it is necessary to read the full context of
93 any dot-separated strings before breaking it into tokens; as such, this
94 function maintains a "token stack", for when the ambiguous context
95 demands that multiple tokens be parsed at once.
96 """
97 if self.tokenstack:
98 return self.tokenstack.pop(0)
100 seenletters = False
101 token = None
102 state = None
104 while not self.eof:
105 # We only realize that we've reached the end of a token when we
106 # find a character that's not part of the current token - since
107 # that character may be part of the next token, it's stored in the
108 # charstack.
109 if self.charstack:
110 nextchar = self.charstack.pop(0)
111 else:
112 nextchar = self.instream.read(1)
113 while nextchar == '\x00':
114 nextchar = self.instream.read(1)
116 if not nextchar:
117 self.eof = True
118 break
119 elif not state:
120 # First character of the token - determines if we're starting
121 # to parse a word, a number or something else.
122 token = nextchar
123 if self.isword(nextchar):
124 state = 'a'
125 elif self.isnum(nextchar):
126 state = '0'
127 elif self.isspace(nextchar):
128 token = ' '
129 break # emit token
130 else:
131 break # emit token
132 elif state == 'a':
133 # If we've already started reading a word, we keep reading
134 # letters until we find something that's not part of a word.
135 seenletters = True
136 if self.isword(nextchar):
137 token += nextchar
138 elif nextchar == '.':
139 token += nextchar
140 state = 'a.'
141 else:
142 self.charstack.append(nextchar)
143 break # emit token
144 elif state == '0':
145 # If we've already started reading a number, we keep reading
146 # numbers until we find something that doesn't fit.
147 if self.isnum(nextchar):
148 token += nextchar
149 elif nextchar == '.' or (nextchar == ',' and len(token) >= 2):
150 token += nextchar
151 state = '0.'
152 else:
153 self.charstack.append(nextchar)
154 break # emit token
155 elif state == 'a.':
156 # If we've seen some letters and a dot separator, continue
157 # parsing, and the tokens will be broken up later.
158 seenletters = True
159 if nextchar == '.' or self.isword(nextchar):
160 token += nextchar
161 elif self.isnum(nextchar) and token[-1] == '.':
162 token += nextchar
163 state = '0.'
164 else:
165 self.charstack.append(nextchar)
166 break # emit token
167 elif state == '0.':
168 # If we've seen at least one dot separator, keep going, we'll
169 # break up the tokens later.
170 if nextchar == '.' or self.isnum(nextchar):
171 token += nextchar
172 elif self.isword(nextchar) and token[-1] == '.':
173 token += nextchar
174 state = 'a.'
175 else:
176 self.charstack.append(nextchar)
177 break # emit token
179 if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or
180 token[-1] in '.,')):
181 l = self._split_decimal.split(token)
182 token = l[0]
183 for tok in l[1:]:
184 if tok:
185 self.tokenstack.append(tok)
187 if state == '0.' and token.count('.') == 0:
188 token = token.replace(',', '.')
190 return token
192 def __iter__(self):
193 return self
195 def __next__(self):
196 token = self.get_token()
197 if token is None:
198 raise StopIteration
200 return token
202 def next(self):
203 return self.__next__() # Python 2.x support
205 @classmethod
206 def split(cls, s):
207 return list(cls(s))
209 @classmethod
210 def isword(cls, nextchar):
211 """ Whether or not the next character is part of a word """
212 return nextchar.isalpha()
214 @classmethod
215 def isnum(cls, nextchar):
216 """ Whether the next character is part of a number """
217 return nextchar.isdigit()
219 @classmethod
220 def isspace(cls, nextchar):
221 """ Whether the next character is whitespace """
222 return nextchar.isspace()
225class _resultbase(object):
227 def __init__(self):
228 for attr in self.__slots__:
229 setattr(self, attr, None)
231 def _repr(self, classname):
232 l = []
233 for attr in self.__slots__:
234 value = getattr(self, attr)
235 if value is not None:
236 l.append("%s=%s" % (attr, repr(value)))
237 return "%s(%s)" % (classname, ", ".join(l))
239 def __len__(self):
240 return (sum(getattr(self, attr) is not None
241 for attr in self.__slots__))
243 def __repr__(self):
244 return self._repr(self.__class__.__name__)
247class parserinfo(object):
248 """
249 Class which handles what inputs are accepted. Subclass this to customize
250 the language and acceptable values for each parameter.
252 :param dayfirst:
253 Whether to interpret the first value in an ambiguous 3-integer date
254 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
255 ``yearfirst`` is set to ``True``, this distinguishes between YDM
256 and YMD. Default is ``False``.
258 :param yearfirst:
259 Whether to interpret the first value in an ambiguous 3-integer date
260 (e.g. 01/05/09) as the year. If ``True``, the first number is taken
261 to be the year, otherwise the last number is taken to be the year.
262 Default is ``False``.
263 """
265 # m from a.m/p.m, t from ISO T separator
266 JUMP = [" ", ".", ",", ";", "-", "/", "'",
267 "at", "on", "and", "ad", "m", "t", "of",
268 "st", "nd", "rd", "th"]
270 WEEKDAYS = [("Mon", "Monday"),
271 ("Tue", "Tuesday"), # TODO: "Tues"
272 ("Wed", "Wednesday"),
273 ("Thu", "Thursday"), # TODO: "Thurs"
274 ("Fri", "Friday"),
275 ("Sat", "Saturday"),
276 ("Sun", "Sunday")]
277 MONTHS = [("Jan", "January"),
278 ("Feb", "February"), # TODO: "Febr"
279 ("Mar", "March"),
280 ("Apr", "April"),
281 ("May", "May"),
282 ("Jun", "June"),
283 ("Jul", "July"),
284 ("Aug", "August"),
285 ("Sep", "Sept", "September"),
286 ("Oct", "October"),
287 ("Nov", "November"),
288 ("Dec", "December")]
289 HMS = [("h", "hour", "hours"),
290 ("m", "minute", "minutes"),
291 ("s", "second", "seconds")]
292 AMPM = [("am", "a"),
293 ("pm", "p")]
294 UTCZONE = ["UTC", "GMT", "Z", "z"]
295 PERTAIN = ["of"]
296 TZOFFSET = {}
297 # TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate",
298 # "Anno Domini", "Year of Our Lord"]
300 def __init__(self, dayfirst=False, yearfirst=False):
301 self._jump = self._convert(self.JUMP)
302 self._weekdays = self._convert(self.WEEKDAYS)
303 self._months = self._convert(self.MONTHS)
304 self._hms = self._convert(self.HMS)
305 self._ampm = self._convert(self.AMPM)
306 self._utczone = self._convert(self.UTCZONE)
307 self._pertain = self._convert(self.PERTAIN)
309 self.dayfirst = dayfirst
310 self.yearfirst = yearfirst
312 self._year = time.localtime().tm_year
313 self._century = self._year // 100 * 100
315 def _convert(self, lst):
316 dct = {}
317 for i, v in enumerate(lst):
318 if isinstance(v, tuple):
319 for v in v:
320 dct[v.lower()] = i
321 else:
322 dct[v.lower()] = i
323 return dct
325 def jump(self, name):
326 return name.lower() in self._jump
328 def weekday(self, name):
329 try:
330 return self._weekdays[name.lower()]
331 except KeyError:
332 pass
333 return None
335 def month(self, name):
336 try:
337 return self._months[name.lower()] + 1
338 except KeyError:
339 pass
340 return None
342 def hms(self, name):
343 try:
344 return self._hms[name.lower()]
345 except KeyError:
346 return None
348 def ampm(self, name):
349 try:
350 return self._ampm[name.lower()]
351 except KeyError:
352 return None
354 def pertain(self, name):
355 return name.lower() in self._pertain
357 def utczone(self, name):
358 return name.lower() in self._utczone
360 def tzoffset(self, name):
361 if name in self._utczone:
362 return 0
364 return self.TZOFFSET.get(name)
366 def convertyear(self, year, century_specified=False):
367 """
368 Converts two-digit years to year within [-50, 49]
369 range of self._year (current local time)
370 """
372 # Function contract is that the year is always positive
373 assert year >= 0
375 if year < 100 and not century_specified:
376 # assume current century to start
377 year += self._century
379 if year >= self._year + 50: # if too far in future
380 year -= 100
381 elif year < self._year - 50: # if too far in past
382 year += 100
384 return year
386 def validate(self, res):
387 # move to info
388 if res.year is not None:
389 res.year = self.convertyear(res.year, res.century_specified)
391 if ((res.tzoffset == 0 and not res.tzname) or
392 (res.tzname == 'Z' or res.tzname == 'z')):
393 res.tzname = "UTC"
394 res.tzoffset = 0
395 elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname):
396 res.tzoffset = 0
397 return True
400class _ymd(list):
401 def __init__(self, *args, **kwargs):
402 super(self.__class__, self).__init__(*args, **kwargs)
403 self.century_specified = False
404 self.dstridx = None
405 self.mstridx = None
406 self.ystridx = None
408 @property
409 def has_year(self):
410 return self.ystridx is not None
412 @property
413 def has_month(self):
414 return self.mstridx is not None
416 @property
417 def has_day(self):
418 return self.dstridx is not None
420 def could_be_day(self, value):
421 if self.has_day:
422 return False
423 elif not self.has_month:
424 return 1 <= value <= 31
425 elif not self.has_year:
426 # Be permissive, assume leap year
427 month = self[self.mstridx]
428 return 1 <= value <= monthrange(2000, month)[1]
429 else:
430 month = self[self.mstridx]
431 year = self[self.ystridx]
432 return 1 <= value <= monthrange(year, month)[1]
434 def append(self, val, label=None):
435 if hasattr(val, '__len__'):
436 if val.isdigit() and len(val) > 2:
437 self.century_specified = True
438 if label not in [None, 'Y']: # pragma: no cover
439 raise ValueError(label)
440 label = 'Y'
441 elif val > 100:
442 self.century_specified = True
443 if label not in [None, 'Y']: # pragma: no cover
444 raise ValueError(label)
445 label = 'Y'
447 super(self.__class__, self).append(int(val))
449 if label == 'M':
450 if self.has_month:
451 raise ValueError('Month is already set')
452 self.mstridx = len(self) - 1
453 elif label == 'D':
454 if self.has_day:
455 raise ValueError('Day is already set')
456 self.dstridx = len(self) - 1
457 elif label == 'Y':
458 if self.has_year:
459 raise ValueError('Year is already set')
460 self.ystridx = len(self) - 1
462 def _resolve_from_stridxs(self, strids):
463 """
464 Try to resolve the identities of year/month/day elements using
465 ystridx, mstridx, and dstridx, if enough of these are specified.
466 """
467 if len(self) == 3 and len(strids) == 2:
468 # we can back out the remaining stridx value
469 missing = [x for x in range(3) if x not in strids.values()]
470 key = [x for x in ['y', 'm', 'd'] if x not in strids]
471 assert len(missing) == len(key) == 1
472 key = key[0]
473 val = missing[0]
474 strids[key] = val
476 assert len(self) == len(strids) # otherwise this should not be called
477 out = {key: self[strids[key]] for key in strids}
478 return (out.get('y'), out.get('m'), out.get('d'))
480 def resolve_ymd(self, yearfirst, dayfirst):
481 len_ymd = len(self)
482 year, month, day = (None, None, None)
484 strids = (('y', self.ystridx),
485 ('m', self.mstridx),
486 ('d', self.dstridx))
488 strids = {key: val for key, val in strids if val is not None}
489 if (len(self) == len(strids) > 0 or
490 (len(self) == 3 and len(strids) == 2)):
491 return self._resolve_from_stridxs(strids)
493 mstridx = self.mstridx
495 if len_ymd > 3:
496 raise ValueError("More than three YMD values")
497 elif len_ymd == 1 or (mstridx is not None and len_ymd == 2):
498 # One member, or two members with a month string
499 if mstridx is not None:
500 month = self[mstridx]
501 # since mstridx is 0 or 1, self[mstridx-1] always
502 # looks up the other element
503 other = self[mstridx - 1]
504 else:
505 other = self[0]
507 if len_ymd > 1 or mstridx is None:
508 if other > 31:
509 year = other
510 else:
511 day = other
513 elif len_ymd == 2:
514 # Two members with numbers
515 if self[0] > 31:
516 # 99-01
517 year, month = self
518 elif self[1] > 31:
519 # 01-99
520 month, year = self
521 elif dayfirst and self[1] <= 12:
522 # 13-01
523 day, month = self
524 else:
525 # 01-13
526 month, day = self
528 elif len_ymd == 3:
529 # Three members
530 if mstridx == 0:
531 if self[1] > 31:
532 # Apr-2003-25
533 month, year, day = self
534 else:
535 month, day, year = self
536 elif mstridx == 1:
537 if self[0] > 31 or (yearfirst and self[2] <= 31):
538 # 99-Jan-01
539 year, month, day = self
540 else:
541 # 01-Jan-01
542 # Give precedence to day-first, since
543 # two-digit years is usually hand-written.
544 day, month, year = self
546 elif mstridx == 2:
547 # WTF!?
548 if self[1] > 31:
549 # 01-99-Jan
550 day, year, month = self
551 else:
552 # 99-01-Jan
553 year, day, month = self
555 else:
556 if (self[0] > 31 or
557 self.ystridx == 0 or
558 (yearfirst and self[1] <= 12 and self[2] <= 31)):
559 # 99-01-01
560 if dayfirst and self[2] <= 12:
561 year, day, month = self
562 else:
563 year, month, day = self
564 elif self[0] > 12 or (dayfirst and self[1] <= 12):
565 # 13-01-01
566 day, month, year = self
567 else:
568 # 01-13-01
569 month, day, year = self
571 return year, month, day
574class parser(object):
575 def __init__(self, info=None):
576 self.info = info or parserinfo()
578 def parse(self, timestr, default=None,
579 ignoretz=False, tzinfos=None, **kwargs):
580 """
581 Parse the date/time string into a :class:`datetime.datetime` object.
583 :param timestr:
584 Any date/time string using the supported formats.
586 :param default:
587 The default datetime object, if this is a datetime object and not
588 ``None``, elements specified in ``timestr`` replace elements in the
589 default object.
591 :param ignoretz:
592 If set ``True``, time zones in parsed strings are ignored and a
593 naive :class:`datetime.datetime` object is returned.
595 :param tzinfos:
596 Additional time zone names / aliases which may be present in the
597 string. This argument maps time zone names (and optionally offsets
598 from those time zones) to time zones. This parameter can be a
599 dictionary with timezone aliases mapping time zone names to time
600 zones or a function taking two parameters (``tzname`` and
601 ``tzoffset``) and returning a time zone.
603 The timezones to which the names are mapped can be an integer
604 offset from UTC in seconds or a :class:`tzinfo` object.
606 .. doctest::
607 :options: +NORMALIZE_WHITESPACE
609 >>> from dateutil.parser import parse
610 >>> from dateutil.tz import gettz
611 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
612 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
613 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
614 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
615 datetime.datetime(2012, 1, 19, 17, 21,
616 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
618 This parameter is ignored if ``ignoretz`` is set.
620 :param \\*\\*kwargs:
621 Keyword arguments as passed to ``_parse()``.
623 :return:
624 Returns a :class:`datetime.datetime` object or, if the
625 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
626 first element being a :class:`datetime.datetime` object, the second
627 a tuple containing the fuzzy tokens.
629 :raises ParserError:
630 Raised for invalid or unknown string format, if the provided
631 :class:`tzinfo` is not in a valid format, or if an invalid date
632 would be created.
634 :raises TypeError:
635 Raised for non-string or character stream input.
637 :raises OverflowError:
638 Raised if the parsed date exceeds the largest valid C integer on
639 your system.
640 """
642 if default is None:
643 default = datetime.datetime.now().replace(hour=0, minute=0,
644 second=0, microsecond=0)
646 res, skipped_tokens = self._parse(timestr, **kwargs)
648 if res is None:
649 raise ParserError("Unknown string format: %s", timestr)
651 if len(res) == 0:
652 raise ParserError("String does not contain a date: %s", timestr)
654 try:
655 ret = self._build_naive(res, default)
656 except ValueError as e:
657 six.raise_from(ParserError(e.args[0] + ": %s", timestr), e)
659 if not ignoretz:
660 ret = self._build_tzaware(ret, res, tzinfos)
662 if kwargs.get('fuzzy_with_tokens', False):
663 return ret, skipped_tokens
664 else:
665 return ret
667 class _result(_resultbase):
668 __slots__ = ["year", "month", "day", "weekday",
669 "hour", "minute", "second", "microsecond",
670 "tzname", "tzoffset", "ampm","any_unused_tokens"]
672 def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False,
673 fuzzy_with_tokens=False):
674 """
675 Private method which performs the heavy lifting of parsing, called from
676 ``parse()``, which passes on its ``kwargs`` to this function.
678 :param timestr:
679 The string to parse.
681 :param dayfirst:
682 Whether to interpret the first value in an ambiguous 3-integer date
683 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
684 ``yearfirst`` is set to ``True``, this distinguishes between YDM
685 and YMD. If set to ``None``, this value is retrieved from the
686 current :class:`parserinfo` object (which itself defaults to
687 ``False``).
689 :param yearfirst:
690 Whether to interpret the first value in an ambiguous 3-integer date
691 (e.g. 01/05/09) as the year. If ``True``, the first number is taken
692 to be the year, otherwise the last number is taken to be the year.
693 If this is set to ``None``, the value is retrieved from the current
694 :class:`parserinfo` object (which itself defaults to ``False``).
696 :param fuzzy:
697 Whether to allow fuzzy parsing, allowing for string like "Today is
698 January 1, 2047 at 8:21:00AM".
700 :param fuzzy_with_tokens:
701 If ``True``, ``fuzzy`` is automatically set to True, and the parser
702 will return a tuple where the first element is the parsed
703 :class:`datetime.datetime` datetimestamp and the second element is
704 a tuple containing the portions of the string which were ignored:
706 .. doctest::
708 >>> from dateutil.parser import parse
709 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
710 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
712 """
713 if fuzzy_with_tokens:
714 fuzzy = True
716 info = self.info
718 if dayfirst is None:
719 dayfirst = info.dayfirst
721 if yearfirst is None:
722 yearfirst = info.yearfirst
724 res = self._result()
725 l = _timelex.split(timestr) # Splits the timestr into tokens
727 skipped_idxs = []
729 # year/month/day list
730 ymd = _ymd()
732 len_l = len(l)
733 i = 0
734 try:
735 while i < len_l:
737 # Check if it's a number
738 value_repr = l[i]
739 try:
740 value = float(value_repr)
741 except ValueError:
742 value = None
744 if value is not None:
745 # Numeric token
746 i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy)
748 # Check weekday
749 elif info.weekday(l[i]) is not None:
750 value = info.weekday(l[i])
751 res.weekday = value
753 # Check month name
754 elif info.month(l[i]) is not None:
755 value = info.month(l[i])
756 ymd.append(value, 'M')
758 if i + 1 < len_l:
759 if l[i + 1] in ('-', '/'):
760 # Jan-01[-99]
761 sep = l[i + 1]
762 ymd.append(l[i + 2])
764 if i + 3 < len_l and l[i + 3] == sep:
765 # Jan-01-99
766 ymd.append(l[i + 4])
767 i += 2
769 i += 2
771 elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and
772 info.pertain(l[i + 2])):
773 # Jan of 01
774 # In this case, 01 is clearly year
775 if l[i + 4].isdigit():
776 # Convert it here to become unambiguous
777 value = int(l[i + 4])
778 year = str(info.convertyear(value))
779 ymd.append(year, 'Y')
780 else:
781 # Wrong guess
782 pass
783 # TODO: not hit in tests
784 i += 4
786 # Check am/pm
787 elif info.ampm(l[i]) is not None:
788 value = info.ampm(l[i])
789 val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy)
791 if val_is_ampm:
792 res.hour = self._adjust_ampm(res.hour, value)
793 res.ampm = value
795 elif fuzzy:
796 skipped_idxs.append(i)
798 # Check for a timezone name
799 elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]):
800 res.tzname = l[i]
801 res.tzoffset = info.tzoffset(res.tzname)
803 # Check for something like GMT+3, or BRST+3. Notice
804 # that it doesn't mean "I am 3 hours after GMT", but
805 # "my time +3 is GMT". If found, we reverse the
806 # logic so that timezone parsing code will get it
807 # right.
808 if i + 1 < len_l and l[i + 1] in ('+', '-'):
809 l[i + 1] = ('+', '-')[l[i + 1] == '+']
810 res.tzoffset = None
811 if info.utczone(res.tzname):
812 # With something like GMT+3, the timezone
813 # is *not* GMT.
814 res.tzname = None
816 # Check for a numbered timezone
817 elif res.hour is not None and l[i] in ('+', '-'):
818 signal = (-1, 1)[l[i] == '+']
819 len_li = len(l[i + 1])
821 # TODO: check that l[i + 1] is integer?
822 if len_li == 4:
823 # -0300
824 hour_offset = int(l[i + 1][:2])
825 min_offset = int(l[i + 1][2:])
826 elif i + 2 < len_l and l[i + 2] == ':':
827 # -03:00
828 hour_offset = int(l[i + 1])
829 min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like?
830 i += 2
831 elif len_li <= 2:
832 # -[0]3
833 hour_offset = int(l[i + 1][:2])
834 min_offset = 0
835 else:
836 raise ValueError(timestr)
838 res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60)
840 # Look for a timezone name between parenthesis
841 if (i + 5 < len_l and
842 info.jump(l[i + 2]) and l[i + 3] == '(' and
843 l[i + 5] == ')' and
844 3 <= len(l[i + 4]) and
845 self._could_be_tzname(res.hour, res.tzname,
846 None, l[i + 4])):
847 # -0300 (BRST)
848 res.tzname = l[i + 4]
849 i += 4
851 i += 1
853 # Check jumps
854 elif not (info.jump(l[i]) or fuzzy):
855 raise ValueError(timestr)
857 else:
858 skipped_idxs.append(i)
859 i += 1
861 # Process year/month/day
862 year, month, day = ymd.resolve_ymd(yearfirst, dayfirst)
864 res.century_specified = ymd.century_specified
865 res.year = year
866 res.month = month
867 res.day = day
869 except (IndexError, ValueError):
870 return None, None
872 if not info.validate(res):
873 return None, None
875 if fuzzy_with_tokens:
876 skipped_tokens = self._recombine_skipped(l, skipped_idxs)
877 return res, tuple(skipped_tokens)
878 else:
879 return res, None
881 def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy):
882 # Token is a number
883 value_repr = tokens[idx]
884 try:
885 value = self._to_decimal(value_repr)
886 except Exception as e:
887 six.raise_from(ValueError('Unknown numeric token'), e)
889 len_li = len(value_repr)
891 len_l = len(tokens)
893 if (len(ymd) == 3 and len_li in (2, 4) and
894 res.hour is None and
895 (idx + 1 >= len_l or
896 (tokens[idx + 1] != ':' and
897 info.hms(tokens[idx + 1]) is None))):
898 # 19990101T23[59]
899 s = tokens[idx]
900 res.hour = int(s[:2])
902 if len_li == 4:
903 res.minute = int(s[2:])
905 elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6):
906 # YYMMDD or HHMMSS[.ss]
907 s = tokens[idx]
909 if not ymd and '.' not in tokens[idx]:
910 ymd.append(s[:2])
911 ymd.append(s[2:4])
912 ymd.append(s[4:])
913 else:
914 # 19990101T235959[.59]
916 # TODO: Check if res attributes already set.
917 res.hour = int(s[:2])
918 res.minute = int(s[2:4])
919 res.second, res.microsecond = self._parsems(s[4:])
921 elif len_li in (8, 12, 14):
922 # YYYYMMDD
923 s = tokens[idx]
924 ymd.append(s[:4], 'Y')
925 ymd.append(s[4:6])
926 ymd.append(s[6:8])
928 if len_li > 8:
929 res.hour = int(s[8:10])
930 res.minute = int(s[10:12])
932 if len_li > 12:
933 res.second = int(s[12:])
935 elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None:
936 # HH[ ]h or MM[ ]m or SS[.ss][ ]s
937 hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True)
938 (idx, hms) = self._parse_hms(idx, tokens, info, hms_idx)
939 if hms is not None:
940 # TODO: checking that hour/minute/second are not
941 # already set?
942 self._assign_hms(res, value_repr, hms)
944 elif idx + 2 < len_l and tokens[idx + 1] == ':':
945 # HH:MM[:SS[.ss]]
946 res.hour = int(value)
947 value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this?
948 (res.minute, res.second) = self._parse_min_sec(value)
950 if idx + 4 < len_l and tokens[idx + 3] == ':':
951 res.second, res.microsecond = self._parsems(tokens[idx + 4])
953 idx += 2
955 idx += 2
957 elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'):
958 sep = tokens[idx + 1]
959 ymd.append(value_repr)
961 if idx + 2 < len_l and not info.jump(tokens[idx + 2]):
962 if tokens[idx + 2].isdigit():
963 # 01-01[-01]
964 ymd.append(tokens[idx + 2])
965 else:
966 # 01-Jan[-01]
967 value = info.month(tokens[idx + 2])
969 if value is not None:
970 ymd.append(value, 'M')
971 else:
972 raise ValueError()
974 if idx + 3 < len_l and tokens[idx + 3] == sep:
975 # We have three members
976 value = info.month(tokens[idx + 4])
978 if value is not None:
979 ymd.append(value, 'M')
980 else:
981 ymd.append(tokens[idx + 4])
982 idx += 2
984 idx += 1
985 idx += 1
987 elif idx + 1 >= len_l or info.jump(tokens[idx + 1]):
988 if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None:
989 # 12 am
990 hour = int(value)
991 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2]))
992 idx += 1
993 else:
994 # Year, month or day
995 ymd.append(value)
996 idx += 1
998 elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24):
999 # 12am
1000 hour = int(value)
1001 res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1]))
1002 idx += 1
1004 elif ymd.could_be_day(value):
1005 ymd.append(value)
1007 elif not fuzzy:
1008 raise ValueError()
1010 return idx
1012 def _find_hms_idx(self, idx, tokens, info, allow_jump):
1013 len_l = len(tokens)
1015 if idx+1 < len_l and info.hms(tokens[idx+1]) is not None:
1016 # There is an "h", "m", or "s" label following this token. We take
1017 # assign the upcoming label to the current token.
1018 # e.g. the "12" in 12h"
1019 hms_idx = idx + 1
1021 elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and
1022 info.hms(tokens[idx+2]) is not None):
1023 # There is a space and then an "h", "m", or "s" label.
1024 # e.g. the "12" in "12 h"
1025 hms_idx = idx + 2
1027 elif idx > 0 and info.hms(tokens[idx-1]) is not None:
1028 # There is a "h", "m", or "s" preceding this token. Since neither
1029 # of the previous cases was hit, there is no label following this
1030 # token, so we use the previous label.
1031 # e.g. the "04" in "12h04"
1032 hms_idx = idx-1
1034 elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and
1035 info.hms(tokens[idx-2]) is not None):
1036 # If we are looking at the final token, we allow for a
1037 # backward-looking check to skip over a space.
1038 # TODO: Are we sure this is the right condition here?
1039 hms_idx = idx - 2
1041 else:
1042 hms_idx = None
1044 return hms_idx
1046 def _assign_hms(self, res, value_repr, hms):
1047 # See GH issue #427, fixing float rounding
1048 value = self._to_decimal(value_repr)
1050 if hms == 0:
1051 # Hour
1052 res.hour = int(value)
1053 if value % 1:
1054 res.minute = int(60*(value % 1))
1056 elif hms == 1:
1057 (res.minute, res.second) = self._parse_min_sec(value)
1059 elif hms == 2:
1060 (res.second, res.microsecond) = self._parsems(value_repr)
1062 def _could_be_tzname(self, hour, tzname, tzoffset, token):
1063 return (hour is not None and
1064 tzname is None and
1065 tzoffset is None and
1066 len(token) <= 5 and
1067 (all(x in string.ascii_uppercase for x in token)
1068 or token in self.info.UTCZONE))
1070 def _ampm_valid(self, hour, ampm, fuzzy):
1071 """
1072 For fuzzy parsing, 'a' or 'am' (both valid English words)
1073 may erroneously trigger the AM/PM flag. Deal with that
1074 here.
1075 """
1076 val_is_ampm = True
1078 # If there's already an AM/PM flag, this one isn't one.
1079 if fuzzy and ampm is not None:
1080 val_is_ampm = False
1082 # If AM/PM is found and hour is not, raise a ValueError
1083 if hour is None:
1084 if fuzzy:
1085 val_is_ampm = False
1086 else:
1087 raise ValueError('No hour specified with AM or PM flag.')
1088 elif not 0 <= hour <= 12:
1089 # If AM/PM is found, it's a 12 hour clock, so raise
1090 # an error for invalid range
1091 if fuzzy:
1092 val_is_ampm = False
1093 else:
1094 raise ValueError('Invalid hour specified for 12-hour clock.')
1096 return val_is_ampm
1098 def _adjust_ampm(self, hour, ampm):
1099 if hour < 12 and ampm == 1:
1100 hour += 12
1101 elif hour == 12 and ampm == 0:
1102 hour = 0
1103 return hour
1105 def _parse_min_sec(self, value):
1106 # TODO: Every usage of this function sets res.second to the return
1107 # value. Are there any cases where second will be returned as None and
1108 # we *don't* want to set res.second = None?
1109 minute = int(value)
1110 second = None
1112 sec_remainder = value % 1
1113 if sec_remainder:
1114 second = int(60 * sec_remainder)
1115 return (minute, second)
1117 def _parse_hms(self, idx, tokens, info, hms_idx):
1118 # TODO: Is this going to admit a lot of false-positives for when we
1119 # just happen to have digits and "h", "m" or "s" characters in non-date
1120 # text? I guess hex hashes won't have that problem, but there's plenty
1121 # of random junk out there.
1122 if hms_idx is None:
1123 hms = None
1124 new_idx = idx
1125 elif hms_idx > idx:
1126 hms = info.hms(tokens[hms_idx])
1127 new_idx = hms_idx
1128 else:
1129 # Looking backwards, increment one.
1130 hms = info.hms(tokens[hms_idx]) + 1
1131 new_idx = idx
1133 return (new_idx, hms)
1135 # ------------------------------------------------------------------
1136 # Handling for individual tokens. These are kept as methods instead
1137 # of functions for the sake of customizability via subclassing.
1139 def _parsems(self, value):
1140 """Parse a I[.F] seconds value into (seconds, microseconds)."""
1141 if "." not in value:
1142 return int(value), 0
1143 else:
1144 i, f = value.split(".")
1145 return int(i), int(f.ljust(6, "0")[:6])
1147 def _to_decimal(self, val):
1148 try:
1149 decimal_value = Decimal(val)
1150 # See GH 662, edge case, infinite value should not be converted
1151 # via `_to_decimal`
1152 if not decimal_value.is_finite():
1153 raise ValueError("Converted decimal value is infinite or NaN")
1154 except Exception as e:
1155 msg = "Could not convert %s to decimal" % val
1156 six.raise_from(ValueError(msg), e)
1157 else:
1158 return decimal_value
1160 # ------------------------------------------------------------------
1161 # Post-Parsing construction of datetime output. These are kept as
1162 # methods instead of functions for the sake of customizability via
1163 # subclassing.
1165 def _build_tzinfo(self, tzinfos, tzname, tzoffset):
1166 if callable(tzinfos):
1167 tzdata = tzinfos(tzname, tzoffset)
1168 else:
1169 tzdata = tzinfos.get(tzname)
1170 # handle case where tzinfo is paased an options that returns None
1171 # eg tzinfos = {'BRST' : None}
1172 if isinstance(tzdata, datetime.tzinfo) or tzdata is None:
1173 tzinfo = tzdata
1174 elif isinstance(tzdata, text_type):
1175 tzinfo = tz.tzstr(tzdata)
1176 elif isinstance(tzdata, integer_types):
1177 tzinfo = tz.tzoffset(tzname, tzdata)
1178 else:
1179 raise TypeError("Offset must be tzinfo subclass, tz string, "
1180 "or int offset.")
1181 return tzinfo
1183 def _build_tzaware(self, naive, res, tzinfos):
1184 if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)):
1185 tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset)
1186 aware = naive.replace(tzinfo=tzinfo)
1187 aware = self._assign_tzname(aware, res.tzname)
1189 elif res.tzname and res.tzname in time.tzname:
1190 aware = naive.replace(tzinfo=tz.tzlocal())
1192 # Handle ambiguous local datetime
1193 aware = self._assign_tzname(aware, res.tzname)
1195 # This is mostly relevant for winter GMT zones parsed in the UK
1196 if (aware.tzname() != res.tzname and
1197 res.tzname in self.info.UTCZONE):
1198 aware = aware.replace(tzinfo=tz.UTC)
1200 elif res.tzoffset == 0:
1201 aware = naive.replace(tzinfo=tz.UTC)
1203 elif res.tzoffset:
1204 aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset))
1206 elif not res.tzname and not res.tzoffset:
1207 # i.e. no timezone information was found.
1208 aware = naive
1210 elif res.tzname:
1211 # tz-like string was parsed but we don't know what to do
1212 # with it
1213 warnings.warn("tzname {tzname} identified but not understood. "
1214 "Pass `tzinfos` argument in order to correctly "
1215 "return a timezone-aware datetime. In a future "
1216 "version, this will raise an "
1217 "exception.".format(tzname=res.tzname),
1218 category=UnknownTimezoneWarning)
1219 aware = naive
1221 return aware
1223 def _build_naive(self, res, default):
1224 repl = {}
1225 for attr in ("year", "month", "day", "hour",
1226 "minute", "second", "microsecond"):
1227 value = getattr(res, attr)
1228 if value is not None:
1229 repl[attr] = value
1231 if 'day' not in repl:
1232 # If the default day exceeds the last day of the month, fall back
1233 # to the end of the month.
1234 cyear = default.year if res.year is None else res.year
1235 cmonth = default.month if res.month is None else res.month
1236 cday = default.day if res.day is None else res.day
1238 if cday > monthrange(cyear, cmonth)[1]:
1239 repl['day'] = monthrange(cyear, cmonth)[1]
1241 naive = default.replace(**repl)
1243 if res.weekday is not None and not res.day:
1244 naive = naive + relativedelta.relativedelta(weekday=res.weekday)
1246 return naive
1248 def _assign_tzname(self, dt, tzname):
1249 if dt.tzname() != tzname:
1250 new_dt = tz.enfold(dt, fold=1)
1251 if new_dt.tzname() == tzname:
1252 return new_dt
1254 return dt
1256 def _recombine_skipped(self, tokens, skipped_idxs):
1257 """
1258 >>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"]
1259 >>> skipped_idxs = [0, 1, 2, 5]
1260 >>> _recombine_skipped(tokens, skipped_idxs)
1261 ["foo bar", "baz"]
1262 """
1263 skipped_tokens = []
1264 for i, idx in enumerate(sorted(skipped_idxs)):
1265 if i > 0 and idx - 1 == skipped_idxs[i - 1]:
1266 skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx]
1267 else:
1268 skipped_tokens.append(tokens[idx])
1270 return skipped_tokens
1273DEFAULTPARSER = parser()
1276def parse(timestr, parserinfo=None, **kwargs):
1277 """
1279 Parse a string in one of the supported formats, using the
1280 ``parserinfo`` parameters.
1282 :param timestr:
1283 A string containing a date/time stamp.
1285 :param parserinfo:
1286 A :class:`parserinfo` object containing parameters for the parser.
1287 If ``None``, the default arguments to the :class:`parserinfo`
1288 constructor are used.
1290 The ``**kwargs`` parameter takes the following keyword arguments:
1292 :param default:
1293 The default datetime object, if this is a datetime object and not
1294 ``None``, elements specified in ``timestr`` replace elements in the
1295 default object.
1297 :param ignoretz:
1298 If set ``True``, time zones in parsed strings are ignored and a naive
1299 :class:`datetime` object is returned.
1301 :param tzinfos:
1302 Additional time zone names / aliases which may be present in the
1303 string. This argument maps time zone names (and optionally offsets
1304 from those time zones) to time zones. This parameter can be a
1305 dictionary with timezone aliases mapping time zone names to time
1306 zones or a function taking two parameters (``tzname`` and
1307 ``tzoffset``) and returning a time zone.
1309 The timezones to which the names are mapped can be an integer
1310 offset from UTC in seconds or a :class:`tzinfo` object.
1312 .. doctest::
1313 :options: +NORMALIZE_WHITESPACE
1315 >>> from dateutil.parser import parse
1316 >>> from dateutil.tz import gettz
1317 >>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")}
1318 >>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos)
1319 datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200))
1320 >>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos)
1321 datetime.datetime(2012, 1, 19, 17, 21,
1322 tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago'))
1324 This parameter is ignored if ``ignoretz`` is set.
1326 :param dayfirst:
1327 Whether to interpret the first value in an ambiguous 3-integer date
1328 (e.g. 01/05/09) as the day (``True``) or month (``False``). If
1329 ``yearfirst`` is set to ``True``, this distinguishes between YDM and
1330 YMD. If set to ``None``, this value is retrieved from the current
1331 :class:`parserinfo` object (which itself defaults to ``False``).
1333 :param yearfirst:
1334 Whether to interpret the first value in an ambiguous 3-integer date
1335 (e.g. 01/05/09) as the year. If ``True``, the first number is taken to
1336 be the year, otherwise the last number is taken to be the year. If
1337 this is set to ``None``, the value is retrieved from the current
1338 :class:`parserinfo` object (which itself defaults to ``False``).
1340 :param fuzzy:
1341 Whether to allow fuzzy parsing, allowing for string like "Today is
1342 January 1, 2047 at 8:21:00AM".
1344 :param fuzzy_with_tokens:
1345 If ``True``, ``fuzzy`` is automatically set to True, and the parser
1346 will return a tuple where the first element is the parsed
1347 :class:`datetime.datetime` datetimestamp and the second element is
1348 a tuple containing the portions of the string which were ignored:
1350 .. doctest::
1352 >>> from dateutil.parser import parse
1353 >>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True)
1354 (datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at '))
1356 :return:
1357 Returns a :class:`datetime.datetime` object or, if the
1358 ``fuzzy_with_tokens`` option is ``True``, returns a tuple, the
1359 first element being a :class:`datetime.datetime` object, the second
1360 a tuple containing the fuzzy tokens.
1362 :raises ValueError:
1363 Raised for invalid or unknown string format, if the provided
1364 :class:`tzinfo` is not in a valid format, or if an invalid date
1365 would be created.
1367 :raises OverflowError:
1368 Raised if the parsed date exceeds the largest valid C integer on
1369 your system.
1370 """
1371 if parserinfo:
1372 return parser(parserinfo).parse(timestr, **kwargs)
1373 else:
1374 return DEFAULTPARSER.parse(timestr, **kwargs)
1377class _tzparser(object):
1379 class _result(_resultbase):
1381 __slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset",
1382 "start", "end"]
1384 class _attr(_resultbase):
1385 __slots__ = ["month", "week", "weekday",
1386 "yday", "jyday", "day", "time"]
1388 def __repr__(self):
1389 return self._repr("")
1391 def __init__(self):
1392 _resultbase.__init__(self)
1393 self.start = self._attr()
1394 self.end = self._attr()
1396 def parse(self, tzstr):
1397 res = self._result()
1398 l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x]
1399 used_idxs = list()
1400 try:
1402 len_l = len(l)
1404 i = 0
1405 while i < len_l:
1406 # BRST+3[BRDT[+2]]
1407 j = i
1408 while j < len_l and not [x for x in l[j]
1409 if x in "0123456789:,-+"]:
1410 j += 1
1411 if j != i:
1412 if not res.stdabbr:
1413 offattr = "stdoffset"
1414 res.stdabbr = "".join(l[i:j])
1415 else:
1416 offattr = "dstoffset"
1417 res.dstabbr = "".join(l[i:j])
1419 for ii in range(j):
1420 used_idxs.append(ii)
1421 i = j
1422 if (i < len_l and (l[i] in ('+', '-') or l[i][0] in
1423 "0123456789")):
1424 if l[i] in ('+', '-'):
1425 # Yes, that's right. See the TZ variable
1426 # documentation.
1427 signal = (1, -1)[l[i] == '+']
1428 used_idxs.append(i)
1429 i += 1
1430 else:
1431 signal = -1
1432 len_li = len(l[i])
1433 if len_li == 4:
1434 # -0300
1435 setattr(res, offattr, (int(l[i][:2]) * 3600 +
1436 int(l[i][2:]) * 60) * signal)
1437 elif i + 1 < len_l and l[i + 1] == ':':
1438 # -03:00
1439 setattr(res, offattr,
1440 (int(l[i]) * 3600 +
1441 int(l[i + 2]) * 60) * signal)
1442 used_idxs.append(i)
1443 i += 2
1444 elif len_li <= 2:
1445 # -[0]3
1446 setattr(res, offattr,
1447 int(l[i][:2]) * 3600 * signal)
1448 else:
1449 return None
1450 used_idxs.append(i)
1451 i += 1
1452 if res.dstabbr:
1453 break
1454 else:
1455 break
1458 if i < len_l:
1459 for j in range(i, len_l):
1460 if l[j] == ';':
1461 l[j] = ','
1463 assert l[i] == ','
1465 i += 1
1467 if i >= len_l:
1468 pass
1469 elif (8 <= l.count(',') <= 9 and
1470 not [y for x in l[i:] if x != ','
1471 for y in x if y not in "0123456789+-"]):
1472 # GMT0BST,3,0,30,3600,10,0,26,7200[,3600]
1473 for x in (res.start, res.end):
1474 x.month = int(l[i])
1475 used_idxs.append(i)
1476 i += 2
1477 if l[i] == '-':
1478 value = int(l[i + 1]) * -1
1479 used_idxs.append(i)
1480 i += 1
1481 else:
1482 value = int(l[i])
1483 used_idxs.append(i)
1484 i += 2
1485 if value:
1486 x.week = value
1487 x.weekday = (int(l[i]) - 1) % 7
1488 else:
1489 x.day = int(l[i])
1490 used_idxs.append(i)
1491 i += 2
1492 x.time = int(l[i])
1493 used_idxs.append(i)
1494 i += 2
1495 if i < len_l:
1496 if l[i] in ('-', '+'):
1497 signal = (-1, 1)[l[i] == "+"]
1498 used_idxs.append(i)
1499 i += 1
1500 else:
1501 signal = 1
1502 used_idxs.append(i)
1503 res.dstoffset = (res.stdoffset + int(l[i]) * signal)
1505 # This was a made-up format that is not in normal use
1506 warn(('Parsed time zone "%s"' % tzstr) +
1507 'is in a non-standard dateutil-specific format, which ' +
1508 'is now deprecated; support for parsing this format ' +
1509 'will be removed in future versions. It is recommended ' +
1510 'that you switch to a standard format like the GNU ' +
1511 'TZ variable format.', tz.DeprecatedTzFormatWarning)
1512 elif (l.count(',') == 2 and l[i:].count('/') <= 2 and
1513 not [y for x in l[i:] if x not in (',', '/', 'J', 'M',
1514 '.', '-', ':')
1515 for y in x if y not in "0123456789"]):
1516 for x in (res.start, res.end):
1517 if l[i] == 'J':
1518 # non-leap year day (1 based)
1519 used_idxs.append(i)
1520 i += 1
1521 x.jyday = int(l[i])
1522 elif l[i] == 'M':
1523 # month[-.]week[-.]weekday
1524 used_idxs.append(i)
1525 i += 1
1526 x.month = int(l[i])
1527 used_idxs.append(i)
1528 i += 1
1529 assert l[i] in ('-', '.')
1530 used_idxs.append(i)
1531 i += 1
1532 x.week = int(l[i])
1533 if x.week == 5:
1534 x.week = -1
1535 used_idxs.append(i)
1536 i += 1
1537 assert l[i] in ('-', '.')
1538 used_idxs.append(i)
1539 i += 1
1540 x.weekday = (int(l[i]) - 1) % 7
1541 else:
1542 # year day (zero based)
1543 x.yday = int(l[i]) + 1
1545 used_idxs.append(i)
1546 i += 1
1548 if i < len_l and l[i] == '/':
1549 used_idxs.append(i)
1550 i += 1
1551 # start time
1552 len_li = len(l[i])
1553 if len_li == 4:
1554 # -0300
1555 x.time = (int(l[i][:2]) * 3600 +
1556 int(l[i][2:]) * 60)
1557 elif i + 1 < len_l and l[i + 1] == ':':
1558 # -03:00
1559 x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60
1560 used_idxs.append(i)
1561 i += 2
1562 if i + 1 < len_l and l[i + 1] == ':':
1563 used_idxs.append(i)
1564 i += 2
1565 x.time += int(l[i])
1566 elif len_li <= 2:
1567 # -[0]3
1568 x.time = (int(l[i][:2]) * 3600)
1569 else:
1570 return None
1571 used_idxs.append(i)
1572 i += 1
1574 assert i == len_l or l[i] == ','
1576 i += 1
1578 assert i >= len_l
1580 except (IndexError, ValueError, AssertionError):
1581 return None
1583 unused_idxs = set(range(len_l)).difference(used_idxs)
1584 res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"})
1585 return res
1588DEFAULTTZPARSER = _tzparser()
1591def _parsetz(tzstr):
1592 return DEFAULTTZPARSER.parse(tzstr)
1595class ParserError(ValueError):
1596 """Error class for representing failure to parse a datetime string."""
1597 def __str__(self):
1598 try:
1599 return self.args[0] % self.args[1:]
1600 except (TypeError, IndexError):
1601 return super(ParserError, self).__str__()
1603 def __repr__(self):
1604 return "%s(%s)" % (self.__class__.__name__, str(self))
1607class UnknownTimezoneWarning(RuntimeWarning):
1608 """Raised when the parser finds a timezone it cannot parse into a tzinfo"""
1609# vim:ts=4:sw=4:et