Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""ISO 8601 date time string parsing 

2 

3Basic usage: 

4>>> import iso8601 

5>>> iso8601.parse_date("2007-01-25T12:00:00Z") 

6datetime.datetime(2007, 1, 25, 12, 0, tzinfo=<iso8601.Utc ...>) 

7>>> 

8 

9""" 

10 

11import datetime 

12from decimal import Decimal 

13import sys 

14import re 

15 

16__all__ = ["parse_date", "ParseError", "UTC", 

17 "FixedOffset"] 

18 

19if sys.version_info >= (3, 0, 0): 

20 _basestring = str 

21else: 

22 _basestring = basestring 

23 

24 

25# Adapted from http://delete.me.uk/2005/03/iso8601.html 

26ISO8601_REGEX = re.compile( 

27 r""" 

28 (?P<year>[0-9]{4}) 

29 ( 

30 ( 

31 (-(?P<monthdash>[0-9]{1,2})) 

32 | 

33 (?P<month>[0-9]{2}) 

34 (?!$) # Don't allow YYYYMM 

35 ) 

36 ( 

37 ( 

38 (-(?P<daydash>[0-9]{1,2})) 

39 | 

40 (?P<day>[0-9]{2}) 

41 ) 

42 ( 

43 ( 

44 (?P<separator>[ T]) 

45 (?P<hour>[0-9]{2}) 

46 (:{0,1}(?P<minute>[0-9]{2})){0,1} 

47 ( 

48 :{0,1}(?P<second>[0-9]{1,2}) 

49 ([.,](?P<second_fraction>[0-9]+)){0,1} 

50 ){0,1} 

51 (?P<timezone> 

52 Z 

53 | 

54 ( 

55 (?P<tz_sign>[-+]) 

56 (?P<tz_hour>[0-9]{2}) 

57 :{0,1} 

58 (?P<tz_minute>[0-9]{2}){0,1} 

59 ) 

60 ){0,1} 

61 ){0,1} 

62 ) 

63 ){0,1} # YYYY-MM 

64 ){0,1} # YYYY only 

65 $ 

66 """, 

67 re.VERBOSE 

68) 

69 

70class ParseError(ValueError): 

71 """Raised when there is a problem parsing a date string""" 

72 

73if sys.version_info >= (3, 2, 0): 

74 UTC = datetime.timezone.utc 

75 def FixedOffset(offset_hours, offset_minutes, name): 

76 return datetime.timezone( 

77 datetime.timedelta( 

78 hours=offset_hours, minutes=offset_minutes), 

79 name) 

80else: 

81 # Yoinked from python docs 

82 ZERO = datetime.timedelta(0) 

83 class Utc(datetime.tzinfo): 

84 """UTC Timezone 

85 

86 """ 

87 def utcoffset(self, dt): 

88 return ZERO 

89 

90 def tzname(self, dt): 

91 return "UTC" 

92 

93 def dst(self, dt): 

94 return ZERO 

95 

96 def __repr__(self): 

97 return "<iso8601.Utc>" 

98 

99 UTC = Utc() 

100 

101 class FixedOffset(datetime.tzinfo): 

102 """Fixed offset in hours and minutes from UTC 

103 

104 """ 

105 def __init__(self, offset_hours, offset_minutes, name): 

106 self.__offset_hours = offset_hours # Keep for later __getinitargs__ 

107 self.__offset_minutes = offset_minutes # Keep for later __getinitargs__ 

108 self.__offset = datetime.timedelta( 

109 hours=offset_hours, minutes=offset_minutes) 

110 self.__name = name 

111 

112 def __eq__(self, other): 

113 if isinstance(other, FixedOffset): 

114 return ( 

115 (other.__offset == self.__offset) 

116 and 

117 (other.__name == self.__name) 

118 ) 

119 return NotImplemented 

120 

121 def __getinitargs__(self): 

122 return (self.__offset_hours, self.__offset_minutes, self.__name) 

123 

124 def utcoffset(self, dt): 

125 return self.__offset 

126 

127 def tzname(self, dt): 

128 return self.__name 

129 

130 def dst(self, dt): 

131 return ZERO 

132 

133 def __repr__(self): 

134 return "<FixedOffset %r %r>" % (self.__name, self.__offset) 

135 

136 

137def to_int(d, key, default_to_zero=False, default=None, required=True): 

138 """Pull a value from the dict and convert to int 

139 

140 :param default_to_zero: If the value is None or empty, treat it as zero 

141 :param default: If the value is missing in the dict use this default 

142 

143 """ 

144 value = d.get(key) or default 

145 if (value in ["", None]) and default_to_zero: 

146 return 0 

147 if value is None: 

148 if required: 

149 raise ParseError("Unable to read %s from %s" % (key, d)) 

150 else: 

151 return int(value) 

152 

153def parse_timezone(matches, default_timezone=UTC): 

154 """Parses ISO 8601 time zone specs into tzinfo offsets 

155 

156 """ 

157 

158 if matches["timezone"] == "Z": 

159 return UTC 

160 # This isn't strictly correct, but it's common to encounter dates without 

161 # timezones so I'll assume the default (which defaults to UTC). 

162 # Addresses issue 4. 

163 if matches["timezone"] is None: 

164 return default_timezone 

165 sign = matches["tz_sign"] 

166 hours = to_int(matches, "tz_hour") 

167 minutes = to_int(matches, "tz_minute", default_to_zero=True) 

168 description = "%s%02d:%02d" % (sign, hours, minutes) 

169 if sign == "-": 

170 hours = -hours 

171 minutes = -minutes 

172 return FixedOffset(hours, minutes, description) 

173 

174def parse_date(datestring, default_timezone=UTC): 

175 """Parses ISO 8601 dates into datetime objects 

176 

177 The timezone is parsed from the date string. However it is quite common to 

178 have dates without a timezone (not strictly correct). In this case the 

179 default timezone specified in default_timezone is used. This is UTC by 

180 default. 

181 

182 :param datestring: The date to parse as a string 

183 :param default_timezone: A datetime tzinfo instance to use when no timezone 

184 is specified in the datestring. If this is set to 

185 None then a naive datetime object is returned. 

186 :returns: A datetime.datetime instance 

187 :raises: ParseError when there is a problem parsing the date or 

188 constructing the datetime instance. 

189 

190 """ 

191 if not isinstance(datestring, _basestring): 

192 raise ParseError("Expecting a string %r" % datestring) 

193 m = ISO8601_REGEX.match(datestring) 

194 if not m: 

195 raise ParseError("Unable to parse date string %r" % datestring) 

196 groups = m.groupdict() 

197 

198 tz = parse_timezone(groups, default_timezone=default_timezone) 

199 

200 groups["second_fraction"] = int(Decimal("0.%s" % (groups["second_fraction"] or 0)) * Decimal("1000000.0")) 

201 

202 try: 

203 return datetime.datetime( 

204 year=to_int(groups, "year"), 

205 month=to_int(groups, "month", default=to_int(groups, "monthdash", required=False, default=1)), 

206 day=to_int(groups, "day", default=to_int(groups, "daydash", required=False, default=1)), 

207 hour=to_int(groups, "hour", default_to_zero=True), 

208 minute=to_int(groups, "minute", default_to_zero=True), 

209 second=to_int(groups, "second", default_to_zero=True), 

210 microsecond=groups["second_fraction"], 

211 tzinfo=tz, 

212 ) 

213 except Exception as e: 

214 raise ParseError(e)