Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Tools for working with dates 

3""" 

4from statsmodels.compat.python import lrange, lzip, lmap, asstr 

5import re 

6import datetime 

7 

8from pandas import to_datetime 

9import numpy as np 

10 

11_quarter_to_day = { 

12 "1" : (3, 31), 

13 "2" : (6, 30), 

14 "3" : (9, 30), 

15 "4" : (12, 31), 

16 "I" : (3, 31), 

17 "II" : (6, 30), 

18 "III" : (9, 30), 

19 "IV" : (12, 31) 

20 } 

21 

22 

23_mdays = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31] 

24_months_with_days = lzip(lrange(1,13), _mdays) 

25_month_to_day = dict(zip(map(str,lrange(1,13)), _months_with_days)) 

26_month_to_day.update(dict(zip(["I", "II", "III", "IV", "V", "VI", 

27 "VII", "VIII", "IX", "X", "XI", "XII"], 

28 _months_with_days))) 

29 

30# regex patterns 

31_y_pattern = r'^\d?\d?\d?\d$' 

32 

33_q_pattern = r''' 

34^ # beginning of string 

35\d?\d?\d?\d # match any number 1-9999, includes leading zeros 

36 

37(:?q) # use q or a : as a separator 

38 

39([1-4]|(I{1,3}V?)) # match 1-4 or I-IV roman numerals 

40 

41$ # end of string 

42''' 

43 

44_m_pattern = r''' 

45^ # beginning of string 

46\d?\d?\d?\d # match any number 1-9999, includes leading zeros 

47 

48(:?m) # use m or a : as a separator 

49 

50(([1-9][0-2]?)|(I?XI{0,2}|I?VI{0,3}|I{1,3})) # match 1-12 or 

51 # I-XII roman numerals 

52 

53$ # end of string 

54''' 

55 

56 

57#NOTE: see also ts.extras.isleapyear, which accepts a sequence 

58def _is_leap(year): 

59 year = int(year) 

60 return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0) 

61 

62 

63def date_parser(timestr, parserinfo=None, **kwargs): 

64 """ 

65 Uses dateutil.parser.parse, but also handles monthly dates of the form 

66 1999m4, 1999:m4, 1999:mIV, 1999mIV and the same for quarterly data 

67 with q instead of m. It is not case sensitive. The default for annual 

68 data is the end of the year, which also differs from dateutil. 

69 """ 

70 flags = re.IGNORECASE | re.VERBOSE 

71 if re.search(_q_pattern, timestr, flags): 

72 y,q = timestr.replace(":","").lower().split('q') 

73 month, day = _quarter_to_day[q.upper()] 

74 year = int(y) 

75 elif re.search(_m_pattern, timestr, flags): 

76 y,m = timestr.replace(":","").lower().split('m') 

77 month, day = _month_to_day[m.upper()] 

78 year = int(y) 

79 if _is_leap(y) and month == 2: 

80 day += 1 

81 elif re.search(_y_pattern, timestr, flags): 

82 month, day = 12, 31 

83 year = int(timestr) 

84 else: 

85 return to_datetime(timestr, **kwargs) 

86 

87 return datetime.datetime(year, month, day) 

88 

89 

90def date_range_str(start, end=None, length=None): 

91 """ 

92 Returns a list of abbreviated date strings. 

93 

94 Parameters 

95 ---------- 

96 start : str 

97 The first abbreviated date, for instance, '1965q1' or '1965m1' 

98 end : str, optional 

99 The last abbreviated date if length is None. 

100 length : int, optional 

101 The length of the returned array of end is None. 

102 

103 Returns 

104 ------- 

105 date_range : list 

106 List of strings 

107 """ 

108 flags = re.IGNORECASE | re.VERBOSE 

109 #_check_range_inputs(end, length, freq) 

110 start = start.lower() 

111 if re.search(_m_pattern, start, flags): 

112 annual_freq = 12 

113 split = 'm' 

114 elif re.search(_q_pattern, start, flags): 

115 annual_freq = 4 

116 split = 'q' 

117 elif re.search(_y_pattern, start, flags): 

118 annual_freq = 1 

119 start += 'a1' # hack 

120 if end: 

121 end += 'a1' 

122 split = 'a' 

123 else: 

124 raise ValueError("Date %s not understood" % start) 

125 yr1, offset1 = lmap(int, start.replace(":","").split(split)) 

126 if end is not None: 

127 end = end.lower() 

128 yr2, offset2 = lmap(int, end.replace(":","").split(split)) 

129 length = (yr2 - yr1) * annual_freq + offset2 

130 elif length: 

131 yr2 = yr1 + length // annual_freq 

132 offset2 = length % annual_freq + (offset1 - 1) 

133 years = np.repeat(lrange(yr1+1, yr2), annual_freq).tolist() 

134 years = np.r_[[str(yr1)]*(annual_freq+1-offset1), years] # tack on first year 

135 years = np.r_[years, [str(yr2)]*offset2] # tack on last year 

136 if split != 'a': 

137 offset = np.tile(np.arange(1, annual_freq+1), yr2-yr1-1) 

138 offset = np.r_[np.arange(offset1, annual_freq+1).astype('a2'), offset] 

139 offset = np.r_[offset, np.arange(1,offset2+1).astype('a2')] 

140 date_arr_range = [''.join([i, split, asstr(j)]) for i,j in 

141 zip(years, offset)] 

142 else: 

143 date_arr_range = years.tolist() 

144 return date_arr_range 

145 

146 

147def dates_from_str(dates): 

148 """ 

149 Turns a sequence of date strings and returns a list of datetime. 

150 

151 Parameters 

152 ---------- 

153 dates : array_like 

154 A sequence of abbreviated dates as string. For instance, 

155 '1996m1' or '1996Q1'. The datetime dates are at the end of the 

156 period. 

157 

158 Returns 

159 ------- 

160 date_list : ndarray 

161 A list of datetime types. 

162 """ 

163 return lmap(date_parser, dates) 

164 

165 

166def dates_from_range(start, end=None, length=None): 

167 """ 

168 Turns a sequence of date strings and returns a list of datetime. 

169 

170 Parameters 

171 ---------- 

172 start : str 

173 The first abbreviated date, for instance, '1965q1' or '1965m1' 

174 end : str, optional 

175 The last abbreviated date if length is None. 

176 length : int, optional 

177 The length of the returned array of end is None. 

178 

179 Examples 

180 -------- 

181 >>> import statsmodels.api as sm 

182 >>> import pandas as pd 

183 >>> dates = pd.date_range('1960m1', length=nobs) 

184 

185 

186 Returns 

187 ------- 

188 date_list : ndarray 

189 A list of datetime types. 

190 """ 

191 dates = date_range_str(start, end, length) 

192 return dates_from_str(dates)