Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1 

2import numpy as np 

3from numpy import (dot, eye, diag_indices, zeros, ones, diag, 

4 asarray, r_) 

5from numpy.linalg import solve 

6 

7 

8# def denton(indicator, benchmark, freq="aq", **kwarg): 

9# """ 

10# Denton's method to convert low-frequency to high frequency data. 

11# 

12# Parameters 

13# ---------- 

14# benchmark : array_like 

15# The higher frequency benchmark. A 1d or 2d data series in columns. 

16# If 2d, then M series are assumed. 

17# indicator 

18# A low-frequency indicator series. It is assumed that there are no 

19# pre-sample indicators. Ie., the first indicators line up with 

20# the first benchmark. 

21# freq : str {"aq","qm", "other"} 

22# "aq" - Benchmarking an annual series to quarterly. 

23# "mq" - Benchmarking a quarterly series to monthly. 

24# "other" - Custom stride. A kwarg, k, must be supplied. 

25# kwargs : 

26# k : int 

27# The number of high-frequency observations that sum to make an 

28# aggregate low-frequency observation. `k` is used with 

29# `freq` == "other". 

30# Returns 

31# ------- 

32# benchmarked series : ndarray 

33# 

34# Notes 

35# ----- 

36# Denton's method minimizes the distance given by the penalty function, in 

37# a least squares sense, between the unknown benchmarked series and the 

38# indicator series subject to the condition that the sum of the benchmarked 

39# series is equal to the benchmark. 

40# 

41# 

42# References 

43# ---------- 

44# Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National 

45# Accounts Manual--Concepts, Data Sources, and Compilation. IMF. 

46# http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm 

47# Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual 

48# totals: an approach based on quadratic minimization." Journal of the 

49# American Statistical Association. 99-102. 

50# 

51# """ 

52# # check arrays and make 2d 

53# indicator = np.asarray(indicator) 

54# if indicator.ndim == 1: 

55# indicator = indicator[:,None] 

56# benchmark = np.asarray(benchmark) 

57# if benchmark.ndim == 1: 

58# benchmark = benchmark[:,None] 

59# 

60# # get dimensions 

61# N = len(indicator) # total number of high-freq 

62# m = len(benchmark) # total number of low-freq 

63# 

64# # number of low-freq observations for aggregate measure 

65# # 4 for annual to quarter and 3 for quarter to monthly 

66# if freq == "aq": 

67# k = 4 

68# elif freq == "qm": 

69# k = 3 

70# elif freq == "other": 

71# k = kwargs.get("k") 

72# if not k: 

73# raise ValueError("k must be supplied with freq=\"other\"") 

74# else: 

75# raise ValueError("freq %s not understood" % freq) 

76# 

77# n = k*m # number of indicator series with a benchmark for back-series 

78# # if k*m != n, then we are going to extrapolate q observations 

79# 

80# B = block_diag(*(np.ones((k,1)),)*m) 

81# 

82# r = benchmark - B.T.dot(indicator) 

83#TODO: take code in the string at the end and implement Denton's original 

84# method with a few of the penalty functions. 

85 

86 

87def dentonm(indicator, benchmark, freq="aq", **kwargs): 

88 """ 

89 Modified Denton's method to convert low-frequency to high-frequency data. 

90 

91 Uses proportionate first-differences as the penalty function. See notes. 

92 

93 Parameters 

94 ---------- 

95 indicator : array_like 

96 A low-frequency indicator series. It is assumed that there are no 

97 pre-sample indicators. Ie., the first indicators line up with 

98 the first benchmark. 

99 benchmark : array_like 

100 The higher frequency benchmark. A 1d or 2d data series in columns. 

101 If 2d, then M series are assumed. 

102 freq : str {"aq","qm", "other"} 

103 The frequency to use in the conversion. 

104 

105 * "aq" - Benchmarking an annual series to quarterly. 

106 * "mq" - Benchmarking a quarterly series to monthly. 

107 * "other" - Custom stride. A kwarg, k, must be supplied. 

108 **kwargs 

109 Additional keyword argument. For example: 

110 

111 * k, an int, the number of high-frequency observations that sum to make 

112 an aggregate low-frequency observation. `k` is used with 

113 `freq` == "other". 

114 

115 Returns 

116 ------- 

117 transformed : ndarray 

118 The transformed series. 

119 

120 Examples 

121 -------- 

122 >>> indicator = [50,100,150,100] * 5 

123 >>> benchmark = [500,400,300,400,500] 

124 >>> benchmarked = dentonm(indicator, benchmark, freq="aq") 

125 

126 Notes 

127 ----- 

128 Denton's method minimizes the distance given by the penalty function, in 

129 a least squares sense, between the unknown benchmarked series and the 

130 indicator series subject to the condition that the sum of the benchmarked 

131 series is equal to the benchmark. The modification allows that the first 

132 value not be pre-determined as is the case with Denton's original method. 

133 If the there is no benchmark provided for the last few indicator 

134 observations, then extrapolation is performed using the last 

135 benchmark-indicator ratio of the previous period. 

136 

137 Minimizes sum((X[t]/I[t] - X[t-1]/I[t-1])**2) 

138 

139 s.t. 

140 

141 sum(X) = A, for each period. Where X is the benchmarked series, I is 

142 the indicator, and A is the benchmark. 

143 

144 References 

145 ---------- 

146 Bloem, A.M, Dippelsman, R.J. and Maehle, N.O. 2001 Quarterly National 

147 Accounts Manual--Concepts, Data Sources, and Compilation. IMF. 

148 http://www.imf.org/external/pubs/ft/qna/2000/Textbook/index.htm 

149 Cholette, P. 1988. "Benchmarking systems of socio-economic time series." 

150 Statistics Canada, Time Series Research and Analysis Division, 

151 Working Paper No TSRA-88-017E. 

152 Denton, F.T. 1971. "Adjustment of monthly or quarterly series to annual 

153 totals: an approach based on quadratic minimization." Journal of the 

154 American Statistical Association. 99-102. 

155 """ 

156# penalty : str 

157# Penalty function. Can be "D1", "D2", "D3", "D4", "D5". 

158# X is the benchmarked series and I is the indicator. 

159# D1 - sum((X[t] - X[t-1]) - (I[t] - I[ti-1])**2) 

160# D2 - sum((ln(X[t]/X[t-1]) - ln(I[t]/I[t-1]))**2) 

161# D3 - sum((X[t]/X[t-1] / I[t]/I[t-1])**2) 

162# D4 - sum((X[t]/I[t] - X[t-1]/I[t-1])**2) 

163# D5 - sum((X[t]/I[t] / X[t-1]/I[t-1] - 1)**2) 

164#NOTE: only D4 is the only one implemented, see IMF chapter 6. 

165 

166 # check arrays and make 2d 

167 indicator = asarray(indicator) 

168 if indicator.ndim == 1: 

169 indicator = indicator[:,None] 

170 benchmark = asarray(benchmark) 

171 if benchmark.ndim == 1: 

172 benchmark = benchmark[:,None] 

173 

174 # get dimensions 

175 N = len(indicator) # total number of high-freq 

176 m = len(benchmark) # total number of low-freq 

177 

178 # number of low-freq observations for aggregate measure 

179 # 4 for annual to quarter and 3 for quarter to monthly 

180 if freq == "aq": 

181 k = 4 

182 elif freq == "qm": 

183 k = 3 

184 elif freq == "other": 

185 k = kwargs.get("k") 

186 if not k: 

187 raise ValueError("k must be supplied with freq=\"other\"") 

188 else: 

189 raise ValueError("freq %s not understood" % freq) 

190 

191 n = k*m # number of indicator series with a benchmark for back-series 

192 # if k*m != n, then we are going to extrapolate q observations 

193 if N > n: 

194 q = N - n 

195 else: 

196 q = 0 

197 

198 # make the aggregator matrix 

199 #B = block_diag(*(ones((k,1)),)*m) 

200 B = np.kron(np.eye(m), ones((k,1))) 

201 

202 # following the IMF paper, we can do 

203 Zinv = diag(1./indicator.squeeze()[:n]) 

204 # this is D in Denton's notation (not using initial value correction) 

205# D = eye(n) 

206 # make off-diagonal = -1 

207# D[((np.diag_indices(n)[0])[:-1]+1,(np.diag_indices(n)[1])[:-1])] = -1 

208 # account for starting conditions 

209# H = D[1:,:] 

210# HTH = dot(H.T,H) 

211 # just make HTH 

212 HTH = eye(n) 

213 diag_idx0, diag_idx1 = diag_indices(n) 

214 HTH[diag_idx0[1:-1], diag_idx1[1:-1]] += 1 

215 HTH[diag_idx0[:-1]+1, diag_idx1[:-1]] = -1 

216 HTH[diag_idx0[:-1], diag_idx1[:-1]+1] = -1 

217 

218 W = dot(dot(Zinv,HTH),Zinv) 

219 

220 # make partitioned matrices 

221 # TODO: break this out so that we can simplify the linalg? 

222 I = zeros((n+m, n+m)) # noqa:E741 

223 I[:n,:n] = W 

224 I[:n,n:] = B 

225 I[n:,:n] = B.T 

226 

227 A = zeros((m+n,1)) # zero first-order constraints 

228 A[-m:] = benchmark # adding up constraints 

229 X = solve(I,A) 

230 X = X[:-m] # drop the lagrange multipliers 

231 

232 # handle extrapolation 

233 if q > 0: 

234 # get last Benchmark-Indicator ratio 

235 bi = X[n-1]/indicator[n-1] 

236 extrapolated = bi * indicator[n:] 

237 X = r_[X,extrapolated] 

238 

239 return X.squeeze() 

240 

241 

242if __name__ == "__main__": 

243 #these will be the tests 

244 # from IMF paper 

245 

246 # quarterly data 

247 indicator = np.array([98.2, 100.8, 102.2, 100.8, 99.0, 101.6, 

248 102.7, 101.5, 100.5, 103.0, 103.5, 101.5]) 

249 # two annual observations 

250 benchmark = np.array([4000.,4161.4]) 

251 x_imf = dentonm(indicator, benchmark, freq="aq") 

252 

253 imf_stata = np.array([969.8, 998.4, 1018.3, 1013.4, 1007.2, 1042.9, 

254 1060.3, 1051.0, 1040.6, 1066.5, 1071.7, 1051.0]) 

255 np.testing.assert_almost_equal(imf_stata, x_imf, 1) 

256 

257 # Denton example 

258 zQ = np.array([50,100,150,100] * 5) 

259 Y = np.array([500,400,300,400,500]) 

260 x_denton = dentonm(zQ, Y, freq="aq") 

261 x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894, 

262 105.97568,147.50144,89.958987,40.547201,74.445963, 

263 108.34473,76.66211,42.763347,94.14664,153.41596, 

264 109.67405,58.290761,122.62556,190.41409,128.66959]) 

265 

266 

267""" 

268# Examples from the Denton 1971 paper 

269k = 4 

270m = 5 

271n = m*k 

272 

273zQ = [50,100,150,100] * m 

274Y = [500,400,300,400,500] 

275 

276A = np.eye(n) 

277B = block_diag(*(np.ones((k,1)),)*m) 

278 

279r = Y - B.T.dot(zQ) 

280#Ainv = inv(A) 

281Ainv = A # shortcut for identity 

282C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B))) 

283x = zQ + C.dot(r) 

284 

285# minimize first difference d(x-z) 

286R = linalg.tri(n, dtype=float) # R is tril so actually R.T in paper 

287Ainv = R.dot(R.T) 

288C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B))) 

289x1 = zQ + C.dot(r) 

290 

291# minimize the second difference d**2(x-z) 

292Ainv = R.dot(Ainv).dot(R.T) 

293C = Ainv.dot(B).dot(inv(B.T.dot(Ainv).dot(B))) 

294x12 = zQ + C.dot(r) 

295 

296 

297# # do it proportionately (x-z)/z 

298Z = np.diag(zQ) 

299Ainv = np.eye(n) 

300C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B))) 

301x11 = zQ + C.dot(r) 

302 

303# do it proportionately with differencing d((x-z)/z) 

304Ainv = R.dot(R.T) 

305C = Z.dot(Ainv).dot(Z).dot(B).dot(inv(B.T.dot(Z).dot(Ainv).dot(Z).dot(B))) 

306x111 = zQ + C.dot(r) 

307 

308x_stata = np.array([64.334796,127.80616,187.82379,120.03526,56.563894, 

309 105.97568,147.50144,89.958987,40.547201,74.445963, 

310 108.34473,76.66211,42.763347,94.14664,153.41596, 

311 109.67405,58.290761,122.62556,190.41409,128.66959]) 

312"""