Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Module of kernels that are able to handle continuous as well as categorical 

3variables (both ordered and unordered). 

4 

5This is a slight deviation from the current approach in 

6statsmodels.nonparametric.kernels where each kernel is a class object. 

7 

8Having kernel functions rather than classes makes extension to a multivariate 

9kernel density estimation much easier. 

10 

11NOTE: As it is, this module does not interact with the existing API 

12""" 

13 

14import numpy as np 

15from scipy.special import erf 

16 

17 

18#TODO: 

19# - make sure we only receive int input for wang-ryzin and aitchison-aitken 

20# - Check for the scalar Xi case everywhere 

21 

22 

23def aitchison_aitken(h, Xi, x, num_levels=None): 

24 r""" 

25 The Aitchison-Aitken kernel, used for unordered discrete random variables. 

26 

27 Parameters 

28 ---------- 

29 h : 1-D ndarray, shape (K,) 

30 The bandwidths used to estimate the value of the kernel function. 

31 Xi : 2-D ndarray of ints, shape (nobs, K) 

32 The value of the training set. 

33 x: 1-D ndarray, shape (K,) 

34 The value at which the kernel density is being estimated. 

35 num_levels: bool, optional 

36 Gives the user the option to specify the number of levels for the 

37 random variable. If False, the number of levels is calculated from 

38 the data. 

39 

40 Returns 

41 ------- 

42 kernel_value : ndarray, shape (nobs, K) 

43 The value of the kernel function at each training point for each var. 

44 

45 Notes 

46 ----- 

47 See p.18 of [2]_ for details. The value of the kernel L if :math:`X_{i}=x` 

48 is :math:`1-\lambda`, otherwise it is :math:`\frac{\lambda}{c-1}`. 

49 Here :math:`c` is the number of levels plus one of the RV. 

50 

51 References 

52 ---------- 

53 .. [*] J. Aitchison and C.G.G. Aitken, "Multivariate binary discrimination 

54 by the kernel method", Biometrika, vol. 63, pp. 413-420, 1976. 

55 .. [*] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation 

56 and Trends in Econometrics: Vol 3: No 1, pp1-88., 2008. 

57 """ 

58 Xi = Xi.reshape(Xi.size) # seems needed in case Xi is scalar 

59 if num_levels is None: 

60 num_levels = np.asarray(np.unique(Xi).size) 

61 

62 kernel_value = np.ones(Xi.size) * h / (num_levels - 1) 

63 idx = Xi == x 

64 kernel_value[idx] = (idx * (1 - h))[idx] 

65 return kernel_value 

66 

67 

68def wang_ryzin(h, Xi, x): 

69 r""" 

70 The Wang-Ryzin kernel, used for ordered discrete random variables. 

71 

72 Parameters 

73 ---------- 

74 h : scalar or 1-D ndarray, shape (K,) 

75 The bandwidths used to estimate the value of the kernel function. 

76 Xi : ndarray of ints, shape (nobs, K) 

77 The value of the training set. 

78 x : scalar or 1-D ndarray of shape (K,) 

79 The value at which the kernel density is being estimated. 

80 

81 Returns 

82 ------- 

83 kernel_value : ndarray, shape (nobs, K) 

84 The value of the kernel function at each training point for each var. 

85 

86 Notes 

87 ----- 

88 See p. 19 in [1]_ for details. The value of the kernel L if 

89 :math:`X_{i}=x` is :math:`1-\lambda`, otherwise it is 

90 :math:`\frac{1-\lambda}{2}\lambda^{|X_{i}-x|}`, where :math:`\lambda` is 

91 the bandwidth. 

92 

93 References 

94 ---------- 

95 .. [*] Racine, Jeff. "Nonparametric Econometrics: A Primer," Foundation 

96 and Trends in Econometrics: Vol 3: No 1, pp1-88., 2008. 

97 http://dx.doi.org/10.1561/0800000009 

98 .. [*] M.-C. Wang and J. van Ryzin, "A class of smooth estimators for 

99 discrete distributions", Biometrika, vol. 68, pp. 301-309, 1981. 

100 """ 

101 Xi = Xi.reshape(Xi.size) # seems needed in case Xi is scalar 

102 kernel_value = 0.5 * (1 - h) * (h ** abs(Xi - x)) 

103 idx = Xi == x 

104 kernel_value[idx] = (idx * (1 - h))[idx] 

105 return kernel_value 

106 

107 

108def gaussian(h, Xi, x): 

109 """ 

110 Gaussian Kernel for continuous variables 

111 Parameters 

112 ---------- 

113 h : 1-D ndarray, shape (K,) 

114 The bandwidths used to estimate the value of the kernel function. 

115 Xi : 1-D ndarray, shape (K,) 

116 The value of the training set. 

117 x : 1-D ndarray, shape (K,) 

118 The value at which the kernel density is being estimated. 

119 

120 Returns 

121 ------- 

122 kernel_value : ndarray, shape (nobs, K) 

123 The value of the kernel function at each training point for each var. 

124 """ 

125 return (1. / np.sqrt(2 * np.pi)) * np.exp(-(Xi - x)**2 / (h**2 * 2.)) 

126 

127 

128def tricube(h, Xi, x): 

129 """ 

130 Tricube Kernel for continuous variables 

131 Parameters 

132 ---------- 

133 h : 1-D ndarray, shape (K,) 

134 The bandwidths used to estimate the value of the kernel function. 

135 Xi : 1-D ndarray, shape (K,) 

136 The value of the training set. 

137 x : 1-D ndarray, shape (K,) 

138 The value at which the kernel density is being estimated. 

139 

140 Returns 

141 ------- 

142 kernel_value : ndarray, shape (nobs, K) 

143 The value of the kernel function at each training point for each var. 

144 """ 

145 u = (Xi - x) / h 

146 u[np.abs(u) > 1] = 0 

147 return (70. / 81) * (1 - np.abs(u)**3)**3 

148 

149 

150def gaussian_convolution(h, Xi, x): 

151 """ Calculates the Gaussian Convolution Kernel """ 

152 return (1. / np.sqrt(4 * np.pi)) * np.exp(- (Xi - x)**2 / (h**2 * 4.)) 

153 

154 

155def wang_ryzin_convolution(h, Xi, Xj): 

156 # This is the equivalent of the convolution case with the Gaussian Kernel 

157 # However it is not exactly convolution. Think of a better name 

158 # References 

159 ordered = np.zeros(Xi.size) 

160 for x in np.unique(Xi): 

161 ordered += wang_ryzin(h, Xi, x) * wang_ryzin(h, Xj, x) 

162 

163 return ordered 

164 

165 

166def aitchison_aitken_convolution(h, Xi, Xj): 

167 Xi_vals = np.unique(Xi) 

168 ordered = np.zeros(Xi.size) 

169 num_levels = Xi_vals.size 

170 for x in Xi_vals: 

171 ordered += aitchison_aitken(h, Xi, x, num_levels=num_levels) * \ 

172 aitchison_aitken(h, Xj, x, num_levels=num_levels) 

173 

174 return ordered 

175 

176 

177def gaussian_cdf(h, Xi, x): 

178 return 0.5 * h * (1 + erf((x - Xi) / (h * np.sqrt(2)))) 

179 

180 

181def aitchison_aitken_cdf(h, Xi, x_u): 

182 x_u = int(x_u) 

183 Xi_vals = np.unique(Xi) 

184 ordered = np.zeros(Xi.size) 

185 num_levels = Xi_vals.size 

186 for x in Xi_vals: 

187 if x <= x_u: #FIXME: why a comparison for unordered variables? 

188 ordered += aitchison_aitken(h, Xi, x, num_levels=num_levels) 

189 

190 return ordered 

191 

192 

193def wang_ryzin_cdf(h, Xi, x_u): 

194 ordered = np.zeros(Xi.size) 

195 for x in np.unique(Xi): 

196 if x <= x_u: 

197 ordered += wang_ryzin(h, Xi, x) 

198 

199 return ordered 

200 

201 

202def d_gaussian(h, Xi, x): 

203 # The derivative of the Gaussian Kernel 

204 return 2 * (Xi - x) * gaussian(h, Xi, x) / h**2 

205 

206 

207def aitchison_aitken_reg(h, Xi, x): 

208 """ 

209 A version for the Aitchison-Aitken kernel for nonparametric regression. 

210 

211 Suggested by Li and Racine. 

212 """ 

213 kernel_value = np.ones(Xi.size) 

214 ix = Xi != x 

215 inDom = ix * h 

216 kernel_value[ix] = inDom[ix] 

217 return kernel_value 

218 

219 

220def wang_ryzin_reg(h, Xi, x): 

221 """ 

222 A version for the Wang-Ryzin kernel for nonparametric regression. 

223 

224 Suggested by Li and Racine in [1] ch.4 

225 """ 

226 return h ** abs(Xi - x)