Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/tools/numdiff.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""numerical differentiation function, gradient, Jacobian, and Hessian
3Author : josef-pkt
4License : BSD
6Notes
7-----
8These are simple forward differentiation, so that we have them available
9without dependencies.
11* Jacobian should be faster than numdifftools because it does not use loop over
12 observations.
13* numerical precision will vary and depend on the choice of stepsizes
14"""
16# TODO:
17# * some cleanup
18# * check numerical accuracy (and bugs) with numdifftools and analytical
19# derivatives
20# - linear least squares case: (hess - 2*X'X) is 1e-8 or so
21# - gradient and Hessian agree with numdifftools when evaluated away from
22# minimum
23# - forward gradient, Jacobian evaluated at minimum is inaccurate, centered
24# (+/- epsilon) is ok
25# * dot product of Jacobian is different from Hessian, either wrong example or
26# a bug (unlikely), or a real difference
27#
28#
29# What are the conditions that Jacobian dotproduct and Hessian are the same?
30#
31# See also:
32#
33# BHHH: Greene p481 17.4.6, MLE Jacobian = d loglike / d beta , where loglike
34# is vector for each observation
35# see also example 17.4 when J'J is very different from Hessian
36# also does it hold only at the minimum, what's relationship to covariance
37# of Jacobian matrix
38# http://projects.scipy.org/scipy/ticket/1157
39# https://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm
40# objective: sum((y-f(beta,x)**2), Jacobian = d f/d beta
41# and not d objective/d beta as in MLE Greene
42# similar: http://crsouza.blogspot.com/2009/11/neural-network-learning-by-levenberg_18.html#hessian
43#
44# in example: if J = d x*beta / d beta then J'J == X'X
45# similar to https://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm
46import numpy as np
48from statsmodels.compat.pandas import Appender, Substitution
50# NOTE: we only do double precision internally so far
51EPS = np.MachAr().eps
53_hessian_docs = """
54 Calculate Hessian with finite difference derivative approximation
56 Parameters
57 ----------
58 x : array_like
59 value at which function derivative is evaluated
60 f : function
61 function of one array f(x, `*args`, `**kwargs`)
62 epsilon : float or array_like, optional
63 Stepsize used, if None, then stepsize is automatically chosen
64 according to EPS**(1/%(scale)s)*x.
65 args : tuple
66 Arguments for function `f`.
67 kwargs : dict
68 Keyword arguments for function `f`.
69 %(extra_params)s
71 Returns
72 -------
73 hess : ndarray
74 array of partial second derivatives, Hessian
75 %(extra_returns)s
77 Notes
78 -----
79 Equation (%(equation_number)s) in Ridout. Computes the Hessian as::
81 %(equation)s
83 where e[j] is a vector with element j == 1 and the rest are zero and
84 d[i] is epsilon[i].
86 References
87 ----------:
89 Ridout, M.S. (2009) Statistical applications of the complex-step method
90 of numerical differentiation. The American Statistician, 63, 66-74
91"""
94def _get_epsilon(x, s, epsilon, n):
95 if epsilon is None:
96 h = EPS**(1. / s) * np.maximum(np.abs(x), 0.1)
97 else:
98 if np.isscalar(epsilon):
99 h = np.empty(n)
100 h.fill(epsilon)
101 else: # pragma : no cover
102 h = np.asarray(epsilon)
103 if h.shape != x.shape:
104 raise ValueError("If h is not a scalar it must have the same"
105 " shape as x.")
106 return h
109def approx_fprime(x, f, epsilon=None, args=(), kwargs={}, centered=False):
110 '''
111 Gradient of function, or Jacobian if function f returns 1d array
113 Parameters
114 ----------
115 x : ndarray
116 parameters at which the derivative is evaluated
117 f : function
118 `f(*((x,)+args), **kwargs)` returning either one value or 1d array
119 epsilon : float, optional
120 Stepsize, if None, optimal stepsize is used. This is EPS**(1/2)*x for
121 `centered` == False and EPS**(1/3)*x for `centered` == True.
122 args : tuple
123 Tuple of additional arguments for function `f`.
124 kwargs : dict
125 Dictionary of additional keyword arguments for function `f`.
126 centered : bool
127 Whether central difference should be returned. If not, does forward
128 differencing.
130 Returns
131 -------
132 grad : ndarray
133 gradient or Jacobian
135 Notes
136 -----
137 If f returns a 1d array, it returns a Jacobian. If a 2d array is returned
138 by f (e.g., with a value for each observation), it returns a 3d array
139 with the Jacobian of each observation with shape xk x nobs x xk. I.e.,
140 the Jacobian of the first observation would be [:, 0, :]
141 '''
142 n = len(x)
143 # TODO: add scaled stepsize
144 f0 = f(*((x,)+args), **kwargs)
145 dim = np.atleast_1d(f0).shape # it could be a scalar
146 grad = np.zeros((n,) + dim, np.promote_types(float, x.dtype))
147 ei = np.zeros((n,), float)
148 if not centered:
149 epsilon = _get_epsilon(x, 2, epsilon, n)
150 for k in range(n):
151 ei[k] = epsilon[k]
152 grad[k, :] = (f(*((x+ei,) + args), **kwargs) - f0)/epsilon[k]
153 ei[k] = 0.0
154 else:
155 epsilon = _get_epsilon(x, 3, epsilon, n) / 2.
156 for k in range(len(x)):
157 ei[k] = epsilon[k]
158 grad[k, :] = (f(*((x+ei,)+args), **kwargs) -
159 f(*((x-ei,)+args), **kwargs))/(2 * epsilon[k])
160 ei[k] = 0.0
161 return grad.squeeze().T
164def approx_fprime_cs(x, f, epsilon=None, args=(), kwargs={}):
165 '''
166 Calculate gradient or Jacobian with complex step derivative approximation
168 Parameters
169 ----------
170 x : ndarray
171 parameters at which the derivative is evaluated
172 f : function
173 `f(*((x,)+args), **kwargs)` returning either one value or 1d array
174 epsilon : float, optional
175 Stepsize, if None, optimal stepsize is used. Optimal step-size is
176 EPS*x. See note.
177 args : tuple
178 Tuple of additional arguments for function `f`.
179 kwargs : dict
180 Dictionary of additional keyword arguments for function `f`.
182 Returns
183 -------
184 partials : ndarray
185 array of partial derivatives, Gradient or Jacobian
187 Notes
188 -----
189 The complex-step derivative has truncation error O(epsilon**2), so
190 truncation error can be eliminated by choosing epsilon to be very small.
191 The complex-step derivative avoids the problem of round-off error with
192 small epsilon because there is no subtraction.
193 '''
194 # From Guilherme P. de Freitas, numpy mailing list
195 # May 04 2010 thread "Improvement of performance"
196 # http://mail.scipy.org/pipermail/numpy-discussion/2010-May/050250.html
197 n = len(x)
198 epsilon = _get_epsilon(x, 1, epsilon, n)
199 increments = np.identity(n) * 1j * epsilon
200 # TODO: see if this can be vectorized, but usually dim is small
201 partials = [f(x+ih, *args, **kwargs).imag / epsilon[i]
202 for i, ih in enumerate(increments)]
203 return np.array(partials).T
206def approx_hess_cs(x, f, epsilon=None, args=(), kwargs={}):
207 '''Calculate Hessian with complex-step derivative approximation
209 Parameters
210 ----------
211 x : array_like
212 value at which function derivative is evaluated
213 f : function
214 function of one array f(x)
215 epsilon : float
216 stepsize, if None, then stepsize is automatically chosen
218 Returns
219 -------
220 hess : ndarray
221 array of partial second derivatives, Hessian
223 Notes
224 -----
225 based on equation 10 in
226 M. S. RIDOUT: Statistical Applications of the Complex-step Method
227 of Numerical Differentiation, University of Kent, Canterbury, Kent, U.K.
229 The stepsize is the same for the complex and the finite difference part.
230 '''
231 # TODO: might want to consider lowering the step for pure derivatives
232 n = len(x)
233 h = _get_epsilon(x, 3, epsilon, n)
234 ee = np.diag(h)
235 hess = np.outer(h, h)
237 n = len(x)
239 for i in range(n):
240 for j in range(i, n):
241 hess[i, j] = (f(*((x + 1j*ee[i, :] + ee[j, :],) + args), **kwargs)
242 - f(*((x + 1j*ee[i, :] - ee[j, :],)+args),
243 **kwargs)).imag/2./hess[i, j]
244 hess[j, i] = hess[i, j]
246 return hess
249@Substitution(
250 scale="3",
251 extra_params="""return_grad : bool
252 Whether or not to also return the gradient
253""",
254 extra_returns="""grad : nparray
255 Gradient if return_grad == True
256""",
257 equation_number="7",
258 equation="""1/(d_j*d_k) * ((f(x + d[j]*e[j] + d[k]*e[k]) - f(x + d[j]*e[j])))
259"""
260)
261@Appender(_hessian_docs)
262def approx_hess1(x, f, epsilon=None, args=(), kwargs={}, return_grad=False):
263 n = len(x)
264 h = _get_epsilon(x, 3, epsilon, n)
265 ee = np.diag(h)
267 f0 = f(*((x,)+args), **kwargs)
268 # Compute forward step
269 g = np.zeros(n)
270 for i in range(n):
271 g[i] = f(*((x+ee[i, :],)+args), **kwargs)
273 hess = np.outer(h, h) # this is now epsilon**2
274 # Compute "double" forward step
275 for i in range(n):
276 for j in range(i, n):
277 hess[i, j] = (f(*((x + ee[i, :] + ee[j, :],) + args), **kwargs) -
278 g[i] - g[j] + f0)/hess[i, j]
279 hess[j, i] = hess[i, j]
280 if return_grad:
281 grad = (g - f0)/h
282 return hess, grad
283 else:
284 return hess
287@Substitution(
288 scale="3",
289 extra_params="""return_grad : bool
290 Whether or not to also return the gradient
291""",
292 extra_returns="""grad : ndarray
293 Gradient if return_grad == True
294""",
295 equation_number="8",
296 equation="""1/(2*d_j*d_k) * ((f(x + d[j]*e[j] + d[k]*e[k]) - f(x + d[j]*e[j])) -
297 (f(x + d[k]*e[k]) - f(x)) +
298 (f(x - d[j]*e[j] - d[k]*e[k]) - f(x + d[j]*e[j])) -
299 (f(x - d[k]*e[k]) - f(x)))
300"""
301)
302@Appender(_hessian_docs)
303def approx_hess2(x, f, epsilon=None, args=(), kwargs={}, return_grad=False):
304 #
305 n = len(x)
306 # NOTE: ridout suggesting using eps**(1/4)*theta
307 h = _get_epsilon(x, 3, epsilon, n)
308 ee = np.diag(h)
309 f0 = f(*((x,)+args), **kwargs)
310 # Compute forward step
311 g = np.zeros(n)
312 gg = np.zeros(n)
313 for i in range(n):
314 g[i] = f(*((x+ee[i, :],)+args), **kwargs)
315 gg[i] = f(*((x-ee[i, :],)+args), **kwargs)
317 hess = np.outer(h, h) # this is now epsilon**2
318 # Compute "double" forward step
319 for i in range(n):
320 for j in range(i, n):
321 hess[i, j] = (f(*((x + ee[i, :] + ee[j, :],) + args), **kwargs) -
322 g[i] - g[j] + f0 +
323 f(*((x - ee[i, :] - ee[j, :],) + args), **kwargs) -
324 gg[i] - gg[j] + f0)/(2 * hess[i, j])
325 hess[j, i] = hess[i, j]
326 if return_grad:
327 grad = (g - f0)/h
328 return hess, grad
329 else:
330 return hess
333@Substitution(
334 scale="4",
335 extra_params="",
336 extra_returns="",
337 equation_number="9",
338 equation="""1/(4*d_j*d_k) * ((f(x + d[j]*e[j] + d[k]*e[k]) - f(x + d[j]*e[j]
339 - d[k]*e[k])) -
340 (f(x - d[j]*e[j] + d[k]*e[k]) - f(x - d[j]*e[j]
341 - d[k]*e[k]))"""
342)
343@Appender(_hessian_docs)
344def approx_hess3(x, f, epsilon=None, args=(), kwargs={}):
345 n = len(x)
346 h = _get_epsilon(x, 4, epsilon, n)
347 ee = np.diag(h)
348 hess = np.outer(h,h)
350 for i in range(n):
351 for j in range(i, n):
352 hess[i, j] = (f(*((x + ee[i, :] + ee[j, :],) + args), **kwargs)
353 - f(*((x + ee[i, :] - ee[j, :],) + args), **kwargs)
354 - (f(*((x - ee[i, :] + ee[j, :],) + args), **kwargs)
355 - f(*((x - ee[i, :] - ee[j, :],) + args), **kwargs))
356 )/(4.*hess[i, j])
357 hess[j, i] = hess[i, j]
358 return hess
361approx_hess = approx_hess3
362approx_hess.__doc__ += "\n This is an alias for approx_hess3"