Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/optimize/linesearch.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Functions
3---------
4.. autosummary::
5 :toctree: generated/
7 line_search_armijo
8 line_search_wolfe1
9 line_search_wolfe2
10 scalar_search_wolfe1
11 scalar_search_wolfe2
13"""
14from warnings import warn
16from scipy.optimize import minpack2
17import numpy as np
19__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
20 'scalar_search_wolfe1', 'scalar_search_wolfe2',
21 'line_search_armijo']
23class LineSearchWarning(RuntimeWarning):
24 pass
27#------------------------------------------------------------------------------
28# Minpack's Wolfe line and scalar searches
29#------------------------------------------------------------------------------
31def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
32 old_fval=None, old_old_fval=None,
33 args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
34 xtol=1e-14):
35 """
36 As `scalar_search_wolfe1` but do a line search to direction `pk`
38 Parameters
39 ----------
40 f : callable
41 Function `f(x)`
42 fprime : callable
43 Gradient of `f`
44 xk : array_like
45 Current point
46 pk : array_like
47 Search direction
49 gfk : array_like, optional
50 Gradient of `f` at point `xk`
51 old_fval : float, optional
52 Value of `f` at point `xk`
53 old_old_fval : float, optional
54 Value of `f` at point preceding `xk`
56 The rest of the parameters are the same as for `scalar_search_wolfe1`.
58 Returns
59 -------
60 stp, f_count, g_count, fval, old_fval
61 As in `line_search_wolfe1`
62 gval : array
63 Gradient of `f` at the final point
65 """
66 if gfk is None:
67 gfk = fprime(xk)
69 if isinstance(fprime, tuple):
70 eps = fprime[1]
71 fprime = fprime[0]
72 newargs = (f, eps) + args
73 gradient = False
74 else:
75 newargs = args
76 gradient = True
78 gval = [gfk]
79 gc = [0]
80 fc = [0]
82 def phi(s):
83 fc[0] += 1
84 return f(xk + s*pk, *args)
86 def derphi(s):
87 gval[0] = fprime(xk + s*pk, *newargs)
88 if gradient:
89 gc[0] += 1
90 else:
91 fc[0] += len(xk) + 1
92 return np.dot(gval[0], pk)
94 derphi0 = np.dot(gfk, pk)
96 stp, fval, old_fval = scalar_search_wolfe1(
97 phi, derphi, old_fval, old_old_fval, derphi0,
98 c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
100 return stp, fc[0], gc[0], fval, old_fval, gval[0]
103def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
104 c1=1e-4, c2=0.9,
105 amax=50, amin=1e-8, xtol=1e-14):
106 """
107 Scalar function search for alpha that satisfies strong Wolfe conditions
109 alpha > 0 is assumed to be a descent direction.
111 Parameters
112 ----------
113 phi : callable phi(alpha)
114 Function at point `alpha`
115 derphi : callable phi'(alpha)
116 Objective function derivative. Returns a scalar.
117 phi0 : float, optional
118 Value of phi at 0
119 old_phi0 : float, optional
120 Value of phi at previous point
121 derphi0 : float, optional
122 Value derphi at 0
123 c1 : float, optional
124 Parameter for Armijo condition rule.
125 c2 : float, optional
126 Parameter for curvature condition rule.
127 amax, amin : float, optional
128 Maximum and minimum step size
129 xtol : float, optional
130 Relative tolerance for an acceptable step.
132 Returns
133 -------
134 alpha : float
135 Step size, or None if no suitable step was found
136 phi : float
137 Value of `phi` at the new point `alpha`
138 phi0 : float
139 Value of `phi` at `alpha=0`
141 Notes
142 -----
143 Uses routine DCSRCH from MINPACK.
145 """
147 if phi0 is None:
148 phi0 = phi(0.)
149 if derphi0 is None:
150 derphi0 = derphi(0.)
152 if old_phi0 is not None and derphi0 != 0:
153 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
154 if alpha1 < 0:
155 alpha1 = 1.0
156 else:
157 alpha1 = 1.0
159 phi1 = phi0
160 derphi1 = derphi0
161 isave = np.zeros((2,), np.intc)
162 dsave = np.zeros((13,), float)
163 task = b'START'
165 maxiter = 100
166 for i in range(maxiter):
167 stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
168 c1, c2, xtol, task,
169 amin, amax, isave, dsave)
170 if task[:2] == b'FG':
171 alpha1 = stp
172 phi1 = phi(stp)
173 derphi1 = derphi(stp)
174 else:
175 break
176 else:
177 # maxiter reached, the line search did not converge
178 stp = None
180 if task[:5] == b'ERROR' or task[:4] == b'WARN':
181 stp = None # failed
183 return stp, phi1, phi0
186line_search = line_search_wolfe1
189#------------------------------------------------------------------------------
190# Pure-Python Wolfe line and scalar searches
191#------------------------------------------------------------------------------
193def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
194 old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
195 extra_condition=None, maxiter=10):
196 """Find alpha that satisfies strong Wolfe conditions.
198 Parameters
199 ----------
200 f : callable f(x,*args)
201 Objective function.
202 myfprime : callable f'(x,*args)
203 Objective function gradient.
204 xk : ndarray
205 Starting point.
206 pk : ndarray
207 Search direction.
208 gfk : ndarray, optional
209 Gradient value for x=xk (xk being the current parameter
210 estimate). Will be recomputed if omitted.
211 old_fval : float, optional
212 Function value for x=xk. Will be recomputed if omitted.
213 old_old_fval : float, optional
214 Function value for the point preceding x=xk.
215 args : tuple, optional
216 Additional arguments passed to objective function.
217 c1 : float, optional
218 Parameter for Armijo condition rule.
219 c2 : float, optional
220 Parameter for curvature condition rule.
221 amax : float, optional
222 Maximum step size
223 extra_condition : callable, optional
224 A callable of the form ``extra_condition(alpha, x, f, g)``
225 returning a boolean. Arguments are the proposed step ``alpha``
226 and the corresponding ``x``, ``f`` and ``g`` values. The line search
227 accepts the value of ``alpha`` only if this
228 callable returns ``True``. If the callable returns ``False``
229 for the step length, the algorithm will continue with
230 new iterates. The callable is only called for iterates
231 satisfying the strong Wolfe conditions.
232 maxiter : int, optional
233 Maximum number of iterations to perform.
235 Returns
236 -------
237 alpha : float or None
238 Alpha for which ``x_new = x0 + alpha * pk``,
239 or None if the line search algorithm did not converge.
240 fc : int
241 Number of function evaluations made.
242 gc : int
243 Number of gradient evaluations made.
244 new_fval : float or None
245 New function value ``f(x_new)=f(x0+alpha*pk)``,
246 or None if the line search algorithm did not converge.
247 old_fval : float
248 Old function value ``f(x0)``.
249 new_slope : float or None
250 The local slope along the search direction at the
251 new value ``<myfprime(x_new), pk>``,
252 or None if the line search algorithm did not converge.
255 Notes
256 -----
257 Uses the line search algorithm to enforce strong Wolfe
258 conditions. See Wright and Nocedal, 'Numerical Optimization',
259 1999, pp. 59-61.
261 Examples
262 --------
263 >>> from scipy.optimize import line_search
265 A objective function and its gradient are defined.
267 >>> def obj_func(x):
268 ... return (x[0])**2+(x[1])**2
269 >>> def obj_grad(x):
270 ... return [2*x[0], 2*x[1]]
272 We can find alpha that satisfies strong Wolfe conditions.
274 >>> start_point = np.array([1.8, 1.7])
275 >>> search_gradient = np.array([-1.0, -1.0])
276 >>> line_search(obj_func, obj_grad, start_point, search_gradient)
277 (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
279 """
280 fc = [0]
281 gc = [0]
282 gval = [None]
283 gval_alpha = [None]
285 def phi(alpha):
286 fc[0] += 1
287 return f(xk + alpha * pk, *args)
289 if isinstance(myfprime, tuple):
290 def derphi(alpha):
291 fc[0] += len(xk) + 1
292 eps = myfprime[1]
293 fprime = myfprime[0]
294 newargs = (f, eps) + args
295 gval[0] = fprime(xk + alpha * pk, *newargs) # store for later use
296 gval_alpha[0] = alpha
297 return np.dot(gval[0], pk)
298 else:
299 fprime = myfprime
301 def derphi(alpha):
302 gc[0] += 1
303 gval[0] = fprime(xk + alpha * pk, *args) # store for later use
304 gval_alpha[0] = alpha
305 return np.dot(gval[0], pk)
307 if gfk is None:
308 gfk = fprime(xk, *args)
309 derphi0 = np.dot(gfk, pk)
311 if extra_condition is not None:
312 # Add the current gradient as argument, to avoid needless
313 # re-evaluation
314 def extra_condition2(alpha, phi):
315 if gval_alpha[0] != alpha:
316 derphi(alpha)
317 x = xk + alpha * pk
318 return extra_condition(alpha, x, phi, gval[0])
319 else:
320 extra_condition2 = None
322 alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
323 phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
324 extra_condition2, maxiter=maxiter)
326 if derphi_star is None:
327 warn('The line search algorithm did not converge', LineSearchWarning)
328 else:
329 # derphi_star is a number (derphi) -- so use the most recently
330 # calculated gradient used in computing it derphi = gfk*pk
331 # this is the gradient at the next step no need to compute it
332 # again in the outer loop.
333 derphi_star = gval[0]
335 return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
338def scalar_search_wolfe2(phi, derphi, phi0=None,
339 old_phi0=None, derphi0=None,
340 c1=1e-4, c2=0.9, amax=None,
341 extra_condition=None, maxiter=10):
342 """Find alpha that satisfies strong Wolfe conditions.
344 alpha > 0 is assumed to be a descent direction.
346 Parameters
347 ----------
348 phi : callable phi(alpha)
349 Objective scalar function.
350 derphi : callable phi'(alpha)
351 Objective function derivative. Returns a scalar.
352 phi0 : float, optional
353 Value of phi at 0.
354 old_phi0 : float, optional
355 Value of phi at previous point.
356 derphi0 : float, optional
357 Value of derphi at 0
358 c1 : float, optional
359 Parameter for Armijo condition rule.
360 c2 : float, optional
361 Parameter for curvature condition rule.
362 amax : float, optional
363 Maximum step size.
364 extra_condition : callable, optional
365 A callable of the form ``extra_condition(alpha, phi_value)``
366 returning a boolean. The line search accepts the value
367 of ``alpha`` only if this callable returns ``True``.
368 If the callable returns ``False`` for the step length,
369 the algorithm will continue with new iterates.
370 The callable is only called for iterates satisfying
371 the strong Wolfe conditions.
372 maxiter : int, optional
373 Maximum number of iterations to perform.
375 Returns
376 -------
377 alpha_star : float or None
378 Best alpha, or None if the line search algorithm did not converge.
379 phi_star : float
380 phi at alpha_star.
381 phi0 : float
382 phi at 0.
383 derphi_star : float or None
384 derphi at alpha_star, or None if the line search algorithm
385 did not converge.
387 Notes
388 -----
389 Uses the line search algorithm to enforce strong Wolfe
390 conditions. See Wright and Nocedal, 'Numerical Optimization',
391 1999, pp. 59-61.
393 """
395 if phi0 is None:
396 phi0 = phi(0.)
398 if derphi0 is None:
399 derphi0 = derphi(0.)
401 alpha0 = 0
402 if old_phi0 is not None and derphi0 != 0:
403 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
404 else:
405 alpha1 = 1.0
407 if alpha1 < 0:
408 alpha1 = 1.0
410 if amax is not None:
411 alpha1 = min(alpha1, amax)
413 phi_a1 = phi(alpha1)
414 #derphi_a1 = derphi(alpha1) evaluated below
416 phi_a0 = phi0
417 derphi_a0 = derphi0
419 if extra_condition is None:
420 extra_condition = lambda alpha, phi: True
422 for i in range(maxiter):
423 if alpha1 == 0 or (amax is not None and alpha0 == amax):
424 # alpha1 == 0: This shouldn't happen. Perhaps the increment has
425 # slipped below machine precision?
426 alpha_star = None
427 phi_star = phi0
428 phi0 = old_phi0
429 derphi_star = None
431 if alpha1 == 0:
432 msg = 'Rounding errors prevent the line search from converging'
433 else:
434 msg = "The line search algorithm could not find a solution " + \
435 "less than or equal to amax: %s" % amax
437 warn(msg, LineSearchWarning)
438 break
440 if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
441 ((phi_a1 >= phi_a0) and (i > 1)):
442 alpha_star, phi_star, derphi_star = \
443 _zoom(alpha0, alpha1, phi_a0,
444 phi_a1, derphi_a0, phi, derphi,
445 phi0, derphi0, c1, c2, extra_condition)
446 break
448 derphi_a1 = derphi(alpha1)
449 if (abs(derphi_a1) <= -c2*derphi0):
450 if extra_condition(alpha1, phi_a1):
451 alpha_star = alpha1
452 phi_star = phi_a1
453 derphi_star = derphi_a1
454 break
456 if (derphi_a1 >= 0):
457 alpha_star, phi_star, derphi_star = \
458 _zoom(alpha1, alpha0, phi_a1,
459 phi_a0, derphi_a1, phi, derphi,
460 phi0, derphi0, c1, c2, extra_condition)
461 break
463 alpha2 = 2 * alpha1 # increase by factor of two on each iteration
464 if amax is not None:
465 alpha2 = min(alpha2, amax)
466 alpha0 = alpha1
467 alpha1 = alpha2
468 phi_a0 = phi_a1
469 phi_a1 = phi(alpha1)
470 derphi_a0 = derphi_a1
472 else:
473 # stopping test maxiter reached
474 alpha_star = alpha1
475 phi_star = phi_a1
476 derphi_star = None
477 warn('The line search algorithm did not converge', LineSearchWarning)
479 return alpha_star, phi_star, phi0, derphi_star
482def _cubicmin(a, fa, fpa, b, fb, c, fc):
483 """
484 Finds the minimizer for a cubic polynomial that goes through the
485 points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
487 If no minimizer can be found, return None.
489 """
490 # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
492 with np.errstate(divide='raise', over='raise', invalid='raise'):
493 try:
494 C = fpa
495 db = b - a
496 dc = c - a
497 denom = (db * dc) ** 2 * (db - dc)
498 d1 = np.empty((2, 2))
499 d1[0, 0] = dc ** 2
500 d1[0, 1] = -db ** 2
501 d1[1, 0] = -dc ** 3
502 d1[1, 1] = db ** 3
503 [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
504 fc - fa - C * dc]).flatten())
505 A /= denom
506 B /= denom
507 radical = B * B - 3 * A * C
508 xmin = a + (-B + np.sqrt(radical)) / (3 * A)
509 except ArithmeticError:
510 return None
511 if not np.isfinite(xmin):
512 return None
513 return xmin
516def _quadmin(a, fa, fpa, b, fb):
517 """
518 Finds the minimizer for a quadratic polynomial that goes through
519 the points (a,fa), (b,fb) with derivative at a of fpa.
521 """
522 # f(x) = B*(x-a)^2 + C*(x-a) + D
523 with np.errstate(divide='raise', over='raise', invalid='raise'):
524 try:
525 D = fa
526 C = fpa
527 db = b - a * 1.0
528 B = (fb - D - C * db) / (db * db)
529 xmin = a - C / (2.0 * B)
530 except ArithmeticError:
531 return None
532 if not np.isfinite(xmin):
533 return None
534 return xmin
537def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
538 phi, derphi, phi0, derphi0, c1, c2, extra_condition):
539 """Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
541 Part of the optimization algorithm in `scalar_search_wolfe2`.
543 Notes
544 -----
545 Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
546 'Numerical Optimization', 1999, pp. 61.
548 """
550 maxiter = 10
551 i = 0
552 delta1 = 0.2 # cubic interpolant check
553 delta2 = 0.1 # quadratic interpolant check
554 phi_rec = phi0
555 a_rec = 0
556 while True:
557 # interpolate to find a trial step length between a_lo and
558 # a_hi Need to choose interpolation here. Use cubic
559 # interpolation and then if the result is within delta *
560 # dalpha or outside of the interval bounded by a_lo or a_hi
561 # then use quadratic interpolation, if the result is still too
562 # close, then use bisection
564 dalpha = a_hi - a_lo
565 if dalpha < 0:
566 a, b = a_hi, a_lo
567 else:
568 a, b = a_lo, a_hi
570 # minimizer of cubic interpolant
571 # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
572 #
573 # if the result is too close to the end points (or out of the
574 # interval), then use quadratic interpolation with phi_lo,
575 # derphi_lo and phi_hi if the result is still too close to the
576 # end points (or out of the interval) then use bisection
578 if (i > 0):
579 cchk = delta1 * dalpha
580 a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
581 a_rec, phi_rec)
582 if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
583 qchk = delta2 * dalpha
584 a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
585 if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
586 a_j = a_lo + 0.5*dalpha
588 # Check new value of a_j
590 phi_aj = phi(a_j)
591 if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
592 phi_rec = phi_hi
593 a_rec = a_hi
594 a_hi = a_j
595 phi_hi = phi_aj
596 else:
597 derphi_aj = derphi(a_j)
598 if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
599 a_star = a_j
600 val_star = phi_aj
601 valprime_star = derphi_aj
602 break
603 if derphi_aj*(a_hi - a_lo) >= 0:
604 phi_rec = phi_hi
605 a_rec = a_hi
606 a_hi = a_lo
607 phi_hi = phi_lo
608 else:
609 phi_rec = phi_lo
610 a_rec = a_lo
611 a_lo = a_j
612 phi_lo = phi_aj
613 derphi_lo = derphi_aj
614 i += 1
615 if (i > maxiter):
616 # Failed to find a conforming step size
617 a_star = None
618 val_star = None
619 valprime_star = None
620 break
621 return a_star, val_star, valprime_star
624#------------------------------------------------------------------------------
625# Armijo line and scalar searches
626#------------------------------------------------------------------------------
628def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
629 """Minimize over alpha, the function ``f(xk+alpha pk)``.
631 Parameters
632 ----------
633 f : callable
634 Function to be minimized.
635 xk : array_like
636 Current point.
637 pk : array_like
638 Search direction.
639 gfk : array_like
640 Gradient of `f` at point `xk`.
641 old_fval : float
642 Value of `f` at point `xk`.
643 args : tuple, optional
644 Optional arguments.
645 c1 : float, optional
646 Value to control stopping criterion.
647 alpha0 : scalar, optional
648 Value of `alpha` at start of the optimization.
650 Returns
651 -------
652 alpha
653 f_count
654 f_val_at_alpha
656 Notes
657 -----
658 Uses the interpolation algorithm (Armijo backtracking) as suggested by
659 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
661 """
662 xk = np.atleast_1d(xk)
663 fc = [0]
665 def phi(alpha1):
666 fc[0] += 1
667 return f(xk + alpha1*pk, *args)
669 if old_fval is None:
670 phi0 = phi(0.)
671 else:
672 phi0 = old_fval # compute f(xk) -- done in past loop
674 derphi0 = np.dot(gfk, pk)
675 alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
676 alpha0=alpha0)
677 return alpha, fc[0], phi1
680def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
681 """
682 Compatibility wrapper for `line_search_armijo`
683 """
684 r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
685 alpha0=alpha0)
686 return r[0], r[1], 0, r[2]
689def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
690 """Minimize over alpha, the function ``phi(alpha)``.
692 Uses the interpolation algorithm (Armijo backtracking) as suggested by
693 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
695 alpha > 0 is assumed to be a descent direction.
697 Returns
698 -------
699 alpha
700 phi1
702 """
703 phi_a0 = phi(alpha0)
704 if phi_a0 <= phi0 + c1*alpha0*derphi0:
705 return alpha0, phi_a0
707 # Otherwise, compute the minimizer of a quadratic interpolant:
709 alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
710 phi_a1 = phi(alpha1)
712 if (phi_a1 <= phi0 + c1*alpha1*derphi0):
713 return alpha1, phi_a1
715 # Otherwise, loop with cubic interpolation until we find an alpha which
716 # satisfies the first Wolfe condition (since we are backtracking, we will
717 # assume that the value of alpha is not too small and satisfies the second
718 # condition.
720 while alpha1 > amin: # we are assuming alpha>0 is a descent direction
721 factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
722 a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
723 alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
724 a = a / factor
725 b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
726 alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
727 b = b / factor
729 alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
730 phi_a2 = phi(alpha2)
732 if (phi_a2 <= phi0 + c1*alpha2*derphi0):
733 return alpha2, phi_a2
735 if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
736 alpha2 = alpha1 / 2.0
738 alpha0 = alpha1
739 alpha1 = alpha2
740 phi_a0 = phi_a1
741 phi_a1 = phi_a2
743 # Failed to find a suitable step length
744 return None, phi_a1
747#------------------------------------------------------------------------------
748# Non-monotone line search for DF-SANE
749#------------------------------------------------------------------------------
751def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
752 gamma=1e-4, tau_min=0.1, tau_max=0.5):
753 """
754 Nonmonotone backtracking line search as described in [1]_
756 Parameters
757 ----------
758 f : callable
759 Function returning a tuple ``(f, F)`` where ``f`` is the value
760 of a merit function and ``F`` the residual.
761 x_k : ndarray
762 Initial position.
763 d : ndarray
764 Search direction.
765 prev_fs : float
766 List of previous merit function values. Should have ``len(prev_fs) <= M``
767 where ``M`` is the nonmonotonicity window parameter.
768 eta : float
769 Allowed merit function increase, see [1]_
770 gamma, tau_min, tau_max : float, optional
771 Search parameters, see [1]_
773 Returns
774 -------
775 alpha : float
776 Step length
777 xp : ndarray
778 Next position
779 fp : float
780 Merit function value at next position
781 Fp : ndarray
782 Residual at next position
784 References
785 ----------
786 [1] "Spectral residual method without gradient information for solving
787 large-scale nonlinear systems of equations." W. La Cruz,
788 J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
790 """
791 f_k = prev_fs[-1]
792 f_bar = max(prev_fs)
794 alpha_p = 1
795 alpha_m = 1
796 alpha = 1
798 while True:
799 xp = x_k + alpha_p * d
800 fp, Fp = f(xp)
802 if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
803 alpha = alpha_p
804 break
806 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
808 xp = x_k - alpha_m * d
809 fp, Fp = f(xp)
811 if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
812 alpha = -alpha_m
813 break
815 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
817 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
818 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
820 return alpha, xp, fp, Fp
823def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
824 gamma=1e-4, tau_min=0.1, tau_max=0.5,
825 nu=0.85):
826 """
827 Nonmonotone line search from [1]
829 Parameters
830 ----------
831 f : callable
832 Function returning a tuple ``(f, F)`` where ``f`` is the value
833 of a merit function and ``F`` the residual.
834 x_k : ndarray
835 Initial position.
836 d : ndarray
837 Search direction.
838 f_k : float
839 Initial merit function value.
840 C, Q : float
841 Control parameters. On the first iteration, give values
842 Q=1.0, C=f_k
843 eta : float
844 Allowed merit function increase, see [1]_
845 nu, gamma, tau_min, tau_max : float, optional
846 Search parameters, see [1]_
848 Returns
849 -------
850 alpha : float
851 Step length
852 xp : ndarray
853 Next position
854 fp : float
855 Merit function value at next position
856 Fp : ndarray
857 Residual at next position
858 C : float
859 New value for the control parameter C
860 Q : float
861 New value for the control parameter Q
863 References
864 ----------
865 .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
866 search and its application to the spectral residual
867 method'', IMA J. Numer. Anal. 29, 814 (2009).
869 """
870 alpha_p = 1
871 alpha_m = 1
872 alpha = 1
874 while True:
875 xp = x_k + alpha_p * d
876 fp, Fp = f(xp)
878 if fp <= C + eta - gamma * alpha_p**2 * f_k:
879 alpha = alpha_p
880 break
882 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
884 xp = x_k - alpha_m * d
885 fp, Fp = f(xp)
887 if fp <= C + eta - gamma * alpha_m**2 * f_k:
888 alpha = -alpha_m
889 break
891 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
893 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
894 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
896 # Update C and Q
897 Q_next = nu * Q + 1
898 C = (nu * Q * (C + eta) + fp) / Q_next
899 Q = Q_next
901 return alpha, xp, fp, Fp, C, Q