Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Trust-region optimization.""" 

2import math 

3 

4import numpy as np 

5import scipy.linalg 

6from .optimize import (_check_unknown_options, wrap_function, _status_message, 

7 OptimizeResult, _prepare_scalar_function) 

8 

9__all__ = [] 

10 

11 

12class BaseQuadraticSubproblem(object): 

13 """ 

14 Base/abstract class defining the quadratic model for trust-region 

15 minimization. Child classes must implement the ``solve`` method. 

16 

17 Values of the objective function, Jacobian and Hessian (if provided) at 

18 the current iterate ``x`` are evaluated on demand and then stored as 

19 attributes ``fun``, ``jac``, ``hess``. 

20 """ 

21 

22 def __init__(self, x, fun, jac, hess=None, hessp=None): 

23 self._x = x 

24 self._f = None 

25 self._g = None 

26 self._h = None 

27 self._g_mag = None 

28 self._cauchy_point = None 

29 self._newton_point = None 

30 self._fun = fun 

31 self._jac = jac 

32 self._hess = hess 

33 self._hessp = hessp 

34 

35 def __call__(self, p): 

36 return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p)) 

37 

38 @property 

39 def fun(self): 

40 """Value of objective function at current iteration.""" 

41 if self._f is None: 

42 self._f = self._fun(self._x) 

43 return self._f 

44 

45 @property 

46 def jac(self): 

47 """Value of Jacobian of objective function at current iteration.""" 

48 if self._g is None: 

49 self._g = self._jac(self._x) 

50 return self._g 

51 

52 @property 

53 def hess(self): 

54 """Value of Hessian of objective function at current iteration.""" 

55 if self._h is None: 

56 self._h = self._hess(self._x) 

57 return self._h 

58 

59 def hessp(self, p): 

60 if self._hessp is not None: 

61 return self._hessp(self._x, p) 

62 else: 

63 return np.dot(self.hess, p) 

64 

65 @property 

66 def jac_mag(self): 

67 """Magnitude of jacobian of objective function at current iteration.""" 

68 if self._g_mag is None: 

69 self._g_mag = scipy.linalg.norm(self.jac) 

70 return self._g_mag 

71 

72 def get_boundaries_intersections(self, z, d, trust_radius): 

73 """ 

74 Solve the scalar quadratic equation ||z + t d|| == trust_radius. 

75 This is like a line-sphere intersection. 

76 Return the two values of t, sorted from low to high. 

77 """ 

78 a = np.dot(d, d) 

79 b = 2 * np.dot(z, d) 

80 c = np.dot(z, z) - trust_radius**2 

81 sqrt_discriminant = math.sqrt(b*b - 4*a*c) 

82 

83 # The following calculation is mathematically 

84 # equivalent to: 

85 # ta = (-b - sqrt_discriminant) / (2*a) 

86 # tb = (-b + sqrt_discriminant) / (2*a) 

87 # but produce smaller round off errors. 

88 # Look at Matrix Computation p.97 

89 # for a better justification. 

90 aux = b + math.copysign(sqrt_discriminant, b) 

91 ta = -aux / (2*a) 

92 tb = -2*c / aux 

93 return sorted([ta, tb]) 

94 

95 def solve(self, trust_radius): 

96 raise NotImplementedError('The solve method should be implemented by ' 

97 'the child class') 

98 

99 

100def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None, 

101 subproblem=None, initial_trust_radius=1.0, 

102 max_trust_radius=1000.0, eta=0.15, gtol=1e-4, 

103 maxiter=None, disp=False, return_all=False, 

104 callback=None, inexact=True, **unknown_options): 

105 """ 

106 Minimization of scalar function of one or more variables using a 

107 trust-region algorithm. 

108 

109 Options for the trust-region algorithm are: 

110 initial_trust_radius : float 

111 Initial trust radius. 

112 max_trust_radius : float 

113 Never propose steps that are longer than this value. 

114 eta : float 

115 Trust region related acceptance stringency for proposed steps. 

116 gtol : float 

117 Gradient norm must be less than `gtol` 

118 before successful termination. 

119 maxiter : int 

120 Maximum number of iterations to perform. 

121 disp : bool 

122 If True, print convergence message. 

123 inexact : bool 

124 Accuracy to solve subproblems. If True requires less nonlinear 

125 iterations, but more vector products. Only effective for method 

126 trust-krylov. 

127 

128 This function is called by the `minimize` function. 

129 It is not supposed to be called directly. 

130 """ 

131 _check_unknown_options(unknown_options) 

132 

133 if jac is None: 

134 raise ValueError('Jacobian is currently required for trust-region ' 

135 'methods') 

136 if hess is None and hessp is None: 

137 raise ValueError('Either the Hessian or the Hessian-vector product ' 

138 'is currently required for trust-region methods') 

139 if subproblem is None: 

140 raise ValueError('A subproblem solving strategy is required for ' 

141 'trust-region methods') 

142 if not (0 <= eta < 0.25): 

143 raise Exception('invalid acceptance stringency') 

144 if max_trust_radius <= 0: 

145 raise Exception('the max trust radius must be positive') 

146 if initial_trust_radius <= 0: 

147 raise ValueError('the initial trust radius must be positive') 

148 if initial_trust_radius >= max_trust_radius: 

149 raise ValueError('the initial trust radius must be less than the ' 

150 'max trust radius') 

151 

152 # force the initial guess into a nice format 

153 x0 = np.asarray(x0).flatten() 

154 

155 # A ScalarFunction representing the problem. This caches calls to fun, jac, 

156 # hess. 

157 sf = _prepare_scalar_function(fun, x0, jac=jac, hess=hess, args=args) 

158 fun = sf.fun 

159 jac = sf.grad 

160 if hess is not None: 

161 hess = sf.hess 

162 # ScalarFunction doesn't represent hessp 

163 nhessp, hessp = wrap_function(hessp, args) 

164 

165 # limit the number of iterations 

166 if maxiter is None: 

167 maxiter = len(x0)*200 

168 

169 # init the search status 

170 warnflag = 0 

171 

172 # initialize the search 

173 trust_radius = initial_trust_radius 

174 x = x0 

175 if return_all: 

176 allvecs = [x] 

177 m = subproblem(x, fun, jac, hess, hessp) 

178 k = 0 

179 

180 # search for the function min 

181 # do not even start if the gradient is small enough 

182 while m.jac_mag >= gtol: 

183 

184 # Solve the sub-problem. 

185 # This gives us the proposed step relative to the current position 

186 # and it tells us whether the proposed step 

187 # has reached the trust region boundary or not. 

188 try: 

189 p, hits_boundary = m.solve(trust_radius) 

190 except np.linalg.linalg.LinAlgError: 

191 warnflag = 3 

192 break 

193 

194 # calculate the predicted value at the proposed point 

195 predicted_value = m(p) 

196 

197 # define the local approximation at the proposed point 

198 x_proposed = x + p 

199 m_proposed = subproblem(x_proposed, fun, jac, hess, hessp) 

200 

201 # evaluate the ratio defined in equation (4.4) 

202 actual_reduction = m.fun - m_proposed.fun 

203 predicted_reduction = m.fun - predicted_value 

204 if predicted_reduction <= 0: 

205 warnflag = 2 

206 break 

207 rho = actual_reduction / predicted_reduction 

208 

209 # update the trust radius according to the actual/predicted ratio 

210 if rho < 0.25: 

211 trust_radius *= 0.25 

212 elif rho > 0.75 and hits_boundary: 

213 trust_radius = min(2*trust_radius, max_trust_radius) 

214 

215 # if the ratio is high enough then accept the proposed step 

216 if rho > eta: 

217 x = x_proposed 

218 m = m_proposed 

219 

220 # append the best guess, call back, increment the iteration count 

221 if return_all: 

222 allvecs.append(np.copy(x)) 

223 if callback is not None: 

224 callback(np.copy(x)) 

225 k += 1 

226 

227 # check if the gradient is small enough to stop 

228 if m.jac_mag < gtol: 

229 warnflag = 0 

230 break 

231 

232 # check if we have looked at enough iterations 

233 if k >= maxiter: 

234 warnflag = 1 

235 break 

236 

237 # print some stuff if requested 

238 status_messages = ( 

239 _status_message['success'], 

240 _status_message['maxiter'], 

241 'A bad approximation caused failure to predict improvement.', 

242 'A linalg error occurred, such as a non-psd Hessian.', 

243 ) 

244 if disp: 

245 if warnflag == 0: 

246 print(status_messages[warnflag]) 

247 else: 

248 print('Warning: ' + status_messages[warnflag]) 

249 print(" Current function value: %f" % m.fun) 

250 print(" Iterations: %d" % k) 

251 print(" Function evaluations: %d" % sf.nfev) 

252 print(" Gradient evaluations: %d" % sf.ngev) 

253 print(" Hessian evaluations: %d" % (sf.nhev + nhessp[0])) 

254 

255 result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag, 

256 fun=m.fun, jac=m.jac, nfev=sf.nfev, njev=sf.ngev, 

257 nhev=sf.nhev + nhessp[0], nit=k, 

258 message=status_messages[warnflag]) 

259 

260 if hess is not None: 

261 result['hess'] = m.hess 

262 

263 if return_all: 

264 result['allvecs'] = allvecs 

265 

266 return result