Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Sparse block 1-norm estimator. 

2""" 

3 

4import numpy as np 

5from scipy.sparse.linalg import aslinearoperator 

6 

7 

8__all__ = ['onenormest'] 

9 

10 

11def onenormest(A, t=2, itmax=5, compute_v=False, compute_w=False): 

12 """ 

13 Compute a lower bound of the 1-norm of a sparse matrix. 

14 

15 Parameters 

16 ---------- 

17 A : ndarray or other linear operator 

18 A linear operator that can be transposed and that can 

19 produce matrix products. 

20 t : int, optional 

21 A positive parameter controlling the tradeoff between 

22 accuracy versus time and memory usage. 

23 Larger values take longer and use more memory 

24 but give more accurate output. 

25 itmax : int, optional 

26 Use at most this many iterations. 

27 compute_v : bool, optional 

28 Request a norm-maximizing linear operator input vector if True. 

29 compute_w : bool, optional 

30 Request a norm-maximizing linear operator output vector if True. 

31 

32 Returns 

33 ------- 

34 est : float 

35 An underestimate of the 1-norm of the sparse matrix. 

36 v : ndarray, optional 

37 The vector such that ||Av||_1 == est*||v||_1. 

38 It can be thought of as an input to the linear operator 

39 that gives an output with particularly large norm. 

40 w : ndarray, optional 

41 The vector Av which has relatively large 1-norm. 

42 It can be thought of as an output of the linear operator 

43 that is relatively large in norm compared to the input. 

44 

45 Notes 

46 ----- 

47 This is algorithm 2.4 of [1]. 

48 

49 In [2] it is described as follows. 

50 "This algorithm typically requires the evaluation of 

51 about 4t matrix-vector products and almost invariably 

52 produces a norm estimate (which is, in fact, a lower 

53 bound on the norm) correct to within a factor 3." 

54 

55 .. versionadded:: 0.13.0 

56 

57 References 

58 ---------- 

59 .. [1] Nicholas J. Higham and Francoise Tisseur (2000), 

60 "A Block Algorithm for Matrix 1-Norm Estimation, 

61 with an Application to 1-Norm Pseudospectra." 

62 SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201. 

63 

64 .. [2] Awad H. Al-Mohy and Nicholas J. Higham (2009), 

65 "A new scaling and squaring algorithm for the matrix exponential." 

66 SIAM J. Matrix Anal. Appl. Vol. 31, No. 3, pp. 970-989. 

67 

68 Examples 

69 -------- 

70 >>> from scipy.sparse import csc_matrix 

71 >>> from scipy.sparse.linalg import onenormest 

72 >>> A = csc_matrix([[1., 0., 0.], [5., 8., 2.], [0., -1., 0.]], dtype=float) 

73 >>> A.todense() 

74 matrix([[ 1., 0., 0.], 

75 [ 5., 8., 2.], 

76 [ 0., -1., 0.]]) 

77 >>> onenormest(A) 

78 9.0 

79 >>> np.linalg.norm(A.todense(), ord=1) 

80 9.0 

81 """ 

82 

83 # Check the input. 

84 A = aslinearoperator(A) 

85 if A.shape[0] != A.shape[1]: 

86 raise ValueError('expected the operator to act like a square matrix') 

87 

88 # If the operator size is small compared to t, 

89 # then it is easier to compute the exact norm. 

90 # Otherwise estimate the norm. 

91 n = A.shape[1] 

92 if t >= n: 

93 A_explicit = np.asarray(aslinearoperator(A).matmat(np.identity(n))) 

94 if A_explicit.shape != (n, n): 

95 raise Exception('internal error: ', 

96 'unexpected shape ' + str(A_explicit.shape)) 

97 col_abs_sums = abs(A_explicit).sum(axis=0) 

98 if col_abs_sums.shape != (n, ): 

99 raise Exception('internal error: ', 

100 'unexpected shape ' + str(col_abs_sums.shape)) 

101 argmax_j = np.argmax(col_abs_sums) 

102 v = elementary_vector(n, argmax_j) 

103 w = A_explicit[:, argmax_j] 

104 est = col_abs_sums[argmax_j] 

105 else: 

106 est, v, w, nmults, nresamples = _onenormest_core(A, A.H, t, itmax) 

107 

108 # Report the norm estimate along with some certificates of the estimate. 

109 if compute_v or compute_w: 

110 result = (est,) 

111 if compute_v: 

112 result += (v,) 

113 if compute_w: 

114 result += (w,) 

115 return result 

116 else: 

117 return est 

118 

119 

120def _blocked_elementwise(func): 

121 """ 

122 Decorator for an elementwise function, to apply it blockwise along 

123 first dimension, to avoid excessive memory usage in temporaries. 

124 """ 

125 block_size = 2**20 

126 

127 def wrapper(x): 

128 if x.shape[0] < block_size: 

129 return func(x) 

130 else: 

131 y0 = func(x[:block_size]) 

132 y = np.zeros((x.shape[0],) + y0.shape[1:], dtype=y0.dtype) 

133 y[:block_size] = y0 

134 del y0 

135 for j in range(block_size, x.shape[0], block_size): 

136 y[j:j+block_size] = func(x[j:j+block_size]) 

137 return y 

138 return wrapper 

139 

140 

141@_blocked_elementwise 

142def sign_round_up(X): 

143 """ 

144 This should do the right thing for both real and complex matrices. 

145 

146 From Higham and Tisseur: 

147 "Everything in this section remains valid for complex matrices 

148 provided that sign(A) is redefined as the matrix (aij / |aij|) 

149 (and sign(0) = 1) transposes are replaced by conjugate transposes." 

150 

151 """ 

152 Y = X.copy() 

153 Y[Y == 0] = 1 

154 Y /= np.abs(Y) 

155 return Y 

156 

157 

158@_blocked_elementwise 

159def _max_abs_axis1(X): 

160 return np.max(np.abs(X), axis=1) 

161 

162 

163def _sum_abs_axis0(X): 

164 block_size = 2**20 

165 r = None 

166 for j in range(0, X.shape[0], block_size): 

167 y = np.sum(np.abs(X[j:j+block_size]), axis=0) 

168 if r is None: 

169 r = y 

170 else: 

171 r += y 

172 return r 

173 

174 

175def elementary_vector(n, i): 

176 v = np.zeros(n, dtype=float) 

177 v[i] = 1 

178 return v 

179 

180 

181def vectors_are_parallel(v, w): 

182 # Columns are considered parallel when they are equal or negative. 

183 # Entries are required to be in {-1, 1}, 

184 # which guarantees that the magnitudes of the vectors are identical. 

185 if v.ndim != 1 or v.shape != w.shape: 

186 raise ValueError('expected conformant vectors with entries in {-1,1}') 

187 n = v.shape[0] 

188 return np.dot(v, w) == n 

189 

190 

191def every_col_of_X_is_parallel_to_a_col_of_Y(X, Y): 

192 for v in X.T: 

193 if not any(vectors_are_parallel(v, w) for w in Y.T): 

194 return False 

195 return True 

196 

197 

198def column_needs_resampling(i, X, Y=None): 

199 # column i of X needs resampling if either 

200 # it is parallel to a previous column of X or 

201 # it is parallel to a column of Y 

202 n, t = X.shape 

203 v = X[:, i] 

204 if any(vectors_are_parallel(v, X[:, j]) for j in range(i)): 

205 return True 

206 if Y is not None: 

207 if any(vectors_are_parallel(v, w) for w in Y.T): 

208 return True 

209 return False 

210 

211 

212def resample_column(i, X): 

213 X[:, i] = np.random.randint(0, 2, size=X.shape[0])*2 - 1 

214 

215 

216def less_than_or_close(a, b): 

217 return np.allclose(a, b) or (a < b) 

218 

219 

220def _algorithm_2_2(A, AT, t): 

221 """ 

222 This is Algorithm 2.2. 

223 

224 Parameters 

225 ---------- 

226 A : ndarray or other linear operator 

227 A linear operator that can produce matrix products. 

228 AT : ndarray or other linear operator 

229 The transpose of A. 

230 t : int, optional 

231 A positive parameter controlling the tradeoff between 

232 accuracy versus time and memory usage. 

233 

234 Returns 

235 ------- 

236 g : sequence 

237 A non-negative decreasing vector 

238 such that g[j] is a lower bound for the 1-norm 

239 of the column of A of jth largest 1-norm. 

240 The first entry of this vector is therefore a lower bound 

241 on the 1-norm of the linear operator A. 

242 This sequence has length t. 

243 ind : sequence 

244 The ith entry of ind is the index of the column A whose 1-norm 

245 is given by g[i]. 

246 This sequence of indices has length t, and its entries are 

247 chosen from range(n), possibly with repetition, 

248 where n is the order of the operator A. 

249 

250 Notes 

251 ----- 

252 This algorithm is mainly for testing. 

253 It uses the 'ind' array in a way that is similar to 

254 its usage in algorithm 2.4. This algorithm 2.2 may be easier to test, 

255 so it gives a chance of uncovering bugs related to indexing 

256 which could have propagated less noticeably to algorithm 2.4. 

257 

258 """ 

259 A_linear_operator = aslinearoperator(A) 

260 AT_linear_operator = aslinearoperator(AT) 

261 n = A_linear_operator.shape[0] 

262 

263 # Initialize the X block with columns of unit 1-norm. 

264 X = np.ones((n, t)) 

265 if t > 1: 

266 X[:, 1:] = np.random.randint(0, 2, size=(n, t-1))*2 - 1 

267 X /= float(n) 

268 

269 # Iteratively improve the lower bounds. 

270 # Track extra things, to assert invariants for debugging. 

271 g_prev = None 

272 h_prev = None 

273 k = 1 

274 ind = range(t) 

275 while True: 

276 Y = np.asarray(A_linear_operator.matmat(X)) 

277 g = _sum_abs_axis0(Y) 

278 best_j = np.argmax(g) 

279 g.sort() 

280 g = g[::-1] 

281 S = sign_round_up(Y) 

282 Z = np.asarray(AT_linear_operator.matmat(S)) 

283 h = _max_abs_axis1(Z) 

284 

285 # If this algorithm runs for fewer than two iterations, 

286 # then its return values do not have the properties indicated 

287 # in the description of the algorithm. 

288 # In particular, the entries of g are not 1-norms of any 

289 # column of A until the second iteration. 

290 # Therefore we will require the algorithm to run for at least 

291 # two iterations, even though this requirement is not stated 

292 # in the description of the algorithm. 

293 if k >= 2: 

294 if less_than_or_close(max(h), np.dot(Z[:, best_j], X[:, best_j])): 

295 break 

296 ind = np.argsort(h)[::-1][:t] 

297 h = h[ind] 

298 for j in range(t): 

299 X[:, j] = elementary_vector(n, ind[j]) 

300 

301 # Check invariant (2.2). 

302 if k >= 2: 

303 if not less_than_or_close(g_prev[0], h_prev[0]): 

304 raise Exception('invariant (2.2) is violated') 

305 if not less_than_or_close(h_prev[0], g[0]): 

306 raise Exception('invariant (2.2) is violated') 

307 

308 # Check invariant (2.3). 

309 if k >= 3: 

310 for j in range(t): 

311 if not less_than_or_close(g[j], g_prev[j]): 

312 raise Exception('invariant (2.3) is violated') 

313 

314 # Update for the next iteration. 

315 g_prev = g 

316 h_prev = h 

317 k += 1 

318 

319 # Return the lower bounds and the corresponding column indices. 

320 return g, ind 

321 

322 

323def _onenormest_core(A, AT, t, itmax): 

324 """ 

325 Compute a lower bound of the 1-norm of a sparse matrix. 

326 

327 Parameters 

328 ---------- 

329 A : ndarray or other linear operator 

330 A linear operator that can produce matrix products. 

331 AT : ndarray or other linear operator 

332 The transpose of A. 

333 t : int, optional 

334 A positive parameter controlling the tradeoff between 

335 accuracy versus time and memory usage. 

336 itmax : int, optional 

337 Use at most this many iterations. 

338 

339 Returns 

340 ------- 

341 est : float 

342 An underestimate of the 1-norm of the sparse matrix. 

343 v : ndarray, optional 

344 The vector such that ||Av||_1 == est*||v||_1. 

345 It can be thought of as an input to the linear operator 

346 that gives an output with particularly large norm. 

347 w : ndarray, optional 

348 The vector Av which has relatively large 1-norm. 

349 It can be thought of as an output of the linear operator 

350 that is relatively large in norm compared to the input. 

351 nmults : int, optional 

352 The number of matrix products that were computed. 

353 nresamples : int, optional 

354 The number of times a parallel column was observed, 

355 necessitating a re-randomization of the column. 

356 

357 Notes 

358 ----- 

359 This is algorithm 2.4. 

360 

361 """ 

362 # This function is a more or less direct translation 

363 # of Algorithm 2.4 from the Higham and Tisseur (2000) paper. 

364 A_linear_operator = aslinearoperator(A) 

365 AT_linear_operator = aslinearoperator(AT) 

366 if itmax < 2: 

367 raise ValueError('at least two iterations are required') 

368 if t < 1: 

369 raise ValueError('at least one column is required') 

370 n = A.shape[0] 

371 if t >= n: 

372 raise ValueError('t should be smaller than the order of A') 

373 # Track the number of big*small matrix multiplications 

374 # and the number of resamplings. 

375 nmults = 0 

376 nresamples = 0 

377 # "We now explain our choice of starting matrix. We take the first 

378 # column of X to be the vector of 1s [...] This has the advantage that 

379 # for a matrix with nonnegative elements the algorithm converges 

380 # with an exact estimate on the second iteration, and such matrices 

381 # arise in applications [...]" 

382 X = np.ones((n, t), dtype=float) 

383 # "The remaining columns are chosen as rand{-1,1}, 

384 # with a check for and correction of parallel columns, 

385 # exactly as for S in the body of the algorithm." 

386 if t > 1: 

387 for i in range(1, t): 

388 # These are technically initial samples, not resamples, 

389 # so the resampling count is not incremented. 

390 resample_column(i, X) 

391 for i in range(t): 

392 while column_needs_resampling(i, X): 

393 resample_column(i, X) 

394 nresamples += 1 

395 # "Choose starting matrix X with columns of unit 1-norm." 

396 X /= float(n) 

397 # "indices of used unit vectors e_j" 

398 ind_hist = np.zeros(0, dtype=np.intp) 

399 est_old = 0 

400 S = np.zeros((n, t), dtype=float) 

401 k = 1 

402 ind = None 

403 while True: 

404 Y = np.asarray(A_linear_operator.matmat(X)) 

405 nmults += 1 

406 mags = _sum_abs_axis0(Y) 

407 est = np.max(mags) 

408 best_j = np.argmax(mags) 

409 if est > est_old or k == 2: 

410 if k >= 2: 

411 ind_best = ind[best_j] 

412 w = Y[:, best_j] 

413 # (1) 

414 if k >= 2 and est <= est_old: 

415 est = est_old 

416 break 

417 est_old = est 

418 S_old = S 

419 if k > itmax: 

420 break 

421 S = sign_round_up(Y) 

422 del Y 

423 # (2) 

424 if every_col_of_X_is_parallel_to_a_col_of_Y(S, S_old): 

425 break 

426 if t > 1: 

427 # "Ensure that no column of S is parallel to another column of S 

428 # or to a column of S_old by replacing columns of S by rand{-1,1}." 

429 for i in range(t): 

430 while column_needs_resampling(i, S, S_old): 

431 resample_column(i, S) 

432 nresamples += 1 

433 del S_old 

434 # (3) 

435 Z = np.asarray(AT_linear_operator.matmat(S)) 

436 nmults += 1 

437 h = _max_abs_axis1(Z) 

438 del Z 

439 # (4) 

440 if k >= 2 and max(h) == h[ind_best]: 

441 break 

442 # "Sort h so that h_first >= ... >= h_last 

443 # and re-order ind correspondingly." 

444 # 

445 # Later on, we will need at most t+len(ind_hist) largest 

446 # entries, so drop the rest 

447 ind = np.argsort(h)[::-1][:t+len(ind_hist)].copy() 

448 del h 

449 if t > 1: 

450 # (5) 

451 # Break if the most promising t vectors have been visited already. 

452 if np.in1d(ind[:t], ind_hist).all(): 

453 break 

454 # Put the most promising unvisited vectors at the front of the list 

455 # and put the visited vectors at the end of the list. 

456 # Preserve the order of the indices induced by the ordering of h. 

457 seen = np.in1d(ind, ind_hist) 

458 ind = np.concatenate((ind[~seen], ind[seen])) 

459 for j in range(t): 

460 X[:, j] = elementary_vector(n, ind[j]) 

461 

462 new_ind = ind[:t][~np.in1d(ind[:t], ind_hist)] 

463 ind_hist = np.concatenate((ind_hist, new_ind)) 

464 k += 1 

465 v = elementary_vector(n, ind_best) 

466 return est, v, w, nmults, nresamples