Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3This file contains a Python version of the gradient projection rotation 

4algorithms (GPA) developed by Bernaards, C.A. and Jennrich, R.I. 

5The code is based on code developed Bernaards, C.A. and Jennrich, R.I. 

6and is ported and made available with permission of the authors. 

7 

8References 

9---------- 

10[1] Bernaards, C.A. and Jennrich, R.I. (2005) Gradient Projection Algorithms 

11and Software for Arbitrary Rotation Criteria in Factor Analysis. Educational 

12and Psychological Measurement, 65 (5), 676-696. 

13 

14[2] Jennrich, R.I. (2001). A simple general procedure for orthogonal rotation. 

15Psychometrika, 66, 289-306. 

16 

17[3] Jennrich, R.I. (2002). A simple general method for oblique rotation. 

18Psychometrika, 67, 7-19. 

19 

20[4] http://www.stat.ucla.edu/research/gpa/matlab.net 

21 

22[5] http://www.stat.ucla.edu/research/gpa/GPderfree.txt 

23""" 

24 

25import numpy as np 

26 

27 

28def GPA(A, ff=None, vgQ=None, T=None, max_tries=501, 

29 rotation_method='orthogonal', tol=1e-5): 

30 r""" 

31 The gradient projection algorithm (GPA) minimizes a target function 

32 :math:`\phi(L)`, where :math:`L` is a matrix with rotated factors. 

33 

34 For orthogonal rotation methods :math:`L=AT`, where :math:`T` is an 

35 orthogonal matrix. For oblique rotation matrices :math:`L=A(T^*)^{-1}`, 

36 where :math:`T` is a normal matrix, i.e., :math:`TT^*=T^*T`. Oblique 

37 rotations relax the orthogonality constraint in order to gain simplicity 

38 in the interpretation. 

39 

40 Parameters 

41 ---------- 

42 A : numpy matrix 

43 non rotated factors 

44 T : numpy matrix (default identity matrix) 

45 initial guess of rotation matrix 

46 ff : function (defualt None) 

47 criterion :math:`\phi` to optimize. Should have A, T, L as keyword 

48 arguments 

49 and mapping to a float. Only used (and required) if vgQ is not 

50 provided. 

51 vgQ : function (defualt None) 

52 criterion :math:`\phi` to optimize and its derivative. Should have 

53 A, T, L as keyword arguments and mapping to a tuple containing a 

54 float and vector. Can be omitted if ff is provided. 

55 max_tries : int (default 501) 

56 maximum number of iterations 

57 rotation_method : str 

58 should be one of {orthogonal, oblique} 

59 tol : float 

60 stop criterion, algorithm stops if Frobenius norm of gradient is 

61 smaller then tol 

62 """ 

63 # pre processing 

64 if rotation_method not in ['orthogonal', 'oblique']: 

65 raise ValueError('rotation_method should be one of ' 

66 '{orthogonal, oblique}') 

67 if vgQ is None: 

68 if ff is None: 

69 raise ValueError('ff should be provided if vgQ is not') 

70 derivative_free = True 

71 Gff = lambda x: Gf(x, lambda y: ff(T=y, A=A, L=None)) 

72 else: 

73 derivative_free = False 

74 if T is None: 

75 T = np.eye(A.shape[1]) 

76 # pre processing for iteration 

77 al = 1 

78 table = [] 

79 # pre processing for iteration: initialize f and G 

80 if derivative_free: 

81 f = ff(T=T, A=A, L=None) 

82 G = Gff(T) 

83 elif rotation_method == 'orthogonal': # and not derivative_free 

84 L = A.dot(T) 

85 f, Gq = vgQ(L=L) 

86 G = (A.T).dot(Gq) 

87 else: # i.e. rotation_method == 'oblique' and not derivative_free 

88 Ti = np.linalg.inv(T) 

89 L = A.dot(Ti.T) 

90 f, Gq = vgQ(L=L) 

91 G = -((L.T).dot(Gq).dot(Ti)).T 

92 # iteration 

93 for i_try in range(0, max_tries): 

94 # determine Gp 

95 if rotation_method == 'orthogonal': 

96 M = (T.T).dot(G) 

97 S = (M + M.T)/2 

98 Gp = G - T.dot(S) 

99 else: # i.e. if rotation_method == 'oblique': 

100 Gp = G-T.dot(np.diag(np.sum(T*G, axis=0))) 

101 s = np.linalg.norm(Gp, 'fro') 

102 table.append([i_try, f, np.log10(s), al]) 

103 # if we are close stop 

104 if s < tol: 

105 break 

106 # update T 

107 al = 2*al 

108 for i in range(11): 

109 # determine Tt 

110 X = T - al*Gp 

111 if rotation_method == 'orthogonal': 

112 U, D, V = np.linalg.svd(X, full_matrices=False) 

113 Tt = U.dot(V) 

114 else: # i.e. if rotation_method == 'oblique': 

115 v = 1/np.sqrt(np.sum(X**2, axis=0)) 

116 Tt = X.dot(np.diag(v)) 

117 # calculate objective using Tt 

118 if derivative_free: 

119 ft = ff(T=Tt, A=A, L=None) 

120 elif rotation_method == 'orthogonal': # and not derivative_free 

121 L = A.dot(Tt) 

122 ft, Gq = vgQ(L=L) 

123 else: # i.e. rotation_method == 'oblique' and not derivative_free 

124 Ti = np.linalg.inv(Tt) 

125 L = A.dot(Ti.T) 

126 ft, Gq = vgQ(L=L) 

127 # if sufficient improvement in objective -> use this T 

128 if ft < f-.5*s**2*al: 

129 break 

130 al = al/2 

131 # post processing for next iteration 

132 T = Tt 

133 f = ft 

134 if derivative_free: 

135 G = Gff(T) 

136 elif rotation_method == 'orthogonal': # and not derivative_free 

137 G = (A.T).dot(Gq) 

138 else: # i.e. rotation_method == 'oblique' and not derivative_free 

139 G = -((L.T).dot(Gq).dot(Ti)).T 

140 # post processing 

141 Th = T 

142 Lh = rotateA(A, T, rotation_method=rotation_method) 

143 Phi = (T.T).dot(T) 

144 return Lh, Phi, Th, table 

145 

146 

147def Gf(T, ff): 

148 """ 

149 Subroutine for the gradient of f using numerical derivatives. 

150 """ 

151 k = T.shape[0] 

152 ep = 1e-4 

153 G = np.zeros((k, k)) 

154 for r in range(k): 

155 for s in range(k): 

156 dT = np.zeros((k, k)) 

157 dT[r, s] = ep 

158 G[r, s] = (ff(T+dT)-ff(T-dT))/(2*ep) 

159 return G 

160 

161 

162def rotateA(A, T, rotation_method='orthogonal'): 

163 r""" 

164 For orthogonal rotation methods :math:`L=AT`, where :math:`T` is an 

165 orthogonal matrix. For oblique rotation matrices :math:`L=A(T^*)^{-1}`, 

166 where :math:`T` is a normal matrix, i.e., :math:`TT^*=T^*T`. Oblique 

167 rotations relax the orthogonality constraint in order to gain simplicity 

168 in the interpretation. 

169 """ 

170 if rotation_method == 'orthogonal': 

171 L = A.dot(T) 

172 elif rotation_method == 'oblique': 

173 L = A.dot(np.linalg.inv(T.T)) 

174 else: # i.e. if rotation_method == 'oblique': 

175 raise ValueError('rotation_method should be one of ' 

176 '{orthogonal, oblique}') 

177 return L 

178 

179 

180def oblimin_objective(L=None, A=None, T=None, gamma=0, 

181 rotation_method='orthogonal', 

182 return_gradient=True): 

183 r""" 

184 Objective function for the oblimin family for orthogonal or 

185 oblique rotation wich minimizes: 

186 

187 .. math:: 

188 \phi(L) = \frac{1}{4}(L\circ L,(I-\gamma C)(L\circ L)N), 

189 

190 where :math:`L` is a :math:`p\times k` matrix, :math:`N` is 

191 :math:`k\times k` 

192 matrix with zeros on the diagonal and ones elsewhere, :math:`C` is a 

193 :math:`p\times p` matrix with elements equal to :math:`1/p`, 

194 :math:`(X,Y)=\operatorname{Tr}(X^*Y)` is the Frobenius norm and 

195 :math:`\circ` 

196 is the element-wise product or Hadamard product. 

197 

198 The gradient is given by 

199 

200 .. math:: 

201 L\circ\left[(I-\gamma C) (L \circ L)N\right]. 

202 

203 Either :math:`L` should be provided or :math:`A` and :math:`T` should be 

204 provided. 

205 

206 For orthogonal rotations :math:`L` satisfies 

207 

208 .. math:: 

209 L = AT, 

210 

211 where :math:`T` is an orthogonal matrix. For oblique rotations :math:`L` 

212 satisfies 

213 

214 .. math:: 

215 L = A(T^*)^{-1}, 

216 

217 where :math:`T` is a normal matrix. 

218 

219 The oblimin family is parametrized by the parameter :math:`\gamma`. For 

220 orthogonal rotations: 

221 

222 * :math:`\gamma=0` corresponds to quartimax, 

223 * :math:`\gamma=\frac{1}{2}` corresponds to biquartimax, 

224 * :math:`\gamma=1` corresponds to varimax, 

225 * :math:`\gamma=\frac{1}{p}` corresponds to equamax. 

226 For oblique rotations rotations: 

227 

228 * :math:`\gamma=0` corresponds to quartimin, 

229 * :math:`\gamma=\frac{1}{2}` corresponds to biquartimin. 

230 

231 Parameters 

232 ---------- 

233 L : numpy matrix (default None) 

234 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT` 

235 A : numpy matrix (default None) 

236 non rotated factors 

237 T : numpy matrix (default None) 

238 rotation matrix 

239 gamma : float (default 0) 

240 a parameter 

241 rotation_method : str 

242 should be one of {orthogonal, oblique} 

243 return_gradient : bool (default True) 

244 toggles return of gradient 

245 """ 

246 if L is None: 

247 assert(A is not None and T is not None) 

248 L = rotateA(A, T, rotation_method=rotation_method) 

249 p, k = L.shape 

250 L2 = L**2 

251 N = np.ones((k, k))-np.eye(k) 

252 if np.isclose(gamma, 0): 

253 X = L2.dot(N) 

254 else: 

255 C = np.ones((p, p))/p 

256 X = (np.eye(p) - gamma*C).dot(L2).dot(N) 

257 phi = np.sum(L2*X)/4 

258 if return_gradient: 

259 Gphi = L*X 

260 return phi, Gphi 

261 else: 

262 return phi 

263 

264 

265def orthomax_objective(L=None, A=None, T=None, gamma=0, return_gradient=True): 

266 r""" 

267 Objective function for the orthomax family for orthogonal 

268 rotation wich minimizes the following objective: 

269 

270 .. math:: 

271 \phi(L) = -\frac{1}{4}(L\circ L,(I-\gamma C)(L\circ L)), 

272 

273 where :math:`0\leq\gamma\leq1`, :math:`L` is a :math:`p\times k` matrix, 

274 :math:`C` is a :math:`p\times p` matrix with elements equal to 

275 :math:`1/p`, 

276 :math:`(X,Y)=\operatorname{Tr}(X^*Y)` is the Frobenius norm and 

277 :math:`\circ` is the element-wise product or Hadamard product. 

278 

279 Either :math:`L` should be provided or :math:`A` and :math:`T` should be 

280 provided. 

281 

282 For orthogonal rotations :math:`L` satisfies 

283 

284 .. math:: 

285 L = AT, 

286 

287 where :math:`T` is an orthogonal matrix. 

288 

289 The orthomax family is parametrized by the parameter :math:`\gamma`: 

290 

291 * :math:`\gamma=0` corresponds to quartimax, 

292 * :math:`\gamma=\frac{1}{2}` corresponds to biquartimax, 

293 * :math:`\gamma=1` corresponds to varimax, 

294 * :math:`\gamma=\frac{1}{p}` corresponds to equamax. 

295 

296 Parameters 

297 ---------- 

298 L : numpy matrix (default None) 

299 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT` 

300 A : numpy matrix (default None) 

301 non rotated factors 

302 T : numpy matrix (default None) 

303 rotation matrix 

304 gamma : float (default 0) 

305 a parameter 

306 return_gradient : bool (default True) 

307 toggles return of gradient 

308 """ 

309 assert 0 <= gamma <= 1, "Gamma should be between 0 and 1" 

310 if L is None: 

311 assert(A is not None and T is not None) 

312 L = rotateA(A, T, rotation_method='orthogonal') 

313 p, k = L.shape 

314 L2 = L**2 

315 if np.isclose(gamma, 0): 

316 X = L2 

317 else: 

318 C = np.ones((p, p))/p 

319 X = (np.eye(p)-gamma*C).dot(L2) 

320 phi = -np.sum(L2*X)/4 

321 if return_gradient: 

322 Gphi = -L*X 

323 return phi, Gphi 

324 else: 

325 return phi 

326 

327 

328def CF_objective(L=None, A=None, T=None, kappa=0, 

329 rotation_method='orthogonal', 

330 return_gradient=True): 

331 r""" 

332 Objective function for the Crawford-Ferguson family for orthogonal 

333 and oblique rotation wich minimizes the following objective: 

334 

335 .. math:: 

336 \phi(L) =\frac{1-\kappa}{4} (L\circ L,(L\circ L)N) 

337 -\frac{1}{4}(L\circ L,M(L\circ L)), 

338 

339 where :math:`0\leq\kappa\leq1`, :math:`L` is a :math:`p\times k` matrix, 

340 :math:`N` is :math:`k\times k` matrix with zeros on the diagonal and ones 

341 elsewhere, 

342 :math:`M` is :math:`p\times p` matrix with zeros on the diagonal and ones 

343 elsewhere 

344 :math:`(X,Y)=\operatorname{Tr}(X^*Y)` is the Frobenius norm and 

345 :math:`\circ` is the element-wise product or Hadamard product. 

346 

347 The gradient is given by 

348 

349 .. math:: 

350 d\phi(L) = (1-\kappa) L\circ\left[(L\circ L)N\right] 

351 -\kappa L\circ \left[M(L\circ L)\right]. 

352 

353 Either :math:`L` should be provided or :math:`A` and :math:`T` should be 

354 provided. 

355 

356 For orthogonal rotations :math:`L` satisfies 

357 

358 .. math:: 

359 L = AT, 

360 

361 where :math:`T` is an orthogonal matrix. For oblique rotations :math:`L` 

362 satisfies 

363 

364 .. math:: 

365 L = A(T^*)^{-1}, 

366 

367 where :math:`T` is a normal matrix. 

368 

369 For orthogonal rotations the oblimin (and orthomax) family of rotations is 

370 equivalent to the Crawford-Ferguson family. To be more precise: 

371 

372 * :math:`\kappa=0` corresponds to quartimax, 

373 * :math:`\kappa=\frac{1}{p}` corresponds to variamx, 

374 * :math:`\kappa=\frac{k-1}{p+k-2}` corresponds to parsimax, 

375 * :math:`\kappa=1` corresponds to factor parsimony. 

376 

377 Parameters 

378 ---------- 

379 L : numpy matrix (default None) 

380 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT` 

381 A : numpy matrix (default None) 

382 non rotated factors 

383 T : numpy matrix (default None) 

384 rotation matrix 

385 gamma : float (default 0) 

386 a parameter 

387 rotation_method : str 

388 should be one of {orthogonal, oblique} 

389 return_gradient : bool (default True) 

390 toggles return of gradient 

391 """ 

392 assert 0 <= kappa <= 1, "Kappa should be between 0 and 1" 

393 if L is None: 

394 assert(A is not None and T is not None) 

395 L = rotateA(A, T, rotation_method=rotation_method) 

396 p, k = L.shape 

397 L2 = L**2 

398 X = None 

399 if not np.isclose(kappa, 1): 

400 N = np.ones((k, k)) - np.eye(k) 

401 X = (1 - kappa)*L2.dot(N) 

402 if not np.isclose(kappa, 0): 

403 M = np.ones((p, p)) - np.eye(p) 

404 if X is None: 

405 X = kappa*M.dot(L2) 

406 else: 

407 X += kappa*M.dot(L2) 

408 phi = np.sum(L2 * X) / 4 

409 if return_gradient: 

410 Gphi = L*X 

411 return phi, Gphi 

412 else: 

413 return phi 

414 

415 

416def vgQ_target(H, L=None, A=None, T=None, rotation_method='orthogonal'): 

417 r""" 

418 Subroutine for the value of vgQ using orthogonal or oblique rotation 

419 towards a target matrix, i.e., we minimize: 

420 

421 .. math:: 

422 \phi(L) =\frac{1}{2}\|L-H\|^2 

423 

424 and the gradient is given by 

425 

426 .. math:: 

427 d\phi(L)=L-H. 

428 

429 Either :math:`L` should be provided or :math:`A` and :math:`T` should be 

430 provided. 

431 

432 For orthogonal rotations :math:`L` satisfies 

433 

434 .. math:: 

435 L = AT, 

436 

437 where :math:`T` is an orthogonal matrix. For oblique rotations :math:`L` 

438 satisfies 

439 

440 .. math:: 

441 L = A(T^*)^{-1}, 

442 

443 where :math:`T` is a normal matrix. 

444 

445 Parameters 

446 ---------- 

447 H : numpy matrix 

448 target matrix 

449 L : numpy matrix (default None) 

450 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT` 

451 A : numpy matrix (default None) 

452 non rotated factors 

453 T : numpy matrix (default None) 

454 rotation matrix 

455 rotation_method : str 

456 should be one of {orthogonal, oblique} 

457 """ 

458 if L is None: 

459 assert(A is not None and T is not None) 

460 L = rotateA(A, T, rotation_method=rotation_method) 

461 q = np.linalg.norm(L-H, 'fro')**2 

462 Gq = 2*(L-H) 

463 return q, Gq 

464 

465 

466def ff_target(H, L=None, A=None, T=None, rotation_method='orthogonal'): 

467 r""" 

468 Subroutine for the value of f using (orthogonal or oblique) rotation 

469 towards a target matrix, i.e., we minimize: 

470 

471 .. math:: 

472 \phi(L) =\frac{1}{2}\|L-H\|^2. 

473 

474 Either :math:`L` should be provided or :math:`A` and :math:`T` should be 

475 provided. For orthogonal rotations :math:`L` satisfies 

476 

477 .. math:: 

478 L = AT, 

479 

480 where :math:`T` is an orthogonal matrix. For oblique rotations 

481 :math:`L` satisfies 

482 

483 .. math:: 

484 L = A(T^*)^{-1}, 

485 

486 where :math:`T` is a normal matrix. 

487 

488 Parameters 

489 ---------- 

490 H : numpy matrix 

491 target matrix 

492 L : numpy matrix (default None) 

493 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT` 

494 A : numpy matrix (default None) 

495 non rotated factors 

496 T : numpy matrix (default None) 

497 rotation matrix 

498 rotation_method : str 

499 should be one of {orthogonal, oblique} 

500 """ 

501 if L is None: 

502 assert(A is not None and T is not None) 

503 L = rotateA(A, T, rotation_method=rotation_method) 

504 return np.linalg.norm(L-H, 'fro')**2 

505 

506 

507def vgQ_partial_target(H, W=None, L=None, A=None, T=None): 

508 r""" 

509 Subroutine for the value of vgQ using orthogonal rotation towards a partial 

510 target matrix, i.e., we minimize: 

511 

512 .. math:: 

513 \phi(L) =\frac{1}{2}\|W\circ(L-H)\|^2, 

514 

515 where :math:`\circ` is the element-wise product or Hadamard product and 

516 :math:`W` is a matrix whose entries can only be one or zero. The gradient 

517 is given by 

518 

519 .. math:: 

520 d\phi(L)=W\circ(L-H). 

521 

522 Either :math:`L` should be provided or :math:`A` and :math:`T` should be 

523 provided. 

524 

525 For orthogonal rotations :math:`L` satisfies 

526 

527 .. math:: 

528 L = AT, 

529 

530 where :math:`T` is an orthogonal matrix. 

531 

532 Parameters 

533 ---------- 

534 H : numpy matrix 

535 target matrix 

536 W : numpy matrix (default matrix with equal weight one for all entries) 

537 matrix with weights, entries can either be one or zero 

538 L : numpy matrix (default None) 

539 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT` 

540 A : numpy matrix (default None) 

541 non rotated factors 

542 T : numpy matrix (default None) 

543 rotation matrix 

544 """ 

545 if W is None: 

546 return vgQ_target(H, L=L, A=A, T=T) 

547 if L is None: 

548 assert(A is not None and T is not None) 

549 L = rotateA(A, T, rotation_method='orthogonal') 

550 q = np.linalg.norm(W*(L-H), 'fro')**2 

551 Gq = 2*W*(L-H) 

552 return q, Gq 

553 

554 

555def ff_partial_target(H, W=None, L=None, A=None, T=None): 

556 r""" 

557 Subroutine for the value of vgQ using orthogonal rotation towards a partial 

558 target matrix, i.e., we minimize: 

559 

560 .. math:: 

561 \phi(L) =\frac{1}{2}\|W\circ(L-H)\|^2, 

562 

563 where :math:`\circ` is the element-wise product or Hadamard product and 

564 :math:`W` is a matrix whose entries can only be one or zero. Either 

565 :math:`L` should be provided or :math:`A` and :math:`T` should be provided. 

566 

567 For orthogonal rotations :math:`L` satisfies 

568 

569 .. math:: 

570 L = AT, 

571 

572 where :math:`T` is an orthogonal matrix. 

573 

574 Parameters 

575 ---------- 

576 H : numpy matrix 

577 target matrix 

578 W : numpy matrix (default matrix with equal weight one for all entries) 

579 matrix with weights, entries can either be one or zero 

580 L : numpy matrix (default None) 

581 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT` 

582 A : numpy matrix (default None) 

583 non rotated factors 

584 T : numpy matrix (default None) 

585 rotation matrix 

586 """ 

587 if W is None: 

588 return ff_target(H, L=L, A=A, T=T) 

589 if L is None: 

590 assert(A is not None and T is not None) 

591 L = rotateA(A, T, rotation_method='orthogonal') 

592 q = np.linalg.norm(W*(L-H), 'fro')**2 

593 return q