Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/multivariate/factor_rotation/_gpa_rotation.py : 7%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3This file contains a Python version of the gradient projection rotation
4algorithms (GPA) developed by Bernaards, C.A. and Jennrich, R.I.
5The code is based on code developed Bernaards, C.A. and Jennrich, R.I.
6and is ported and made available with permission of the authors.
8References
9----------
10[1] Bernaards, C.A. and Jennrich, R.I. (2005) Gradient Projection Algorithms
11and Software for Arbitrary Rotation Criteria in Factor Analysis. Educational
12and Psychological Measurement, 65 (5), 676-696.
14[2] Jennrich, R.I. (2001). A simple general procedure for orthogonal rotation.
15Psychometrika, 66, 289-306.
17[3] Jennrich, R.I. (2002). A simple general method for oblique rotation.
18Psychometrika, 67, 7-19.
20[4] http://www.stat.ucla.edu/research/gpa/matlab.net
22[5] http://www.stat.ucla.edu/research/gpa/GPderfree.txt
23"""
25import numpy as np
28def GPA(A, ff=None, vgQ=None, T=None, max_tries=501,
29 rotation_method='orthogonal', tol=1e-5):
30 r"""
31 The gradient projection algorithm (GPA) minimizes a target function
32 :math:`\phi(L)`, where :math:`L` is a matrix with rotated factors.
34 For orthogonal rotation methods :math:`L=AT`, where :math:`T` is an
35 orthogonal matrix. For oblique rotation matrices :math:`L=A(T^*)^{-1}`,
36 where :math:`T` is a normal matrix, i.e., :math:`TT^*=T^*T`. Oblique
37 rotations relax the orthogonality constraint in order to gain simplicity
38 in the interpretation.
40 Parameters
41 ----------
42 A : numpy matrix
43 non rotated factors
44 T : numpy matrix (default identity matrix)
45 initial guess of rotation matrix
46 ff : function (defualt None)
47 criterion :math:`\phi` to optimize. Should have A, T, L as keyword
48 arguments
49 and mapping to a float. Only used (and required) if vgQ is not
50 provided.
51 vgQ : function (defualt None)
52 criterion :math:`\phi` to optimize and its derivative. Should have
53 A, T, L as keyword arguments and mapping to a tuple containing a
54 float and vector. Can be omitted if ff is provided.
55 max_tries : int (default 501)
56 maximum number of iterations
57 rotation_method : str
58 should be one of {orthogonal, oblique}
59 tol : float
60 stop criterion, algorithm stops if Frobenius norm of gradient is
61 smaller then tol
62 """
63 # pre processing
64 if rotation_method not in ['orthogonal', 'oblique']:
65 raise ValueError('rotation_method should be one of '
66 '{orthogonal, oblique}')
67 if vgQ is None:
68 if ff is None:
69 raise ValueError('ff should be provided if vgQ is not')
70 derivative_free = True
71 Gff = lambda x: Gf(x, lambda y: ff(T=y, A=A, L=None))
72 else:
73 derivative_free = False
74 if T is None:
75 T = np.eye(A.shape[1])
76 # pre processing for iteration
77 al = 1
78 table = []
79 # pre processing for iteration: initialize f and G
80 if derivative_free:
81 f = ff(T=T, A=A, L=None)
82 G = Gff(T)
83 elif rotation_method == 'orthogonal': # and not derivative_free
84 L = A.dot(T)
85 f, Gq = vgQ(L=L)
86 G = (A.T).dot(Gq)
87 else: # i.e. rotation_method == 'oblique' and not derivative_free
88 Ti = np.linalg.inv(T)
89 L = A.dot(Ti.T)
90 f, Gq = vgQ(L=L)
91 G = -((L.T).dot(Gq).dot(Ti)).T
92 # iteration
93 for i_try in range(0, max_tries):
94 # determine Gp
95 if rotation_method == 'orthogonal':
96 M = (T.T).dot(G)
97 S = (M + M.T)/2
98 Gp = G - T.dot(S)
99 else: # i.e. if rotation_method == 'oblique':
100 Gp = G-T.dot(np.diag(np.sum(T*G, axis=0)))
101 s = np.linalg.norm(Gp, 'fro')
102 table.append([i_try, f, np.log10(s), al])
103 # if we are close stop
104 if s < tol:
105 break
106 # update T
107 al = 2*al
108 for i in range(11):
109 # determine Tt
110 X = T - al*Gp
111 if rotation_method == 'orthogonal':
112 U, D, V = np.linalg.svd(X, full_matrices=False)
113 Tt = U.dot(V)
114 else: # i.e. if rotation_method == 'oblique':
115 v = 1/np.sqrt(np.sum(X**2, axis=0))
116 Tt = X.dot(np.diag(v))
117 # calculate objective using Tt
118 if derivative_free:
119 ft = ff(T=Tt, A=A, L=None)
120 elif rotation_method == 'orthogonal': # and not derivative_free
121 L = A.dot(Tt)
122 ft, Gq = vgQ(L=L)
123 else: # i.e. rotation_method == 'oblique' and not derivative_free
124 Ti = np.linalg.inv(Tt)
125 L = A.dot(Ti.T)
126 ft, Gq = vgQ(L=L)
127 # if sufficient improvement in objective -> use this T
128 if ft < f-.5*s**2*al:
129 break
130 al = al/2
131 # post processing for next iteration
132 T = Tt
133 f = ft
134 if derivative_free:
135 G = Gff(T)
136 elif rotation_method == 'orthogonal': # and not derivative_free
137 G = (A.T).dot(Gq)
138 else: # i.e. rotation_method == 'oblique' and not derivative_free
139 G = -((L.T).dot(Gq).dot(Ti)).T
140 # post processing
141 Th = T
142 Lh = rotateA(A, T, rotation_method=rotation_method)
143 Phi = (T.T).dot(T)
144 return Lh, Phi, Th, table
147def Gf(T, ff):
148 """
149 Subroutine for the gradient of f using numerical derivatives.
150 """
151 k = T.shape[0]
152 ep = 1e-4
153 G = np.zeros((k, k))
154 for r in range(k):
155 for s in range(k):
156 dT = np.zeros((k, k))
157 dT[r, s] = ep
158 G[r, s] = (ff(T+dT)-ff(T-dT))/(2*ep)
159 return G
162def rotateA(A, T, rotation_method='orthogonal'):
163 r"""
164 For orthogonal rotation methods :math:`L=AT`, where :math:`T` is an
165 orthogonal matrix. For oblique rotation matrices :math:`L=A(T^*)^{-1}`,
166 where :math:`T` is a normal matrix, i.e., :math:`TT^*=T^*T`. Oblique
167 rotations relax the orthogonality constraint in order to gain simplicity
168 in the interpretation.
169 """
170 if rotation_method == 'orthogonal':
171 L = A.dot(T)
172 elif rotation_method == 'oblique':
173 L = A.dot(np.linalg.inv(T.T))
174 else: # i.e. if rotation_method == 'oblique':
175 raise ValueError('rotation_method should be one of '
176 '{orthogonal, oblique}')
177 return L
180def oblimin_objective(L=None, A=None, T=None, gamma=0,
181 rotation_method='orthogonal',
182 return_gradient=True):
183 r"""
184 Objective function for the oblimin family for orthogonal or
185 oblique rotation wich minimizes:
187 .. math::
188 \phi(L) = \frac{1}{4}(L\circ L,(I-\gamma C)(L\circ L)N),
190 where :math:`L` is a :math:`p\times k` matrix, :math:`N` is
191 :math:`k\times k`
192 matrix with zeros on the diagonal and ones elsewhere, :math:`C` is a
193 :math:`p\times p` matrix with elements equal to :math:`1/p`,
194 :math:`(X,Y)=\operatorname{Tr}(X^*Y)` is the Frobenius norm and
195 :math:`\circ`
196 is the element-wise product or Hadamard product.
198 The gradient is given by
200 .. math::
201 L\circ\left[(I-\gamma C) (L \circ L)N\right].
203 Either :math:`L` should be provided or :math:`A` and :math:`T` should be
204 provided.
206 For orthogonal rotations :math:`L` satisfies
208 .. math::
209 L = AT,
211 where :math:`T` is an orthogonal matrix. For oblique rotations :math:`L`
212 satisfies
214 .. math::
215 L = A(T^*)^{-1},
217 where :math:`T` is a normal matrix.
219 The oblimin family is parametrized by the parameter :math:`\gamma`. For
220 orthogonal rotations:
222 * :math:`\gamma=0` corresponds to quartimax,
223 * :math:`\gamma=\frac{1}{2}` corresponds to biquartimax,
224 * :math:`\gamma=1` corresponds to varimax,
225 * :math:`\gamma=\frac{1}{p}` corresponds to equamax.
226 For oblique rotations rotations:
228 * :math:`\gamma=0` corresponds to quartimin,
229 * :math:`\gamma=\frac{1}{2}` corresponds to biquartimin.
231 Parameters
232 ----------
233 L : numpy matrix (default None)
234 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT`
235 A : numpy matrix (default None)
236 non rotated factors
237 T : numpy matrix (default None)
238 rotation matrix
239 gamma : float (default 0)
240 a parameter
241 rotation_method : str
242 should be one of {orthogonal, oblique}
243 return_gradient : bool (default True)
244 toggles return of gradient
245 """
246 if L is None:
247 assert(A is not None and T is not None)
248 L = rotateA(A, T, rotation_method=rotation_method)
249 p, k = L.shape
250 L2 = L**2
251 N = np.ones((k, k))-np.eye(k)
252 if np.isclose(gamma, 0):
253 X = L2.dot(N)
254 else:
255 C = np.ones((p, p))/p
256 X = (np.eye(p) - gamma*C).dot(L2).dot(N)
257 phi = np.sum(L2*X)/4
258 if return_gradient:
259 Gphi = L*X
260 return phi, Gphi
261 else:
262 return phi
265def orthomax_objective(L=None, A=None, T=None, gamma=0, return_gradient=True):
266 r"""
267 Objective function for the orthomax family for orthogonal
268 rotation wich minimizes the following objective:
270 .. math::
271 \phi(L) = -\frac{1}{4}(L\circ L,(I-\gamma C)(L\circ L)),
273 where :math:`0\leq\gamma\leq1`, :math:`L` is a :math:`p\times k` matrix,
274 :math:`C` is a :math:`p\times p` matrix with elements equal to
275 :math:`1/p`,
276 :math:`(X,Y)=\operatorname{Tr}(X^*Y)` is the Frobenius norm and
277 :math:`\circ` is the element-wise product or Hadamard product.
279 Either :math:`L` should be provided or :math:`A` and :math:`T` should be
280 provided.
282 For orthogonal rotations :math:`L` satisfies
284 .. math::
285 L = AT,
287 where :math:`T` is an orthogonal matrix.
289 The orthomax family is parametrized by the parameter :math:`\gamma`:
291 * :math:`\gamma=0` corresponds to quartimax,
292 * :math:`\gamma=\frac{1}{2}` corresponds to biquartimax,
293 * :math:`\gamma=1` corresponds to varimax,
294 * :math:`\gamma=\frac{1}{p}` corresponds to equamax.
296 Parameters
297 ----------
298 L : numpy matrix (default None)
299 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT`
300 A : numpy matrix (default None)
301 non rotated factors
302 T : numpy matrix (default None)
303 rotation matrix
304 gamma : float (default 0)
305 a parameter
306 return_gradient : bool (default True)
307 toggles return of gradient
308 """
309 assert 0 <= gamma <= 1, "Gamma should be between 0 and 1"
310 if L is None:
311 assert(A is not None and T is not None)
312 L = rotateA(A, T, rotation_method='orthogonal')
313 p, k = L.shape
314 L2 = L**2
315 if np.isclose(gamma, 0):
316 X = L2
317 else:
318 C = np.ones((p, p))/p
319 X = (np.eye(p)-gamma*C).dot(L2)
320 phi = -np.sum(L2*X)/4
321 if return_gradient:
322 Gphi = -L*X
323 return phi, Gphi
324 else:
325 return phi
328def CF_objective(L=None, A=None, T=None, kappa=0,
329 rotation_method='orthogonal',
330 return_gradient=True):
331 r"""
332 Objective function for the Crawford-Ferguson family for orthogonal
333 and oblique rotation wich minimizes the following objective:
335 .. math::
336 \phi(L) =\frac{1-\kappa}{4} (L\circ L,(L\circ L)N)
337 -\frac{1}{4}(L\circ L,M(L\circ L)),
339 where :math:`0\leq\kappa\leq1`, :math:`L` is a :math:`p\times k` matrix,
340 :math:`N` is :math:`k\times k` matrix with zeros on the diagonal and ones
341 elsewhere,
342 :math:`M` is :math:`p\times p` matrix with zeros on the diagonal and ones
343 elsewhere
344 :math:`(X,Y)=\operatorname{Tr}(X^*Y)` is the Frobenius norm and
345 :math:`\circ` is the element-wise product or Hadamard product.
347 The gradient is given by
349 .. math::
350 d\phi(L) = (1-\kappa) L\circ\left[(L\circ L)N\right]
351 -\kappa L\circ \left[M(L\circ L)\right].
353 Either :math:`L` should be provided or :math:`A` and :math:`T` should be
354 provided.
356 For orthogonal rotations :math:`L` satisfies
358 .. math::
359 L = AT,
361 where :math:`T` is an orthogonal matrix. For oblique rotations :math:`L`
362 satisfies
364 .. math::
365 L = A(T^*)^{-1},
367 where :math:`T` is a normal matrix.
369 For orthogonal rotations the oblimin (and orthomax) family of rotations is
370 equivalent to the Crawford-Ferguson family. To be more precise:
372 * :math:`\kappa=0` corresponds to quartimax,
373 * :math:`\kappa=\frac{1}{p}` corresponds to variamx,
374 * :math:`\kappa=\frac{k-1}{p+k-2}` corresponds to parsimax,
375 * :math:`\kappa=1` corresponds to factor parsimony.
377 Parameters
378 ----------
379 L : numpy matrix (default None)
380 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT`
381 A : numpy matrix (default None)
382 non rotated factors
383 T : numpy matrix (default None)
384 rotation matrix
385 gamma : float (default 0)
386 a parameter
387 rotation_method : str
388 should be one of {orthogonal, oblique}
389 return_gradient : bool (default True)
390 toggles return of gradient
391 """
392 assert 0 <= kappa <= 1, "Kappa should be between 0 and 1"
393 if L is None:
394 assert(A is not None and T is not None)
395 L = rotateA(A, T, rotation_method=rotation_method)
396 p, k = L.shape
397 L2 = L**2
398 X = None
399 if not np.isclose(kappa, 1):
400 N = np.ones((k, k)) - np.eye(k)
401 X = (1 - kappa)*L2.dot(N)
402 if not np.isclose(kappa, 0):
403 M = np.ones((p, p)) - np.eye(p)
404 if X is None:
405 X = kappa*M.dot(L2)
406 else:
407 X += kappa*M.dot(L2)
408 phi = np.sum(L2 * X) / 4
409 if return_gradient:
410 Gphi = L*X
411 return phi, Gphi
412 else:
413 return phi
416def vgQ_target(H, L=None, A=None, T=None, rotation_method='orthogonal'):
417 r"""
418 Subroutine for the value of vgQ using orthogonal or oblique rotation
419 towards a target matrix, i.e., we minimize:
421 .. math::
422 \phi(L) =\frac{1}{2}\|L-H\|^2
424 and the gradient is given by
426 .. math::
427 d\phi(L)=L-H.
429 Either :math:`L` should be provided or :math:`A` and :math:`T` should be
430 provided.
432 For orthogonal rotations :math:`L` satisfies
434 .. math::
435 L = AT,
437 where :math:`T` is an orthogonal matrix. For oblique rotations :math:`L`
438 satisfies
440 .. math::
441 L = A(T^*)^{-1},
443 where :math:`T` is a normal matrix.
445 Parameters
446 ----------
447 H : numpy matrix
448 target matrix
449 L : numpy matrix (default None)
450 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT`
451 A : numpy matrix (default None)
452 non rotated factors
453 T : numpy matrix (default None)
454 rotation matrix
455 rotation_method : str
456 should be one of {orthogonal, oblique}
457 """
458 if L is None:
459 assert(A is not None and T is not None)
460 L = rotateA(A, T, rotation_method=rotation_method)
461 q = np.linalg.norm(L-H, 'fro')**2
462 Gq = 2*(L-H)
463 return q, Gq
466def ff_target(H, L=None, A=None, T=None, rotation_method='orthogonal'):
467 r"""
468 Subroutine for the value of f using (orthogonal or oblique) rotation
469 towards a target matrix, i.e., we minimize:
471 .. math::
472 \phi(L) =\frac{1}{2}\|L-H\|^2.
474 Either :math:`L` should be provided or :math:`A` and :math:`T` should be
475 provided. For orthogonal rotations :math:`L` satisfies
477 .. math::
478 L = AT,
480 where :math:`T` is an orthogonal matrix. For oblique rotations
481 :math:`L` satisfies
483 .. math::
484 L = A(T^*)^{-1},
486 where :math:`T` is a normal matrix.
488 Parameters
489 ----------
490 H : numpy matrix
491 target matrix
492 L : numpy matrix (default None)
493 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT`
494 A : numpy matrix (default None)
495 non rotated factors
496 T : numpy matrix (default None)
497 rotation matrix
498 rotation_method : str
499 should be one of {orthogonal, oblique}
500 """
501 if L is None:
502 assert(A is not None and T is not None)
503 L = rotateA(A, T, rotation_method=rotation_method)
504 return np.linalg.norm(L-H, 'fro')**2
507def vgQ_partial_target(H, W=None, L=None, A=None, T=None):
508 r"""
509 Subroutine for the value of vgQ using orthogonal rotation towards a partial
510 target matrix, i.e., we minimize:
512 .. math::
513 \phi(L) =\frac{1}{2}\|W\circ(L-H)\|^2,
515 where :math:`\circ` is the element-wise product or Hadamard product and
516 :math:`W` is a matrix whose entries can only be one or zero. The gradient
517 is given by
519 .. math::
520 d\phi(L)=W\circ(L-H).
522 Either :math:`L` should be provided or :math:`A` and :math:`T` should be
523 provided.
525 For orthogonal rotations :math:`L` satisfies
527 .. math::
528 L = AT,
530 where :math:`T` is an orthogonal matrix.
532 Parameters
533 ----------
534 H : numpy matrix
535 target matrix
536 W : numpy matrix (default matrix with equal weight one for all entries)
537 matrix with weights, entries can either be one or zero
538 L : numpy matrix (default None)
539 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT`
540 A : numpy matrix (default None)
541 non rotated factors
542 T : numpy matrix (default None)
543 rotation matrix
544 """
545 if W is None:
546 return vgQ_target(H, L=L, A=A, T=T)
547 if L is None:
548 assert(A is not None and T is not None)
549 L = rotateA(A, T, rotation_method='orthogonal')
550 q = np.linalg.norm(W*(L-H), 'fro')**2
551 Gq = 2*W*(L-H)
552 return q, Gq
555def ff_partial_target(H, W=None, L=None, A=None, T=None):
556 r"""
557 Subroutine for the value of vgQ using orthogonal rotation towards a partial
558 target matrix, i.e., we minimize:
560 .. math::
561 \phi(L) =\frac{1}{2}\|W\circ(L-H)\|^2,
563 where :math:`\circ` is the element-wise product or Hadamard product and
564 :math:`W` is a matrix whose entries can only be one or zero. Either
565 :math:`L` should be provided or :math:`A` and :math:`T` should be provided.
567 For orthogonal rotations :math:`L` satisfies
569 .. math::
570 L = AT,
572 where :math:`T` is an orthogonal matrix.
574 Parameters
575 ----------
576 H : numpy matrix
577 target matrix
578 W : numpy matrix (default matrix with equal weight one for all entries)
579 matrix with weights, entries can either be one or zero
580 L : numpy matrix (default None)
581 rotated factors, i.e., :math:`L=A(T^*)^{-1}=AT`
582 A : numpy matrix (default None)
583 non rotated factors
584 T : numpy matrix (default None)
585 rotation matrix
586 """
587 if W is None:
588 return ff_target(H, L=L, A=A, T=T)
589 if L is None:
590 assert(A is not None and T is not None)
591 L = rotateA(A, T, rotation_method='orthogonal')
592 q = np.linalg.norm(W*(L-H), 'fro')**2
593 return q