Coverage for partipy/selection.py: 69%
29 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-09 10:26 +0200
« prev ^ index » next coverage.py v7.8.0, created at 2025-05-09 10:26 +0200
1import numpy as np
2from scipy.linalg import cho_factor, cho_solve
5def _invert_SPD_mtx(mtx):
6 # Cholesky factorization
7 c, lower = cho_factor(mtx)
9 # Inverse using Cholesky
10 identity = np.eye(mtx.shape[0])
11 cov_inv = cho_solve((c, lower), identity)
12 return cov_inv
15def compute_IC_approx(X, X_tilde, n_archetypes):
16 """
17 Compute information-theorectic criterion to access goodness-of-fit
19 Reference: Suleman, A., 2017. Validation of archetypal analysis, 2017 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)pp. 1-6. https://doi.org/10.1109/FUZZ-IEEE.2017.8015385
20 (see Equation 13)
22 Parameters
23 ----------
24 X: np.ndarray
25 Data matrix
26 X_tilde: np.ndarray
27 Approximation of data matrix by archetypal analysis, i.e. X_tilde = A B X = A Z
28 n_archetypes : int
29 number of archetypes.
31 Returns
32 -------
33 IC: float
34 """
35 assert np.all(X.shape == X_tilde.shape)
36 n_samples, n_features = X.shape
37 X_cov = np.cov(X, rowvar=False, bias=True)
38 X_tilde_cov = np.cov(X_tilde, rowvar=False, bias=True)
39 assert np.all(X_cov.shape == np.array(n_features))
40 assert np.all(X_tilde_cov.shape == np.array(n_features))
41 X_cov_inv = _invert_SPD_mtx(X_cov)
42 IC = np.log(np.square(np.linalg.norm(X - X_tilde)) / (n_samples * n_features)) + 2 * (
43 (2 * n_archetypes - 1) / np.trace(X_tilde_cov @ X_cov_inv)
44 )
45 return IC
48def compute_IC(X, X_tilde, n_archetypes):
49 """
50 Compute information-theorectic criterion to access goodness-of-fit
52 Reference: Suleman, A., 2017. Validation of archetypal analysis, 2017 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)pp. 1-6. https://doi.org/10.1109/FUZZ-IEEE.2017.8015385
53 (see Equation 12)
55 Parameters
56 ----------
57 X: np.ndarray
58 Data matrix
59 X_tilde: np.ndarray
60 Approximation of data matrix by archetypal analysis, i.e. X_tilde = A B X = A Z
61 n_archetypes : int
62 number of archetypes.
64 Returns
65 -------
66 IC: float
67 """
68 assert np.all(X.shape == X_tilde.shape)
69 n_samples, n_features = X.shape
70 X_cov = np.cov(X, rowvar=False, bias=True)
71 X_tilde_cov = np.cov(X_tilde, rowvar=False, bias=True)
72 assert np.all(X_cov.shape == np.array(n_features))
73 assert np.all(X_tilde_cov.shape == np.array(n_features))
74 X_cov_inv = _invert_SPD_mtx(X_cov)
75 K_mu = n_features * (n_archetypes - 1)
76 K_beta = n_archetypes * (n_features - 1)
77 IC = np.log(np.square(np.linalg.norm(X - X_tilde)) / (n_samples * n_features)) + 2 * (
78 (K_mu + K_beta + 1) / (n_features * np.trace(X_tilde_cov @ X_cov_inv))
79 )
80 return IC