Coverage for partipy/selection.py: 69%

29 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-05-09 10:26 +0200

1import numpy as np 

2from scipy.linalg import cho_factor, cho_solve 

3 

4 

5def _invert_SPD_mtx(mtx): 

6 # Cholesky factorization 

7 c, lower = cho_factor(mtx) 

8 

9 # Inverse using Cholesky 

10 identity = np.eye(mtx.shape[0]) 

11 cov_inv = cho_solve((c, lower), identity) 

12 return cov_inv 

13 

14 

15def compute_IC_approx(X, X_tilde, n_archetypes): 

16 """ 

17 Compute information-theorectic criterion to access goodness-of-fit 

18 

19 Reference: Suleman, A., 2017. Validation of archetypal analysis, 2017 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)pp. 1-6. https://doi.org/10.1109/FUZZ-IEEE.2017.8015385 

20 (see Equation 13) 

21 

22 Parameters 

23 ---------- 

24 X: np.ndarray 

25 Data matrix 

26 X_tilde: np.ndarray 

27 Approximation of data matrix by archetypal analysis, i.e. X_tilde = A B X = A Z 

28 n_archetypes : int 

29 number of archetypes. 

30 

31 Returns 

32 ------- 

33 IC: float 

34 """ 

35 assert np.all(X.shape == X_tilde.shape) 

36 n_samples, n_features = X.shape 

37 X_cov = np.cov(X, rowvar=False, bias=True) 

38 X_tilde_cov = np.cov(X_tilde, rowvar=False, bias=True) 

39 assert np.all(X_cov.shape == np.array(n_features)) 

40 assert np.all(X_tilde_cov.shape == np.array(n_features)) 

41 X_cov_inv = _invert_SPD_mtx(X_cov) 

42 IC = np.log(np.square(np.linalg.norm(X - X_tilde)) / (n_samples * n_features)) + 2 * ( 

43 (2 * n_archetypes - 1) / np.trace(X_tilde_cov @ X_cov_inv) 

44 ) 

45 return IC 

46 

47 

48def compute_IC(X, X_tilde, n_archetypes): 

49 """ 

50 Compute information-theorectic criterion to access goodness-of-fit 

51 

52 Reference: Suleman, A., 2017. Validation of archetypal analysis, 2017 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)pp. 1-6. https://doi.org/10.1109/FUZZ-IEEE.2017.8015385 

53 (see Equation 12) 

54 

55 Parameters 

56 ---------- 

57 X: np.ndarray 

58 Data matrix 

59 X_tilde: np.ndarray 

60 Approximation of data matrix by archetypal analysis, i.e. X_tilde = A B X = A Z 

61 n_archetypes : int 

62 number of archetypes. 

63 

64 Returns 

65 ------- 

66 IC: float 

67 """ 

68 assert np.all(X.shape == X_tilde.shape) 

69 n_samples, n_features = X.shape 

70 X_cov = np.cov(X, rowvar=False, bias=True) 

71 X_tilde_cov = np.cov(X_tilde, rowvar=False, bias=True) 

72 assert np.all(X_cov.shape == np.array(n_features)) 

73 assert np.all(X_tilde_cov.shape == np.array(n_features)) 

74 X_cov_inv = _invert_SPD_mtx(X_cov) 

75 K_mu = n_features * (n_archetypes - 1) 

76 K_beta = n_archetypes * (n_features - 1) 

77 IC = np.log(np.square(np.linalg.norm(X - X_tilde)) / (n_samples * n_features)) + 2 * ( 

78 (K_mu + K_beta + 1) / (n_features * np.trace(X_tilde_cov @ X_cov_inv)) 

79 ) 

80 return IC