Coverage for ParTIpy/initialize.py: 7%

56 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-16 10:22 +0100

1import numpy as np 

2from scipy.sparse import csr_matrix 

3 

4 

5def _random_init(X: np.ndarray, n_archetypes: int, exclude: None | list = None, seed: int = 42) -> np.ndarray: 

6 if exclude is None: 

7 exclude = [] 

8 B = np.eye(N=n_archetypes, M=X.shape[0]) 

9 return B 

10 

11 

12def _furthest_sum_init(X: np.ndarray, n_archetypes: int, exclude: None | list = None, seed: int = 42) -> np.ndarray: 

13 """Furthest sum algorithm, to efficiently generat initial archetypes. 

14 

15 Parameters 

16 ---------- 

17 X: numpy 2d-array 

18 data matrix with shape (n_samples x n_features) 

19 

20 n_archetypes: int 

21 number of candidate archetypes to extract. 

22 

23 exclude: numpy 1d-array 

24 entries in X that can not be used as candidates. 

25 

26 seed: int 

27 reproducibility 

28 

29 Output 

30 ------ 

31 B : numpy 2d-array 

32 B matrix with shape (n_archetypes, n_samples). 

33 """ 

34 np.random.seed(seed) 

35 

36 if exclude is None: 

37 exclude = [] 

38 

39 def max_ind_val(input_list): 

40 return max(zip(range(len(input_list)), input_list, strict=False), key=lambda x: x[1]) 

41 

42 K = X.T 

43 D, N = K.shape 

44 index = np.array(range(N)) 

45 index[exclude] = 0 

46 i = [int(np.ceil(N * np.random.rand()))] 

47 index[i] = -1 

48 ind_t = i 

49 sum_dist = np.zeros((1, N), np.complex128) 

50 

51 if N > n_archetypes * D: 

52 Kt = K 

53 Kt2 = np.sum(Kt**2, axis=0) 

54 for k in range(1, n_archetypes + 11): 

55 if k > n_archetypes - 1: 

56 Kq = Kt[:, i[0]] @ Kt 

57 sum_dist -= np.lib.scimath.sqrt(Kt2 - 2 * Kq + Kt2[i[0]]) 

58 index[i[0]] = i[0] 

59 i = i[1:] 

60 t = np.where(index != -1)[0] 

61 Kq = Kt[:, ind_t].T @ Kt 

62 sum_dist += np.lib.scimath.sqrt(Kt2 - 2 * Kq + Kt2[ind_t]) 

63 ind, _ = max_ind_val(sum_dist[:, t][0].real) 

64 ind_t = t[ind] 

65 i.append(ind_t) # type: ignore 

66 index[ind_t] = -1 

67 else: 

68 if D != N or np.sum(K - K.T) != 0: # Generate kernel if K not one 

69 Kt = K 

70 K = Kt.T @ Kt 

71 K = np.lib.scimath.sqrt(np.matlib.repmat(np.diag(K), N, 1) - 2 * K + np.matlib.repmat((np.diag(K)).T, 1, N)) 

72 

73 Kt2 = np.diag(K) # Horizontal 

74 for k in range(1, n_archetypes + 11): 

75 if k > n_archetypes - 1: 

76 sum_dist -= np.lib.scimath.sqrt(Kt2 - 2 * K[i[0], :] + Kt2[i[0]]) 

77 index[i[0]] = i[0] 

78 i = i[1:] 

79 t = np.where(index != -1)[0] 

80 sum_dist += np.lib.scimath.sqrt(Kt2 - 2 * K[ind_t, :] + Kt2[ind_t]) 

81 ind, _ = max_ind_val(sum_dist[:, t][0].real) 

82 ind_t = t[ind] 

83 i.append(ind_t) # type: ignore 

84 index[ind_t] = -1 

85 

86 B = csr_matrix((np.ones(len(i)), (i, range(n_archetypes))), shape=(N, n_archetypes)).toarray().T 

87 B = np.ascontiguousarray(B) 

88 return B