Coverage for ParTIpy/initialize.py: 7%
56 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-16 10:22 +0100
« prev ^ index » next coverage.py v7.6.12, created at 2025-03-16 10:22 +0100
1import numpy as np
2from scipy.sparse import csr_matrix
5def _random_init(X: np.ndarray, n_archetypes: int, exclude: None | list = None, seed: int = 42) -> np.ndarray:
6 if exclude is None:
7 exclude = []
8 B = np.eye(N=n_archetypes, M=X.shape[0])
9 return B
12def _furthest_sum_init(X: np.ndarray, n_archetypes: int, exclude: None | list = None, seed: int = 42) -> np.ndarray:
13 """Furthest sum algorithm, to efficiently generat initial archetypes.
15 Parameters
16 ----------
17 X: numpy 2d-array
18 data matrix with shape (n_samples x n_features)
20 n_archetypes: int
21 number of candidate archetypes to extract.
23 exclude: numpy 1d-array
24 entries in X that can not be used as candidates.
26 seed: int
27 reproducibility
29 Output
30 ------
31 B : numpy 2d-array
32 B matrix with shape (n_archetypes, n_samples).
33 """
34 np.random.seed(seed)
36 if exclude is None:
37 exclude = []
39 def max_ind_val(input_list):
40 return max(zip(range(len(input_list)), input_list, strict=False), key=lambda x: x[1])
42 K = X.T
43 D, N = K.shape
44 index = np.array(range(N))
45 index[exclude] = 0
46 i = [int(np.ceil(N * np.random.rand()))]
47 index[i] = -1
48 ind_t = i
49 sum_dist = np.zeros((1, N), np.complex128)
51 if N > n_archetypes * D:
52 Kt = K
53 Kt2 = np.sum(Kt**2, axis=0)
54 for k in range(1, n_archetypes + 11):
55 if k > n_archetypes - 1:
56 Kq = Kt[:, i[0]] @ Kt
57 sum_dist -= np.lib.scimath.sqrt(Kt2 - 2 * Kq + Kt2[i[0]])
58 index[i[0]] = i[0]
59 i = i[1:]
60 t = np.where(index != -1)[0]
61 Kq = Kt[:, ind_t].T @ Kt
62 sum_dist += np.lib.scimath.sqrt(Kt2 - 2 * Kq + Kt2[ind_t])
63 ind, _ = max_ind_val(sum_dist[:, t][0].real)
64 ind_t = t[ind]
65 i.append(ind_t) # type: ignore
66 index[ind_t] = -1
67 else:
68 if D != N or np.sum(K - K.T) != 0: # Generate kernel if K not one
69 Kt = K
70 K = Kt.T @ Kt
71 K = np.lib.scimath.sqrt(np.matlib.repmat(np.diag(K), N, 1) - 2 * K + np.matlib.repmat((np.diag(K)).T, 1, N))
73 Kt2 = np.diag(K) # Horizontal
74 for k in range(1, n_archetypes + 11):
75 if k > n_archetypes - 1:
76 sum_dist -= np.lib.scimath.sqrt(Kt2 - 2 * K[i[0], :] + Kt2[i[0]])
77 index[i[0]] = i[0]
78 i = i[1:]
79 t = np.where(index != -1)[0]
80 sum_dist += np.lib.scimath.sqrt(Kt2 - 2 * K[ind_t, :] + Kt2[ind_t])
81 ind, _ = max_ind_val(sum_dist[:, t][0].real)
82 ind_t = t[ind]
83 i.append(ind_t) # type: ignore
84 index[ind_t] = -1
86 B = csr_matrix((np.ones(len(i)), (i, range(n_archetypes))), shape=(N, n_archetypes)).toarray().T
87 B = np.ascontiguousarray(B)
88 return B