Coverage for src/driada/dimensionality/effective.py: 92.31%
26 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-25 15:40 +0300
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-25 15:40 +0300
1from .utils import *
2from scipy.stats import entropy
3import warnings
4from ..utils.data import correlation_matrix
6DATA_SHAPE_THR = 0.01 # if n/t in multivariate time series data is more than DATA_SHAPE_THR,
7 # cov/corr spectrum may be significantly distorted, correction is recommended
10def _eff_dim(corr_eigs, q=2):
11 if q < 0:
12 raise ValueError('Renyi entropy is undefined for q<0')
14 norm_corr_eigs = corr_eigs / np.sum(corr_eigs)
15 if q == 1:
16 # standard entropy
17 return 2**entropy(norm_corr_eigs, base=2)
19 elif q == 2:
20 # quadratic entropy
21 return (sum(corr_eigs)**2)/sum([e**2 for e in corr_eigs])
23 elif q == np.inf:
24 # min-entropy
25 return np.sum(corr_eigs)/np.max(corr_eigs)
27 else:
28 return 1.0/(1.0 - q)*np.log(np.sum([p**q for p in norm_corr_eigs]))
31def eff_dim(data, enable_correction, q=2, **correction_kwargs):
32 n, t = data.shape
33 if 1.0*n/t > DATA_SHAPE_THR and not enable_correction:
34 warnings.warn(f'fN/T is {1.0*n/t}, which is bigger than {DATA_SHAPE_THR}. Spectrum correction is recommended')
36 cmat = correlation_matrix(data)
37 if enable_correction:
38 corrected_eigs = correct_cov_spectrum(n, t, cmat, **correction_kwargs)
39 final_eigs = corrected_eigs[-1]
40 else:
41 final_eigs = eigh(cmat, eigvals_only=True)
43 return _eff_dim(final_eigs, q=q)