Coverage for src/driada/dimensionality/effective.py: 92.31%

26 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-25 15:40 +0300

1from .utils import * 

2from scipy.stats import entropy 

3import warnings 

4from ..utils.data import correlation_matrix 

5 

6DATA_SHAPE_THR = 0.01 # if n/t in multivariate time series data is more than DATA_SHAPE_THR, 

7 # cov/corr spectrum may be significantly distorted, correction is recommended 

8 

9 

10def _eff_dim(corr_eigs, q=2): 

11 if q < 0: 

12 raise ValueError('Renyi entropy is undefined for q<0') 

13 

14 norm_corr_eigs = corr_eigs / np.sum(corr_eigs) 

15 if q == 1: 

16 # standard entropy 

17 return 2**entropy(norm_corr_eigs, base=2) 

18 

19 elif q == 2: 

20 # quadratic entropy 

21 return (sum(corr_eigs)**2)/sum([e**2 for e in corr_eigs]) 

22 

23 elif q == np.inf: 

24 # min-entropy 

25 return np.sum(corr_eigs)/np.max(corr_eigs) 

26 

27 else: 

28 return 1.0/(1.0 - q)*np.log(np.sum([p**q for p in norm_corr_eigs])) 

29 

30 

31def eff_dim(data, enable_correction, q=2, **correction_kwargs): 

32 n, t = data.shape 

33 if 1.0*n/t > DATA_SHAPE_THR and not enable_correction: 

34 warnings.warn(f'fN/T is {1.0*n/t}, which is bigger than {DATA_SHAPE_THR}. Spectrum correction is recommended') 

35 

36 cmat = correlation_matrix(data) 

37 if enable_correction: 

38 corrected_eigs = correct_cov_spectrum(n, t, cmat, **correction_kwargs) 

39 final_eigs = corrected_eigs[-1] 

40 else: 

41 final_eigs = eigh(cmat, eigvals_only=True) 

42 

43 return _eff_dim(final_eigs, q=q) 

44 

45 

46