Coverage for emd/imftools.py: 69%
198 statements
« prev ^ index » next coverage.py v7.6.11, created at 2025-03-08 15:44 +0000
« prev ^ index » next coverage.py v7.6.11, created at 2025-03-08 15:44 +0000
1#!/usr/bin/python
3# vim: set expandtab ts=4 sw=4:
5"""
6Functions for handling and assessing IMFs.
8Supporting tools for IMFs estimated using the emd.sift submodule.
10"""
12import logging
14import numpy as np
15from tabulate import tabulate
17from ._sift_core import (get_padded_extrema, interp_envelope,
18 zero_crossing_count)
19from .support import ensure_2d, ensure_equal_dims
21# Housekeeping for logging
22logger = logging.getLogger(__name__)
25def amplitude_normalise(X, thresh=1e-10, clip=False, interp_method='pchip',
26 max_iters=3):
27 """Normalise the amplitude envelope of an IMF to be 1.
29 Multiple runs of normalisation are carried out until the desired threshold
30 is reached. This uses the method described as part of the AM-FM transform
31 [1]_
33 Parameters
34 ----------
35 X : ndarray
36 Input array of IMFs to be normalised
37 thresh : float
38 Threshold for stopping normalisation (Default value = 1e-10)
39 clip : bool
40 Whether to clip the output between -1 and 1 (Default value = False)
41 interp_method : {'pchip','mono_pchip','splrep'}
42 Method used to interpolate envelopes (Default value = 'pchip')
43 max_iters : int
44 Maximum number of iterations of normalisation to perform
46 Returns
47 -------
48 ndarray
49 Amplitude normalised IMFs
51 References
52 ----------
53 .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank,
54 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis,
55 1(2), 177–229. https://doi.org/10.1142/s1793536909000096
57 """
58 logger.info('STARTED: Amplitude-Normalise')
60 if X.ndim == 2:
61 logger.debug('Normalising {0} samples across {1} IMFs'.format(*X.shape))
62 else:
63 logger.debug('Normalising {0} samples across {1} first-level and {2} second-level IMFs'.format(*X.shape))
64 logger.debug('Using {0} interpolation with threshold of {1} and max_iters {2}'.format(interp_method,
65 thresh,
66 max_iters))
68 # Don't normalise in place
69 X = X.copy()
71 orig_dim = X.ndim
72 if X.ndim == 2:
73 X = X[:, :, None]
75 extrema_opts = {'method': 'numpypad'} # Rilling doesn't make sense for combined extrema
76 for iimf in range(X.shape[1]):
77 for jimf in range(X.shape[2]):
79 env = interp_envelope(X[:, iimf, jimf], mode='combined',
80 interp_method=interp_method,
81 extrema_opts=extrema_opts)
83 if env is None:
84 continue_norm = False
85 else:
86 continue_norm = True
88 iters = 0
89 while continue_norm and (iters < max_iters):
90 iters += 1
92 X[:, iimf, jimf] = X[:, iimf, jimf] / env
93 env = interp_envelope(X[:, iimf, jimf], mode='combined',
94 interp_method=interp_method,
95 extrema_opts=extrema_opts)
97 if env is None:
98 continue_norm = False
99 else:
100 continue_norm = True
102 iter_val = np.abs(env.sum() - env.shape[0])
103 if iter_val < thresh:
104 continue_norm = False
106 logger.info('Normalise of IMF-{0}-{1} complete in {2} iters (val={3})'.format(iimf,
107 jimf,
108 iters,
109 iter_val))
111 if clip:
112 logger.debug('Clipping signal to -1:1 range')
113 # Make absolutely sure nothing daft is happening
114 X = np.clip(X, -1, 1)
116 if orig_dim == 2:
117 X = X[:, :, 0]
119 logger.info('COMPLETED: Amplitude-Normalise')
120 return X
123def wrap_phase(IP, ncycles=1, mode='2pi'):
124 """Wrap a phase time-course.
126 Parameters
127 ----------
128 IP : ndarray
129 Input array of unwrapped phase values
130 ncycles : int
131 Number of cycles per wrap (Default value = 1)
132 mode : {'2pi','-pi2pi'}
133 Flag to indicate the values to wrap phase within (Default value = '2pi')
135 Returns
136 -------
137 ndarray
138 Wrapped phase time-course
140 Notes
141 -----
142 Non-finite phase values are not changed by this operation. eg np.nans in
143 the input will be present and unchanged in the output.
145 """
146 if (ncycles < 1) or (not isinstance(ncycles, (int, np.integer))):
147 raise ValueError("'ncycles' must be a positive integer value - input was '{0}'".format(ncycles))
149 if mode not in ['2pi', '-pi2pi']:
150 raise ValueError("Invalid mode value")
152 # Wrapping length
153 phase_len = ncycles * 2 * np.pi
155 # Compute wrapped phases using np.ufunc where and out to avoid processing non-finite values.
156 if mode == '2pi':
157 phases = np.remainder(IP, phase_len, where=np.isfinite(IP), out=IP)
158 elif mode == '-pi2pi':
159 phases = np.remainder(IP + np.pi * ncycles, phase_len - np.pi * ncycles, where=np.isfinite(IP), out=IP)
161 return phases
163# --------------------------
164# Assess IMF 'quality'
167def is_imf(imf, avg_tol=5e-2, envelope_opts=None, extrema_opts=None):
168 """Determine whether a signal is a 'true IMF'.
170 Two criteria are tested. Firstly, the number of extrema and number of
171 zero-crossings must differ by zero or one. Secondly,the mean of the upper
172 and lower envelopes must be within a tolerance of zero.
174 Parameters
175 ----------
176 imf : 2d array
177 Array of signals to check [nsamples x nimfs]
178 avg_tol : float
179 Tolerance of acceptance for criterion two. The sum-square of the mean
180 of the upper and lower envelope must be below avg_tol of the sum-square
181 of the signal being checked.
182 envelope_opts : dict
183 Dictionary of envelope estimation options, must be identical to options
184 used when estimating IMFs.
185 extrema_opts : dict
186 Dictionary of extrema estimation options, must be identical to options
187 used when estimating IMFs.
189 Returns
190 -------
191 array [2 x nimfs]
192 Boolean array indicating whether each IMF passed each test.
194 Notes
195 -----
196 These are VERY strict criteria to apply to real data. The tests may
197 indicate a fail if the sift doesn't coverge well in a short segment of the
198 signal when the majority of the IMF is well behaved.
200 The tests are only valid if called with identical envelope_opts and
201 extrema_opts as were used in the sift estimation.
203 """
204 from scipy.signal import find_peaks
205 imf = ensure_2d([imf], ['imf'], 'is_imf')
207 if envelope_opts is None:
208 envelope_opts = {}
210 checks = np.zeros((imf.shape[1], 2), dtype=bool)
212 for ii in range(imf.shape[1]):
214 # Extrema and zero-crossings differ by <=1
215 num_zc = zero_crossing_count(imf[:, ii])
216 num_ext = find_peaks(imf[:, ii])[0].shape[0] + find_peaks(-imf[:, ii])[0].shape[0]
218 # Mean of envelopes should be zero
219 upper = interp_envelope(imf[:, ii], mode='upper',
220 **envelope_opts, extrema_opts=extrema_opts)
221 lower = interp_envelope(imf[:, ii], mode='lower',
222 **envelope_opts, extrema_opts=extrema_opts)
224 # If upper or lower are None we should stop sifting altogether
225 if upper is None or lower is None:
226 logger.debug('IMF-{0} False - no peaks detected')
227 continue
229 # Find local mean
230 avg = np.mean([upper, lower], axis=0)[:, None]
231 avg_sum = np.sum(np.abs(avg))
232 imf_sum = np.sum(np.abs(imf[:, ii]))
233 diff = avg_sum / imf_sum
235 # TODO: Could probably add a Rilling-like criterion here. ie - is_imf
236 # is true if (1-alpha)% of time is within some thresh
237 checks[ii, 0] = np.abs(np.diff((num_zc, num_ext))) <= 1
238 checks[ii, 1] = diff < avg_tol
240 msg = 'IMF-{0} {1} - {2} extrema and {3} zero-crossings. Avg of envelopes is {4:.4}/{5:.4} ({6:.4}%)'
241 msg = msg.format(ii, np.all(checks[ii, :]), num_ext, num_zc, avg_sum, imf_sum, 100*diff)
242 logger.debug(msg)
244 return checks
247def check_decreasing_freq(IF, mode='proportion'):
248 """Similar to method 1 in http://dx.doi.org/10.11601/ijates.v5i1.139.
250 Parameters
251 ----------
252 IF : ndarray
253 nsamples x nimfs array of instantaneous frequency values
254 mode : {'proportion', 'sum', 'full'}
255 Flag indicating whether the proportion of overlapping samples
256 ('proportion', default), the total number of overlapping samples
257 ('sum') or the full nsamples x nimfs-1 array ('full') will be returned
259 Returns
260 -------
261 metric : ndarray
262 nimfs-1 length vector containing the proportion of samples in which the
263 IF of adjacent pairs of IMFs overlapped. This is returned per-sample if
264 input squash_time is None.
266 """
267 # Find frequency differences
268 dIF = np.diff(IF, axis=1)
270 metric = dIF > 0
272 if mode == 'sum' or mode == 'proportion':
273 metric = np.nansum(dIF > 0, axis=0)
275 if mode == 'proportion':
276 metric = metric / dIF.shape[0]
278 return metric
281def est_orthogonality(imf):
282 """Compute the index of orthogonality from a set of IMFs.
284 Method is described in equation 6.5 of Huang et al (1998) [1]_.
286 Parameters
287 ----------
288 imf : ndarray
289 Input array of IMFs
291 Returns
292 -------
293 ndarray
294 Matrix of orthogonality values [nimfs x nimfs]
296 References
297 ----------
298 .. [1] Huang, N. E., Shen, Z., Long, S. R., Wu, M. C., Shih, H. H., Zheng,
299 Q., … Liu, H. H. (1998). The empirical mode decomposition and the Hilbert
300 spectrum for nonlinear and non-stationary time series analysis. Proceedings
301 of the Royal Society of London. Series A: Mathematical, Physical and
302 Engineering Sciences, 454(1971), 903–995.
303 https://doi.org/10.1098/rspa.1998.0193
305 """
306 ortho = np.ones((imf.shape[1], imf.shape[1])) * np.nan
308 for ii in range(imf.shape[1]):
309 for jj in range(imf.shape[1]):
310 ortho[ii, jj] = np.abs(np.sum(imf[:, ii] * imf[:, jj])) \
311 / (np.sqrt(np.sum(imf[:, jj] * imf[:, jj])) * np.sqrt(np.sum(imf[:, ii] * imf[:, ii])))
313 return ortho
316def pseudo_mode_mixing_index(imf):
317 """Compute the Pseudo Mode Mixing Index from a set of IMFs.
319 Section VI in Wang et al (2018) _[1]
321 Parameters
322 ----------
323 imf : ndarray
324 Input array of IMFs
326 Returns
327 -------
328 ndarray
329 Vector of PSMI [nimfs,]
331 References
332 ----------
333 .. [1] Wang, Y.-H., Hu, K., & Lo, M.-T. (2018). Uniform Phase Empirical
334 Mode Decomposition: An Optimal Hybridization of Masking Signal and Ensemble
335 Approaches. IEEE Access, 6, 34819–34833.
336 https://doi.org/10.1109/access.2018.2847634
338 """
339 psmi = np.zeros((imf.shape[1],))
341 for ii in range(imf.shape[1]-1):
343 num = np.dot(imf[:, ii], imf[:, ii+1])
344 denom = np.linalg.norm(imf[:, ii])**2 + np.linalg.norm(imf[:, ii+1])**2 + 1e-8
346 psmi[ii] = np.max([num / denom, 0])
348 return psmi
351def assess_harmonic_criteria(IP, IF, IA, num_segments=None, base_imf=None, print_result=True):
352 """Assess IMFs for potential harmonic relationships.
354 This function implements tests for the criteria defining when signals can
355 be considered 'harmonically related' as introduced in [1]_. Broadly,
356 harmonically related signals are defined as having an integer frequency
357 ratio, constant phase relationship, and a well-defined joint instantaneous
358 frequency
360 Three criteria are assessed by splitting the time-series into approximately
361 equally sized segments and computing metrics within each segment.
363 Parameters
364 ----------
365 IP, IF, IA : ndarray of equal shape
366 Instantaneous Phase, Frequency and Amplitude estimates for a set of
367 IMFs. These are typically the outputs from emd.spectra.frequency_transform.
368 num_segments : int
369 Number of segments to split the time series into to enable statistical assessment.
370 base_inf : int
371 Index of IMF to be considered the potential 'fundamental' oscillation.
372 print_result : bool
373 Flag indicating whether to print a summary table of results.
375 Returns
376 -------
377 df
378 Pandas DataFrame containing a range of summary and comparison metrics.
380 Notes
381 -----
382 In detail, this function compares each IMF to a 'base' IMF to see if it can
383 be considered a potential harmonic. Each pair of IMFs are assessed for:
385 1) An integer frequency ratio. The distribution of frequency ratios across
386 segments is compared to its closest integer value with a 1-sample t-test
388 2) Consistent phase relationship. The instantaneous phase time-courses are
389 assessed for temporal dependence using a Distance Correlation t-statistic.
391 3) The af ratio is less than 1. The product of the amplitude ratio and
392 frequency ratio of the two IMFs should be less than 1 according to a
393 1-sided 1-sample t-test.
395 References
396 ----------
397 .. [1] Fabus, M. S., Woolrich, M. W., Warnaby, C. W., & Quinn, A. J.
398 (2022). Understanding Harmonic Structures Through Instantaneous Frequency.
399 IEEE Open Journal of Signal Processing. doi: 10.1109/OJSP.2022.3198012.
401 """
402 # Housekeeping
403 import dcor
404 import pandas as pd
405 from scipy.stats import ttest_1samp
406 IP, IF, IA = ensure_2d([IP, IF, IA], ['IP', 'IF', 'IA'], 'assess_harmonic_criteria')
407 ensure_equal_dims((IP, IF, IA), ('IP', 'IF', 'IA'), 'assess_harmonic_criteria')
409 if base_imf is None:
410 base_imf = IP.shape[1] - 1
412 IP = IP.copy()[:, :base_imf+1]
413 IF = IF.copy()[:, :base_imf+1]
414 IA = IA.copy()[:, :base_imf+1]
416 if num_segments is None:
417 num_segments = 20
419 IPs = np.array_split(IP, num_segments, axis=0)
420 IFs = np.array_split(IF, num_segments, axis=0)
421 IAs = np.array_split(IA, num_segments, axis=0)
423 vals, counts = np.unique([xx.shape[0] for xx in IPs], return_counts=True)
424 msg = 'Splitting data into {0} segments with lengths {1} and counts {2}'
425 logger.info(msg.format(num_segments, vals, counts))
427 IFms = [ff.mean(axis=0) for ff in IFs]
428 IAms = [aa.mean(axis=0) for aa in IAs]
430 fratios = np.zeros((base_imf, num_segments))
431 a_s = np.zeros((base_imf, num_segments))
432 afs = np.zeros((base_imf, num_segments))
433 dcorrs = np.zeros((base_imf, num_segments))
434 dcor_pvals = np.zeros((base_imf, 2))
435 fratio_pvals = np.zeros(base_imf)
436 af_pvals = np.zeros(base_imf)
438 for ii in range(base_imf):
439 # Freq ratios
440 fratios[ii, :] = [ff[ii] / ff[base_imf] for ff in IFms]
441 # Amp ratio
442 a_s[ii, :] = [aa[ii] / aa[base_imf] for aa in IAms]
443 # af value
444 afs[ii, :] = a_s[ii, :] * fratios[ii, :]
446 # Test 1: significant Phase-Phase Correlation
447 dcorr = dcor.distance_correlation(IP[:, ii], IP[:, base_imf])
448 p_dcor, _ = dcor.independence.distance_correlation_t_test(IP[:, ii], IP[:, base_imf])
449 dcor_pvals[ii, :] = dcorr, p_dcor
450 for jj in range(num_segments):
451 dcorrs[ii, jj] = dcor.distance_correlation(IPs[jj][:, ii], IPs[jj][:, base_imf])
453 # Test 2: frequency ratio not different from nearest integer
454 ftarget = np.round(fratios[ii, :].mean())
455 _, fratio_pvals[ii] = ttest_1samp(fratios[ii, :], ftarget)
456 # Test 3: af < 1
457 _, af_pvals[ii] = ttest_1samp(afs[ii, :], 1, alternative='less')
459 info = {'InstFreq Mean': np.array(IFms).mean(axis=0)[:base_imf],
460 'InstFreq StDev': np.array(IFms).std(axis=0)[:base_imf],
461 'InstFreq Ratio': fratios.mean(axis=1),
462 'Integer IF p-value': fratio_pvals,
463 'InstAmp Mean': np.array(IAms).mean(axis=0)[:base_imf],
464 'InstAmp StDev': np.array(IAms).std(axis=0)[:base_imf],
465 'InstAmp Ratio': a_s.mean(axis=1),
466 'af Value': afs.mean(axis=1),
467 'af < 1 p-value': af_pvals,
468 'DistCorr': dcor_pvals[:, 0],
469 'DistCorr p-value': dcor_pvals[:, 1]}
471 df = pd.DataFrame.from_dict(info)
473 if print_result:
474 tabs = []
475 for ii in range(base_imf):
476 tabs.append([f'IMF-{ii}',
477 df['DistCorr'][ii],
478 df['DistCorr p-value'][ii],
479 df['InstFreq Ratio'][ii],
480 df['Integer IF p-value'][ii],
481 df['af Value'][ii],
482 df['af < 1 p-value'][ii]])
483 heads = ['IMF', 'Phase DistCorr', 'p-value', 'InstFreq Ratio', 'p-value', 'af Ratio', 'p-value']
485 print(tabulate(tabs, headers=heads, tablefmt='orgtbl'))
487 return df
490def assess_joint_if(imf, freq_transform_args=None, return_mode='full'):
491 """Assess whether two signals have a well formed joint instantaneous frequency.
493 Parameters
494 ----------
495 imf : ndarray
496 Array of intrinsic mode functions.
497 freq_transform_args : {None, dict}
498 Optional dictionary of keyword arguments to be passed to
499 emd.spectra.frequency_transform
500 return_mode : {'binary', 'full'}
501 Whether to return the full joint instantaneous frequency or a binarised
502 vector indicating samples that have positive joint instantaneous
503 frequency.
505 Returns
506 -------
507 joint_if : ndarray
508 Array of joint instantaneous frequency values or binary values
509 indicating whether the joint instantaneous frequency was less than
510 zero.
512 References
513 ----------
514 .. [1] Fabus, M. S., Woolrich, M. W., Warnaby, C. W., & Quinn, A. J.
515 (2022). Understanding Harmonic Structures Through Instantaneous Frequency.
516 IEEE Open Journal of Signal Processing. doi: 10.1109/OJSP.2022.3198012.
518 """
519 # Import from spectra inside function to avoid circular imports. Strictly,
520 # emd.spectra depends on emd.imftools but not the other way around
521 from .spectra import frequency_transform
523 # Housekeeping
524 imf = ensure_2d([imf], ['imf'], 'assess_joint_if')
526 inds = np.arange(1, imf.shape[1])
527 step = -1
529 freq_transform_args = {} if freq_transform_args is None else freq_transform_args
531 joint_if = np.zeros_like(imf[:, :-1])
533 for ii in range(len(inds)):
535 jif = imf[:, inds[ii]] + imf[:, inds[ii]+step]
536 IP, IF, IA = frequency_transform(jif, 1, 'hilbert', **freq_transform_args)
538 joint_if[:, ii] = IF[:, 0]
540 if return_mode == 'binary':
541 joint_if = joint_if < 0
543 return joint_if
546# --------------------------
547# Epoching
550def find_extrema_locked_epochs(X, winsize, lock_to='peaks', percentile=None):
551 """Define epochs around peaks or troughs within the data.
553 Parameters
554 ----------
555 X : ndarray
556 Input time-series
557 winsize : int
558 Width of window to extract around each extrema
559 lock_to : {'max','min'}
560 Flag to select peak or trough locking (Default value = 'max')
561 percentile : float
562 Optional flag to selection only the upper percentile of extrema by
563 magnitude (Default value = None)
565 Returns
566 -------
567 ndarray
568 Array of start and end indices for epochs around extrema.
570 """
571 if lock_to not in ['peaks', 'troughs', 'combined']:
572 raise ValueError("Invalid lock_to value")
574 locs, pks = get_padded_extrema(X, pad_width=0, mode=lock_to)
576 if percentile is not None:
577 thresh = np.percentile(pks, percentile)
578 locs = locs[pks > thresh]
579 pks = pks[pks > thresh]
581 winstep = int(winsize / 2)
583 # Get all trials
584 trls = np.r_[np.atleast_2d(locs - winstep), np.atleast_2d(locs + winstep)].T
586 # Reject trials which start before 0
587 inds = trls[:, 0] < 0
588 trls = trls[inds == False, :] # noqa: E712
590 # Reject trials which end after X.shape[0]
591 inds = trls[:, 1] > X.shape[0]
592 trls = trls[inds == False, :] # noqa: E712
594 return trls
597def apply_epochs(X, trls):
598 """Apply a set of epochs to a continuous dataset.
600 Parameters
601 ----------
602 X : ndarray
603 Input dataset to be epoched
604 trls : ndarray
605 2D array of start and end indices for each epoch. The second dimension
606 should be of len==2 and contain start and end indices in order.
608 Returns
609 -------
610 ndarray
611 Epoched time-series
613 """
614 Y = np.zeros((trls[0, 1] - trls[0, 0], X.shape[1], trls.shape[0]))
615 for ii in np.arange(trls.shape[0]):
617 Y[:, :, ii] = X[trls[ii, 0]:trls[ii, 1], :]
619 return Y
622# Circular statistics
623#
624# These functions are a work in progress and not currently tested.
625# Mostly based on equations from wikipedia.
626# https://en.wikipedia.org/wiki/Circular_mean
627#
628# Everything works in radians to match the instantaneous phase estimates.
631def _radians_to_complex(IP, IA=None):
632 """Convert phase in radians to circular/complex coordinates."""
633 if IA is None:
634 IA = np.ones_like(IP)
635 ensure_equal_dims([IP, IA], ['IP', 'IA'], 'ip_to_complex')
637 # Actual computation using exponential formula - could equivalently use the
638 # sine/cosine form - computation time nearly identical
639 # phi = np.cos(IP) + 1j * np.sin(IP)
640 phi = IA * np.exp(1j * IP)
642 return phi
645def ip_mean_resultant_vector(IP, IA=None, axis=0):
646 """Compute the mean resultant vector of a set of phase values."""
647 if IA is None:
648 IA = np.ones_like(IP)
650 phi = _radians_to_complex(IP, IA=IA)
652 return phi.mean(axis=axis)
655def ip_circular_mean(IP, IA=None, axis=0):
656 """Compute the circular mean of a set of phase values."""
657 phi = ip_mean_resultant_vector(IP, IA=IA, axis=axis)
659 return np.angle(phi)
662def ip_circular_variance(IP, IA=None, axis=0):
663 """Compute the circular variance of a set of phase values."""
664 # https://en.wikipedia.org/wiki/Directional_statistics#Standard_deviation
666 phi = ip_mean_resultant_vector(IP, IA=IA, axis=axis)
668 return 1 - np.abs(phi)