Coverage for emd/spectra.py: 58%
303 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-09 10:07 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-09 10:07 +0000
1#!/usr/bin/python
3# vim: set expandtab ts=4 sw=4:
5"""
6Routines relating to frequency transforms and power-spectra.
8Frequency Transform Routines:
9 frequency_transform
10 quadrature_transform
11 phase_from_complex_signal
12 freq_from_phase
13 phase_from_freq
14 phase_angle
16Power Spectra:
17 holospectrum
18 hilberthuang
19 hilberthuang_1d
21Power Spectra Helpers:
22 define_hist_bins
23 define_hist_bins_from_data
25"""
27import logging
29import numpy as np
31from . import cycles, imftools
32from ._sift_core import interp_envelope
33from .support import ensure_2d, ensure_equal_dims, ensure_vector
35# Housekeeping for logging
36logger = logging.getLogger(__name__)
38# Sential value for observations outside histogram range
39DROP_SENTINAL = np.iinfo(np.int32).min
41##
44def frequency_transform(imf, sample_rate, method, smooth_freq=3,
45 smooth_phase=5):
46 """Compute instantaneous phase, frequency and amplitude from a set of IMFs.
48 Several approaches are implemented from [1]_ and [2]_.
50 Parameters
51 ----------
52 imf : ndarray
53 Input array of IMFs.
54 sample_rate : float
55 Sampling frequency of the signal in Hz
56 method : {'hilbert','quad','direct_quad','nht'}
57 The method for computing the frequency stats
58 smooth_phase : int
59 Length of window when smoothing the unwrapped phase (Default value = 31)
61 Returns
62 -------
63 IP : ndarray
64 Array of instantaneous phase estimates
65 IF : ndarray
66 Array of instantaneous frequency estimates
67 IA : ndarray
68 Array of instantaneous amplitude estimates
70 References
71 ----------
72 .. [1] Huang, N. E., Shen, Z., Long, S. R., Wu, M. C., Shih, H. H., Zheng,
73 Q., … Liu, H. H. (1998). The empirical mode decomposition and the Hilbert
74 spectrum for nonlinear and non-stationary time series analysis. Proceedings
75 of the Royal Society of London. Series A: Mathematical, Physical and
76 Engineering Sciences, 454(1971), 903–995.
77 https://doi.org/10.1098/rspa.1998.0193
78 .. [2] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank,
79 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis,
80 1(2), 177–229. https://doi.org/10.1142/s1793536909000096
82 """
83 from scipy.signal import hilbert
84 logger.info('STARTED: compute frequency stats')
86 imf = ensure_2d([imf], ['imf'], 'frequency_transform')
87 logger.debug('computing on {0} samples over {1} imfs at sample rate {2}'.format(imf.shape[0],
88 imf.shape[1],
89 sample_rate))
91 # Each case here should compute the analytic form of the imfs and the
92 # instantaneous amplitude.
93 if method == 'hilbert':
94 logger.info('Using Hilbert transform')
96 analytic_signal = hilbert(imf, axis=0)
98 # Estimate instantaneous amplitudes directly from analytic signal
99 iamp = np.abs(analytic_signal)
101 elif method == 'nht':
102 logger.info('Using Amplitude-Normalised Hilbert transform')
104 n_imf = imftools.amplitude_normalise(imf)
105 analytic_signal = hilbert(n_imf, axis=0)
107 orig_dim = imf.ndim
108 if imf.ndim == 2:
109 imf = imf[:, :, None]
111 # Estimate inst amplitudes with spline interpolation
112 iamp = np.zeros_like(imf)
113 for ii in range(imf.shape[1]):
114 for jj in range(imf.shape[2]):
115 iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj],
116 mode='upper')
117 if orig_dim == 2:
118 iamp = iamp[:, :, 0]
120 elif method == 'ctrl':
121 logger.info('Using Control Points - CURRENTLY BROKEN')
123 orig_dim = imf.ndim
124 if imf.ndim == 2:
125 imf = imf[:, :, None]
127 # Get phase from control points
128 iphase = np.zeros_like(imf)
129 for ii in range(imf.shape[1]):
130 for jj in range(imf.shape[2]):
131 good_cycles = cycles.get_cycle_inds_from_waveform(imf[:, ii, jj], cycle_start='asc')
132 ctrl = cycles.get_control_points(imf[:, ii, jj], good_cycles)
133 iphase[:, ii, jj] = phase_from_control_points(ctrl, good_cycles)
134 iphase[:, ii, jj] = np.unwrap(iphase[:, ii, jj])
136 # Estimate inst amplitudes with spline interpolation
137 iamp = np.zeros_like(imf)
138 for ii in range(imf.shape[1]):
139 for jj in range(imf.shape[2]):
140 iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj],
141 mode='upper')
143 if orig_dim == 2:
144 iamp = iamp[:, :, 0]
145 iphase = iphase[:, :, 0]
147 elif method == 'quad':
148 logger.info('Using Quadrature transform')
150 analytic_signal = quadrature_transform(imf)
152 orig_dim = imf.ndim
153 if imf.ndim == 2:
154 imf = imf[:, :, None]
156 # Estimate inst amplitudes with spline interpolation
157 iamp = np.zeros_like(imf)
158 for ii in range(imf.shape[1]):
159 for jj in range(imf.shape[2]):
160 iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj],
161 mode='upper')
163 if orig_dim == 2:
164 iamp = iamp[:, :, 0]
166 elif method == 'direct_quad':
167 logger.info('Using Direct-Quadrature transform')
168 raise ValueError('direct_quad method is broken!')
170 n_imf = imftools.amplitude_normalise(imf.copy())
171 iphase = np.unwrap(phase_angle(n_imf))
173 iamp = np.zeros_like(imf)
174 for ii in range(imf.shape[1]):
175 iamp[:, ii] = interp_envelope(imf[:, ii, None], mode='combined')
177 else:
178 logger.error("Method '{0}' not recognised".format(method))
179 raise ValueError("Method '{0}' not recognised\nPlease use one of 'hilbert','nht' or 'quad'".format(method))
181 if method != 'ctrl':
182 # Compute unwrapped phase for frequency estimation
183 iphase = phase_from_complex_signal(analytic_signal,
184 smoothing=smooth_phase,
185 ret_phase='unwrapped')
187 # Compute inst. freq from phase
188 ifreq = freq_from_phase(iphase, sample_rate, savgol_width=smooth_freq)
190 # Return wrapped phase
191 iphase = imftools.wrap_phase(iphase)
193 logger.info('COMPLETED: compute frequency stats. Returning {0} imfs'.format(iphase.shape[1]))
194 return iphase, ifreq, iamp
196#%% -----------------------------------------------------
197# Frequency stat utils
200def quadrature_transform(X, fix_zerocrossings=False):
201 """Compute the quadrature transform on a set of time-series.
203 This algorithm is defined in equation 34 of [1]_. The return is a complex
204 array with the input data as the real part and the quadrature transform as
205 the imaginary part.
207 Parameters
208 ----------
209 X : ndarray
210 Array containing time-series to transform
212 Returns
213 -------
214 quad_signal : ndarray
215 Complex valued array containing the quadrature transformed signal
217 References
218 ----------
219 .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank,
220 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis,
221 1(2), 177–229. https://doi.org/10.1142/s1793536909000096
223 """
224 nX = imftools.amplitude_normalise(X.copy(), clip=False)
225 nX = nX / (np.abs(nX).max() + 1e-8) # Clip any remaining points outside -1 < x < 1
227 # Avoid occasional 'invalid value encountered' RuntimeWarning in sqrt using
228 # where argument in ufunc
229 tmp = 1 - nX**2
230 good_vals = (tmp != 0) & (np.isnan(tmp) == False) # noqa: E712
231 imagX = np.sqrt(tmp, out=tmp, where=good_vals)
233 # Add warning here....
234 if np.all(np.isreal(imagX)) == False: # noqa: E712
235 imagX = imagX.real
237 mask = ((np.diff(nX, axis=0) > 0) * -2) + 1
238 mask[mask == 0] = -1
239 mask = np.r_[mask, mask[-1, None, :]]
241 q = imagX * mask
243 if fix_zerocrossings:
244 q = _fix_quadrature_zero_crossings(q)
246 return nX + 1j * q
249def _fix_quadrature_zero_crossings(quad):
250 """Numerical 'fix' for instability around zero in quadrature signals.
252 EXPERIMENTAL WORK-IN-PROGRESS FUNCTION!! Use with caution.
254 Replaces sample closest to zero with the average of the four surrounding
255 points. This is needed as the direct quadrature method involves squaring
256 the raw signal - this is normally fine but explodes when close to zero.
258 """
259 quad_fix = quad.copy()
261 for ii in range(quad.shape[1]):
262 # Find all zero crossings
263 zc = np.where(np.diff(np.sign(quad[:, ii]), axis=0) != 0)[0]
264 # Drop crossings we don't have surrounding samples for
265 zc = zc[(zc >= 1) & (zc < quad.shape[0]-3)]
266 # Delay-embedding array around zero-crossing
267 zz = np.vstack((quad[zc-1, ii],
268 quad[zc, ii],
269 quad[zc+1, ii],
270 quad[zc+2, ii],
271 quad[zc+3, ii])).T
273 # Take a copy for output and replace 'fixed' zero-crossing point.
274 quad_fix[zc+1, ii] = zz[:, np.array((0, 4))].mean(axis=1)
275 quad_fix[zc, ii] = np.average(zz[:, np.array((0, 4))], weights=[3/4, 1/4], axis=1)
276 quad_fix[zc+2, ii] = np.average(zz[:, np.array((0, 4))], weights=[1/4, 3/4], axis=1)
278 from scipy.ndimage import median_filter
279 quad_fix = median_filter(quad_fix, (7, 1))
281 return quad_fix
284def phase_from_complex_signal(complex_signal, smoothing=None,
285 ret_phase='wrapped', phase_jump='ascending'):
286 """Compute the instantaneous phase from a complex signal.
288 The complex input may be obtained from either the Hilbert Transform or by
289 Direct Quadrature.
291 Parameters
292 ----------
293 complex_signal : complex ndarray
294 Complex valued input array
295 smoothing : int
296 Integer window length used in phase smoothing (Default value = None)
297 ret_phase : {'wrapped','unwrapped'}
298 Flag indicating whether to return the wrapped or unwrapped phase (Default value = 'wrapped')
299 phase_jump : {'ascending','peak','descending','trough'}
300 Flag indicating where in the cycle the phase jump should be (Default value = 'ascending')
302 Returns
303 -------
304 IP : ndarray
305 Array of instantaneous phase values
307 """
308 # Compute unwrapped phase
309 iphase = np.unwrap(np.angle(complex_signal), axis=0)
311 orig_dim = iphase.ndim
312 if iphase.ndim == 2:
313 iphase = iphase[:, :, None]
315 # Apply smoothing if requested
316 from scipy.signal import medfilt
317 if smoothing is not None:
318 for ii in range(iphase.shape[1]):
319 for jj in range(iphase.shape[2]):
320 iphase[:, ii, jj] = medfilt(iphase[:, ii, jj], smoothing)
322 if orig_dim == 2:
323 iphase = iphase[:, :, 0]
325 # Set phase jump point to requested part of cycle
326 if phase_jump == 'ascending':
327 iphase = iphase + np.pi / 2
328 elif phase_jump == 'peak':
329 pass # do nothing
330 elif phase_jump == 'descending':
331 iphase = iphase - np.pi / 2
332 elif phase_jump == 'trough':
333 iphase = iphase + np.pi
335 if ret_phase == 'wrapped':
336 return imftools.wrap_phase(iphase)
337 elif ret_phase == 'unwrapped':
338 return iphase
341def freq_from_phase(iphase, sample_rate, savgol_width=3):
342 """Compute the instantaneous frequency from the instantaneous phase.
344 A savitsky-golay filter is used to compute the derivative of the phase and
345 can be smoothed by specifying a longer savgol_width (minimum value=3).
347 Parameters
348 ----------
349 iphase : ndarray
350 Input array containing the unwrapped instantaneous phase time-course
351 sample_rate : float
352 The sampling frequency of the data
353 savgol_width : int >= 3
354 The window length of the Savitsky-Golay filter window
356 Returns
357 -------
358 IF : ndarray
359 Array containing the instantaneous frequencies
361 """
362 from scipy.signal import savgol_filter
364 # Differential of instantaneous phase
365 iphase = savgol_filter(iphase, savgol_width, 1, deriv=1, axis=0)
367 # Convert to freq
368 ifrequency = iphase / (2.0 * np.pi) * sample_rate
370 return ifrequency
373def phase_from_freq(ifrequency, sample_rate, phase_start=-np.pi):
374 """Compute the instantaneous phase of a signal from its instantaneous frequency.
376 Parameters
377 ----------
378 ifrequency : ndarray
379 Input array containing the instantaneous frequencies of a signal
380 sample_rate : float
381 The sampling frequency of the data
382 phase_start : float
383 Start value of the phase output (Default value = -np.pi)
385 Returns
386 -------
387 IP : ndarray
388 The instantaneous phase of the signal
390 """
391 iphase_diff = (ifrequency / sample_rate) * (2 * np.pi)
393 iphase = phase_start + np.cumsum(iphase_diff, axis=0)
395 return iphase
398def phase_from_control_points(ctrl, cycles):
399 """Compute instantaneous phase from control points."""
400 from scipy import interpolate as interp
402 cycles = ensure_vector([cycles],
403 ['cycles'],
404 'phase_from_control_points')
406 ip = np.zeros_like(cycles, dtype=float)
407 phase_y = np.array([0, np.pi / 2, np.pi, 3 * np.pi / 2, 2 * np.pi])
409 for jj in range(1, cycles.max() + 1):
411 if np.any(np.isnan(ctrl[jj-1, :])):
412 continue
414 f = interp.interp1d(ctrl[jj-1, :], phase_y, kind='linear')
415 ph = f(np.arange(0, ctrl[jj-1, -1] + 1))
417 ip[cycles == jj] = ph
419 return ip
422def direct_quadrature(fm):
423 """Compute the quadrature transform on a set of time-series.
425 This algorithm is defined in equation 35 of [1].
427 Section 3.2 of 'on instantaneous frequency'
429 THIS IS IN DEVELOPMENT
431 Parameters
432 ----------
433 fm : ndarray
434 Input signal containing a frequency-modulated signal
436 References
437 ----------
438 .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank,
439 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis,
440 1(2), 177–229. https://doi.org/10.1142/s1793536909000096
442 """
443 ph = phase_angle(fm)
445 # We'll have occasional nans where fm==1 or -1
446 inds = np.argwhere(np.isnan(ph))
448 vals = (ph[inds[:, 0] - 1, :] + ph[inds[:, 0] + 1, :]) / 2
449 ph[inds[:, 0]] = vals
451 return ph
454def phase_angle(fm):
455 """Compute the phase angle of a set of time-series.
457 This algorithm is defined in equation 35 of [1]_.
459 THIS IS IN DEVELOPMENT
461 Parameters
462 ----------
463 X : ndarray
464 Array containing time-series to transform
466 Returns
467 -------
468 quad_signal : ndarray
469 Complex valued array containing the quadrature transformed signal
471 References
472 ----------
473 .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank,
474 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis,
475 1(2), 177–229. https://doi.org/10.1142/s1793536909000096
477 """
478 return np.arctan(fm / np.lib.scimath.sqrt(1 - np.power(fm, 2)))
480#%% -----------------------------------------------------
481# Time-frequency spectra
484def hilberthuang(IF, IA, edges=None,
485 sum_time=True,
486 sum_imfs=True,
487 mode='power',
488 sample_rate=1,
489 scaling=None,
490 return_sparse=False,
491 return_Gb_limit=10):
492 """Compute a Hilbert-Huang transform (HHT).
494 The Hilbert-Huang transform takes the instataneous frequency and
495 instantaneous amplitude of a time-series and represents the energy of a
496 signal across time and frequency [1]_.
498 The full Hilbert-Huang array is 3-dimensional [nfrequencies x ntimes x nimfs].
499 By default, the returned holospectrum is summed across time and IMFs,
500 returning only the frequency dimension.- this behaviour can be tuned with
501 the sum_time and sum_imfs arguments. Setting return_sparse to True is
502 strongly recommended returning very large arrays.
504 Parameters
505 ----------
506 IF : ndarray
507 2D first level instantaneous frequencies
508 IA : ndarray
509 2D first level instantaneous amplitudes
510 edges : {ndarray, tuple or None}
511 Definition of the frequency bins used in the spectrum. This may be:
513 * array_like vector of bin edge values (as defined by
514 emd.spectra.define_hist_bins)
516 * a tuple of values that can be passed to emd.spectra.define_hist_bins
517 (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz)
519 * None in which case a sensible set of bins will be defined from the
520 input data (this is the default option)
521 sum_time : boolean
522 Flag indicating whether to sum across time dimension
523 sum_imfs : boolean
524 Flag indicating whether to sum across IMF dimension
525 mode : {'power','amplitude'}
526 Flag indicating whether to sum the power or amplitudes (Default value = 'power')
527 scaling : {'density', 'spectrum', None}
528 Switch specifying the normalisation or scaling applied to the spectrum.
529 sample_rate : float
530 Sampling rate of the data used in 'density' scaling
531 return_sparse : bool
532 Flag indicating whether to return the full or sparse form(Default value = False)
533 return_Gb_limit : {float, None}
534 Maximum array size in Gb that will be returned if a non-sparse/dense
535 array is being returned (default = 10). If the function return would
536 exceed this size, the function will raise an error. If set to None,
537 then no limit is imposed. Sparse arrays are always returned.
539 Returns
540 -------
541 f : ndarray
542 Vector of histogram bin centers for each frequency
543 hht : ndarray
544 2D array containing the Hilbert-Huang Transform
546 Notes
547 -----
548 Run a HHT with an automatically generated set of histogram bins:
550 >>> f, hht = emd.spectra.hilberthuang(IF, IA, sample_rate=512)
552 Run a HHT and return the spectrum for each IMF separately
554 >>> f, hht = emd.spectra.hilberthuang(IF, IA,, sample_rate=512, sum_imfs=False)
556 Run a HHT with 49 bins, linearly spaced between 1 and 50Hz
558 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512)
560 Run a HHT with 49 bins, logarithmically spaced between 0.001 and 50Hz
562 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49, 'log'), sample_rate=512)
564 Run a HHT with an externally generated set of histogram bin edges
566 >>> my_edges = np.array([0.5, 2, 5, 11, 22])
567 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=my_edges sample_rate=512)
569 Run a HHT and return the full time dimension - the HHT is summed over time by default
571 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512, sum_time=False)
573 Run a HHT and return a memory efficient sparse array - this is strongly
574 recommended for very large HHTs
576 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512,
577 >>> sum_time=False, return_sparse=True)
579 If return_sparse is set to True the returned array is a sparse matrix in
580 COOrdinate form using sparse package (sparse.COO). This is much more memory
581 efficient than the full form but may not behave as expected in all
582 functions expecting full arrays.
584 References
585 ----------
586 .. [1] Huang, N. E., Shen, Z., Long, S. R., Wu, M. C., Shih, H. H., Zheng,
587 Q., … Liu, H. H. (1998). The empirical mode decomposition and the Hilbert
588 spectrum for nonlinear and non-stationary time series analysis. Proceedings
589 of the Royal Society of London. Series A: Mathematical, Physical and
590 Engineering Sciences, 454(1971), 903–995.
591 https://doi.org/10.1098/rspa.1998.0193
593 """
594 # Housekeeeping
595 IF, IA = ensure_2d([IF, IA], ['IF', 'IA'], 'hilberthuang')
596 ensure_equal_dims((IF, IA), ('IF', 'IA'), 'hilberthuang')
598 logger.info('STARTED: compute Hilbert-Huang Transform')
599 logger.debug('computing on {0} samples over {1} IMFs '.format(IF.shape[0],
600 IF.shape[1]))
601 edges, bins = _histogram_bin_relay(edges, IF.flatten())
602 logger.debug('Freq bins: {0} to {1} in {2} steps'.format(edges[0],
603 edges[-1],
604 len(edges)))
606 # Begin computation
607 spec = _base_spectra(IF, IA, edges)
609 sum_dims = np.where([0, sum_time, sum_imfs])[0]
611 spec = _post_process_spectra(spec, sum_dims=sum_dims,
612 mode=mode, time_dim=1,
613 sample_rate=sample_rate, scaling=scaling,
614 return_sparse=False, return_Gb_limit=return_Gb_limit)
616 logger.info('COMPLETED: Hilbert-Huang Transform - output size {0}'.format(spec.shape))
617 return bins, spec
620def holospectrum(IF, IF2, IA2,
621 edges=None, edges2=None,
622 sum_time=True,
623 sum_first_imfs=True,
624 sum_second_imfs=True,
625 mode='power',
626 sample_rate=1,
627 scaling=None,
628 return_sparse=False,
629 return_Gb_limit=10):
630 """Compute a Holospectrum.
632 Holospectra are computed from the first and second layer frequecy
633 statistics of a dataset. The Holospectrum represents the energy of a signal
634 across time, carrier frequency and amplitude-modulation frequency [1]_.
636 The full Holospctrum is a 5-dimensional array:
637 [nfrequencise x namplitude_frequencies x time x first_imfs x second_imfs]
638 By default, the returned holospectrum is summed across time and IMFs,
639 returning only the first two dimensions - this behaviour can be tuned with
640 the sum_time, sum_first_imfs and sum_second_imfs arguments.
642 WARNING: returning the full Holospectrum can create some enormous arrays!
643 Setting return_sparse=True is VERY strongly recommended if you want to work
644 with the raw time and IMF dimensions.
646 Parameters
647 ----------
648 IF : ndarray
649 2D first level instantaneous frequencies
650 IF2 : ndarray
651 3D second level instantaneous frequencies
652 IA2 : ndarray
653 3D second level instantaneous amplitudes
654 edges : {ndarray, tuple or None}
655 Definition of the frequency bins used for carrier frequencies in the
656 spectrum. This may be:
658 * array_like vector of bin edge values (as defined by
659 emd.spectra.define_hist_bins)
661 * a tuple of values that can be passed to emd.spectra.define_hist_bins
662 (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz)
664 * None in which case a sensible set of bins will be defined from the
665 input data (this is the default option)
666 edges2 : {ndarray, tuple or None}
667 Definition of the frequency bins used for amplitude modulation
668 frequencies in the spectrum. The options are the same as for `edges`.
669 sum_time : boolean
670 Flag indicating whether to sum across time dimension
671 sum_first_imfs : boolean
672 Flag indicating whether to sum across first-layer IMF dimension
673 sum_second_imfs : boolean
674 Flag indicating whether to sum across the second-layer IMF dimension
675 mode : {'power','amplitude'}
676 Flag indicating whether to sum the power or amplitude (Default value = 'power')
677 scaling : {'density', 'spectrum', None}
678 Switch specifying the normalisation or scaling applied to the spectrum.
679 sample_rate : float
680 Sampling rate of the data used in 'density' scaling
681 return_sparse : boolean
682 Flag indicating whether to return a sparse or dense (normal numpy) array.
683 return_Gb_limit : {float, None}
684 Maximum array size in Gb that will be returned if a non-sparse/dense
685 array is being returned (default = 10). If the function return would
686 exceed this size, the function will raise an error. If set to None,
687 then no limit is imposed. Sparse arrays are always returned.
689 Returns
690 -------
691 f_carrier : ndarray
692 Vector of histogram bin centers for each carrier (first-level) frequency
693 f_am : ndarray
694 Vector of histogram bin centers for each amplitude modulation
695 (second-level) frequency
696 holo : ndarray
697 Holospectrum of input data.
699 Notes
700 -----
701 Run a Holospectrum with an automatically generated set of histogram bins:
703 >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512)
705 Run a Holospectrum and return the spectrum for each first and second level IMF separately
707 >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512,
708 >>> sum_first_imfs=False, sum_second_imfs=False)
710 Run a Holospectrum with 49 carrier frequency bins linearly spaced between 1
711 and 50Hz and 32 amplitude modulation frequency bins logarithmicly spaced
712 between 0.1 and 20Hz
714 >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512,
715 >>> edges=(1, 50, 49),
716 >>> edges2=(0.1, 20, 32, 'log'))
718 Run a Holospectrum without summing over the time dimensions and return the
719 result in a memory efficient sparse array - this is strongly recommended
720 for very large HHTs
722 >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512,
723 >>> edges=(1, 50, 49),
724 >>> edges2=(0.1, 20, 32, 'log',
725 >>> sum_time=False, return_sparse=True)
727 If return_sparse is set to True the returned array is a sparse matrix in
728 COOrdinate form using sparse package (sparse.COO). This is much more memory
729 efficient than the full form but may not behave as expected in all
730 functions expecting full arrays.
732 References
733 ----------
734 .. [1] Huang, N. E., Hu, K., Yang, A. C. C., Chang, H.-C., Jia, D., Liang,
735 W.-K., … Wu, Z. (2016). On Holo-Hilbert spectral analysis: a full
736 informational spectral representation for nonlinear and non-stationary
737 data. Philosophical Transactions of the Royal Society A: Mathematical,
738 Physical and Engineering Sciences, 374(2065), 20150206.
739 https://doi.org/10.1098/rsta.2015.0206
741 """
742 # Housekeeping
743 logger.info('STARTED: compute Holospectrum')
745 out = ensure_2d((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum')
746 IF, IF2, IA2 = out
747 ensure_equal_dims((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum', dim=0)
748 ensure_equal_dims((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum', dim=1)
750 msg = 'computing on {0} samples over {1} first-level IMFs and {2} second level IMFs'
751 logger.debug(msg.format(IF2.shape[0], IF2.shape[1], IF2.shape[2]))
753 edges, bins = _histogram_bin_relay(edges, IF.flatten())
754 logger.debug('First level freq bins: {0} to {1} in {2} steps'.format(edges[0],
755 edges[-1],
756 len(edges)))
757 edges2, bins2 = _histogram_bin_relay(edges2, IF2.flatten())
758 logger.debug('Second level freq bins: {0} to {1} in {2} steps'.format(edges2[0],
759 edges2[-1],
760 len(edges2)))
762 # Begin computation
763 holo = _higher_order_spectra(IF, IF2, IA2, edges, edges2)
765 sum_dims = np.where([0, 0, sum_time, sum_first_imfs, sum_second_imfs])[0]
767 holo = _post_process_spectra(holo, sum_dims=sum_dims,
768 mode='power', time_dim=2,
769 sample_rate=sample_rate, scaling=scaling,
770 return_sparse=False, return_Gb_limit=return_Gb_limit)
772 logger.info('COMPLETED: Holospectrum - output size {0}'.format(holo.shape))
773 return bins, bins2, holo
776def hilbertmarginal(IF, IA, order=2,
777 freq_edges=None, amp_edges=None,
778 sum_time=True, sum_imfs=True,
779 sample_rate=1, scaling=None,
780 return_sparse=False,
781 return_Gb_limit=10):
782 """Compute a generalised Hilbert marginal spectrum.
784 This is an experimental function which probably implements the method
785 introduced in Huang et al (2008) _[1]. This creates a 2D
786 amplitude-frequency representation of the signal.
788 Parameters
789 ----------
790 IF : ndarray
791 2D first level instantaneous frequencies
792 IA : ndarray
793 2D first level instantaneous amplitudes
794 order : int
795 Power to which amplitude is raised before spectrum computation.
796 freq_edges: {ndarray, tuple or None}
797 Definition of the frequency bins used in the spectrum. This may be:
799 * array_like vector of bin edge values (as defined by
800 emd.spectra.define_hist_bins)
802 * a tuple of values that can be passed to emd.spectra.define_hist_bins
803 (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz)
805 * None in which case a sensible set of bins will be defined from the
806 input data (this is the default option)
807 amp_edges : {ndarray, tuple or None}
808 Definition of amplitude bins used in spectrum. Format options are the
809 same as for `freq_edges`.
810 sum_time : boolean
811 Flag indicating whether to sum across time dimension
812 sum_imfs : boolean
813 Flag indicating whether to sum across IMF dimension
814 sample_rate : float
815 Sampling rate of the data used in 'density' scaling
816 scaling : {'density', 'spectrum', None}
817 Switch specifying the normalisation or scaling applied to the spectrum.
818 return_sparse : bool
819 Flag indicating whether to return the full or sparse form(Default value = True)
820 return_Gb_limit : {float, None}
821 Maximum array size in Gb that will be returned if a non-sparse/dense
822 array is being returned (default = 10). If the function return would
823 exceed this size, the function will raise an error. If set to None,
824 then no limit is imposed. Sparse arrays are always returned.
826 Returns
827 -------
828 a : ndarray
829 Vector of histogram bin centers for each amplitude
830 f : ndarray
831 Vector of histogram bin centers for each frequency
832 hht : ndarray
833 2D array containing the Hilbert-Huang Transform
835 References
836 ----------
837 .. [1] Huang, Y. X., Schmitt, F. G., Lu, Z. M., & Liu, Y. L. (2008). An
838 amplitude-frequency study of turbulent scaling intermittency using
839 Empirical Mode Decomposition and Hilbert Spectral Analysis. In EPL
840 (Europhysics Letters) (Vol. 84, Issue 4, p. 40010). IOP Publishing.
841 https://doi.org/10.1209/0295-5075/84/40010
843 """
844 logger.info('STARTED: compute Hilbert-Marginal spectrum')
846 IF, IA = ensure_2d([IF, IA], ['IF', 'IA'], 'hilberthuang')
847 ensure_equal_dims((IF, IA), ('IF', 'IA'), 'hilberthuang')
849 freq_edges, freq_bins = _histogram_bin_relay(freq_edges, IF.flatten())
850 logger.debug('Freq bins: {0} to {1} in {2} steps'.format(freq_edges[0],
851 freq_edges[-1],
852 len(freq_edges)))
854 amp_edges, amp_bins = _histogram_bin_relay(amp_edges, IA.flatten())
855 logger.debug('Amp bins: {0} to {1} in {2} steps'.format(amp_edges[0],
856 amp_edges[-1],
857 len(amp_edges)))
859 # Compute HOS - distribution of amplitude across amplitude, frequency,
860 # time and IMF
861 hima = _higher_order_spectra(IA,
862 IF[:, :, None],
863 IA[:, :, None],
864 amp_edges, freq_edges)
865 # hima is a spase array of dimensions
866 # [len(edges), len(edges2), num_samples, num_imfs, 1]
868 # Get amplitude values in broadcastable shape
869 A_values = np.reshape(IA, (1, 1, IA.shape[0], IA.shape[1], 1))**order
871 # Scale hima by amplitude values and resolution
872 dA = np.diff(amp_bins)[0]
873 hima = hima * A_values * dA
875 # Create PDF histogram
876 sum_dims = np.where([0, 0, sum_time, sum_imfs, 1])[0]
877 hima = hima.sum(axis=sum_dims)
878 hima = hima / IF.size
880 # Post-process - summing as already been done
881 hima = _post_process_spectra(hima, mode=None, sample_rate=sample_rate,
882 scaling=scaling, return_sparse=return_sparse,
883 return_Gb_limit=return_Gb_limit)
885 logger.info('COMPLETED: Hilbert-Marginal Spectrum - output size {0}'.format(hima.shape))
886 return amp_bins, freq_bins, hima
889def _post_process_spectra(spec, sum_dims=None,
890 mode='power',
891 scaling=None,
892 time_dim=1,
893 sample_rate=1,
894 return_sparse=False,
895 return_Gb_limit=10):
896 """Apply standard processes to input spectrum.
898 This function implements a set of processing options common to all
899 hilbert-huang based spectra.
901 Parameters
902 ----------
903 spec : ndarray
904 2 or 3d input spectrum, usually a sparse array
905 sum_dims : int or list of int
906 Flag indicating whether to sum across time dimension
907 mode : {'power', 'amplitude'}
908 Switch specifying whether the distribution should return amplitude or
909 power (amplitude squared) values.
910 scaling : {'density', 'spectrum', None}
911 Switch specifying the normalisation or scaling applied to the spectrum.
912 time_dim : int
913 Axis index of the dimension across time. This is used when applying
914 some normalisations or scalings.
915 return_sparse : boolean
916 Flag indicating whether to return a sparse or dense (normal numpy) array.
917 return_Gb_limit : {float, None}
918 Maximum array size in Gb that will be returned if a non-sparse/dense
919 array is being returned (default = 10). If the function return would
920 exceed this size, the function will raise an error. If set to None,
921 then no limit is imposed. Sparse arrays are always returned.
923 Returns
924 -------
925 ndarray
926 processed power spectrum
928 See Also
929 --------
930 hilberthuang, holospectrum, hilbertmarginal
932 """
933 # No housekeeping here - assume that inputs have been sanitised by higher level functions.
934 if mode == 'power':
935 logger.debug('Squaring amplitude to compute power')
936 spec = spec**2
938 if scaling == 'density':
939 logger.debug("Applying scaling: 'density'.")
940 spec = spec / (sample_rate * spec.shape[time_dim])
941 elif scaling == 'spectrum':
942 logger.debug("Applying scaling: 'spectrum'.")
943 spec = spec / spec.shape[time_dim]
944 elif scaling is None:
945 pass
946 else:
947 logger.error('Unknown scaling: {0}'.format(scaling))
948 raise ValueError('Unknown scaling: {0}'.format(scaling))
950 if (sum_dims is not None) and (len(sum_dims) > 0):
951 orig_dim = spec.shape
952 spec = spec.sum(axis=sum_dims)
953 msg = "Summing across dimensions {0}. Input dims ({1}) -> output dims ({2})"
954 logger.debug(msg.format(sum_dims, orig_dim, spec.shape))
956 if (return_sparse is False) and (return_Gb_limit is not None):
957 byte_size = spec.size * 8 # sparse arrays don't have itemsize attr so assuming 8 for now
958 if (byte_size / (1024**3)) > return_Gb_limit:
959 msg = "Converting the output to dense format will create a very large array\n"
960 msg += "This spectrum is about to return a {0}Gb numpy array - the limit is set to {1}Gb.\n"
961 msg += "please either set 'return_sparse' to True to get a memory-efficient sparse array, or \n"
962 msg += "change 'return_Gb_limit' if you really want the dense array..."
963 logger.warning(msg.format(byte_size / (1024**3), return_Gb_limit))
964 raise RuntimeError(msg.format(byte_size / (1024**3), return_Gb_limit))
965 spec = spec.todense()
966 msg = 'Converting output to dense array - size {0}Gb'
967 logger.debug(msg.format(byte_size / (1024**3), return_Gb_limit))
968 else:
969 msg = 'Returning a sparse array - size {0}Gb'
970 logger.debug(msg.format(byte_size / (1024**3), return_Gb_limit))
972 return spec
975def _base_spectra(X, Z, x_edges):
976 """Compute a 2-dimensional Hilbert-Huang distribution.
978 This is a helper function for constructing a sparse array representation of
979 a two dimensional distribution of power. This function would not normally
980 be called by the user.
982 Parameters
983 ----------
984 X : ndarray
985 2d array of values defining the first dimension, usually [samples x imfs]
986 Z : ndarray
987 2d array of amplitude or power values matching the size of input X
988 x_edges : ndarray
989 Vector array containing bin edges for input X
991 Returns
992 -------
993 sparse_array
994 Sparse array representation of two dimensional distribution.
996 See Also
997 --------
998 hilberthuang
1000 """
1001 # No housekeeping here - assume that inputs have been sanitised by higher level functions.
1003 # Find bin indices for first dimension
1004 x_inds = _digitize(X, x_edges)
1005 # Find bin indices for time dimension - cast to match input shape
1006 t_inds = np.broadcast_to(np.arange(x_inds.shape[0])[:, np.newaxis],
1007 x_inds.shape)
1008 # Find bin indices for IMF dimension - cast to match input shape
1009 i_inds = np.broadcast_to(np.arange(X.shape[1])[np.newaxis, :],
1010 x_inds.shape)
1012 # Create vectorised COO coordinate array
1013 coords = np.c_[x_inds.flatten(),
1014 t_inds.flatten(),
1015 i_inds.flatten()].T
1017 # Vectorise amplitude values to match coordinates
1018 Z = Z.flatten()
1020 # Drop observations which lie outside specified bin edges
1021 drops = np.any(coords == DROP_SENTINAL, axis=0)
1022 coords = np.delete(coords, drops, axis=1)
1023 Z = np.delete(Z, drops)
1025 # Compute final shape
1026 final_shape = (x_edges.shape[0]-1,
1027 x_inds.shape[0],
1028 x_inds.shape[1])
1030 # Create sparse spectrum
1031 from sparse import COO
1032 s = COO(coords, Z, shape=final_shape)
1034 return s
1037def _higher_order_spectra(X, Y, Z, x_edges, y_edges):
1038 """Compute a 3-dimensional Hilbert-Huang distribution.
1040 This is a helper function for constructing a sparse array representation of
1041 a three dimensional distribution of power. This would not normally be
1042 called by the user.
1044 Parameters
1045 ----------
1046 X : ndarray
1047 2d array of values defining the first dimension, usually [samples x imfs]
1048 Y : ndarray
1049 3d array of values defining the second dimension, usually [samples x imfs x imfs]
1050 Z : ndarray
1051 3d array of amplitude or power values matching the size of input Y
1052 x_edges : ndarray
1053 Vector array containing bin edges for input X
1054 y_edges : ndarray
1055 Vector array containing bin edges for input Y
1057 Returns
1058 -------
1059 sparse_array
1060 Sparse array representation of three dimensional distribution.
1062 See Also
1063 --------
1064 holospectrum, hilbertmarginal
1066 """
1067 # No housekeeping here - assume that inputs have been sanitised by higher level functions.
1069 # Find bin indices for user specified dimensions
1070 x_inds = _digitize(X, x_edges)
1071 y_inds = _digitize(Y, y_edges)
1073 x_inds = np.broadcast_to(x_inds[:, :, np.newaxis], y_inds.shape)
1074 # Find bin indices for time dimension - cast to match input shape
1075 t_inds = np.broadcast_to(np.arange(x_inds.shape[0])[:, np.newaxis, np.newaxis],
1076 y_inds.shape)
1077 # Find bin indices for first IMF dimension - cast to match input shape
1078 i_inds = np.broadcast_to(np.arange(X.shape[1])[np.newaxis, :, np.newaxis],
1079 y_inds.shape)
1080 # Find bin indices for second IMF dimension - cast to match input shape
1081 j_inds = np.broadcast_to(np.arange(Y.shape[2])[np.newaxis, np.newaxis, :],
1082 y_inds.shape)
1084 # Create vectorised COO coordinate array
1085 coords = np.c_[x_inds.flatten(),
1086 y_inds.flatten(),
1087 t_inds.flatten(),
1088 i_inds.flatten(),
1089 j_inds.flatten()].T
1091 # Vectorise amplitude values to match coordinates
1092 Z = Z.flatten()
1094 # Drop observations which lie outside specified bin edges
1095 drops = np.any(coords == DROP_SENTINAL, axis=0)
1096 Z = np.delete(Z, drops)
1097 coords = np.delete(coords, drops, axis=1)
1099 # Compute final shape
1100 final_shape = (x_edges.shape[0]-1,
1101 y_edges.shape[0]-1,
1102 x_inds.shape[0],
1103 x_inds.shape[1],
1104 y_inds.shape[2])
1106 # Create sparse spectrum
1107 from sparse import COO
1108 s = COO(coords, Z, shape=final_shape)
1110 return s
1113def _digitize(vals, edges):
1114 """Return index of values into a set of defined bins.
1116 Parameters
1117 ----------
1118 vals : array_like
1119 Array of values to be binned
1120 edges : array_like
1121 Array containing the edges of bins. N edges define N-1 bins. Edges are
1122 inclusive on both the left and right.
1124 Returns
1125 -------
1126 ndarray
1127 Array containing index of each data point in vals into bins defined by edges
1129 Notes
1130 -----
1131 This function is a wrapper for np.digitize but has important differences.
1132 1. This function returns a sentinel value for observations outside the
1133 range of the specified bin edges
1134 2. This bin edges in this function are inclusive on the lower end and
1135 exclusive on the top.
1137 """
1138 drops = np.logical_or(vals < edges[0], vals >= edges[-1])
1139 inds = np.digitize(vals, edges) - 1
1140 inds[drops] = DROP_SENTINAL
1141 return inds
1144#%% -----------------------------------------------------
1145# Utilities
1147def define_hist_bins(data_min, data_max, nbins, scale='linear'):
1148 """Define the bin edges and centre values for use in a histogram.
1150 Parameters
1151 ----------
1152 data_min : float
1153 Value for minimum edge
1154 data_max : float
1155 Value for maximum edge
1156 nbins : int
1157 Number of bins to create
1158 scale : {'linear','log'}
1159 Flag indicating whether to use a linear or log spacing between bins (Default value = 'linear')
1161 Returns
1162 -------
1163 edges : ndarray
1164 1D array of bin edges
1165 centres : ndarray
1166 1D array of bin centres
1168 Notes
1169 -----
1170 An example creating histogram bins between 1 Hz and 5 Hz with four linearly
1171 spaced bins.
1173 >>> edges,centres = emd.spectra.define_hist_bins(1, 5, 4)
1174 >>> print(edges)
1175 [1. 2. 3. 4. 5.]
1176 >>> print(centres)
1177 [1.5 2.5 3.5 4.5]
1179 """
1180 if scale == 'log':
1181 p = np.log([data_min, data_max])
1182 edges = np.linspace(p[0], p[1], nbins + 1)
1183 edges = np.exp(edges)
1184 elif scale == 'linear':
1185 edges = np.linspace(data_min, data_max, nbins + 1)
1186 else:
1187 raise ValueError('scale \'{0}\' not recognised. please use \'log\' or \'linear\'.')
1189 # Get centre frequecy for the bins
1190 centres = np.array([(edges[ii] + edges[ii + 1]) / 2 for ii in range(len(edges) - 1)])
1192 return edges, centres
1195def define_hist_bins_from_data(X, nbins=None, mode='sqrt', scale='linear', tol=1e-3, max_bins=2048):
1196 """Find the bin edges and centre frequencies for use in a histogram.
1198 If nbins is defined, mode is ignored
1200 Parameters
1201 ----------
1202 X : ndarray
1203 Dataset whose summary stats will define the histogram
1204 nbins : int
1205 Number of bins to create, if undefined this is derived from the data (Default value = None)
1206 mode : {'sqrt'}
1207 Method for deriving number of bins if nbins is undefined (Default value = 'sqrt')
1208 scale : {'linear','log'}
1209 (Default value = 'linear')
1211 Returns
1212 -------
1213 edges : ndarray
1214 1D array of bin edges
1215 centres : ndarray
1216 1D array of bin centres
1218 """
1219 data_min = X.min() - tol
1220 data_max = X.max() + tol
1222 if nbins is None:
1223 if mode == 'sqrt':
1224 nbins = np.sqrt(X.shape[0]).astype(int)
1225 else:
1226 raise ValueError('mode {0} not recognised, please use \'sqrt\'')
1228 # Don't exceed max_bin number
1229 nbins = nbins if nbins < max_bins else max_bins
1231 return define_hist_bins(data_min, data_max, nbins, scale=scale)
1234def _histogram_bin_relay(params, data=None):
1235 """Relay function which does-the-right-thing with histogram bin inputs.
1237 Parameters
1238 ----------
1239 params : None or tuple(start, stop, nsteps) or np.ndarray
1240 Parameters given by user, if:
1241 None - bins automatically computed using define_hist_bins_from_data
1242 Tuple of length three - bins computed by passing params to define_hist_bins
1243 numpy.ndarray - input defines bin edges, bin centres are computed.
1244 data : ndarray
1245 Optional data used to compute bins if params=None
1247 Returns
1248 -------
1249 ndarray
1250 Array of bin edges
1251 ndarray
1252 Array of bin centres
1254 """
1255 if params is None:
1256 # User didn't say anything - guess bins
1257 edges, bins = define_hist_bins_from_data(data.flatten())
1258 elif isinstance(params, tuple) and len(params) in [3, 4]:
1259 # User specified meta bins - make actual bins
1260 edges, bins = define_hist_bins(*params)
1261 elif isinstance(params, (list, tuple, np.ndarray)):
1262 # User provided actual bin edges - use them
1263 edges = np.array(params)
1264 bins = _compute_centres_from_edges(edges)
1265 else:
1266 ValueError('Inputs not recognised....')
1268 return edges, bins
1271def _compute_centres_from_edges(edges, method='mean'):
1272 """Compute bin centres from an array of bin edges."""
1273 if method == 'geometric':
1274 bins = np.sqrt(edges[1:] * edges[:-1])
1275 elif method == 'mean':
1276 bins = (edges[1:] + edges[:-1]) / 2
1277 else:
1278 msg = 'method \'{0}\' not recognised. please use \'mean\' or \'geometric\'.'
1279 raise ValueError(msg)
1281 return bins