Coverage for emd/spectra.py: 58%

303 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-09 10:07 +0000

1#!/usr/bin/python 

2 

3# vim: set expandtab ts=4 sw=4: 

4 

5""" 

6Routines relating to frequency transforms and power-spectra. 

7 

8Frequency Transform Routines: 

9 frequency_transform 

10 quadrature_transform 

11 phase_from_complex_signal 

12 freq_from_phase 

13 phase_from_freq 

14 phase_angle 

15 

16Power Spectra: 

17 holospectrum 

18 hilberthuang 

19 hilberthuang_1d 

20 

21Power Spectra Helpers: 

22 define_hist_bins 

23 define_hist_bins_from_data 

24 

25""" 

26 

27import logging 

28 

29import numpy as np 

30 

31from . import cycles, imftools 

32from ._sift_core import interp_envelope 

33from .support import ensure_2d, ensure_equal_dims, ensure_vector 

34 

35# Housekeeping for logging 

36logger = logging.getLogger(__name__) 

37 

38# Sential value for observations outside histogram range 

39DROP_SENTINAL = np.iinfo(np.int32).min 

40 

41## 

42 

43 

44def frequency_transform(imf, sample_rate, method, smooth_freq=3, 

45 smooth_phase=5): 

46 """Compute instantaneous phase, frequency and amplitude from a set of IMFs. 

47 

48 Several approaches are implemented from [1]_ and [2]_. 

49 

50 Parameters 

51 ---------- 

52 imf : ndarray 

53 Input array of IMFs. 

54 sample_rate : float 

55 Sampling frequency of the signal in Hz 

56 method : {'hilbert','quad','direct_quad','nht'} 

57 The method for computing the frequency stats 

58 smooth_phase : int 

59 Length of window when smoothing the unwrapped phase (Default value = 31) 

60 

61 Returns 

62 ------- 

63 IP : ndarray 

64 Array of instantaneous phase estimates 

65 IF : ndarray 

66 Array of instantaneous frequency estimates 

67 IA : ndarray 

68 Array of instantaneous amplitude estimates 

69 

70 References 

71 ---------- 

72 .. [1] Huang, N. E., Shen, Z., Long, S. R., Wu, M. C., Shih, H. H., Zheng, 

73 Q., … Liu, H. H. (1998). The empirical mode decomposition and the Hilbert 

74 spectrum for nonlinear and non-stationary time series analysis. Proceedings 

75 of the Royal Society of London. Series A: Mathematical, Physical and 

76 Engineering Sciences, 454(1971), 903–995. 

77 https://doi.org/10.1098/rspa.1998.0193 

78 .. [2] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank, 

79 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis, 

80 1(2), 177–229. https://doi.org/10.1142/s1793536909000096 

81 

82 """ 

83 from scipy.signal import hilbert 

84 logger.info('STARTED: compute frequency stats') 

85 

86 imf = ensure_2d([imf], ['imf'], 'frequency_transform') 

87 logger.debug('computing on {0} samples over {1} imfs at sample rate {2}'.format(imf.shape[0], 

88 imf.shape[1], 

89 sample_rate)) 

90 

91 # Each case here should compute the analytic form of the imfs and the 

92 # instantaneous amplitude. 

93 if method == 'hilbert': 

94 logger.info('Using Hilbert transform') 

95 

96 analytic_signal = hilbert(imf, axis=0) 

97 

98 # Estimate instantaneous amplitudes directly from analytic signal 

99 iamp = np.abs(analytic_signal) 

100 

101 elif method == 'nht': 

102 logger.info('Using Amplitude-Normalised Hilbert transform') 

103 

104 n_imf = imftools.amplitude_normalise(imf) 

105 analytic_signal = hilbert(n_imf, axis=0) 

106 

107 orig_dim = imf.ndim 

108 if imf.ndim == 2: 

109 imf = imf[:, :, None] 

110 

111 # Estimate inst amplitudes with spline interpolation 

112 iamp = np.zeros_like(imf) 

113 for ii in range(imf.shape[1]): 

114 for jj in range(imf.shape[2]): 

115 iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj], 

116 mode='upper') 

117 if orig_dim == 2: 

118 iamp = iamp[:, :, 0] 

119 

120 elif method == 'ctrl': 

121 logger.info('Using Control Points - CURRENTLY BROKEN') 

122 

123 orig_dim = imf.ndim 

124 if imf.ndim == 2: 

125 imf = imf[:, :, None] 

126 

127 # Get phase from control points 

128 iphase = np.zeros_like(imf) 

129 for ii in range(imf.shape[1]): 

130 for jj in range(imf.shape[2]): 

131 good_cycles = cycles.get_cycle_inds_from_waveform(imf[:, ii, jj], cycle_start='asc') 

132 ctrl = cycles.get_control_points(imf[:, ii, jj], good_cycles) 

133 iphase[:, ii, jj] = phase_from_control_points(ctrl, good_cycles) 

134 iphase[:, ii, jj] = np.unwrap(iphase[:, ii, jj]) 

135 

136 # Estimate inst amplitudes with spline interpolation 

137 iamp = np.zeros_like(imf) 

138 for ii in range(imf.shape[1]): 

139 for jj in range(imf.shape[2]): 

140 iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj], 

141 mode='upper') 

142 

143 if orig_dim == 2: 

144 iamp = iamp[:, :, 0] 

145 iphase = iphase[:, :, 0] 

146 

147 elif method == 'quad': 

148 logger.info('Using Quadrature transform') 

149 

150 analytic_signal = quadrature_transform(imf) 

151 

152 orig_dim = imf.ndim 

153 if imf.ndim == 2: 

154 imf = imf[:, :, None] 

155 

156 # Estimate inst amplitudes with spline interpolation 

157 iamp = np.zeros_like(imf) 

158 for ii in range(imf.shape[1]): 

159 for jj in range(imf.shape[2]): 

160 iamp[:, ii, jj] = interp_envelope(imf[:, ii, jj], 

161 mode='upper') 

162 

163 if orig_dim == 2: 

164 iamp = iamp[:, :, 0] 

165 

166 elif method == 'direct_quad': 

167 logger.info('Using Direct-Quadrature transform') 

168 raise ValueError('direct_quad method is broken!') 

169 

170 n_imf = imftools.amplitude_normalise(imf.copy()) 

171 iphase = np.unwrap(phase_angle(n_imf)) 

172 

173 iamp = np.zeros_like(imf) 

174 for ii in range(imf.shape[1]): 

175 iamp[:, ii] = interp_envelope(imf[:, ii, None], mode='combined') 

176 

177 else: 

178 logger.error("Method '{0}' not recognised".format(method)) 

179 raise ValueError("Method '{0}' not recognised\nPlease use one of 'hilbert','nht' or 'quad'".format(method)) 

180 

181 if method != 'ctrl': 

182 # Compute unwrapped phase for frequency estimation 

183 iphase = phase_from_complex_signal(analytic_signal, 

184 smoothing=smooth_phase, 

185 ret_phase='unwrapped') 

186 

187 # Compute inst. freq from phase 

188 ifreq = freq_from_phase(iphase, sample_rate, savgol_width=smooth_freq) 

189 

190 # Return wrapped phase 

191 iphase = imftools.wrap_phase(iphase) 

192 

193 logger.info('COMPLETED: compute frequency stats. Returning {0} imfs'.format(iphase.shape[1])) 

194 return iphase, ifreq, iamp 

195 

196#%% ----------------------------------------------------- 

197# Frequency stat utils 

198 

199 

200def quadrature_transform(X, fix_zerocrossings=False): 

201 """Compute the quadrature transform on a set of time-series. 

202 

203 This algorithm is defined in equation 34 of [1]_. The return is a complex 

204 array with the input data as the real part and the quadrature transform as 

205 the imaginary part. 

206 

207 Parameters 

208 ---------- 

209 X : ndarray 

210 Array containing time-series to transform 

211 

212 Returns 

213 ------- 

214 quad_signal : ndarray 

215 Complex valued array containing the quadrature transformed signal 

216 

217 References 

218 ---------- 

219 .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank, 

220 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis, 

221 1(2), 177–229. https://doi.org/10.1142/s1793536909000096 

222 

223 """ 

224 nX = imftools.amplitude_normalise(X.copy(), clip=False) 

225 nX = nX / (np.abs(nX).max() + 1e-8) # Clip any remaining points outside -1 < x < 1 

226 

227 # Avoid occasional 'invalid value encountered' RuntimeWarning in sqrt using 

228 # where argument in ufunc 

229 tmp = 1 - nX**2 

230 good_vals = (tmp != 0) & (np.isnan(tmp) == False) # noqa: E712 

231 imagX = np.sqrt(tmp, out=tmp, where=good_vals) 

232 

233 # Add warning here.... 

234 if np.all(np.isreal(imagX)) == False: # noqa: E712 

235 imagX = imagX.real 

236 

237 mask = ((np.diff(nX, axis=0) > 0) * -2) + 1 

238 mask[mask == 0] = -1 

239 mask = np.r_[mask, mask[-1, None, :]] 

240 

241 q = imagX * mask 

242 

243 if fix_zerocrossings: 

244 q = _fix_quadrature_zero_crossings(q) 

245 

246 return nX + 1j * q 

247 

248 

249def _fix_quadrature_zero_crossings(quad): 

250 """Numerical 'fix' for instability around zero in quadrature signals. 

251 

252 EXPERIMENTAL WORK-IN-PROGRESS FUNCTION!! Use with caution. 

253 

254 Replaces sample closest to zero with the average of the four surrounding 

255 points. This is needed as the direct quadrature method involves squaring 

256 the raw signal - this is normally fine but explodes when close to zero. 

257 

258 """ 

259 quad_fix = quad.copy() 

260 

261 for ii in range(quad.shape[1]): 

262 # Find all zero crossings 

263 zc = np.where(np.diff(np.sign(quad[:, ii]), axis=0) != 0)[0] 

264 # Drop crossings we don't have surrounding samples for 

265 zc = zc[(zc >= 1) & (zc < quad.shape[0]-3)] 

266 # Delay-embedding array around zero-crossing 

267 zz = np.vstack((quad[zc-1, ii], 

268 quad[zc, ii], 

269 quad[zc+1, ii], 

270 quad[zc+2, ii], 

271 quad[zc+3, ii])).T 

272 

273 # Take a copy for output and replace 'fixed' zero-crossing point. 

274 quad_fix[zc+1, ii] = zz[:, np.array((0, 4))].mean(axis=1) 

275 quad_fix[zc, ii] = np.average(zz[:, np.array((0, 4))], weights=[3/4, 1/4], axis=1) 

276 quad_fix[zc+2, ii] = np.average(zz[:, np.array((0, 4))], weights=[1/4, 3/4], axis=1) 

277 

278 from scipy.ndimage import median_filter 

279 quad_fix = median_filter(quad_fix, (7, 1)) 

280 

281 return quad_fix 

282 

283 

284def phase_from_complex_signal(complex_signal, smoothing=None, 

285 ret_phase='wrapped', phase_jump='ascending'): 

286 """Compute the instantaneous phase from a complex signal. 

287 

288 The complex input may be obtained from either the Hilbert Transform or by 

289 Direct Quadrature. 

290 

291 Parameters 

292 ---------- 

293 complex_signal : complex ndarray 

294 Complex valued input array 

295 smoothing : int 

296 Integer window length used in phase smoothing (Default value = None) 

297 ret_phase : {'wrapped','unwrapped'} 

298 Flag indicating whether to return the wrapped or unwrapped phase (Default value = 'wrapped') 

299 phase_jump : {'ascending','peak','descending','trough'} 

300 Flag indicating where in the cycle the phase jump should be (Default value = 'ascending') 

301 

302 Returns 

303 ------- 

304 IP : ndarray 

305 Array of instantaneous phase values 

306 

307 """ 

308 # Compute unwrapped phase 

309 iphase = np.unwrap(np.angle(complex_signal), axis=0) 

310 

311 orig_dim = iphase.ndim 

312 if iphase.ndim == 2: 

313 iphase = iphase[:, :, None] 

314 

315 # Apply smoothing if requested 

316 from scipy.signal import medfilt 

317 if smoothing is not None: 

318 for ii in range(iphase.shape[1]): 

319 for jj in range(iphase.shape[2]): 

320 iphase[:, ii, jj] = medfilt(iphase[:, ii, jj], smoothing) 

321 

322 if orig_dim == 2: 

323 iphase = iphase[:, :, 0] 

324 

325 # Set phase jump point to requested part of cycle 

326 if phase_jump == 'ascending': 

327 iphase = iphase + np.pi / 2 

328 elif phase_jump == 'peak': 

329 pass # do nothing 

330 elif phase_jump == 'descending': 

331 iphase = iphase - np.pi / 2 

332 elif phase_jump == 'trough': 

333 iphase = iphase + np.pi 

334 

335 if ret_phase == 'wrapped': 

336 return imftools.wrap_phase(iphase) 

337 elif ret_phase == 'unwrapped': 

338 return iphase 

339 

340 

341def freq_from_phase(iphase, sample_rate, savgol_width=3): 

342 """Compute the instantaneous frequency from the instantaneous phase. 

343 

344 A savitsky-golay filter is used to compute the derivative of the phase and 

345 can be smoothed by specifying a longer savgol_width (minimum value=3). 

346 

347 Parameters 

348 ---------- 

349 iphase : ndarray 

350 Input array containing the unwrapped instantaneous phase time-course 

351 sample_rate : float 

352 The sampling frequency of the data 

353 savgol_width : int >= 3 

354 The window length of the Savitsky-Golay filter window 

355 

356 Returns 

357 ------- 

358 IF : ndarray 

359 Array containing the instantaneous frequencies 

360 

361 """ 

362 from scipy.signal import savgol_filter 

363 

364 # Differential of instantaneous phase 

365 iphase = savgol_filter(iphase, savgol_width, 1, deriv=1, axis=0) 

366 

367 # Convert to freq 

368 ifrequency = iphase / (2.0 * np.pi) * sample_rate 

369 

370 return ifrequency 

371 

372 

373def phase_from_freq(ifrequency, sample_rate, phase_start=-np.pi): 

374 """Compute the instantaneous phase of a signal from its instantaneous frequency. 

375 

376 Parameters 

377 ---------- 

378 ifrequency : ndarray 

379 Input array containing the instantaneous frequencies of a signal 

380 sample_rate : float 

381 The sampling frequency of the data 

382 phase_start : float 

383 Start value of the phase output (Default value = -np.pi) 

384 

385 Returns 

386 ------- 

387 IP : ndarray 

388 The instantaneous phase of the signal 

389 

390 """ 

391 iphase_diff = (ifrequency / sample_rate) * (2 * np.pi) 

392 

393 iphase = phase_start + np.cumsum(iphase_diff, axis=0) 

394 

395 return iphase 

396 

397 

398def phase_from_control_points(ctrl, cycles): 

399 """Compute instantaneous phase from control points.""" 

400 from scipy import interpolate as interp 

401 

402 cycles = ensure_vector([cycles], 

403 ['cycles'], 

404 'phase_from_control_points') 

405 

406 ip = np.zeros_like(cycles, dtype=float) 

407 phase_y = np.array([0, np.pi / 2, np.pi, 3 * np.pi / 2, 2 * np.pi]) 

408 

409 for jj in range(1, cycles.max() + 1): 

410 

411 if np.any(np.isnan(ctrl[jj-1, :])): 

412 continue 

413 

414 f = interp.interp1d(ctrl[jj-1, :], phase_y, kind='linear') 

415 ph = f(np.arange(0, ctrl[jj-1, -1] + 1)) 

416 

417 ip[cycles == jj] = ph 

418 

419 return ip 

420 

421 

422def direct_quadrature(fm): 

423 """Compute the quadrature transform on a set of time-series. 

424 

425 This algorithm is defined in equation 35 of [1]. 

426 

427 Section 3.2 of 'on instantaneous frequency' 

428 

429 THIS IS IN DEVELOPMENT 

430 

431 Parameters 

432 ---------- 

433 fm : ndarray 

434 Input signal containing a frequency-modulated signal 

435 

436 References 

437 ---------- 

438 .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank, 

439 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis, 

440 1(2), 177–229. https://doi.org/10.1142/s1793536909000096 

441 

442 """ 

443 ph = phase_angle(fm) 

444 

445 # We'll have occasional nans where fm==1 or -1 

446 inds = np.argwhere(np.isnan(ph)) 

447 

448 vals = (ph[inds[:, 0] - 1, :] + ph[inds[:, 0] + 1, :]) / 2 

449 ph[inds[:, 0]] = vals 

450 

451 return ph 

452 

453 

454def phase_angle(fm): 

455 """Compute the phase angle of a set of time-series. 

456 

457 This algorithm is defined in equation 35 of [1]_. 

458 

459 THIS IS IN DEVELOPMENT 

460 

461 Parameters 

462 ---------- 

463 X : ndarray 

464 Array containing time-series to transform 

465 

466 Returns 

467 ------- 

468 quad_signal : ndarray 

469 Complex valued array containing the quadrature transformed signal 

470 

471 References 

472 ---------- 

473 .. [1] Huang, N. E., Wu, Z., Long, S. R., Arnold, K. C., Chen, X., & Blank, 

474 K. (2009). On Instantaneous Frequency. Advances in Adaptive Data Analysis, 

475 1(2), 177–229. https://doi.org/10.1142/s1793536909000096 

476 

477 """ 

478 return np.arctan(fm / np.lib.scimath.sqrt(1 - np.power(fm, 2))) 

479 

480#%% ----------------------------------------------------- 

481# Time-frequency spectra 

482 

483 

484def hilberthuang(IF, IA, edges=None, 

485 sum_time=True, 

486 sum_imfs=True, 

487 mode='power', 

488 sample_rate=1, 

489 scaling=None, 

490 return_sparse=False, 

491 return_Gb_limit=10): 

492 """Compute a Hilbert-Huang transform (HHT). 

493 

494 The Hilbert-Huang transform takes the instataneous frequency and 

495 instantaneous amplitude of a time-series and represents the energy of a 

496 signal across time and frequency [1]_. 

497 

498 The full Hilbert-Huang array is 3-dimensional [nfrequencies x ntimes x nimfs]. 

499 By default, the returned holospectrum is summed across time and IMFs, 

500 returning only the frequency dimension.- this behaviour can be tuned with 

501 the sum_time and sum_imfs arguments. Setting return_sparse to True is 

502 strongly recommended returning very large arrays. 

503 

504 Parameters 

505 ---------- 

506 IF : ndarray 

507 2D first level instantaneous frequencies 

508 IA : ndarray 

509 2D first level instantaneous amplitudes 

510 edges : {ndarray, tuple or None} 

511 Definition of the frequency bins used in the spectrum. This may be: 

512 

513 * array_like vector of bin edge values (as defined by 

514 emd.spectra.define_hist_bins) 

515 

516 * a tuple of values that can be passed to emd.spectra.define_hist_bins 

517 (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz) 

518 

519 * None in which case a sensible set of bins will be defined from the 

520 input data (this is the default option) 

521 sum_time : boolean 

522 Flag indicating whether to sum across time dimension 

523 sum_imfs : boolean 

524 Flag indicating whether to sum across IMF dimension 

525 mode : {'power','amplitude'} 

526 Flag indicating whether to sum the power or amplitudes (Default value = 'power') 

527 scaling : {'density', 'spectrum', None} 

528 Switch specifying the normalisation or scaling applied to the spectrum. 

529 sample_rate : float 

530 Sampling rate of the data used in 'density' scaling 

531 return_sparse : bool 

532 Flag indicating whether to return the full or sparse form(Default value = False) 

533 return_Gb_limit : {float, None} 

534 Maximum array size in Gb that will be returned if a non-sparse/dense 

535 array is being returned (default = 10). If the function return would 

536 exceed this size, the function will raise an error. If set to None, 

537 then no limit is imposed. Sparse arrays are always returned. 

538 

539 Returns 

540 ------- 

541 f : ndarray 

542 Vector of histogram bin centers for each frequency 

543 hht : ndarray 

544 2D array containing the Hilbert-Huang Transform 

545 

546 Notes 

547 ----- 

548 Run a HHT with an automatically generated set of histogram bins: 

549 

550 >>> f, hht = emd.spectra.hilberthuang(IF, IA, sample_rate=512) 

551 

552 Run a HHT and return the spectrum for each IMF separately 

553 

554 >>> f, hht = emd.spectra.hilberthuang(IF, IA,, sample_rate=512, sum_imfs=False) 

555 

556 Run a HHT with 49 bins, linearly spaced between 1 and 50Hz 

557 

558 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512) 

559 

560 Run a HHT with 49 bins, logarithmically spaced between 0.001 and 50Hz 

561 

562 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49, 'log'), sample_rate=512) 

563 

564 Run a HHT with an externally generated set of histogram bin edges 

565 

566 >>> my_edges = np.array([0.5, 2, 5, 11, 22]) 

567 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=my_edges sample_rate=512) 

568 

569 Run a HHT and return the full time dimension - the HHT is summed over time by default 

570 

571 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512, sum_time=False) 

572 

573 Run a HHT and return a memory efficient sparse array - this is strongly 

574 recommended for very large HHTs 

575 

576 >>> f, hht = emd.spectra.hilberthuang(IF, IA, edges=(1, 50, 49), sample_rate=512, 

577 >>> sum_time=False, return_sparse=True) 

578 

579 If return_sparse is set to True the returned array is a sparse matrix in 

580 COOrdinate form using sparse package (sparse.COO). This is much more memory 

581 efficient than the full form but may not behave as expected in all 

582 functions expecting full arrays. 

583 

584 References 

585 ---------- 

586 .. [1] Huang, N. E., Shen, Z., Long, S. R., Wu, M. C., Shih, H. H., Zheng, 

587 Q., … Liu, H. H. (1998). The empirical mode decomposition and the Hilbert 

588 spectrum for nonlinear and non-stationary time series analysis. Proceedings 

589 of the Royal Society of London. Series A: Mathematical, Physical and 

590 Engineering Sciences, 454(1971), 903–995. 

591 https://doi.org/10.1098/rspa.1998.0193 

592 

593 """ 

594 # Housekeeeping 

595 IF, IA = ensure_2d([IF, IA], ['IF', 'IA'], 'hilberthuang') 

596 ensure_equal_dims((IF, IA), ('IF', 'IA'), 'hilberthuang') 

597 

598 logger.info('STARTED: compute Hilbert-Huang Transform') 

599 logger.debug('computing on {0} samples over {1} IMFs '.format(IF.shape[0], 

600 IF.shape[1])) 

601 edges, bins = _histogram_bin_relay(edges, IF.flatten()) 

602 logger.debug('Freq bins: {0} to {1} in {2} steps'.format(edges[0], 

603 edges[-1], 

604 len(edges))) 

605 

606 # Begin computation 

607 spec = _base_spectra(IF, IA, edges) 

608 

609 sum_dims = np.where([0, sum_time, sum_imfs])[0] 

610 

611 spec = _post_process_spectra(spec, sum_dims=sum_dims, 

612 mode=mode, time_dim=1, 

613 sample_rate=sample_rate, scaling=scaling, 

614 return_sparse=False, return_Gb_limit=return_Gb_limit) 

615 

616 logger.info('COMPLETED: Hilbert-Huang Transform - output size {0}'.format(spec.shape)) 

617 return bins, spec 

618 

619 

620def holospectrum(IF, IF2, IA2, 

621 edges=None, edges2=None, 

622 sum_time=True, 

623 sum_first_imfs=True, 

624 sum_second_imfs=True, 

625 mode='power', 

626 sample_rate=1, 

627 scaling=None, 

628 return_sparse=False, 

629 return_Gb_limit=10): 

630 """Compute a Holospectrum. 

631 

632 Holospectra are computed from the first and second layer frequecy 

633 statistics of a dataset. The Holospectrum represents the energy of a signal 

634 across time, carrier frequency and amplitude-modulation frequency [1]_. 

635 

636 The full Holospctrum is a 5-dimensional array: 

637 [nfrequencise x namplitude_frequencies x time x first_imfs x second_imfs] 

638 By default, the returned holospectrum is summed across time and IMFs, 

639 returning only the first two dimensions - this behaviour can be tuned with 

640 the sum_time, sum_first_imfs and sum_second_imfs arguments. 

641 

642 WARNING: returning the full Holospectrum can create some enormous arrays! 

643 Setting return_sparse=True is VERY strongly recommended if you want to work 

644 with the raw time and IMF dimensions. 

645 

646 Parameters 

647 ---------- 

648 IF : ndarray 

649 2D first level instantaneous frequencies 

650 IF2 : ndarray 

651 3D second level instantaneous frequencies 

652 IA2 : ndarray 

653 3D second level instantaneous amplitudes 

654 edges : {ndarray, tuple or None} 

655 Definition of the frequency bins used for carrier frequencies in the 

656 spectrum. This may be: 

657 

658 * array_like vector of bin edge values (as defined by 

659 emd.spectra.define_hist_bins) 

660 

661 * a tuple of values that can be passed to emd.spectra.define_hist_bins 

662 (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz) 

663 

664 * None in which case a sensible set of bins will be defined from the 

665 input data (this is the default option) 

666 edges2 : {ndarray, tuple or None} 

667 Definition of the frequency bins used for amplitude modulation 

668 frequencies in the spectrum. The options are the same as for `edges`. 

669 sum_time : boolean 

670 Flag indicating whether to sum across time dimension 

671 sum_first_imfs : boolean 

672 Flag indicating whether to sum across first-layer IMF dimension 

673 sum_second_imfs : boolean 

674 Flag indicating whether to sum across the second-layer IMF dimension 

675 mode : {'power','amplitude'} 

676 Flag indicating whether to sum the power or amplitude (Default value = 'power') 

677 scaling : {'density', 'spectrum', None} 

678 Switch specifying the normalisation or scaling applied to the spectrum. 

679 sample_rate : float 

680 Sampling rate of the data used in 'density' scaling 

681 return_sparse : boolean 

682 Flag indicating whether to return a sparse or dense (normal numpy) array. 

683 return_Gb_limit : {float, None} 

684 Maximum array size in Gb that will be returned if a non-sparse/dense 

685 array is being returned (default = 10). If the function return would 

686 exceed this size, the function will raise an error. If set to None, 

687 then no limit is imposed. Sparse arrays are always returned. 

688 

689 Returns 

690 ------- 

691 f_carrier : ndarray 

692 Vector of histogram bin centers for each carrier (first-level) frequency 

693 f_am : ndarray 

694 Vector of histogram bin centers for each amplitude modulation 

695 (second-level) frequency 

696 holo : ndarray 

697 Holospectrum of input data. 

698 

699 Notes 

700 ----- 

701 Run a Holospectrum with an automatically generated set of histogram bins: 

702 

703 >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512) 

704 

705 Run a Holospectrum and return the spectrum for each first and second level IMF separately 

706 

707 >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512, 

708 >>> sum_first_imfs=False, sum_second_imfs=False) 

709 

710 Run a Holospectrum with 49 carrier frequency bins linearly spaced between 1 

711 and 50Hz and 32 amplitude modulation frequency bins logarithmicly spaced 

712 between 0.1 and 20Hz 

713 

714 >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512, 

715 >>> edges=(1, 50, 49), 

716 >>> edges2=(0.1, 20, 32, 'log')) 

717 

718 Run a Holospectrum without summing over the time dimensions and return the 

719 result in a memory efficient sparse array - this is strongly recommended 

720 for very large HHTs 

721 

722 >>> fcarrier, fam, holo = emd.spectra.holospectrum(IF, IA, sample_rate=512, 

723 >>> edges=(1, 50, 49), 

724 >>> edges2=(0.1, 20, 32, 'log', 

725 >>> sum_time=False, return_sparse=True) 

726 

727 If return_sparse is set to True the returned array is a sparse matrix in 

728 COOrdinate form using sparse package (sparse.COO). This is much more memory 

729 efficient than the full form but may not behave as expected in all 

730 functions expecting full arrays. 

731 

732 References 

733 ---------- 

734 .. [1] Huang, N. E., Hu, K., Yang, A. C. C., Chang, H.-C., Jia, D., Liang, 

735 W.-K., … Wu, Z. (2016). On Holo-Hilbert spectral analysis: a full 

736 informational spectral representation for nonlinear and non-stationary 

737 data. Philosophical Transactions of the Royal Society A: Mathematical, 

738 Physical and Engineering Sciences, 374(2065), 20150206. 

739 https://doi.org/10.1098/rsta.2015.0206 

740 

741 """ 

742 # Housekeeping 

743 logger.info('STARTED: compute Holospectrum') 

744 

745 out = ensure_2d((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum') 

746 IF, IF2, IA2 = out 

747 ensure_equal_dims((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum', dim=0) 

748 ensure_equal_dims((IF, IF2, IA2), ('IF', 'IF2', 'IA2'), 'holospectrum', dim=1) 

749 

750 msg = 'computing on {0} samples over {1} first-level IMFs and {2} second level IMFs' 

751 logger.debug(msg.format(IF2.shape[0], IF2.shape[1], IF2.shape[2])) 

752 

753 edges, bins = _histogram_bin_relay(edges, IF.flatten()) 

754 logger.debug('First level freq bins: {0} to {1} in {2} steps'.format(edges[0], 

755 edges[-1], 

756 len(edges))) 

757 edges2, bins2 = _histogram_bin_relay(edges2, IF2.flatten()) 

758 logger.debug('Second level freq bins: {0} to {1} in {2} steps'.format(edges2[0], 

759 edges2[-1], 

760 len(edges2))) 

761 

762 # Begin computation 

763 holo = _higher_order_spectra(IF, IF2, IA2, edges, edges2) 

764 

765 sum_dims = np.where([0, 0, sum_time, sum_first_imfs, sum_second_imfs])[0] 

766 

767 holo = _post_process_spectra(holo, sum_dims=sum_dims, 

768 mode='power', time_dim=2, 

769 sample_rate=sample_rate, scaling=scaling, 

770 return_sparse=False, return_Gb_limit=return_Gb_limit) 

771 

772 logger.info('COMPLETED: Holospectrum - output size {0}'.format(holo.shape)) 

773 return bins, bins2, holo 

774 

775 

776def hilbertmarginal(IF, IA, order=2, 

777 freq_edges=None, amp_edges=None, 

778 sum_time=True, sum_imfs=True, 

779 sample_rate=1, scaling=None, 

780 return_sparse=False, 

781 return_Gb_limit=10): 

782 """Compute a generalised Hilbert marginal spectrum. 

783 

784 This is an experimental function which probably implements the method 

785 introduced in Huang et al (2008) _[1]. This creates a 2D 

786 amplitude-frequency representation of the signal. 

787 

788 Parameters 

789 ---------- 

790 IF : ndarray 

791 2D first level instantaneous frequencies 

792 IA : ndarray 

793 2D first level instantaneous amplitudes 

794 order : int 

795 Power to which amplitude is raised before spectrum computation. 

796 freq_edges: {ndarray, tuple or None} 

797 Definition of the frequency bins used in the spectrum. This may be: 

798 

799 * array_like vector of bin edge values (as defined by 

800 emd.spectra.define_hist_bins) 

801 

802 * a tuple of values that can be passed to emd.spectra.define_hist_bins 

803 (eg edges=(1,50,49) will define 49 bins between 1 and 50Hz) 

804 

805 * None in which case a sensible set of bins will be defined from the 

806 input data (this is the default option) 

807 amp_edges : {ndarray, tuple or None} 

808 Definition of amplitude bins used in spectrum. Format options are the 

809 same as for `freq_edges`. 

810 sum_time : boolean 

811 Flag indicating whether to sum across time dimension 

812 sum_imfs : boolean 

813 Flag indicating whether to sum across IMF dimension 

814 sample_rate : float 

815 Sampling rate of the data used in 'density' scaling 

816 scaling : {'density', 'spectrum', None} 

817 Switch specifying the normalisation or scaling applied to the spectrum. 

818 return_sparse : bool 

819 Flag indicating whether to return the full or sparse form(Default value = True) 

820 return_Gb_limit : {float, None} 

821 Maximum array size in Gb that will be returned if a non-sparse/dense 

822 array is being returned (default = 10). If the function return would 

823 exceed this size, the function will raise an error. If set to None, 

824 then no limit is imposed. Sparse arrays are always returned. 

825 

826 Returns 

827 ------- 

828 a : ndarray 

829 Vector of histogram bin centers for each amplitude 

830 f : ndarray 

831 Vector of histogram bin centers for each frequency 

832 hht : ndarray 

833 2D array containing the Hilbert-Huang Transform 

834 

835 References 

836 ---------- 

837 .. [1] Huang, Y. X., Schmitt, F. G., Lu, Z. M., & Liu, Y. L. (2008). An 

838 amplitude-frequency study of turbulent scaling intermittency using 

839 Empirical Mode Decomposition and Hilbert Spectral Analysis. In EPL 

840 (Europhysics Letters) (Vol. 84, Issue 4, p. 40010). IOP Publishing. 

841 https://doi.org/10.1209/0295-5075/84/40010 

842 

843 """ 

844 logger.info('STARTED: compute Hilbert-Marginal spectrum') 

845 

846 IF, IA = ensure_2d([IF, IA], ['IF', 'IA'], 'hilberthuang') 

847 ensure_equal_dims((IF, IA), ('IF', 'IA'), 'hilberthuang') 

848 

849 freq_edges, freq_bins = _histogram_bin_relay(freq_edges, IF.flatten()) 

850 logger.debug('Freq bins: {0} to {1} in {2} steps'.format(freq_edges[0], 

851 freq_edges[-1], 

852 len(freq_edges))) 

853 

854 amp_edges, amp_bins = _histogram_bin_relay(amp_edges, IA.flatten()) 

855 logger.debug('Amp bins: {0} to {1} in {2} steps'.format(amp_edges[0], 

856 amp_edges[-1], 

857 len(amp_edges))) 

858 

859 # Compute HOS - distribution of amplitude across amplitude, frequency, 

860 # time and IMF 

861 hima = _higher_order_spectra(IA, 

862 IF[:, :, None], 

863 IA[:, :, None], 

864 amp_edges, freq_edges) 

865 # hima is a spase array of dimensions 

866 # [len(edges), len(edges2), num_samples, num_imfs, 1] 

867 

868 # Get amplitude values in broadcastable shape 

869 A_values = np.reshape(IA, (1, 1, IA.shape[0], IA.shape[1], 1))**order 

870 

871 # Scale hima by amplitude values and resolution 

872 dA = np.diff(amp_bins)[0] 

873 hima = hima * A_values * dA 

874 

875 # Create PDF histogram 

876 sum_dims = np.where([0, 0, sum_time, sum_imfs, 1])[0] 

877 hima = hima.sum(axis=sum_dims) 

878 hima = hima / IF.size 

879 

880 # Post-process - summing as already been done 

881 hima = _post_process_spectra(hima, mode=None, sample_rate=sample_rate, 

882 scaling=scaling, return_sparse=return_sparse, 

883 return_Gb_limit=return_Gb_limit) 

884 

885 logger.info('COMPLETED: Hilbert-Marginal Spectrum - output size {0}'.format(hima.shape)) 

886 return amp_bins, freq_bins, hima 

887 

888 

889def _post_process_spectra(spec, sum_dims=None, 

890 mode='power', 

891 scaling=None, 

892 time_dim=1, 

893 sample_rate=1, 

894 return_sparse=False, 

895 return_Gb_limit=10): 

896 """Apply standard processes to input spectrum. 

897 

898 This function implements a set of processing options common to all 

899 hilbert-huang based spectra. 

900 

901 Parameters 

902 ---------- 

903 spec : ndarray 

904 2 or 3d input spectrum, usually a sparse array 

905 sum_dims : int or list of int 

906 Flag indicating whether to sum across time dimension 

907 mode : {'power', 'amplitude'} 

908 Switch specifying whether the distribution should return amplitude or 

909 power (amplitude squared) values. 

910 scaling : {'density', 'spectrum', None} 

911 Switch specifying the normalisation or scaling applied to the spectrum. 

912 time_dim : int 

913 Axis index of the dimension across time. This is used when applying 

914 some normalisations or scalings. 

915 return_sparse : boolean 

916 Flag indicating whether to return a sparse or dense (normal numpy) array. 

917 return_Gb_limit : {float, None} 

918 Maximum array size in Gb that will be returned if a non-sparse/dense 

919 array is being returned (default = 10). If the function return would 

920 exceed this size, the function will raise an error. If set to None, 

921 then no limit is imposed. Sparse arrays are always returned. 

922 

923 Returns 

924 ------- 

925 ndarray 

926 processed power spectrum 

927 

928 See Also 

929 -------- 

930 hilberthuang, holospectrum, hilbertmarginal 

931 

932 """ 

933 # No housekeeping here - assume that inputs have been sanitised by higher level functions. 

934 if mode == 'power': 

935 logger.debug('Squaring amplitude to compute power') 

936 spec = spec**2 

937 

938 if scaling == 'density': 

939 logger.debug("Applying scaling: 'density'.") 

940 spec = spec / (sample_rate * spec.shape[time_dim]) 

941 elif scaling == 'spectrum': 

942 logger.debug("Applying scaling: 'spectrum'.") 

943 spec = spec / spec.shape[time_dim] 

944 elif scaling is None: 

945 pass 

946 else: 

947 logger.error('Unknown scaling: {0}'.format(scaling)) 

948 raise ValueError('Unknown scaling: {0}'.format(scaling)) 

949 

950 if (sum_dims is not None) and (len(sum_dims) > 0): 

951 orig_dim = spec.shape 

952 spec = spec.sum(axis=sum_dims) 

953 msg = "Summing across dimensions {0}. Input dims ({1}) -> output dims ({2})" 

954 logger.debug(msg.format(sum_dims, orig_dim, spec.shape)) 

955 

956 if (return_sparse is False) and (return_Gb_limit is not None): 

957 byte_size = spec.size * 8 # sparse arrays don't have itemsize attr so assuming 8 for now 

958 if (byte_size / (1024**3)) > return_Gb_limit: 

959 msg = "Converting the output to dense format will create a very large array\n" 

960 msg += "This spectrum is about to return a {0}Gb numpy array - the limit is set to {1}Gb.\n" 

961 msg += "please either set 'return_sparse' to True to get a memory-efficient sparse array, or \n" 

962 msg += "change 'return_Gb_limit' if you really want the dense array..." 

963 logger.warning(msg.format(byte_size / (1024**3), return_Gb_limit)) 

964 raise RuntimeError(msg.format(byte_size / (1024**3), return_Gb_limit)) 

965 spec = spec.todense() 

966 msg = 'Converting output to dense array - size {0}Gb' 

967 logger.debug(msg.format(byte_size / (1024**3), return_Gb_limit)) 

968 else: 

969 msg = 'Returning a sparse array - size {0}Gb' 

970 logger.debug(msg.format(byte_size / (1024**3), return_Gb_limit)) 

971 

972 return spec 

973 

974 

975def _base_spectra(X, Z, x_edges): 

976 """Compute a 2-dimensional Hilbert-Huang distribution. 

977 

978 This is a helper function for constructing a sparse array representation of 

979 a two dimensional distribution of power. This function would not normally 

980 be called by the user. 

981 

982 Parameters 

983 ---------- 

984 X : ndarray 

985 2d array of values defining the first dimension, usually [samples x imfs] 

986 Z : ndarray 

987 2d array of amplitude or power values matching the size of input X 

988 x_edges : ndarray 

989 Vector array containing bin edges for input X 

990 

991 Returns 

992 ------- 

993 sparse_array 

994 Sparse array representation of two dimensional distribution. 

995 

996 See Also 

997 -------- 

998 hilberthuang 

999 

1000 """ 

1001 # No housekeeping here - assume that inputs have been sanitised by higher level functions. 

1002 

1003 # Find bin indices for first dimension 

1004 x_inds = _digitize(X, x_edges) 

1005 # Find bin indices for time dimension - cast to match input shape 

1006 t_inds = np.broadcast_to(np.arange(x_inds.shape[0])[:, np.newaxis], 

1007 x_inds.shape) 

1008 # Find bin indices for IMF dimension - cast to match input shape 

1009 i_inds = np.broadcast_to(np.arange(X.shape[1])[np.newaxis, :], 

1010 x_inds.shape) 

1011 

1012 # Create vectorised COO coordinate array 

1013 coords = np.c_[x_inds.flatten(), 

1014 t_inds.flatten(), 

1015 i_inds.flatten()].T 

1016 

1017 # Vectorise amplitude values to match coordinates 

1018 Z = Z.flatten() 

1019 

1020 # Drop observations which lie outside specified bin edges 

1021 drops = np.any(coords == DROP_SENTINAL, axis=0) 

1022 coords = np.delete(coords, drops, axis=1) 

1023 Z = np.delete(Z, drops) 

1024 

1025 # Compute final shape 

1026 final_shape = (x_edges.shape[0]-1, 

1027 x_inds.shape[0], 

1028 x_inds.shape[1]) 

1029 

1030 # Create sparse spectrum 

1031 from sparse import COO 

1032 s = COO(coords, Z, shape=final_shape) 

1033 

1034 return s 

1035 

1036 

1037def _higher_order_spectra(X, Y, Z, x_edges, y_edges): 

1038 """Compute a 3-dimensional Hilbert-Huang distribution. 

1039 

1040 This is a helper function for constructing a sparse array representation of 

1041 a three dimensional distribution of power. This would not normally be 

1042 called by the user. 

1043 

1044 Parameters 

1045 ---------- 

1046 X : ndarray 

1047 2d array of values defining the first dimension, usually [samples x imfs] 

1048 Y : ndarray 

1049 3d array of values defining the second dimension, usually [samples x imfs x imfs] 

1050 Z : ndarray 

1051 3d array of amplitude or power values matching the size of input Y 

1052 x_edges : ndarray 

1053 Vector array containing bin edges for input X 

1054 y_edges : ndarray 

1055 Vector array containing bin edges for input Y 

1056 

1057 Returns 

1058 ------- 

1059 sparse_array 

1060 Sparse array representation of three dimensional distribution. 

1061 

1062 See Also 

1063 -------- 

1064 holospectrum, hilbertmarginal 

1065 

1066 """ 

1067 # No housekeeping here - assume that inputs have been sanitised by higher level functions. 

1068 

1069 # Find bin indices for user specified dimensions 

1070 x_inds = _digitize(X, x_edges) 

1071 y_inds = _digitize(Y, y_edges) 

1072 

1073 x_inds = np.broadcast_to(x_inds[:, :, np.newaxis], y_inds.shape) 

1074 # Find bin indices for time dimension - cast to match input shape 

1075 t_inds = np.broadcast_to(np.arange(x_inds.shape[0])[:, np.newaxis, np.newaxis], 

1076 y_inds.shape) 

1077 # Find bin indices for first IMF dimension - cast to match input shape 

1078 i_inds = np.broadcast_to(np.arange(X.shape[1])[np.newaxis, :, np.newaxis], 

1079 y_inds.shape) 

1080 # Find bin indices for second IMF dimension - cast to match input shape 

1081 j_inds = np.broadcast_to(np.arange(Y.shape[2])[np.newaxis, np.newaxis, :], 

1082 y_inds.shape) 

1083 

1084 # Create vectorised COO coordinate array 

1085 coords = np.c_[x_inds.flatten(), 

1086 y_inds.flatten(), 

1087 t_inds.flatten(), 

1088 i_inds.flatten(), 

1089 j_inds.flatten()].T 

1090 

1091 # Vectorise amplitude values to match coordinates 

1092 Z = Z.flatten() 

1093 

1094 # Drop observations which lie outside specified bin edges 

1095 drops = np.any(coords == DROP_SENTINAL, axis=0) 

1096 Z = np.delete(Z, drops) 

1097 coords = np.delete(coords, drops, axis=1) 

1098 

1099 # Compute final shape 

1100 final_shape = (x_edges.shape[0]-1, 

1101 y_edges.shape[0]-1, 

1102 x_inds.shape[0], 

1103 x_inds.shape[1], 

1104 y_inds.shape[2]) 

1105 

1106 # Create sparse spectrum 

1107 from sparse import COO 

1108 s = COO(coords, Z, shape=final_shape) 

1109 

1110 return s 

1111 

1112 

1113def _digitize(vals, edges): 

1114 """Return index of values into a set of defined bins. 

1115 

1116 Parameters 

1117 ---------- 

1118 vals : array_like 

1119 Array of values to be binned 

1120 edges : array_like 

1121 Array containing the edges of bins. N edges define N-1 bins. Edges are 

1122 inclusive on both the left and right. 

1123 

1124 Returns 

1125 ------- 

1126 ndarray 

1127 Array containing index of each data point in vals into bins defined by edges 

1128 

1129 Notes 

1130 ----- 

1131 This function is a wrapper for np.digitize but has important differences. 

1132 1. This function returns a sentinel value for observations outside the 

1133 range of the specified bin edges 

1134 2. This bin edges in this function are inclusive on the lower end and 

1135 exclusive on the top. 

1136 

1137 """ 

1138 drops = np.logical_or(vals < edges[0], vals >= edges[-1]) 

1139 inds = np.digitize(vals, edges) - 1 

1140 inds[drops] = DROP_SENTINAL 

1141 return inds 

1142 

1143 

1144#%% ----------------------------------------------------- 

1145# Utilities 

1146 

1147def define_hist_bins(data_min, data_max, nbins, scale='linear'): 

1148 """Define the bin edges and centre values for use in a histogram. 

1149 

1150 Parameters 

1151 ---------- 

1152 data_min : float 

1153 Value for minimum edge 

1154 data_max : float 

1155 Value for maximum edge 

1156 nbins : int 

1157 Number of bins to create 

1158 scale : {'linear','log'} 

1159 Flag indicating whether to use a linear or log spacing between bins (Default value = 'linear') 

1160 

1161 Returns 

1162 ------- 

1163 edges : ndarray 

1164 1D array of bin edges 

1165 centres : ndarray 

1166 1D array of bin centres 

1167 

1168 Notes 

1169 ----- 

1170 An example creating histogram bins between 1 Hz and 5 Hz with four linearly 

1171 spaced bins. 

1172 

1173 >>> edges,centres = emd.spectra.define_hist_bins(1, 5, 4) 

1174 >>> print(edges) 

1175 [1. 2. 3. 4. 5.] 

1176 >>> print(centres) 

1177 [1.5 2.5 3.5 4.5] 

1178 

1179 """ 

1180 if scale == 'log': 

1181 p = np.log([data_min, data_max]) 

1182 edges = np.linspace(p[0], p[1], nbins + 1) 

1183 edges = np.exp(edges) 

1184 elif scale == 'linear': 

1185 edges = np.linspace(data_min, data_max, nbins + 1) 

1186 else: 

1187 raise ValueError('scale \'{0}\' not recognised. please use \'log\' or \'linear\'.') 

1188 

1189 # Get centre frequecy for the bins 

1190 centres = np.array([(edges[ii] + edges[ii + 1]) / 2 for ii in range(len(edges) - 1)]) 

1191 

1192 return edges, centres 

1193 

1194 

1195def define_hist_bins_from_data(X, nbins=None, mode='sqrt', scale='linear', tol=1e-3, max_bins=2048): 

1196 """Find the bin edges and centre frequencies for use in a histogram. 

1197 

1198 If nbins is defined, mode is ignored 

1199 

1200 Parameters 

1201 ---------- 

1202 X : ndarray 

1203 Dataset whose summary stats will define the histogram 

1204 nbins : int 

1205 Number of bins to create, if undefined this is derived from the data (Default value = None) 

1206 mode : {'sqrt'} 

1207 Method for deriving number of bins if nbins is undefined (Default value = 'sqrt') 

1208 scale : {'linear','log'} 

1209 (Default value = 'linear') 

1210 

1211 Returns 

1212 ------- 

1213 edges : ndarray 

1214 1D array of bin edges 

1215 centres : ndarray 

1216 1D array of bin centres 

1217 

1218 """ 

1219 data_min = X.min() - tol 

1220 data_max = X.max() + tol 

1221 

1222 if nbins is None: 

1223 if mode == 'sqrt': 

1224 nbins = np.sqrt(X.shape[0]).astype(int) 

1225 else: 

1226 raise ValueError('mode {0} not recognised, please use \'sqrt\'') 

1227 

1228 # Don't exceed max_bin number 

1229 nbins = nbins if nbins < max_bins else max_bins 

1230 

1231 return define_hist_bins(data_min, data_max, nbins, scale=scale) 

1232 

1233 

1234def _histogram_bin_relay(params, data=None): 

1235 """Relay function which does-the-right-thing with histogram bin inputs. 

1236 

1237 Parameters 

1238 ---------- 

1239 params : None or tuple(start, stop, nsteps) or np.ndarray 

1240 Parameters given by user, if: 

1241 None - bins automatically computed using define_hist_bins_from_data 

1242 Tuple of length three - bins computed by passing params to define_hist_bins 

1243 numpy.ndarray - input defines bin edges, bin centres are computed. 

1244 data : ndarray 

1245 Optional data used to compute bins if params=None 

1246 

1247 Returns 

1248 ------- 

1249 ndarray 

1250 Array of bin edges 

1251 ndarray 

1252 Array of bin centres 

1253 

1254 """ 

1255 if params is None: 

1256 # User didn't say anything - guess bins 

1257 edges, bins = define_hist_bins_from_data(data.flatten()) 

1258 elif isinstance(params, tuple) and len(params) in [3, 4]: 

1259 # User specified meta bins - make actual bins 

1260 edges, bins = define_hist_bins(*params) 

1261 elif isinstance(params, (list, tuple, np.ndarray)): 

1262 # User provided actual bin edges - use them 

1263 edges = np.array(params) 

1264 bins = _compute_centres_from_edges(edges) 

1265 else: 

1266 ValueError('Inputs not recognised....') 

1267 

1268 return edges, bins 

1269 

1270 

1271def _compute_centres_from_edges(edges, method='mean'): 

1272 """Compute bin centres from an array of bin edges.""" 

1273 if method == 'geometric': 

1274 bins = np.sqrt(edges[1:] * edges[:-1]) 

1275 elif method == 'mean': 

1276 bins = (edges[1:] + edges[:-1]) / 2 

1277 else: 

1278 msg = 'method \'{0}\' not recognised. please use \'mean\' or \'geometric\'.' 

1279 raise ValueError(msg) 

1280 

1281 return bins