Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ fourier_coefficients.py: 67%

202 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2""" 

3Fourier Coefficient group management for MTH5 format. 

4 

5This module provides classes for organizing and managing Fourier Coefficient 

6data at multiple decimation levels, including utilities for data import/export 

7with different formats (numpy, xarray, pandas). 

8 

9:copyright: 

10 Jared Peacock (jpeacock@usgs.gov) 

11 

12""" 

13 

14from __future__ import annotations 

15 

16from typing import Optional 

17 

18import h5py 

19import mt_metadata.processing.fourier_coefficients as fc 

20 

21# ============================================================================= 

22# Imports 

23# ============================================================================= 

24import numpy as np 

25import pandas as pd 

26import xarray as xr 

27 

28from mth5.groups import BaseGroup, FCChannelDataset 

29from mth5.helpers import validate_name 

30from mth5.utils.exceptions import MTH5Error 

31 

32 

33# from mth5.groups import FCGroup 

34 

35 

36# ============================================================================= 

37"""fc -> FCMasterGroup -> FCGroup -> DecimationLevelGroup -> ChannelGroup -> FCChannelDataset""" 

38 

39 

40class MasterFCGroup(BaseGroup): 

41 """ 

42 Master container for all Fourier Coefficient estimations of time series data. 

43 

44 This class manages multiple Fourier Coefficient processing runs, each containing 

45 different decimation levels. No metadata is required at the master level. 

46 

47 Hierarchy 

48 --------- 

49 MasterFCGroup -> FCGroup (processing runs) -> FCDecimationGroup (decimation levels) 

50 -> FCChannelDataset (individual channels) 

51 

52 Parameters 

53 ---------- 

54 group : h5py.Group 

55 HDF5 group object for the master FC container. 

56 **kwargs 

57 Additional keyword arguments passed to BaseGroup. 

58 

59 Examples 

60 -------- 

61 >>> import h5py 

62 >>> from mth5.groups.fourier_coefficients import MasterFCGroup 

63 >>> with h5py.File('data.h5', 'r') as f: 

64 ... master = MasterFCGroup(f['FC']) 

65 ... fc_group = master.add_fc_group('processing_run_1') 

66 """ 

67 

68 def __init__(self, group: h5py.Group, **kwargs) -> None: 

69 super().__init__(group, **kwargs) 

70 

71 @property 

72 def fc_summary(self) -> pd.DataFrame: 

73 """ 

74 Get a summary of all Fourier Coefficient processing runs. 

75 

76 Returns 

77 ------- 

78 pd.DataFrame 

79 Summary information for all FC groups including names and metadata. 

80 

81 Examples 

82 -------- 

83 >>> master = MasterFCGroup(h5_group) 

84 >>> summary = master.fc_summary 

85 """ 

86 

87 def add_fc_group( 

88 self, 

89 fc_name: str, 

90 fc_metadata: Optional[fc.Decimation] = None, 

91 ) -> FCGroup: 

92 """ 

93 Add a Fourier Coefficient processing run group. 

94 

95 Parameters 

96 ---------- 

97 fc_name : str 

98 Name for the FC group (usually identifies the processing run). 

99 fc_metadata : fc.Decimation, optional 

100 Metadata for the FC group. Default is None. 

101 

102 Returns 

103 ------- 

104 FCGroup 

105 Newly created Fourier Coefficient group. 

106 

107 Examples 

108 -------- 

109 >>> master = MasterFCGroup(h5_group) 

110 >>> fc_group = master.add_fc_group('processing_run_1') 

111 >>> print(fc_group.name) 

112 'processing_run_1' 

113 """ 

114 

115 return self._add_group(fc_name, FCGroup, group_metadata=fc_metadata, match="id") 

116 

117 def get_fc_group(self, fc_name: str) -> FCGroup: 

118 """ 

119 Retrieve a Fourier Coefficient group by name. 

120 

121 Parameters 

122 ---------- 

123 fc_name : str 

124 Name of the FC group to retrieve. 

125 

126 Returns 

127 ------- 

128 FCGroup 

129 The requested Fourier Coefficient group. 

130 

131 Raises 

132 ------ 

133 MTH5Error 

134 If the FC group does not exist. 

135 

136 Examples 

137 -------- 

138 >>> master = MasterFCGroup(h5_group) 

139 >>> fc_group = master.get_fc_group('processing_run_1') 

140 """ 

141 return self._get_group(fc_name, FCGroup) 

142 

143 def remove_fc_group(self, fc_name: str) -> None: 

144 """ 

145 Remove a Fourier Coefficient group. 

146 

147 Deletes the specified FC group and all associated decimation levels and channels. 

148 

149 Parameters 

150 ---------- 

151 fc_name : str 

152 Name of the FC group to remove. 

153 

154 Raises 

155 ------ 

156 MTH5Error 

157 If the FC group does not exist. 

158 

159 Examples 

160 -------- 

161 >>> master = MasterFCGroup(h5_group) 

162 >>> master.remove_fc_group('processing_run_1') 

163 """ 

164 

165 self._remove_group(fc_name) 

166 

167 

168class FCDecimationGroup(BaseGroup): 

169 """ 

170 Container for a single decimation level of Fourier Coefficient data. 

171 

172 This class manages all channels at a specific decimation level, assuming 

173 uniform sampling in both frequency and time domains. 

174 

175 Data Assumptions 

176 ---------------- 

177 1. Data uniformly sampled in frequency domain 

178 2. Data uniformly sampled in time domain 

179 3. FFT moving window has uniform step size 

180 

181 Attributes 

182 ---------- 

183 start_time : datetime 

184 Start time of the decimation level 

185 end_time : datetime 

186 End time of the decimation level 

187 channels : list 

188 List of channel names in this decimation level 

189 decimation_factor : int 

190 Factor by which data was decimated 

191 decimation_level : int 

192 Level index in decimation hierarchy 

193 sample_rate : float 

194 Sample rate after decimation (Hz) 

195 method : str 

196 Method used (FFT, wavelet, etc.) 

197 window : dict 

198 Window parameters (length, overlap, type, sample rate) 

199 

200 Parameters 

201 ---------- 

202 group : h5py.Group 

203 HDF5 group object for this decimation level. 

204 decimation_level_metadata : optional 

205 Metadata for the decimation level. Default is None. 

206 **kwargs 

207 Additional keyword arguments passed to BaseGroup. 

208 

209 Examples 

210 -------- 

211 >>> decimation = FCDecimationGroup(h5_group, decimation_level_metadata=metadata) 

212 >>> channel = decimation.add_channel('Ex', fc_data=fc_array) 

213 """ 

214 

215 def __init__( 

216 self, 

217 group: h5py.Group, 

218 decimation_level_metadata: Optional[fc.Decimation] = None, 

219 **kwargs, 

220 ) -> None: 

221 super().__init__(group, group_metadata=decimation_level_metadata, **kwargs) 

222 

223 @BaseGroup.metadata.getter 

224 def metadata(self): 

225 """Overwrite get metadata to include channel information in the runs""" 

226 

227 self._metadata.channels = [] 

228 for ch in self.groups_list: 

229 ch_group = self.get_channel(ch) 

230 self._metadata.channels.append(ch_group.metadata) 

231 self._metadata.hdf5_reference = self.hdf5_group.ref 

232 return self._metadata 

233 

234 @property 

235 def channel_summary(self) -> pd.DataFrame: 

236 """ 

237 Get a summary of all channels in this decimation level. 

238 

239 Returns a pandas DataFrame with detailed information about each Fourier 

240 Coefficient channel including time ranges, dimensions, and sampling rates. 

241 

242 Returns 

243 ------- 

244 pd.DataFrame 

245 DataFrame with columns: 

246 

247 - component : str 

248 Channel component name (e.g., 'Ex', 'Hy') 

249 - start : datetime64[ns] 

250 Start time of the channel data 

251 - end : datetime64[ns] 

252 End time of the channel data 

253 - n_frequency : int64 

254 Number of frequency bins 

255 - n_windows : int64 

256 Number of time windows 

257 - sample_rate_decimation_level : float64 

258 Decimation level sample rate (Hz) 

259 - sample_rate_window_step : float64 

260 Sample rate of window stepping (Hz) 

261 - units : str 

262 Physical units of the data 

263 - hdf5_reference : h5py.ref_dtype 

264 HDF5 reference to the channel dataset 

265 

266 Examples 

267 -------- 

268 >>> decimation = FCDecimationGroup(h5_group) 

269 >>> summary = decimation.channel_summary 

270 >>> print(summary[['component', 'n_frequency', 'n_windows']]) 

271 """ 

272 

273 ch_list = [] 

274 for key, group in self.hdf5_group.items(): 

275 try: 

276 ch_type = group.attrs["mth5_type"] 

277 if ch_type in ["FCChannel"]: 

278 ch_list.append( 

279 ( 

280 group.attrs["component"], 

281 group.attrs["time_period.start"].split("+")[0], 

282 group.attrs["time_period.end"].split("+")[0], 

283 group.shape[0], 

284 group.shape[1], 

285 group.attrs["sample_rate_decimation_level"], 

286 group.attrs["sample_rate_window_step"], 

287 group.attrs["units"], 

288 group.ref, 

289 ) 

290 ) 

291 except KeyError as error: 

292 self.logger.debug(f"Cannot find a key: {error}") 

293 ch_summary = np.array( 

294 ch_list, 

295 dtype=np.dtype( 

296 [ 

297 ("component", "U20"), 

298 ("start", "datetime64[ns]"), 

299 ("end", "datetime64[ns]"), 

300 ("n_frequency", np.int64), 

301 ("n_windows", np.int64), 

302 ("sample_rate_decimation_level", np.float64), 

303 ("sample_rate_window_step", np.float64), 

304 ("units", "U25"), 

305 ("hdf5_reference", h5py.ref_dtype), 

306 ] 

307 ), 

308 ) 

309 

310 return pd.DataFrame(ch_summary) 

311 

312 def from_dataframe( 

313 self, 

314 df: pd.DataFrame, 

315 channel_key: str, 

316 time_key: str = "time", 

317 frequency_key: str = "frequency", 

318 ) -> None: 

319 """ 

320 Load Fourier Coefficient data from a pandas DataFrame. 

321 

322 Assumes the channel_key column contains complex coefficient values 

323 organized with time and frequency dimensions. 

324 

325 Parameters 

326 ---------- 

327 df : pd.DataFrame 

328 Input DataFrame containing the coefficient data. 

329 channel_key : str 

330 Name of the column containing coefficient values. 

331 time_key : str, default='time' 

332 Name of the time coordinate column. 

333 frequency_key : str, default='frequency' 

334 Name of the frequency coordinate column. 

335 

336 Raises 

337 ------ 

338 TypeError 

339 If df is not a pandas DataFrame. 

340 

341 Examples 

342 -------- 

343 >>> decimation = FCDecimationGroup(h5_group) 

344 >>> decimation.from_dataframe(df, channel_key='Ex', time_key='time') 

345 """ 

346 

347 if not isinstance(df, pd.DataFrame): 

348 msg = f"Must input a pandas dataframe not {type(df)}" 

349 self.logger.error(msg) 

350 raise TypeError(msg) 

351 for col in df.columns: 

352 df[col] = np.complex128(df[col]) 

353 xrds = df[col].to_xarray() 

354 self.add_channel(col, fc_data=xrds.to_numpy()) 

355 

356 def from_xarray( 

357 self, 

358 data_array: xr.Dataset | xr.DataArray, 

359 sample_rate_decimation_level: float, 

360 ) -> None: 

361 """ 

362 Load Fourier Coefficient data from an xarray DataArray or Dataset. 

363 

364 Automatically extracts metadata (time, frequency, units) from the xarray 

365 object and creates appropriate FCChannelDataset instances for each 

366 variable or the single DataArray. 

367 

368 Parameters 

369 ---------- 

370 data_array : xr.DataArray or xr.Dataset 

371 Input xarray object with 'time' and 'frequency' coordinates and 

372 dimensions ['time', 'frequency'] (or transposed variant). 

373 sample_rate_decimation_level : float 

374 Sample rate of the decimation level (Hz). 

375 

376 Raises 

377 ------ 

378 TypeError 

379 If data_array is not an xarray Dataset or DataArray. 

380 

381 Notes 

382 ----- 

383 Automatically handles both (time, frequency) and (frequency, time) 

384 dimension ordering. Units are extracted from xarray attributes if available. 

385 

386 Examples 

387 -------- 

388 >>> import xarray as xr 

389 >>> import numpy as np 

390 >>> decimation = FCDecimationGroup(h5_group) 

391 

392 Create sample xarray data: 

393 

394 >>> times = np.arange('2023-01-01', '2023-01-02', dtype='datetime64[s]') 

395 >>> freqs = np.linspace(0.01, 100, 256) 

396 >>> data_array = np.random.randn(len(times), len(freqs)) + \\ 

397 ... 1j * np.random.randn(len(times), len(freqs)) 

398 >>> xr_data = xr.DataArray( 

399 ... data_array, 

400 ... dims=['time', 'frequency'], 

401 ... coords={'time': times, 'frequency': freqs}, 

402 ... name='Ex' 

403 ... ) 

404 

405 Load into decimation group: 

406 

407 >>> decimation.from_xarray(xr_data, sample_rate_decimation_level=0.5) 

408 """ 

409 

410 if not isinstance(data_array, (xr.Dataset, xr.DataArray)): 

411 msg = f"Must input a xarray Dataset or DataArray not {type(data_array)}" 

412 self.logger.error(msg) 

413 raise TypeError(msg) 

414 ch_metadata = fc.FCChannel() 

415 ch_metadata.time_period.start = data_array.time[0].values 

416 ch_metadata.time_period.end = data_array.time[-1].values 

417 ch_metadata.sample_rate_decimation_level = sample_rate_decimation_level 

418 ch_metadata.frequency_min = data_array.coords["frequency"].data.min() 

419 ch_metadata.frequency_max = data_array.coords["frequency"].data.max() 

420 step_size = ( 

421 data_array.coords["time"].data[1] - data_array.coords["time"].data[0] 

422 ) 

423 ch_metadata.sample_rate_window_step = step_size / np.timedelta64(1, "s") 

424 try: 

425 ch_metadata.units = data_array.units 

426 except AttributeError: 

427 self.logger.debug("Could not find 'units' in xarray") 

428 if isinstance(data_array, xr.DataArray): 

429 self.add_channel( 

430 data_array.name, 

431 fc_data=data_array.to_numpy(), 

432 fc_metadata=ch_metadata, 

433 ) 

434 else: 

435 for ch in data_array.data_vars.keys(): 

436 ch_metadata.component = ch 

437 if ch in self.channel_summary.component.to_list(): 

438 self.remove_channel(ch) 

439 # time index should be the first index 

440 if data_array[ch].time.size == data_array[ch].shape[0]: 

441 self.add_channel( 

442 ch, 

443 fc_data=data_array[ch].to_numpy(), 

444 fc_metadata=ch_metadata, 

445 dtype=data_array[ch].dtype, 

446 ) 

447 elif data_array[ch].time.size == data_array[ch].shape[1]: 

448 self.add_channel( 

449 ch, 

450 fc_data=data_array[ch].to_numpy().T, 

451 fc_metadata=ch_metadata, 

452 dtype=data_array[ch].dtype, 

453 ) 

454 return 

455 

456 def to_xarray(self, channels: Optional[list[str]] = None) -> xr.Dataset: 

457 """ 

458 Create an xarray Dataset from Fourier Coefficient channels. 

459 

460 If no channels are specified, all channels in the decimation level 

461 are included. Each channel becomes a data variable in the resulting Dataset. 

462 

463 Parameters 

464 ---------- 

465 channels : list[str], optional 

466 List of channel names to include. If None, all channels are used. 

467 Default is None. 

468 

469 Returns 

470 ------- 

471 xr.Dataset 

472 xarray Dataset with channels as data variables and 'time' and 

473 'frequency' as shared coordinates. 

474 

475 Examples 

476 -------- 

477 >>> decimation = FCDecimationGroup(h5_group) 

478 >>> xr_data = decimation.to_xarray() 

479 >>> print(xr_data.data_vars) 

480 Data variables: 

481 Ex (time, frequency) complex128 

482 Ey (time, frequency) complex128 

483 

484 Get specific channels: 

485 

486 >>> subset = decimation.to_xarray(channels=['Ex', 'Ey']) 

487 """ 

488 

489 if channels is None: 

490 channels = self.groups_list 

491 ch_dict = {} 

492 for ch in channels: 

493 ch_ds = self.get_channel(ch) 

494 ch_dict[ch] = ch_ds.to_xarray() 

495 return xr.Dataset(ch_dict) 

496 

497 def from_numpy_array( 

498 self, 

499 nd_array: np.ndarray, 

500 ch_name: str | list[str], 

501 ) -> None: 

502 """ 

503 Load Fourier Coefficient data from a numpy array. 

504 

505 Assumes array shape is either (n_frequencies, n_windows) for a single 

506 channel or (n_channels, n_frequencies, n_windows) for multiple channels. 

507 

508 Parameters 

509 ---------- 

510 nd_array : np.ndarray 

511 Input numpy array containing coefficient data. 

512 ch_name : str or list[str] 

513 Channel name (for 2D array) or list of channel names 

514 (for 3D array). 

515 

516 Raises 

517 ------ 

518 TypeError 

519 If nd_array is not a numpy ndarray. 

520 ValueError 

521 If array shape is not (n_frequencies, n_windows) or 

522 (n_channels, n_frequencies, n_windows). 

523 

524 Examples 

525 -------- 

526 >>> decimation = FCDecimationGroup(h5_group) 

527 

528 Load single channel: 

529 

530 >>> data_2d = np.random.randn(256, 100) + 1j * np.random.randn(256, 100) 

531 >>> decimation.from_numpy_array(data_2d, ch_name='Ex') 

532 

533 Load multiple channels: 

534 

535 >>> data_3d = np.random.randn(2, 256, 100) + 1j * np.random.randn(2, 256, 100) 

536 >>> decimation.from_numpy_array(data_3d, ch_name=['Ex', 'Ey']) 

537 """ 

538 

539 if not isinstance(nd_array, np.ndarray): 

540 msg = f"Must input a numpy ndarray not {type(nd_array)}" 

541 self.logger.error(msg) 

542 raise TypeError(msg) 

543 if len(nd_array.shape) == 3: 

544 for index, ch in zip(nd_array.shape[0], ch_name): 

545 self.add_channel(ch, fc_data=nd_array[index]) 

546 elif len(nd_array.shape) == 2: 

547 self.add_channel(ch_name, fc_data=nd_array) 

548 else: 

549 raise ValueError( 

550 "input array must be shaped (n_frequencies, n_windows) or " 

551 "(n_channels, n_frequencies, n_windows)" 

552 ) 

553 

554 def add_channel( 

555 self, 

556 fc_name: str, 

557 fc_data: Optional[np.ndarray] = None, 

558 fc_metadata: Optional[fc.FCChannel] = None, 

559 max_shape: tuple = (None, None), 

560 chunks: bool = True, 

561 dtype: type = complex, 

562 **kwargs, 

563 ) -> FCChannelDataset: 

564 """ 

565 Add a Fourier Coefficient channel to the decimation level. 

566 

567 Creates a new FCChannelDataset for a single channel at a single 

568 decimation level. Input data can be provided as numpy array or created empty. 

569 

570 Parameters 

571 ---------- 

572 fc_name : str 

573 Name for the Fourier Coefficient channel (usually component name like 'Ex'). 

574 fc_data : np.ndarray, optional 

575 Input data with shape (n_frequencies, n_windows). Default is None (creates empty). 

576 fc_metadata : fc.FCChannel, optional 

577 Metadata for the channel. Default is None. 

578 max_shape : tuple, default=(None, None) 

579 Maximum shape for HDF5 dataset dimensions (expandable if None). 

580 chunks : bool, default=True 

581 Whether to use HDF5 chunking. 

582 dtype : type, default=complex 

583 Data type for the dataset. 

584 **kwargs 

585 Additional keyword arguments for HDF5 dataset creation. 

586 

587 Returns 

588 ------- 

589 FCChannelDataset 

590 Newly created FCChannelDataset object. 

591 

592 Raises 

593 ------ 

594 TypeError 

595 If fc_data type is not supported. 

596 

597 Notes 

598 ----- 

599 Data layout assumes (time, frequency) organization: 

600 

601 - time index: window start times 

602 - frequency index: harmonic indices or float values 

603 - data: complex Fourier coefficients 

604 

605 If a channel with the same name already exists, the existing channel 

606 is returned instead of creating a duplicate. 

607 

608 Examples 

609 -------- 

610 >>> decimation = FCDecimationGroup(h5_group) 

611 >>> metadata = fc.FCChannel(component='Ex') 

612 

613 Create from numpy array: 

614 

615 >>> fc_data = np.random.randn(100, 256) + 1j * np.random.randn(100, 256) 

616 >>> channel = decimation.add_channel('Ex', fc_data=fc_data, fc_metadata=metadata) 

617 

618 Create empty channel (expandable): 

619 

620 >>> channel = decimation.add_channel('Ex', fc_metadata=metadata) 

621 """ 

622 

623 fc_name = validate_name(fc_name) 

624 

625 if fc_metadata is None: 

626 fc_metadata = fc.FCChannel(name=fc_name) 

627 if fc_data is not None: 

628 if not isinstance( 

629 fc_data, (np.ndarray, xr.DataArray, xr.Dataset, pd.DataFrame) 

630 ): 

631 msg = ( 

632 "Need to input a numpy.array, xarray.DataArray, " 

633 f"xr.Dataset, pd.DataFrame not {type(fc_data)}" 

634 ) 

635 self.logger.exception(msg) 

636 raise TypeError(msg) 

637 else: 

638 chunks = True 

639 fc_data = np.zeros((1, 1), dtype=dtype) 

640 try: 

641 dataset = self.hdf5_group.create_dataset( 

642 fc_name, 

643 data=fc_data, 

644 dtype=dtype, 

645 chunks=chunks, 

646 maxshape=max_shape, 

647 **self.dataset_options, 

648 ) 

649 

650 fc_dataset = FCChannelDataset(dataset, dataset_metadata=fc_metadata) 

651 except (OSError, RuntimeError, ValueError) as error: 

652 self.logger.error(error) 

653 msg = f"estimate {fc_metadata.component} already exists, returning existing group." 

654 self.logger.debug(msg) 

655 

656 fc_dataset = self.get_channel(fc_metadata.component) 

657 return fc_dataset 

658 

659 def get_channel(self, fc_name: str) -> FCChannelDataset: 

660 """ 

661 Retrieve a Fourier Coefficient channel by name. 

662 

663 Parameters 

664 ---------- 

665 fc_name : str 

666 Name of the Fourier Coefficient channel to retrieve. 

667 

668 Returns 

669 ------- 

670 FCChannelDataset 

671 The requested Fourier Coefficient channel dataset. 

672 

673 Raises 

674 ------ 

675 KeyError 

676 If the channel does not exist in this decimation level. 

677 MTH5Error 

678 If unable to retrieve the channel from HDF5. 

679 

680 Examples 

681 -------- 

682 >>> decimation = FCDecimationGroup(h5_group) 

683 >>> channel = decimation.get_channel('Ex') 

684 >>> print(channel.shape) 

685 (100, 256) 

686 """ 

687 fc_name = validate_name(fc_name) 

688 

689 try: 

690 fc_dataset = self.hdf5_group[fc_name] 

691 fc_metadata = fc.FCChannel(**dict(fc_dataset.attrs)) 

692 return FCChannelDataset(fc_dataset, dataset_metadata=fc_metadata) 

693 except KeyError: 

694 msg = f"{fc_name} does not exist, check groups_list for existing names" 

695 self.logger.error(msg) 

696 raise MTH5Error(msg) 

697 except OSError as error: 

698 self.logger.error(error) 

699 raise MTH5Error(error) 

700 

701 def remove_channel(self, fc_name: str) -> None: 

702 """ 

703 Remove a Fourier Coefficient channel from the decimation level. 

704 

705 Deletes the HDF5 dataset associated with the channel. Note that this 

706 removes the reference but does not reduce the HDF5 file size. 

707 

708 Parameters 

709 ---------- 

710 fc_name : str 

711 Name of the Fourier Coefficient channel to remove. 

712 

713 Raises 

714 ------ 

715 MTH5Error 

716 If the channel does not exist. 

717 

718 Notes 

719 ----- 

720 Deleting a channel does not reduce the HDF5 file size; it simply 

721 removes the reference to the data. To truly reduce file size, copy 

722 the desired data to a new file. 

723 

724 Examples 

725 -------- 

726 >>> decimation = FCDecimationGroup(h5_group) 

727 >>> decimation.remove_channel('Ex') 

728 """ 

729 fc_name = validate_name(fc_name.lower()) 

730 

731 try: 

732 del self.hdf5_group[fc_name] 

733 self.logger.info( 

734 "Deleting a estimate does not reduce the HDF5" 

735 "file size it simply remove the reference. If " 

736 "file size reduction is your goal, simply copy" 

737 " what you want into another file." 

738 ) 

739 except KeyError: 

740 msg = f"{fc_name} does not exist, check groups_list for existing names" 

741 self.logger.error(msg) 

742 raise MTH5Error(msg) 

743 

744 def update_metadata(self) -> None: 

745 """ 

746 Update decimation level metadata from all channels. 

747 

748 Aggregates metadata from all FC channels in the decimation level 

749 including time period, sample rates, and window step information. 

750 Updates the internal metadata object and writes to HDF5. 

751 

752 Notes 

753 ----- 

754 Collects the following information from channels: 

755 

756 - Time period start/end from channel data 

757 - Sample rate decimation level 

758 - Sample rate window step 

759 

760 Should be called after adding or modifying channels to keep 

761 metadata synchronized. 

762 

763 Examples 

764 -------- 

765 >>> decimation = FCDecimationGroup(h5_group) 

766 >>> decimation.add_channel('Ex', fc_data=data_ex) 

767 >>> decimation.add_channel('Ey', fc_data=data_ey) 

768 >>> decimation.update_metadata() 

769 """ 

770 channel_summary = self.channel_summary.copy() 

771 

772 if not channel_summary.empty: 

773 self._metadata.time_period.start = channel_summary.start.min().isoformat() 

774 self._metadata.time_period.end = channel_summary.end.max().isoformat() 

775 self._metadata.sample_rate_decimation_level = ( 

776 channel_summary.sample_rate_decimation_level.unique()[0] 

777 ) 

778 self._metadata.sample_rate_window_step = ( 

779 channel_summary.sample_rate_window_step.unique()[0] 

780 ) 

781 self.write_metadata() 

782 

783 def add_feature( 

784 self, 

785 feature_name: str, 

786 feature_data: Optional[np.ndarray] = None, 

787 feature_metadata: Optional[dict] = None, 

788 max_shape: tuple = (None, None, None), 

789 chunks: bool = True, 

790 **kwargs, 

791 ) -> None: 

792 """ 

793 Add a feature dataset to the decimation level. 

794 

795 Creates a new dataset for auxiliary features or derived quantities 

796 related to Fourier Coefficients (e.g., SNR, coherency, power, etc.). 

797 

798 Parameters 

799 ---------- 

800 feature_name : str 

801 Name for the feature dataset. 

802 feature_data : np.ndarray, optional 

803 Input data for the feature. Default is None (creates empty). 

804 feature_metadata : dict, optional 

805 Metadata dictionary for the feature. Default is None. 

806 max_shape : tuple, default=(None, None, None) 

807 Maximum shape for HDF5 dataset dimensions (expandable if None). 

808 chunks : bool, default=True 

809 Whether to use HDF5 chunking. 

810 **kwargs 

811 Additional keyword arguments for HDF5 dataset creation. 

812 

813 Notes 

814 ----- 

815 Feature types may include: 

816 

817 - Power: Total power in Fourier coefficients 

818 - SNR: Signal-to-noise ratio 

819 - Coherency: Cross-component coherence 

820 - Weights: Channel-specific weights 

821 - Flags: Data quality or processing flags 

822 

823 Examples 

824 -------- 

825 >>> decimation = FCDecimationGroup(h5_group) 

826 >>> snr_data = np.random.randn(100, 256) 

827 >>> decimation.add_feature('snr', feature_data=snr_data) 

828 

829 Or create empty feature for later population: 

830 

831 >>> decimation.add_feature('power_Ex') 

832 """ 

833 

834 

835class FCGroup(BaseGroup): 

836 """ 

837 Manage a set of Fourier Coefficients from a single processing run. 

838 

839 Holds Fourier Coefficient estimations organized by decimation level. 

840 Each decimation level contains channels (Ex, Ey, Hz, etc.) with complex 

841 frequency or time-frequency representations of the input signal. 

842 

843 All channels must use the same calibration. Recalibration requires 

844 rerunning the Fourier Coefficient estimation. 

845 

846 Attributes 

847 ---------- 

848 hdf5_group : h5py.Group 

849 The HDF5 group containing decimation levels 

850 metadata : fc.Decimation 

851 Decimation metadata including time period, sample rates, and channels 

852 

853 Notes 

854 ----- 

855 Processing run structure: 

856 

857 - Multiple decimation levels at different sample rates 

858 - Each decimation level contains multiple channels 

859 - Each channel contains complex Fourier coefficients 

860 - Time period and sample rates define the estimation window 

861 

862 Examples 

863 -------- 

864 >>> with h5py.File('data.h5', 'r') as f: 

865 ... fc_run = FCGroup(f['Fourier_Coefficients/run_1']) 

866 ... print(fc_run.decimation_level_summary) 

867 """ 

868 

869 def __init__( 

870 self, 

871 group: h5py.Group, 

872 decimation_level_metadata: Optional[fc.Decimation] = None, 

873 **kwargs, 

874 ) -> None: 

875 """ 

876 Initialize FCGroup. 

877 

878 Parameters 

879 ---------- 

880 group : h5py.Group 

881 The HDF5 group containing decimation levels. 

882 decimation_level_metadata : fc.Decimation, optional 

883 Metadata object for the processing run. Default is None. 

884 **kwargs 

885 Additional keyword arguments passed to BaseGroup. 

886 """ 

887 super().__init__(group, group_metadata=decimation_level_metadata, **kwargs) 

888 

889 @BaseGroup.metadata.getter 

890 def metadata(self) -> fc.Decimation: 

891 """ 

892 Get processing run metadata including all decimation levels. 

893 

894 Collects metadata from all decimation level groups and aggregates 

895 into a single Decimation metadata object. 

896 

897 Returns 

898 ------- 

899 fc.Decimation 

900 Metadata containing time period, sample rates, and all decimation 

901 level information. 

902 

903 Notes 

904 ----- 

905 This getter automatically populates: 

906 

907 - Time period (start and end) 

908 - List of all decimation levels and their metadata 

909 - HDF5 reference to this group 

910 

911 Examples 

912 -------- 

913 >>> fc_run = FCGroup(h5_group) 

914 >>> metadata = fc_run.metadata 

915 >>> print(metadata.time_period.start) 

916 2023-01-01T00:00:00 

917 """ 

918 self._metadata.channels = [] 

919 for dl in self.groups_list: 

920 dl_group = self.get_decimation_level(dl) 

921 self._metadata.levels.append(dl_group.metadata) 

922 self._metadata.hdf5_reference = self.hdf5_group.ref 

923 return self._metadata 

924 

925 @property 

926 def decimation_level_summary(self) -> pd.DataFrame: 

927 """ 

928 Get a summary of all decimation levels in this processing run. 

929 

930 Returns information about each decimation level including sample rate, 

931 decimation level value, and time span. 

932 

933 Returns 

934 ------- 

935 pd.DataFrame 

936 Summary with columns: 

937 

938 - decimation_level: Integer decimation level identifier 

939 - start: ISO format start time of this decimation level 

940 - end: ISO format end time of this decimation level 

941 - hdf5_reference: Reference to the HDF5 group 

942 

943 Notes 

944 ----- 

945 Each row represents a single decimation level containing multiple 

946 channels with Fourier coefficients at different sample rates. 

947 

948 Examples 

949 -------- 

950 >>> fc_run = FCGroup(h5_group) 

951 >>> summary = fc_run.decimation_level_summary 

952 >>> print(summary[['decimation_level', 'start', 'end']]) 

953 decimation_level start end 

954 0 0 2023-01-01T00:00:00.000000 2023-01-01T01:00:00.000000 

955 1 1 2023-01-01T00:00:00.000000 2023-01-01T02:00:00.000000 

956 """ 

957 

958 ch_list = [] 

959 for key, group in self.hdf5_group.items(): 

960 try: 

961 ch_type = group.attrs["mth5_type"] 

962 if ch_type in ["FCDecimation"]: 

963 ch_list.append( 

964 ( 

965 group.attrs["decimation_level"], 

966 group.attrs["time_period.start"].split("+")[0], 

967 group.attrs["time_period.end"].split("+")[0], 

968 group.ref, 

969 ) 

970 ) 

971 except KeyError as error: 

972 self.logger.debug(f"Could not find key: {error}") 

973 

974 ch_summary = np.array( 

975 ch_list, 

976 dtype=np.dtype( 

977 [ 

978 ("component", "U20"), 

979 ("start", "datetime64[ns]"), 

980 ("end", "datetime64[ns]"), 

981 ("hdf5_reference", h5py.ref_dtype), 

982 ] 

983 ), 

984 ) 

985 

986 return pd.DataFrame(ch_summary) 

987 

988 def add_decimation_level( 

989 self, 

990 decimation_level_name: str, 

991 decimation_level_metadata: Optional[dict | fc.Decimation] = None, 

992 ) -> FCDecimationGroup: 

993 """ 

994 Add a new decimation level to the processing run. 

995 

996 Creates a new FCDecimationGroup for a single decimation level containing 

997 Fourier Coefficient channels at a specific sample rate. 

998 

999 Parameters 

1000 ---------- 

1001 decimation_level_name : str 

1002 Identifier for the decimation level. 

1003 decimation_level_metadata : dict | fc.Decimation, optional 

1004 Metadata for the decimation level. Can be a dictionary or 

1005 fc.Decimation object. Default is None. 

1006 

1007 Returns 

1008 ------- 

1009 FCDecimationGroup 

1010 Newly created decimation level group. 

1011 

1012 Examples 

1013 -------- 

1014 >>> fc_run = FCGroup(h5_group) 

1015 >>> metadata = fc.Decimation(decimation_level=0) 

1016 >>> decimation = fc_run.add_decimation_level('0', metadata) 

1017 """ 

1018 

1019 return self._add_group( 

1020 decimation_level_name, 

1021 FCDecimationGroup, 

1022 group_metadata=decimation_level_metadata, 

1023 match="decimation_level", 

1024 ) 

1025 

1026 def get_decimation_level(self, decimation_level_name: str) -> FCDecimationGroup: 

1027 """ 

1028 Retrieve a decimation level by name. 

1029 

1030 Parameters 

1031 ---------- 

1032 decimation_level_name : str 

1033 Name or identifier of the decimation level. 

1034 

1035 Returns 

1036 ------- 

1037 FCDecimationGroup 

1038 The requested decimation level group. 

1039 

1040 Examples 

1041 -------- 

1042 >>> fc_run = FCGroup(h5_group) 

1043 >>> decimation = fc_run.get_decimation_level('0') 

1044 >>> channels = decimation.groups_list 

1045 """ 

1046 return self._get_group(decimation_level_name, FCDecimationGroup) 

1047 

1048 def remove_decimation_level(self, decimation_level_name: str) -> None: 

1049 """ 

1050 Remove a decimation level from the processing run. 

1051 

1052 Deletes the HDF5 group and all its channels (FCChannelDataset objects). 

1053 

1054 Parameters 

1055 ---------- 

1056 decimation_level_name : str 

1057 Name or identifier of the decimation level to remove. 

1058 

1059 Notes 

1060 ----- 

1061 This removes the entire decimation level and all channels within it. 

1062 To remove individual channels, use FCDecimationGroup.remove_channel() 

1063 instead. 

1064 

1065 Examples 

1066 -------- 

1067 >>> fc_run = FCGroup(h5_group) 

1068 >>> fc_run.remove_decimation_level('0') 

1069 """ 

1070 

1071 self._remove_group(decimation_level_name) 

1072 

1073 def update_metadata(self) -> None: 

1074 """ 

1075 Update processing run metadata from all decimation levels. 

1076 

1077 Aggregates time period information from all decimation levels 

1078 and writes updated metadata to HDF5. 

1079 

1080 Notes 

1081 ----- 

1082 Collects: 

1083 

1084 - Earliest start time across all decimation levels 

1085 - Latest end time across all decimation levels 

1086 

1087 Should be called after adding or removing decimation levels. 

1088 

1089 Examples 

1090 -------- 

1091 >>> fc_run = FCGroup(h5_group) 

1092 >>> fc_run.add_decimation_level('0', metadata0) 

1093 >>> fc_run.add_decimation_level('1', metadata1) 

1094 >>> fc_run.update_metadata() 

1095 """ 

1096 decimation_level_summary = self.decimation_level_summary.copy() 

1097 if not decimation_level_summary.empty: 

1098 self._metadata.time_period.start = ( 

1099 decimation_level_summary.start.min().isoformat() 

1100 ) 

1101 self._metadata.time_period.end = ( 

1102 decimation_level_summary.end.max().isoformat() 

1103 ) 

1104 self.write_metadata() 

1105 

1106 def supports_aurora_processing_config( 

1107 self, 

1108 processing_config: "aurora.config.metadata.processing.Processing", 

1109 remote: bool, 

1110 ) -> bool: 

1111 """ 

1112 Check if all required decimation levels exist for Aurora processing. 

1113 

1114 Performs an all-or-nothing check: returns True only if every decimation 

1115 level required by the processing config is available in this FCGroup. 

1116 

1117 Uses sequential logic to short-circuit: if any required decimation level 

1118 is missing, immediately returns False without checking remaining levels. 

1119 

1120 Parameters 

1121 ---------- 

1122 processing_config : aurora.config.metadata.processing.Processing 

1123 Aurora processing configuration containing required decimation levels. 

1124 remote : bool 

1125 Whether to check for remote processing compatibility. 

1126 

1127 Returns 

1128 ------- 

1129 bool 

1130 True if all required decimation levels are available and consistent, 

1131 False otherwise. 

1132 

1133 Notes 

1134 ----- 

1135 Validation logic: 

1136 

1137 1. Extract list of decimation levels from processing config 

1138 2. Iterate through each required level in sequence 

1139 3. For each level, find a matching FCDecimation in this group 

1140 4. Check consistency using Aurora's validation method 

1141 5. If any level is missing or inconsistent, return False immediately 

1142 6. Return True only if all levels pass validation 

1143 

1144 Examples 

1145 -------- 

1146 >>> fc_run = FCGroup(h5_group) 

1147 >>> config = aurora.config.metadata.processing.Processing(...) 

1148 >>> if fc_run.supports_aurora_processing_config(config, remote=False): 

1149 ... # All decimation levels are available 

1150 ... pass 

1151 """ 

1152 pre_existing_fc_decimation_ids_to_check = self.groups_list 

1153 levels_present = np.full(processing_config.num_decimation_levels, False) 

1154 

1155 for i, aurora_decimation_level in enumerate(processing_config.decimations): 

1156 # Quit checking if dec_level wasn't there 

1157 if i > 0: 

1158 if not levels_present[i - 1]: 

1159 return False 

1160 

1161 # iterate over existing decimations 

1162 for fc_decimation_id in pre_existing_fc_decimation_ids_to_check: 

1163 fc_dec_group = self.get_decimation_level(fc_decimation_id) 

1164 fc_decimation = fc_dec_group.metadata 

1165 levels_present[ 

1166 i 

1167 ] = aurora_decimation_level.is_consistent_with_archived_fc_parameters( 

1168 fc_decimation=fc_decimation, remote=remote 

1169 ) 

1170 if levels_present[i]: 

1171 pre_existing_fc_decimation_ids_to_check.remove( 

1172 fc_decimation_id 

1173 ) # no need to check this one again 

1174 break # break inner for-loop over decimations 

1175 

1176 return levels_present.all()