Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ features.py: 42%
216 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2"""
3Created on Fri Dec 13 12:40:34 2024
5@author: jpeacock
6"""
8from __future__ import annotations
10# =============================================================================
11# Imports
12# =============================================================================
13from typing import Optional
15import h5py
16import numpy as np
17import pandas as pd
18import xarray as xr
19from mt_metadata.features import FeatureDecimationChannel
20from mt_metadata.processing.fourier_coefficients.decimation import Decimation
22from mth5.groups import BaseGroup, FeatureChannelDataset, RunGroup
23from mth5.helpers import validate_name
24from mth5.utils.exceptions import MTH5Error
27# =============================================================================
28"""feature -> FeatureMasterGroup -> FeatureGroup -> DecimationLevelGroup -> ChannelGroup -> FeatureChannelDataset"""
30TIME_DOMAIN = ["ts", "time", "time series", "time_series"]
31FREQUENCY_DOMAIN = ["fc", "frequency", "fourier", "fourier_domain"]
34class MasterFeaturesGroup(BaseGroup):
35 """
36 Master group container for features associated with Fourier Coefficients or time series.
38 This class manages the top-level organization of geophysical feature data,
39 organizing it into feature-specific groups. Features can include various
40 frequency or time-domain analyses.
42 Hierarchy
43 ---------
44 MasterFeatureGroup -> FeatureGroup -> FeatureRunGroup ->
46 - FC: FeatureDecimationGroup -> FeatureChannelDataset
47 - Time Series: FeatureChannelDataset
49 Parameters
50 ----------
51 group : h5py.Group
52 HDF5 group object for this MasterFeaturesGroup.
53 **kwargs
54 Additional keyword arguments passed to BaseGroup.
56 Examples
57 --------
58 >>> import h5py
59 >>> from mth5.groups.features import MasterFeaturesGroup
60 >>> with h5py.File('data.h5', 'r') as f:
61 ... master = MasterFeaturesGroup(f['features'])
62 ... feature_list = master.groups_list
63 """
65 def __init__(self, group: h5py.Group, **kwargs) -> None:
66 super().__init__(group, **kwargs)
68 def add_feature_group(
69 self,
70 feature_name: str,
71 feature_metadata: Optional[FeatureDecimationChannel] = None,
72 ) -> FeatureGroup:
73 """
74 Add a feature group to the master features container.
76 Creates a new FeatureGroup with the specified name and optional metadata.
77 Feature groups organize all runs and decimation levels for a particular feature.
79 Parameters
80 ----------
81 feature_name : str
82 Name for the feature group. Will be validated and formatted.
83 feature_metadata : FeatureDecimationChannel, optional
84 Metadata describing the feature. Default is None.
86 Returns
87 -------
88 FeatureGroup
89 Newly created feature group object.
91 Examples
92 --------
93 >>> master = MasterFeaturesGroup(h5_group)
94 >>> feature = master.add_feature_group('coherency')
95 >>> print(feature.name)
96 'coherency'
97 """
99 return self._add_group(
100 feature_name,
101 FeatureGroup,
102 group_metadata=feature_metadata,
103 match="name",
104 )
106 def get_feature_group(self, feature_name: str) -> FeatureGroup:
107 """
108 Retrieve a feature group by name.
110 Parameters
111 ----------
112 feature_name : str
113 Name of the feature group to retrieve.
115 Returns
116 -------
117 FeatureGroup
118 The requested feature group.
120 Raises
121 ------
122 MTH5Error
123 If the feature group does not exist.
125 Examples
126 --------
127 >>> master = MasterFeaturesGroup(h5_group)
128 >>> feature = master.get_feature_group('coherency')
129 >>> print(feature.name)
130 'coherency'
131 """
132 return self._get_group(feature_name, FeatureGroup)
134 def remove_feature_group(self, feature_name: str) -> None:
135 """
136 Remove a feature group from the master container.
138 Deletes the specified feature group and its associated data from the
139 HDF5 file. Note that this operation removes the reference but does not
140 reduce the file size; copy desired data to a new file for size reduction.
142 Parameters
143 ----------
144 feature_name : str
145 Name of the feature group to remove.
147 Raises
148 ------
149 MTH5Error
150 If the feature group does not exist.
152 Examples
153 --------
154 >>> master = MasterFeaturesGroup(h5_group)
155 >>> master.remove_feature_group('coherency')
156 """
158 self._remove_group(feature_name)
161class FeatureGroup(BaseGroup):
162 """
163 Container for a single feature set with all associated runs and decimation levels.
165 This class manages feature-specific data including all processing runs and
166 decimation levels. Features can include both Fourier Coefficient and time series data.
168 Hierarchy
169 ---------
170 FeatureGroup -> FeatureRunGroup ->
172 - FC: FeatureDecimationLevel -> FeatureChannelDataset
173 - TS: FeatureChannelDataset
175 Parameters
176 ----------
177 group : h5py.Group
178 HDF5 group object for this FeatureGroup.
179 feature_metadata : optional
180 Metadata specific to this feature. Should include description and parameters.
181 **kwargs
182 Additional keyword arguments passed to BaseGroup.
184 Notes
185 -----
186 Feature metadata should be specific to the feature and include descriptions
187 of the feature and any parameters used in its computation.
189 Examples
190 --------
191 >>> feature = FeatureGroup(h5_group, feature_metadata=metadata)
192 >>> run_group = feature.add_feature_run_group('run_1', domain='fc')
193 """
195 def __init__(
196 self,
197 group: h5py.Group,
198 feature_metadata: Optional[object] = None,
199 **kwargs,
200 ) -> None:
201 super().__init__(group, group_metadata=feature_metadata, **kwargs)
203 def add_feature_run_group(
204 self,
205 feature_name: str,
206 feature_run_metadata: Optional[object] = None,
207 domain: str = "fc",
208 ) -> object:
209 """
210 Add a feature run group for a single feature.
212 Creates either a Fourier Coefficient run group or a time series run group
213 based on the specified domain. The domain can be determined from the metadata
214 or explicitly provided.
216 Parameters
217 ----------
218 feature_name : str
219 Name for the feature run group.
220 feature_run_metadata : optional
221 Metadata for the feature run. If provided, domain is extracted from
222 metadata.domain attribute. Default is None.
223 domain : str, default='fc'
224 Domain type for the data. Must be one of:
226 - 'fc', 'frequency', 'fourier', 'fourier_domain': Fourier Coefficients
227 - 'ts', 'time', 'time series', 'time_series': Time series
229 Returns
230 -------
231 FeatureFCRunGroup or FeatureTSRunGroup
232 Newly created feature run group.
234 Raises
235 ------
236 ValueError
237 If domain is not recognized.
238 AttributeError
239 If metadata does not have a domain attribute when metadata is provided.
241 Examples
242 --------
243 >>> feature = FeatureGroup(h5_group)
244 >>> fc_run = feature.add_feature_run_group('processing_run_1', domain='fc')
245 >>> ts_run = feature.add_feature_run_group('ts_analysis', domain='ts')
246 """
247 if feature_run_metadata is not None:
248 try:
249 domain = feature_run_metadata.domain
250 except AttributeError:
251 raise AttributeError(
252 "Could not find attribute 'domain' in metadata object"
253 )
255 if domain in FREQUENCY_DOMAIN:
256 return self._add_group(
257 feature_name,
258 FeatureFCRunGroup,
259 group_metadata=feature_run_metadata,
260 match="id",
261 )
262 elif domain in TIME_DOMAIN:
263 return self._add_group(
264 feature_name,
265 FeatureTSRunGroup,
266 group_metadata=feature_run_metadata,
267 match="id",
268 )
269 else:
270 raise ValueError(
271 f"feature_type {domain} not supported. Use either 'fc' "
272 "for Fourier Coefficent or 'ts' for time series."
273 )
275 def get_feature_run_group(
276 self,
277 feature_name: str,
278 domain: str = "frequency",
279 ) -> object:
280 """
281 Retrieve a feature run group by name and domain type.
283 Parameters
284 ----------
285 feature_name : str
286 Name of the feature run group to retrieve.
287 domain : str, default='frequency'
288 Domain type. Must be one of:
290 - 'fc', 'frequency', 'fourier', 'fourier_domain': Fourier Coefficients
291 - 'ts', 'time', 'time series', 'time_series': Time series
293 Returns
294 -------
295 FeatureFCRunGroup or FeatureTSRunGroup
296 The requested feature run group.
298 Raises
299 ------
300 ValueError
301 If domain is not recognized.
302 MTH5Error
303 If the feature run group does not exist.
305 Examples
306 --------
307 >>> feature = FeatureGroup(h5_group)
308 >>> fc_run = feature.get_feature_run_group('processing_run_1', domain='fc')
309 """
310 if domain in FREQUENCY_DOMAIN:
311 return self._get_group(feature_name, FeatureFCRunGroup)
312 elif domain in TIME_DOMAIN:
313 return self._get_group(feature_name, FeatureTSRunGroup)
314 else:
315 raise ValueError(
316 f"feature_type {domain} not supported. Use either 'fc' "
317 "for Fourier Coefficent or 'ts' for time series."
318 )
320 def remove_feature_run_group(self, feature_name: str) -> None:
321 """
322 Remove a feature run group.
324 Deletes the specified feature run group and all its associated data.
325 Note that deletion removes the reference but does not reduce HDF5 file size.
327 Parameters
328 ----------
329 feature_name : str
330 Name of the feature run group to remove.
332 Raises
333 ------
334 MTH5Error
335 If the feature run group does not exist.
337 Examples
338 --------
339 >>> feature = FeatureGroup(h5_group)
340 >>> feature.remove_feature_run_group('processing_run_1')
341 """
343 self._remove_group(feature_name)
346class FeatureTSRunGroup(BaseGroup):
347 """
348 Container for time series features from a processing or analysis run.
350 This class wraps a RunGroup to manage time series data features while
351 maintaining compatibility with the feature hierarchy structure.
353 Parameters
354 ----------
355 group : h5py.Group
356 HDF5 group object for this FeatureTSRunGroup.
357 feature_run_metadata : optional
358 Metadata for the feature run (same type as timeseries.Run).
359 **kwargs
360 Additional keyword arguments passed to BaseGroup.
362 Notes
363 -----
364 This class uses methods from RunGroup for channel management, which may
365 have performance implications due to multiple RunGroup instantiations.
367 Examples
368 --------
369 >>> ts_run = FeatureTSRunGroup(h5_group, feature_run_metadata=metadata)
370 >>> channel = ts_run.add_feature_channel('Ex', 'electric', data)
371 """
373 def __init__(
374 self,
375 group: h5py.Group,
376 feature_run_metadata: Optional[object] = None,
377 **kwargs,
378 ) -> None:
379 super().__init__(group, group_metadata=feature_run_metadata, **kwargs)
381 ### Use methods from RunGroup (might be slow cause initiating multiple
382 ### RunGroups)?
383 self._run_group = RunGroup(group, feature_run_metadata=None)
385 def add_feature_channel(
386 self,
387 channel_name: str,
388 channel_type: str,
389 data: Optional[np.ndarray] = None,
390 channel_dtype: str = "int32",
391 shape: Optional[tuple] = None,
392 max_shape: tuple = (None,),
393 chunks: bool = True,
394 channel_metadata: Optional[object] = None,
395 **kwargs,
396 ) -> object:
397 """
398 Add a time series channel to the feature run group.
400 Creates a new channel for time series data with the specified properties
401 and optional metadata. Channel metadata should be a timeseries.Channel object.
403 Parameters
404 ----------
405 channel_name : str
406 Name for the channel.
407 channel_type : str
408 Type of channel (e.g., 'electric', 'magnetic').
409 data : np.ndarray, optional
410 Initial data for the channel. Default is None.
411 channel_dtype : str, default='int32'
412 Data type for the channel.
413 shape : tuple, optional
414 Shape of the channel data. Default is None.
415 max_shape : tuple, default=(None,)
416 Maximum shape for expandable dimensions.
417 chunks : bool, default=True
418 Whether to use chunking for the dataset.
419 channel_metadata : optional
420 Metadata object (timeseries.Channel type). Default is None.
421 **kwargs
422 Additional keyword arguments for dataset creation.
424 Returns
425 -------
426 object
427 Channel object from RunGroup.
429 Examples
430 --------
431 >>> ts_run = FeatureTSRunGroup(h5_group)
432 >>> channel = ts_run.add_feature_channel(
433 ... 'Ex', 'electric', data=np.arange(1000))
434 """
436 return self._run_group.add_channel(
437 channel_name,
438 channel_type,
439 data,
440 channel_dtype=channel_dtype,
441 shape=shape,
442 max_shape=max_shape,
443 chunks=chunks,
444 channel_metadata=channel_metadata,
445 **kwargs,
446 )
448 def get_feature_channel(self, channel_name: str) -> object:
449 """
450 Retrieve a feature channel by name.
452 Parameters
453 ----------
454 channel_name : str
455 Name of the channel to retrieve.
457 Returns
458 -------
459 object
460 Channel object from RunGroup.
462 Raises
463 ------
464 MTH5Error
465 If the channel does not exist.
467 Examples
468 --------
469 >>> ts_run = FeatureTSRunGroup(h5_group)
470 >>> channel = ts_run.get_feature_channel('Ex')
471 """
473 return self._run_group.get_channel(channel_name)
475 def remove_feature_channel(self, channel_name: str) -> None:
476 """
477 Remove a feature channel from the run group.
479 Parameters
480 ----------
481 channel_name : str
482 Name of the channel to remove.
484 Raises
485 ------
486 MTH5Error
487 If the channel does not exist.
489 Examples
490 --------
491 >>> ts_run = FeatureTSRunGroup(h5_group)
492 >>> ts_run.remove_feature_channel('Ex')
493 """
494 self._run_group.remove_channel(channel_name)
497class FeatureFCRunGroup(BaseGroup):
498 """
499 Container for Fourier Coefficient features from a processing run.
501 This class manages Fourier Coefficient data organized by decimation levels,
502 each containing multiple frequency channels with time-frequency data.
504 Hierarchy
505 ---------
506 FeatureFCRunGroup -> FeatureDecimationGroup -> FeatureChannelDataset
508 Attributes
509 ----------
510 metadata : Decimation
511 Metadata including:
513 - list of decimation levels
514 - start time (earliest)
515 - end time (latest)
516 - method (fft, wavelet, ...)
517 - list of channels used
518 - starting sample rate
519 - bands used
520 - type (TS or FC)
522 Parameters
523 ----------
524 group : h5py.Group
525 HDF5 group object for this FeatureFCRunGroup.
526 feature_run_metadata : optional
527 Decimation metadata for the feature run. Default is None.
528 **kwargs
529 Additional keyword arguments passed to BaseGroup.
531 Examples
532 --------
533 >>> fc_run = FeatureFCRunGroup(h5_group, feature_run_metadata=metadata)
534 >>> decimation = fc_run.add_decimation_level('level_0', dec_metadata)
535 """
537 def __init__(
538 self,
539 group: h5py.Group,
540 feature_run_metadata: Optional[Decimation] = None,
541 **kwargs,
542 ) -> None:
543 super().__init__(group, group_metadata=feature_run_metadata, **kwargs)
545 @BaseGroup.metadata.getter
546 def metadata(self) -> Decimation:
547 """Overwrite get metadata to include channel information in the runs"""
549 # self._metadata.channels = []
550 # for dl in self.groups_list:
551 # dl_group = self.get_decimation_level(dl)
552 # self._metadata.levels.append(dl_group.metadata)
553 self._metadata.hdf5_reference = self.hdf5_group.ref
554 return self._metadata
556 @property
557 def decimation_level_summary(self) -> pd.DataFrame:
558 """
559 Get a summary of all decimation levels in the run.
561 Returns a pandas DataFrame with information about each decimation level
562 including decimation factor, time range, and HDF5 reference.
564 Returns
565 -------
566 pd.DataFrame
567 DataFrame with columns:
569 - name : str
570 Decimation level name
571 - start : datetime64[ns]
572 Start time of the decimation level
573 - end : datetime64[ns]
574 End time of the decimation level
575 - hdf5_reference : h5py.ref_dtype
576 HDF5 reference to the decimation level group
578 Examples
579 --------
580 >>> fc_run = FeatureFCRunGroup(h5_group)
581 >>> summary = fc_run.decimation_level_summary
582 >>> print(summary[['name', 'start', 'end']])
583 """
585 ch_list = []
586 for key, group in self.hdf5_group.items():
587 try:
588 ch_type = group.attrs["mth5_type"]
589 if ch_type in ["FeatureDecimation"]:
590 ch_list.append(
591 (
592 group.attrs["decimation_level"],
593 group.attrs["time_period.start"].split("+")[0],
594 group.attrs["time_period.end"].split("+")[0],
595 group.ref,
596 )
597 )
598 except KeyError as error:
599 self.logger.debug(f"Could not find key: {error}")
600 ch_summary = np.array(
601 ch_list,
602 dtype=np.dtype(
603 [
604 ("name", "U20"),
605 ("start", "datetime64[ns]"),
606 ("end", "datetime64[ns]"),
607 ("hdf5_reference", h5py.ref_dtype),
608 ]
609 ),
610 )
612 return pd.DataFrame(ch_summary)
614 def add_decimation_level(
615 self,
616 decimation_level_name: str,
617 feature_decimation_level_metadata: Optional[object] = None,
618 ) -> FeatureDecimationGroup:
619 """
620 Add a decimation level group to the feature run.
622 Parameters
623 ----------
624 decimation_level_name : str
625 Name for the decimation level.
626 feature_decimation_level_metadata : optional
627 Metadata for the decimation level. Default is None.
629 Returns
630 -------
631 FeatureDecimationGroup
632 Newly created decimation level group.
634 Examples
635 --------
636 >>> fc_run = FeatureFCRunGroup(h5_group)
637 >>> decimation = fc_run.add_decimation_level('level_0', dec_metadata)
638 >>> print(decimation.name)
639 'level_0'
640 """
642 return self._add_group(
643 decimation_level_name,
644 FeatureDecimationGroup,
645 group_metadata=feature_decimation_level_metadata,
646 match="id",
647 )
649 def get_decimation_level(
650 self, decimation_level_name: str
651 ) -> FeatureDecimationGroup:
652 """
653 Retrieve a decimation level group by name.
655 Parameters
656 ----------
657 decimation_level_name : str
658 Name of the decimation level to retrieve.
660 Returns
661 -------
662 FeatureDecimationGroup
663 The requested decimation level group.
665 Raises
666 ------
667 MTH5Error
668 If the decimation level does not exist.
670 Examples
671 --------
672 >>> fc_run = FeatureFCRunGroup(h5_group)
673 >>> decimation = fc_run.get_decimation_level('level_0')
674 """
675 return self._get_group(decimation_level_name, FeatureDecimationGroup)
677 def remove_decimation_level(self, decimation_level_name: str) -> None:
678 """
679 Remove a decimation level from the feature run.
681 Parameters
682 ----------
683 decimation_level_name : str
684 Name of the decimation level to remove.
686 Raises
687 ------
688 MTH5Error
689 If the decimation level does not exist.
691 Examples
692 --------
693 >>> fc_run = FeatureFCRunGroup(h5_group)
694 >>> fc_run.remove_decimation_level('level_0')
695 """
697 self._remove_group(decimation_level_name)
699 def update_metadata(self) -> None:
700 """
701 Update metadata from all decimation levels.
703 Scans all decimation levels and updates the run-level metadata with
704 aggregated information including time ranges.
706 Examples
707 --------
708 >>> fc_run = FeatureFCRunGroup(h5_group)
709 >>> fc_run.update_metadata()
710 """
711 decimation_level_summary = self.decimation_level_summary.copy()
712 if not decimation_level_summary.empty:
713 self._metadata.time_period.start = (
714 decimation_level_summary.start.min().isoformat()
715 )
716 self._metadata.time_period.end = (
717 decimation_level_summary.end.max().isoformat()
718 )
719 self.write_metadata()
721 # def supports_aurora_processing_config(
722 # self, processing_config, remote
723 # ) -> bool:
724 # """
726 # An "all-or-nothing" check: Return True if every (valid) decimation needed to satisfy the processing_config
727 # is available in the FCGroup (self) otherwise return False (and we will build all FCs).
729 # Logic:
730 # 1. Get a list of all fc groups in the FCGroup (self)
731 # 2. Loop the processing_config decimations, checking if there is a corresponding, already built FCDecimation
732 # in the FCGroup.
734 # Parameters
735 # ----------
736 # processing_config: aurora.config.metadata.processing.Processing
737 # remote: bool
739 # Returns
740 # -------
742 # """
743 # pre_existing_fc_decimation_ids_to_check = self.groups_list
744 # levels_present = np.full(processing_config.num_decimation_levels, False)
745 # for i, dec_level in enumerate(processing_config.decimations):
747 # # Quit checking if dec_level wasn't there
748 # if i > 0:
749 # if not levels_present[i - 1]:
750 # return False
752 # # iterate over existing decimations
753 # for fc_decimation_id in pre_existing_fc_decimation_ids_to_check:
754 # fc_dec_group = self.get_decimation_level(fc_decimation_id)
755 # fc_decimation = fc_dec_group.metadata
756 # levels_present[i] = fc_decimation.has_fcs_for_aurora_processing(
757 # dec_level, remote
758 # )
760 # if levels_present[i]:
761 # pre_existing_fc_decimation_ids_to_check.remove(
762 # fc_decimation_id
763 # ) # no need to check this one again
764 # break # break inner for-loop over decimations
766 # return levels_present.all()
769class FeatureDecimationGroup(BaseGroup):
770 """
771 Container for a single decimation level with multiple Fourier Coefficient channels.
773 This class manages Fourier Coefficient data organized by frequency, time, and channel.
774 Data is assumed to be uniformly sampled in both frequency and time domains.
776 Hierarchy
777 ---------
778 FeatureDecimationGroup -> FeatureChannelDataset (multiple channels)
780 Data Assumptions
781 ----------------
782 1. Data are uniformly sampled in frequency domain
783 2. Data are uniformly sampled in time domain
784 3. FFT moving window has uniform step size
786 Attributes
787 ----------
788 start time : datetime
789 Start time of the decimation level
790 end time : datetime
791 End time of the decimation level
792 channels : list
793 List of channel names in this decimation level
794 decimation_factor : int
795 Factor by which data was decimated
796 decimation_level : int
797 Level index in decimation hierarchy
798 decimation_sample_rate : float
799 Sample rate after decimation (Hz)
800 method : str
801 Method used (FFT, wavelet, etc.)
802 anti_alias_filter : optional
803 Anti-aliasing filter used
804 prewhitening_type : optional
805 Type of prewhitening applied
806 harmonics_kept : list or 'all'
807 Harmonic indices kept in the data
808 window : dict
809 Window parameters (length, overlap, type, sample rate)
810 bands : list
811 Frequency bands in the data
813 Parameters
814 ----------
815 group : h5py.Group
816 HDF5 group object for this FeatureDecimationGroup.
817 decimation_level_metadata : optional
818 Metadata for the decimation level. Default is None.
819 **kwargs
820 Additional keyword arguments passed to BaseGroup.
822 Examples
823 --------
824 >>> decimation = FeatureDecimationGroup(h5_group, metadata)
825 >>> channel = decimation.add_channel('Ex', fc_data=fc_array, fc_metadata=ch_metadata)
826 """
828 def __init__(
829 self,
830 group: h5py.Group,
831 decimation_level_metadata: Optional[object] = None,
832 **kwargs,
833 ) -> None:
834 super().__init__(group, group_metadata=decimation_level_metadata, **kwargs)
836 @BaseGroup.metadata.getter
837 def metadata(self):
838 """Overwrite get metadata to include channel information in the runs"""
840 self._metadata.channels = []
841 for ch in self.groups_list:
842 ch_group = self.get_channel(ch)
843 self._metadata.channels.append(ch_group.metadata)
844 self._metadata.hdf5_reference = self.hdf5_group.ref
845 return self._metadata
847 @property
848 def channel_summary(self) -> pd.DataFrame:
849 """
850 Get a summary of all channels in this decimation level.
852 Returns a pandas DataFrame with detailed information about each Fourier
853 Coefficient channel including time ranges, dimensions, and sampling rates.
855 Returns
856 -------
857 pd.DataFrame
858 DataFrame with columns:
860 - name : str
861 Channel name
862 - start : datetime64[ns]
863 Start time of the channel data
864 - end : datetime64[ns]
865 End time of the channel data
866 - n_frequency : int64
867 Number of frequency bins
868 - n_windows : int64
869 Number of time windows
870 - sample_rate_decimation_level : float64
871 Decimation level sample rate (Hz)
872 - sample_rate_window_step : float64
873 Sample rate of window stepping (Hz)
874 - units : str
875 Physical units of the data
876 - hdf5_reference : h5py.ref_dtype
877 HDF5 reference to the channel dataset
879 Examples
880 --------
881 >>> decimation = FeatureDecimationGroup(h5_group)
882 >>> summary = decimation.channel_summary
883 >>> print(summary[['name', 'n_frequency', 'n_windows']])
884 """
886 ch_list = []
887 for key, group in self.hdf5_group.items():
888 try:
889 ch_type = group.attrs["mth5_type"]
890 if ch_type in ["FCChannel"]:
891 ch_list.append(
892 (
893 group.attrs["name"],
894 group.attrs["time_period.start"].split("+")[0],
895 group.attrs["time_period.end"].split("+")[0],
896 group.shape[0],
897 group.shape[1],
898 group.attrs["sample_rate_decimation_level"],
899 group.attrs["sample_rate_window_step"],
900 group.attrs["units"],
901 group.ref,
902 )
903 )
904 except KeyError as error:
905 self.logger.debug(f"Cannot find a key: {error}")
906 ch_summary = np.array(
907 ch_list,
908 dtype=np.dtype(
909 [
910 ("name", "U20"),
911 ("start", "datetime64[ns]"),
912 ("end", "datetime64[ns]"),
913 ("n_frequency", np.int64),
914 ("n_windows", np.int64),
915 ("sample_rate_decimation_level", np.float64),
916 ("sample_rate_window_step", np.float64),
917 ("units", "U25"),
918 ("hdf5_reference", h5py.ref_dtype),
919 ]
920 ),
921 )
923 return pd.DataFrame(ch_summary)
925 def from_dataframe(
926 self,
927 df: pd.DataFrame,
928 channel_key: str,
929 time_key: str = "time",
930 frequency_key: str = "frequency",
931 ) -> None:
932 """
933 Load Fourier Coefficient data from a pandas DataFrame.
935 Assumes the channel_key column contains complex coefficient values
936 organized with time and frequency dimensions.
938 Parameters
939 ----------
940 df : pd.DataFrame
941 Input DataFrame containing the coefficient data.
942 channel_key : str
943 Name of the column containing coefficient values.
944 time_key : str, default='time'
945 Name of the time coordinate column.
946 frequency_key : str, default='frequency'
947 Name of the frequency coordinate column.
949 Raises
950 ------
951 TypeError
952 If df is not a pandas DataFrame.
954 Examples
955 --------
956 >>> decimation = FeatureDecimationGroup(h5_group)
957 >>> decimation.from_dataframe(df, channel_key='Ex', time_key='time')
958 """
960 if not isinstance(df, pd.DataFrame):
961 msg = f"Must input a pandas dataframe not {type(df)}"
962 self.logger.error(msg)
963 raise TypeError(msg)
964 for col in df.columns:
965 df[col] = np.complex128(df[col])
966 xrds = df[col].to_xarray()
967 self.add_channel(col, fc_data=xrds.to_numpy())
969 def from_xarray(
970 self,
971 data_array: xr.DataArray | xr.Dataset,
972 sample_rate_decimation_level: float,
973 ) -> None:
974 """
975 Load Fourier Coefficient data from an xarray DataArray or Dataset.
977 Automatically extracts metadata (time, frequency, units) from the xarray
978 object and creates appropriate FeatureChannelDataset instances for each
979 variable or the single DataArray.
981 Parameters
982 ----------
983 data_array : xr.DataArray or xr.Dataset
984 Input xarray object with 'time' and 'frequency' coordinates and
985 dimensions ['time', 'frequency'] (or transposed variant).
986 sample_rate_decimation_level : float
987 Sample rate of the decimation level (Hz).
989 Raises
990 ------
991 TypeError
992 If data_array is not an xarray Dataset or DataArray.
994 Notes
995 -----
996 Automatically handles both (time, frequency) and (frequency, time) dimension ordering.
997 Units are extracted from xarray attributes if available.
999 Examples
1000 --------
1001 >>> import xarray as xr
1002 >>> import numpy as np
1003 >>> decimation = FeatureDecimationGroup(h5_group)
1005 Create sample xarray data:
1007 >>> times = np.arange('2023-01-01', '2023-01-02', dtype='datetime64[s]')
1008 >>> freqs = np.linspace(0.01, 100, 256)
1009 >>> data_array = np.random.randn(len(times), len(freqs)) + \\
1010 ... 1j * np.random.randn(len(times), len(freqs))
1011 >>> xr_data = xr.DataArray(
1012 ... data_array,
1013 ... dims=['time', 'frequency'],
1014 ... coords={'time': times, 'frequency': freqs},
1015 ... name='Ex',
1016 ... attrs={'units': 'mV/km'}
1017 ... )
1019 Load into decimation group:
1021 >>> decimation.from_xarray(xr_data, sample_rate_decimation_level=0.5)
1022 """
1024 if not isinstance(data_array, (xr.Dataset, xr.DataArray)):
1025 msg = f"Must input a xarray Dataset or DataArray not {type(data_array)}"
1026 self.logger.error(msg)
1027 raise TypeError(msg)
1028 ch_metadata = FeatureDecimationChannel()
1029 ch_metadata.time_period.start = data_array.time[0].values
1030 ch_metadata.time_period.end = data_array.time[-1].values
1031 ch_metadata.sample_rate_decimation_level = sample_rate_decimation_level
1032 ch_metadata.frequency_min = data_array.coords["frequency"].data.min()
1033 ch_metadata.frequency_max = data_array.coords["frequency"].data.max()
1034 step_size = (
1035 data_array.coords["time"].data[1] - data_array.coords["time"].data[0]
1036 )
1037 ch_metadata.sample_rate_window_step = step_size / np.timedelta64(1, "s")
1038 try:
1039 ch_metadata.units = data_array.units
1040 except AttributeError:
1041 self.logger.debug("Could not find 'units' in xarray")
1042 if isinstance(data_array, xr.DataArray):
1043 self.add_channel(
1044 data_array.name,
1045 fc_data=data_array.to_numpy(),
1046 fc_metadata=ch_metadata,
1047 )
1048 else:
1049 for ch in data_array.data_vars.keys():
1050 ch_metadata.name = ch
1051 if ch in self.channel_summary.name.to_list():
1052 self.remove_channel(ch)
1053 # time index should be the first index
1054 if data_array[ch].time.size == data_array[ch].shape[0]:
1055 self.add_channel(
1056 ch,
1057 fc_data=data_array[ch].to_numpy(),
1058 fc_metadata=ch_metadata,
1059 dtype=data_array[ch].dtype,
1060 )
1061 elif data_array[ch].time.size == data_array[ch].shape[1]:
1062 self.add_channel(
1063 ch,
1064 fc_data=data_array[ch].to_numpy().T,
1065 fc_metadata=ch_metadata,
1066 dtype=data_array[ch].dtype,
1067 )
1068 return
1070 def to_xarray(self, channels: Optional[list] = None) -> xr.Dataset:
1071 """
1072 Create an xarray Dataset from Fourier Coefficient channels.
1074 If no channels are specified, all channels in the decimation level
1075 are included. Each channel becomes a data variable in the resulting Dataset.
1077 Parameters
1078 ----------
1079 channels : list, optional
1080 List of channel names to include. If None, all channels are used.
1081 Default is None.
1083 Returns
1084 -------
1085 xr.Dataset
1086 xarray Dataset with channels as data variables and 'time' and
1087 'frequency' as shared coordinates.
1089 Examples
1090 --------
1091 >>> decimation = FeatureDecimationGroup(h5_group)
1092 >>> xr_data = decimation.to_xarray()
1093 >>> print(xr_data.data_vars)
1094 Data variables:
1095 Ex (time, frequency) complex128
1096 Ey (time, frequency) complex128
1098 Get specific channels:
1100 >>> subset = decimation.to_xarray(channels=['Ex', 'Ey'])
1101 """
1103 if channels is None:
1104 channels = self.groups_list
1105 ch_dict = {}
1106 for ch in channels:
1107 ch_ds = self.get_channel(ch)
1108 ch_dict[ch] = ch_ds.to_xarray()
1109 return xr.Dataset(ch_dict)
1111 def from_numpy_array(
1112 self,
1113 nd_array: np.ndarray,
1114 ch_name: str | list,
1115 ) -> None:
1116 """
1117 Load Fourier Coefficient data from a numpy array.
1119 Assumes array shape is either (n_frequencies, n_windows) for a single
1120 channel or (n_channels, n_frequencies, n_windows) for multiple channels.
1122 Parameters
1123 ----------
1124 nd_array : np.ndarray
1125 Input numpy array containing coefficient data.
1126 ch_name : str or list
1127 Channel name (for 2D array) or list of channel names
1128 (for 3D array).
1130 Raises
1131 ------
1132 TypeError
1133 If nd_array is not a numpy ndarray.
1134 ValueError
1135 If array shape is not (n_frequencies, n_windows) or
1136 (n_channels, n_frequencies, n_windows).
1138 Examples
1139 --------
1140 >>> decimation = FeatureDecimationGroup(h5_group)
1142 Load single channel:
1144 >>> data_2d = np.random.randn(256, 100) + 1j * np.random.randn(256, 100)
1145 >>> decimation.from_numpy_array(data_2d, ch_name='Ex')
1147 Load multiple channels:
1149 >>> data_3d = np.random.randn(2, 256, 100) + 1j * np.random.randn(2, 256, 100)
1150 >>> decimation.from_numpy_array(data_3d, ch_name=['Ex', 'Ey'])
1151 """
1153 if not isinstance(nd_array, np.ndarray):
1154 msg = f"Must input a numpy ndarray not {type(nd_array)}"
1155 self.logger.error(msg)
1156 raise TypeError(msg)
1157 if len(nd_array.shape) == 3:
1158 for index, ch in zip(nd_array.shape[0], ch_name):
1159 self.add_channel(ch, fc_data=nd_array[index])
1160 elif len(nd_array.shape) == 2:
1161 self.add_channel(ch_name, fc_data=nd_array)
1162 else:
1163 raise ValueError(
1164 "input array must be shaped (n_frequencies, n_windows) or "
1165 "(n_channels, n_frequencies, n_windows)"
1166 )
1168 def add_channel(
1169 self,
1170 fc_name: str,
1171 fc_data: Optional[np.ndarray | xr.DataArray | xr.Dataset | pd.DataFrame] = None,
1172 fc_metadata: Optional[FeatureDecimationChannel] = None,
1173 max_shape: tuple = (None, None),
1174 chunks: bool = True,
1175 dtype: type = complex,
1176 **kwargs,
1177 ) -> FeatureChannelDataset:
1178 """
1179 Add a Fourier Coefficient channel to the decimation level.
1181 Creates a new FeatureChannelDataset for a single channel at a single
1182 decimation level. Input data can be provided as numpy array, xarray,
1183 DataFrame, or created empty.
1185 Parameters
1186 ----------
1187 fc_name : str
1188 Name for the Fourier Coefficient channel.
1189 fc_data : np.ndarray, xr.DataArray, xr.Dataset, pd.DataFrame, optional
1190 Input data. Can be numpy array (time, frequency) or xarray/DataFrame
1191 format. Default is None (creates empty dataset).
1192 fc_metadata : FeatureDecimationChannel, optional
1193 Metadata for the channel. Default is None.
1194 max_shape : tuple, default=(None, None)
1195 Maximum shape for HDF5 dataset dimensions (expandable if None).
1196 chunks : bool, default=True
1197 Whether to use HDF5 chunking.
1198 dtype : type, default=complex
1199 Data type for the dataset (e.g., complex, float, int).
1200 **kwargs
1201 Additional keyword arguments for HDF5 dataset creation.
1203 Returns
1204 -------
1205 FeatureChannelDataset
1206 Newly created FeatureChannelDataset object.
1208 Raises
1209 ------
1210 TypeError
1211 If fc_data type is not supported or metadata type mismatch.
1212 RuntimeError or OSError
1213 If channel already exists (will return existing channel).
1215 Notes
1216 -----
1217 Data layout assumes (time, frequency) organization:
1219 - time index: window start times
1220 - frequency index: harmonic indices or float values
1221 - data: complex Fourier coefficients
1223 Examples
1224 --------
1225 >>> decimation = FeatureDecimationGroup(h5_group)
1226 >>> metadata = FeatureDecimationChannel(name='Ex')
1228 Create from numpy array:
1230 >>> fc_data = np.random.randn(100, 256) + 1j * np.random.randn(100, 256)
1231 >>> channel = decimation.add_channel('Ex', fc_data=fc_data, fc_metadata=metadata)
1233 Create empty channel (expandable):
1235 >>> channel = decimation.add_channel('Ex', fc_metadata=metadata)
1236 """
1238 fc_name = validate_name(fc_name)
1240 if fc_metadata is None:
1241 fc_metadata = FeatureDecimationChannel(name=fc_name)
1242 if fc_data is not None:
1243 if not isinstance(
1244 fc_data, (np.ndarray, xr.DataArray, xr.Dataset, pd.DataFrame)
1245 ):
1246 msg = (
1247 "Need to input a numpy.array, xarray.DataArray, "
1248 f"xr.Dataset, pd.DataFrame not {type(fc_data)}"
1249 )
1250 self.logger.exception(msg)
1251 raise TypeError(msg)
1252 else:
1253 chunks = True
1254 fc_data = np.zeros((1, 1), dtype=dtype)
1255 try:
1256 dataset = self.hdf5_group.create_dataset(
1257 fc_name,
1258 data=fc_data,
1259 dtype=dtype,
1260 chunks=chunks,
1261 maxshape=max_shape,
1262 **self.dataset_options,
1263 )
1265 fc_dataset = FeatureChannelDataset(dataset, dataset_metadata=fc_metadata)
1266 except (OSError, RuntimeError, ValueError) as error:
1267 self.logger.error(error)
1268 msg = (
1269 f"estimate {fc_metadata.name} already exists, returning existing group."
1270 )
1271 self.logger.debug(msg)
1273 fc_dataset = self.get_channel(fc_metadata.name)
1274 return fc_dataset
1276 def get_channel(self, fc_name: str) -> FeatureChannelDataset:
1277 """
1278 Retrieve a Fourier Coefficient channel by name.
1280 Parameters
1281 ----------
1282 fc_name : str
1283 Name of the channel to retrieve.
1285 Returns
1286 -------
1287 FeatureChannelDataset
1288 The requested FeatureChannelDataset object.
1290 Raises
1291 ------
1292 MTH5Error
1293 If the channel does not exist.
1295 Examples
1296 --------
1297 >>> decimation = FeatureDecimationGroup(h5_group)
1298 >>> channel = decimation.get_channel('Ex')
1299 >>> data = channel.to_numpy()
1300 """
1301 fc_name = validate_name(fc_name)
1303 try:
1304 fc_dataset = self.hdf5_group[fc_name]
1305 fc_metadata = FeatureDecimationChannel(**dict(fc_dataset.attrs))
1306 return FeatureChannelDataset(fc_dataset, dataset_metadata=fc_metadata)
1307 except KeyError:
1308 msg = f"{fc_name} does not exist, check groups_list for existing names"
1309 self.logger.error(msg)
1310 raise MTH5Error(msg)
1311 except OSError as error:
1312 self.logger.error(error)
1313 raise MTH5Error(error)
1315 def remove_channel(self, fc_name: str) -> None:
1316 """
1317 Remove a Fourier Coefficient channel from the decimation level.
1319 Deletes the channel from the HDF5 file. Note that this removes the
1320 reference but does not reduce file size.
1322 Parameters
1323 ----------
1324 fc_name : str
1325 Name of the channel to remove.
1327 Raises
1328 ------
1329 MTH5Error
1330 If the channel does not exist.
1332 Notes
1333 -----
1334 To reduce HDF5 file size, copy desired data to a new file.
1336 Examples
1337 --------
1338 >>> decimation = FeatureDecimationGroup(h5_group)
1339 >>> decimation.remove_channel('Ex')
1340 """
1341 fc_name = validate_name(fc_name.lower())
1343 try:
1344 del self.hdf5_group[fc_name]
1345 self.logger.info(
1346 "Deleting a estimate does not reduce the HDF5"
1347 "file size it simply remove the reference. If "
1348 "file size reduction is your goal, simply copy"
1349 " what you want into another file."
1350 )
1351 except KeyError:
1352 msg = f"{fc_name} does not exist, check groups_list for existing names"
1353 self.logger.error(msg)
1354 raise MTH5Error(msg)
1356 def update_metadata(self) -> None:
1357 """
1358 Update metadata from all channels in the decimation level.
1360 Scans all channels and updates the decimation-level metadata with
1361 aggregated information including time ranges and sampling rates.
1363 Examples
1364 --------
1365 >>> decimation = FeatureDecimationGroup(h5_group)
1366 >>> decimation.update_metadata()
1367 """
1368 channel_summary = self.channel_summary.copy()
1370 if not channel_summary.empty:
1371 self._metadata.time_period.start = channel_summary.start.min().isoformat()
1372 self._metadata.time_period.end = channel_summary.end.max().isoformat()
1373 self._metadata.sample_rate_decimation_level = (
1374 channel_summary.sample_rate_decimation_level.unique()[0]
1375 )
1376 self._metadata.sample_rate_window_step = (
1377 channel_summary.sample_rate_window_step.unique()[0]
1378 )
1379 self.write_metadata()
1381 def add_weights(
1382 self,
1383 weight_name: str,
1384 weight_data: Optional[np.ndarray] = None,
1385 weight_metadata: Optional[object] = None,
1386 max_shape: tuple = (None, None, None),
1387 chunks: bool = True,
1388 **kwargs,
1389 ) -> None:
1390 """
1391 Add weight or masking data for Fourier Coefficients.
1393 Creates a dataset to store weights or masks for quality control,
1394 frequency band selection, or time window filtering.
1396 Parameters
1397 ----------
1398 weight_name : str
1399 Name for the weight dataset.
1400 weight_data : np.ndarray, optional
1401 Weight values. Default is None.
1402 weight_metadata : optional
1403 Metadata for the weight dataset. Default is None.
1404 max_shape : tuple, default=(None, None, None)
1405 Maximum shape for expandable dimensions.
1406 chunks : bool, default=True
1407 Whether to use HDF5 chunking.
1408 **kwargs
1409 Additional keyword arguments for HDF5 dataset creation.
1411 Notes
1412 -----
1413 Weight datasets can track:
1415 - weight_channel: Per-channel weights
1416 - weight_band: Per-frequency-band weights
1417 - weight_time: Per-time-window weights
1419 This method is a placeholder for future implementation.
1421 Examples
1422 --------
1423 >>> decimation = FeatureDecimationGroup(h5_group)
1424 >>> decimation.add_weights('coherency_weights', weight_data=weights)
1425 """