Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ base.py: 83%
173 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2"""
3Base Group Class
4====================
6Contains all the base functions that will be used by group classes.
8Created on Fri May 29 15:09:48 2020
10:copyright:
11 Jared Peacock (jpeacock@usgs.gov)
13:license:
14 MIT
15"""
16# =============================================================================
17# Imports
18# =============================================================================
19from __future__ import annotations
21import inspect
22import weakref
23from typing import Any, Type
25import h5py
26from loguru import logger
27from mt_metadata import timeseries as metadata
28from mt_metadata.base import MetadataBase
29from mt_metadata.features import (
30 Feature,
31 FeatureDecimationChannel,
32 FeatureFCRun,
33 FeatureTSRun,
34)
35from mt_metadata.processing.fourier_coefficients import Decimation, FC, FCChannel
36from mt_metadata.transfer_functions.tf import TransferFunction
38from mth5.helpers import (
39 add_attributes_to_metadata_class_pydantic,
40 get_tree,
41 read_attrs_to_dict,
42 to_numpy_type,
43 validate_name,
44)
45from mth5.utils.exceptions import MTH5Error
48# make a dictionary of available metadata classes
49meta_classes = dict(inspect.getmembers(metadata, inspect.isclass))
50meta_classes["TransferFunction"] = TransferFunction
51meta_classes["FCDecimation"] = Decimation
52meta_classes["FCChannel"] = FCChannel
53meta_classes["FC"] = FC
54meta_classes["Feature"] = Feature
55meta_classes["FeatureTSRun"] = FeatureTSRun
56meta_classes["FeatureFCRun"] = FeatureFCRun
57meta_classes["FeatureDecimation"] = Decimation
58meta_classes["FeatureDecimationChannel"] = FeatureDecimationChannel
61# =============================================================================
62#
63# =============================================================================
64class BaseGroup:
65 """
66 Base class for HDF5 group management with metadata handling.
68 Provides core functionality for reading, writing, and managing HDF5 groups
69 with integrated metadata validation using mt_metadata standards.
71 Parameters
72 ----------
73 group : h5py.Group or h5py.Dataset
74 HDF5 group or dataset object to wrap.
75 group_metadata : MetadataBase, optional
76 Metadata container with validated attributes. Default is None.
77 **kwargs : dict
78 Additional keyword arguments to set as instance attributes.
80 Attributes
81 ----------
82 hdf5_group : h5py.Group or h5py.Dataset
83 Weak reference to the underlying HDF5 group.
84 metadata : MetadataBase
85 Metadata object with validation and standards compliance.
86 logger : loguru.Logger
87 Logger instance for tracking operations.
88 compression : str, optional
89 HDF5 compression method (e.g., 'gzip').
90 compression_opts : int, optional
91 Compression options/level.
92 shuffle : bool
93 Enable HDF5 shuffle filter. Default is False.
94 fletcher32 : bool
95 Enable HDF5 Fletcher32 checksum. Default is False.
97 Notes
98 -----
99 - All HDF5 group references are weak references to prevent lingering
100 file references after the group is closed.
101 - Metadata changes should be written using `write_metadata()` method.
102 - This is a base class inherited by more specific group types like
103 SurveyGroup, StationGroup, RunGroup, etc.
105 Examples
106 --------
107 Create and manage a group with metadata
109 >>> import h5py
110 >>> with h5py.File('data.h5', 'r+') as f:
111 ... group = f.create_group('MyGroup')
112 ... base_obj = BaseGroup(group)
113 ... print(base_obj)
114 ... # Set and write metadata
115 ... base_obj.metadata.id = 'MyGroup'
116 ... base_obj.write_metadata()
118 Access metadata and group structure
120 >>> print(base_obj.metadata.id)
121 'MyGroup'
122 >>> print(base_obj.groups_list)
123 ['subgroup1', 'subgroup2']
124 >>> print(base_obj.hdf5_group.ref) # Get HDF5 reference
125 <HDF5 Group Reference>
127 """
129 def __init__(
130 self,
131 group: h5py.Group | h5py.Dataset,
132 group_metadata: MetadataBase | None = None,
133 **kwargs: Any,
134 ) -> None:
135 self.compression = None
136 self.compression_opts = None
137 self.shuffle = False
138 self.fletcher32 = False
139 self._has_read_metadata = False
141 self.logger = logger
143 # make sure the reference to the group is weak so there are no lingering
144 # references to a closed HDF5 file.
145 if group is not None and isinstance(group, (h5py.Group, h5py.Dataset)):
146 self.hdf5_group = weakref.ref(group)()
147 # initialize metadata
148 self._initialize_metadata()
150 # if metadata, make sure that its the same class type
151 if group_metadata is not None:
152 self.metadata = group_metadata
154 # write out metadata to make sure that its in the file.
155 self.write_metadata()
157 # if any other keywords
158 for key, value in kwargs.items():
159 setattr(self, key, value)
161 def __str__(self) -> str:
162 """
163 Generate a string representation of the group hierarchy.
165 Returns
166 -------
167 str
168 Tree structure of the HDF5 group and its contents, or error message
169 if file is closed.
171 Examples
172 --------
173 >>> print(base_obj)
174 /MyGroup
175 /subgroup1
176 /dataset1
177 /subgroup2
178 """
179 try:
180 self.hdf5_group.ref
182 return get_tree(self.hdf5_group)
183 except ValueError:
184 msg = "MTH5 file is closed and cannot be accessed."
185 self.logger.warning(msg)
186 return msg
188 def __repr__(self) -> str:
189 """
190 Return the string representation of the group.
192 Returns
193 -------
194 str
195 String representation identical to __str__.
196 """
197 return self.__str__()
199 def __eq__(self, other: object) -> bool:
200 """
201 Check equality with another group.
203 Parameters
204 ----------
205 other : object
206 Another BaseGroup instance to compare with.
208 Returns
209 -------
210 bool
211 True if groups are equal, False otherwise.
213 Raises
214 ------
215 MTH5Error
216 Equality comparison is not yet implemented.
218 Examples
219 --------
220 >>> group1 == group2
221 MTH5Error: Cannot test equals yet
222 """
223 raise MTH5Error("Cannot test equals yet")
225 # Iterate over key, value pairs
226 def __iter__(self):
227 """
228 Iterate over key-value pairs in the HDF5 group.
230 Yields
231 ------
232 tuple
233 (name, object) pairs for each item in the group.
235 Examples
236 --------
237 >>> for name, obj in base_obj:
238 ... print(f"{name}: {type(obj)}")
239 subgroup1: <class 'h5py._hl.group.Group'>
240 dataset1: <class 'h5py._hl.dataset.Dataset'>
241 """
242 return self.hdf5_group.items().__iter__()
244 @property
245 def _class_name(self) -> str:
246 """
247 Extract the base class name without 'Group' suffix.
249 Returns
250 -------
251 str
252 Class name (e.g., 'Survey', 'Station', 'Run').
254 Examples
255 --------
256 >>> print(survey_obj._class_name)
257 'Survey'
258 """
259 return self.__class__.__name__.split("Group")[0]
261 def _initialize_metadata(self) -> None:
262 """
263 Initialize metadata object with custom attributes.
265 Creates a metadata object of the appropriate type based on the class name
266 and adds MTH5-specific attributes (mth5_type, hdf5_reference) for tracking.
268 Notes
269 -----
270 This is called automatically during __init__. The metadata class is determined
271 by matching self._class_name to the meta_classes dictionary. Falls back to
272 MetadataBase if no specific class is found.
274 Examples
275 --------
276 >>> # Called automatically during initialization
277 >>> obj = SurveyGroup(hdf5_group)
278 >>> print(type(obj._metadata))
279 <class 'mt_metadata.timeseries.survey.Survey'>
280 >>> print(obj._metadata.mth5_type)
281 'Survey'
282 """
283 metadata_obj = MetadataBase
284 if self._class_name not in ["Standards"]:
285 try:
286 metadata_obj = meta_classes[self._class_name]
287 except KeyError:
288 metadata_obj = MetadataBase
289 # add 2 attributes that will help with querying using the new Pydantic approach
290 self._metadata = add_attributes_to_metadata_class_pydantic(metadata_obj)
292 # set mth5 specific parameters
293 self._metadata.mth5_type = self._class_name
294 self._metadata.hdf5_reference = self.hdf5_group.ref
296 @property
297 def metadata(self) -> MetadataBase:
298 """
299 Get metadata object with lazy loading from HDF5 attributes.
301 Returns
302 -------
303 MetadataBase
304 Metadata container with all attributes and validation.
306 Notes
307 -----
308 Metadata is loaded on first access and cached for subsequent accesses.
310 Examples
311 --------
312 >>> meta = base_obj.metadata
313 >>> print(meta.id)
314 'MyGroup'
315 >>> print(meta.mth5_type)
316 'Survey'
317 """
318 if not self._has_read_metadata:
319 self.read_metadata()
320 return self._metadata
322 @metadata.setter
323 def metadata(self, metadata_object: MetadataBase) -> None:
324 """
325 Set metadata with type validation.
327 Parameters
328 ----------
329 metadata_object : MetadataBase
330 Metadata container to set. Must be compatible with current class type.
332 Raises
333 ------
334 MTH5Error
335 If metadata_object is not compatible with the current class.
337 Notes
338 -----
339 Direct field assignment is used to preserve complex objects like Provenance
340 that may lose information during to_dict/from_dict conversion.
342 Examples
343 --------
344 >>> from mt_metadata.timeseries import Survey
345 >>> survey_meta = Survey()
346 >>> survey_meta.id = 'NewSurvey'
347 >>> survey_obj.metadata = survey_meta
348 """
349 if not isinstance(metadata_object, (type(self._metadata), MetadataBase)):
350 msg = (
351 f"Metadata must be of type {meta_classes[self._class_name]} "
352 f"not {type(metadata_object)}"
353 )
354 self.logger.error(msg)
355 raise MTH5Error(msg)
357 # Instead of round-trip conversion, directly copy the metadata fields
358 # to preserve complex objects like Provenance that may lose information
359 # during to_dict/from_dict conversion
360 if hasattr(metadata_object, "__dict__"):
361 # For pydantic models, copy field values directly
362 for field_name, field_value in metadata_object.__dict__.items():
363 if hasattr(self._metadata, field_name):
364 setattr(self._metadata, field_name, field_value)
365 else:
366 # Fallback to the original conversion method
367 self._metadata.from_dict(metadata_object.to_dict())
369 # Note: mth5_type and hdf5_reference are set during field creation
370 # They can be updated later if needed through the model's normal field assignment
372 @property
373 def groups_list(self) -> list[str]:
374 """
375 Get list of all subgroup names in the HDF5 group.
377 Returns
378 -------
379 list of str
380 Names of all subgroups and datasets.
382 Examples
383 --------
384 >>> print(base_obj.groups_list)
385 ['Station_001', 'Station_002', 'metadata']
386 """
387 return list(self.hdf5_group.keys())
389 @property
390 def dataset_options(self) -> dict[str, Any]:
391 """
392 Get the HDF5 dataset creation options.
394 Returns
395 -------
396 dict
397 Dictionary containing compression, shuffle, and checksum settings.
399 Examples
400 --------
401 >>> options = base_obj.dataset_options
402 >>> print(options)
403 {'compression': 'gzip', 'compression_opts': 4,
404 'shuffle': True, 'fletcher32': False}
405 """
406 return {
407 "compression": self.compression,
408 "compression_opts": self.compression_opts,
409 "shuffle": self.shuffle,
410 "fletcher32": self.fletcher32,
411 }
413 def read_metadata(self) -> None:
414 """
415 Read metadata from HDF5 group attributes into metadata object.
417 Loads all HDF5 attributes and converts them to appropriate Python types
418 before populating the metadata object with validation.
420 Notes
421 -----
422 This method is called automatically on first metadata access if metadata
423 has not been read yet. Empty attributes are skipped with a debug message.
425 Examples
426 --------
427 Manually read metadata after file changes
429 >>> base_obj.read_metadata()
430 >>> print(base_obj.metadata.id)
431 'MyGroup'
433 Check what attributes were read
435 >>> base_obj.read_metadata()
436 >>> attrs = list(base_obj.metadata.to_dict().keys())
437 >>> print(f"Attributes: {attrs}")
438 Attributes: ['id', 'comments', 'provenance']
439 """
440 meta_dict = read_attrs_to_dict(dict(self.hdf5_group.attrs), self._metadata)
441 # Defensive check: skip if meta_dict is empty
442 if not meta_dict:
443 self.logger.debug(
444 f"No metadata found for {self._class_name}, skipping from_dict."
445 )
446 return
447 self._metadata.from_dict({self._class_name: meta_dict})
448 self._has_read_metadata = True
450 def write_metadata(self) -> None:
451 """
452 Write metadata from object to HDF5 group attributes.
454 Converts metadata values to numpy-compatible types before writing to
455 HDF5 attributes. Handles read-only mode gracefully with warnings.
457 Raises
458 ------
459 KeyError
460 If HDF5 write fails for reasons other than read-only mode.
461 ValueError
462 If synchronous group creation fails for reasons other than read-only mode.
464 Notes
465 -----
466 - Keys that already exist are overwritten.
467 - Read-only files will log a warning instead of raising an error.
468 - This method should be called after any metadata changes.
470 Examples
471 --------
472 Update metadata and write to file
474 >>> base_obj.metadata.id = 'UpdatedGroup'
475 >>> base_obj.metadata.comments = 'New comments'
476 >>> base_obj.write_metadata()
478 Verify write by reloading
480 >>> base_obj._has_read_metadata = False
481 >>> base_obj.read_metadata()
482 >>> print(base_obj.metadata.id)
483 'UpdatedGroup'
484 """
485 try:
486 for key, value in self.metadata.to_dict(single=True).items():
487 value = to_numpy_type(value)
488 self.logger.debug(f"wrote metadata {key} = {value}")
489 self.hdf5_group.attrs.create(key, value)
490 except KeyError as key_error:
491 if "no write intent" in str(key_error):
492 self.logger.warning("File is in read-only mode, cannot write metadata.")
493 else:
494 raise KeyError(key_error)
495 except ValueError as value_error:
496 if "Unable to synchronously create group" in str(value_error):
497 self.logger.warning("File is in read-only mode, cannot write metadata.")
498 else:
499 raise ValueError(value_error)
501 def initialize_group(self, **kwargs: Any) -> None:
502 """
503 Initialize group by setting attributes and writing metadata.
505 Convenience method that sets keyword arguments as instance attributes
506 and writes all metadata to the HDF5 file.
508 Parameters
509 ----------
510 **kwargs : dict
511 Key-value pairs to set as instance attributes.
513 Examples
514 --------
515 Initialize with compression settings
517 >>> base_obj.initialize_group(
518 ... compression='gzip',
519 ... compression_opts=4,
520 ... shuffle=True
521 ... )
522 """
523 for key, value in kwargs.items():
524 setattr(self, key, value)
525 self.write_metadata()
527 def _add_group(
528 self,
529 name: str,
530 group_class: Type,
531 group_metadata: MetadataBase | None = None,
532 match: str = "id",
533 ) -> BaseGroup | None:
534 """
535 Add a new group to the HDF5 file.
537 Creates a new subgroup with optional metadata validation. If the group
538 already exists, returns the existing group without modification.
540 Parameters
541 ----------
542 name : str
543 Name of the group to create. Will be validated and normalized.
544 group_class : type
545 Group class to instantiate for the new group.
546 group_metadata : MetadataBase, optional
547 Metadata container with validated attributes. Default is None.
548 match : str, optional
549 Metadata field to match with group name. Default is 'id'.
551 Returns
552 -------
553 BaseGroup or None
554 Instance of group_class for the new/existing group, or None if
555 file is in read-only mode.
557 Raises
558 ------
559 MTH5Error
560 If group name doesn't match group_metadata.id.
562 Notes
563 -----
564 - Group name is validated and normalized via validate_name().
565 - Weak HDF5 references are set automatically for tracking.
566 - If group exists, log message indicates this and returns existing group.
568 Examples
569 --------
570 Add a new group with metadata
572 >>> from mt_metadata.timeseries import Station
573 >>> station_meta = Station(id='MT_001')
574 >>> station = survey_obj._add_group(
575 ... 'MT_001',
576 ... StationGroup,
577 ... group_metadata=station_meta
578 ... )
579 >>> print(station.metadata.id)
580 'MT_001'
582 Add group without metadata
584 >>> run_obj = station_obj._add_group(
585 ... 'MT_001a',
586 ... RunGroup
587 ... )
589 Handle existing group
591 >>> # If group exists, it returns the existing one
592 >>> run1 = station_obj._add_group('MT_001a', RunGroup)
593 >>> run2 = station_obj._add_group('MT_001a', RunGroup) # Returns same group
594 >>> run1 is run2
595 True
596 """
597 name = validate_name(name)
599 try:
600 if group_metadata is not None:
601 if validate_name(group_metadata.id) != name:
602 msg = (
603 f"{group_class.__name__} name {name} must be "
604 f"the same as group_metadata.{match} "
605 f"{group_metadata.id}"
606 )
607 self.logger.error(msg)
608 raise MTH5Error(msg)
609 new_group = self.hdf5_group.create_group(name)
610 return_obj = group_class(new_group, **self.dataset_options)
611 if group_metadata is None:
612 return_obj._metadata.update_attribute(match, name)
613 else:
614 return_obj.metadata = group_metadata
615 # need to add the hdf5 reference to the metadata
616 return_obj.metadata.hdf5_reference = new_group.ref
617 return_obj.write_metadata()
618 if hasattr(return_obj, "initialize_group"):
619 return_obj.initialize_group()
620 except ValueError as error:
621 if "no write intent" in str(error):
622 self.logger.warning(
623 f"File is in read-only mode, cannot create group {name}"
624 )
625 return
626 elif "name already exists" in str(error):
627 msg = (
628 f"{group_class.__name__} {name} already exists, "
629 "returning existing group."
630 )
631 self.logger.info(msg)
632 return_obj = self._get_group(name, group_class)
633 return return_obj
635 def _get_group(self, name: str, group_class: Type) -> BaseGroup:
636 """
637 Get an existing group from the HDF5 file.
639 Retrieves a subgroup by name, automatically reading its metadata.
641 Parameters
642 ----------
643 name : str
644 Name of the group to retrieve. Will be validated and normalized.
645 group_class : type
646 Group class to instantiate for the retrieved group.
648 Returns
649 -------
650 BaseGroup
651 Instance of group_class for the retrieved group.
653 Raises
654 ------
655 MTH5Error
656 If the group does not exist.
658 Examples
659 --------
660 Get an existing station
662 >>> station = survey_obj._get_group('MT_001', StationGroup)
663 >>> print(station.metadata.id)
664 'MT_001'
666 Handle non-existent group
668 >>> try:
669 ... station = survey_obj._get_group('NonExistent', StationGroup)
670 ... except MTH5Error as e:
671 ... print(f"Group not found: {e}")
672 Group not found: Error: NonExistent does not exist...
673 """
674 name = validate_name(name)
675 try:
676 # get the group and be sure to read the metadata
677 group = group_class(self.hdf5_group[name], **self.dataset_options)
678 group.read_metadata()
679 return group
680 except KeyError:
681 msg = (
682 f"Error: {name} does not exist, check groups_list for " "existing names"
683 )
684 self.logger.debug(msg)
685 raise MTH5Error(msg)
687 def _remove_group(self, name: str) -> None:
688 """
689 Remove a group from the HDF5 file.
691 Deletes a subgroup by name. Note that this removes the reference in the
692 HDF5 file but does not free the disk space (a limitation of HDF5 format).
694 Parameters
695 ----------
696 name : str
697 Name of the group to remove. Will be validated and normalized.
699 Raises
700 ------
701 MTH5Error
702 If the group does not exist or cannot be deleted.
704 Warnings
705 --------
706 Removing a group does not reduce the HDF5 file size, it only removes
707 the reference. To reclaim disk space, create a new file and copy
708 the desired groups into it.
710 Examples
711 --------
712 Remove a group
714 >>> survey_obj._remove_group('MT_001')
715 >>> print('MT_001' in survey_obj.groups_list)
716 False
718 Handle errors when group doesn't exist
720 >>> try:
721 ... survey_obj._remove_group('NonExistent')
722 ... except MTH5Error as e:
723 ... print(f"Cannot remove: {e}")
724 Cannot remove: Error: NonExistent does not exist...
725 """
726 name = validate_name(name)
727 try:
728 del self.hdf5_group[name]
729 self.logger.info(
730 "Deleting a station does not reduce the HDF5"
731 "file size it simply remove the reference. If "
732 "file size reduction is your goal, simply copy"
733 " what you want into another file."
734 )
735 except KeyError as key_error:
736 if "Couldn't delete link" in str(key_error):
737 self.logger.warning(f"File is in read-only mode, cannot delete {name}")
738 else:
739 msg = f"{name} does not exist. Check station_list for existing names"
740 self.logger.debug(msg)
741 raise MTH5Error(msg)
743 def rename_group(self, new_name: str) -> None:
744 """
745 Rename the current group in the HDF5 file.
747 Parameters
748 ----------
749 new_name : str
750 New name for the group. Will be validated and normalized.
752 Raises
753 ------
754 MTH5Error
755 If renaming fails due to read-only mode or other issues.
757 Examples
758 --------
759 Rename a group
761 >>> print(survey_obj.hdf5_group.name)
762 '/OldSurveyName'
763 >>> survey_obj.rename_group('NewSurveyName')
764 >>> print(survey_obj.hdf5_group.name)
765 '/NewSurveyName'
766 """
767 new_name = validate_name(new_name)
768 try:
769 parent_group = self.hdf5_group.parent
770 parent_group.move(self.hdf5_group.name, new_name)
771 self.logger.info(f"Renamed group to {new_name}")
772 # Update hdf5 reference in metadata
773 self.metadata.hdf5_reference = self.hdf5_group.ref
774 except ValueError as error:
775 if "no write intent" in str(error):
776 self.logger.warning("File is in read-only mode, cannot rename group.")
777 else:
778 msg = f"Failed to rename group to {new_name}: {error}"
779 self.logger.error(msg)
780 raise MTH5Error(msg)