Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ mth5.py: 78%
624 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2"""
3==================
4MTH5
5==================
7MTH5 deals with reading and writing an MTH5 file, which are HDF5 files
8developed for magnetotelluric (MT) data. The code is based on h5py and
9numpy. The main purpose is to provide an object-oriented interface for
10managing MT data in the HDF5 format.
12This module implements the MTH5 class which provides a container for the
13hierarchical structure of MT data collection:
15- Version 0.1.0: Survey → Stations → Runs → Channels
16- Version 0.2.0: Experiment → Surveys → Stations → Runs → Channels
18All timeseries data are stored as individual channels with appropriate
19metadata for electric, magnetic, and auxiliary data.
21Created on Sun Dec 9 20:50:41 2018
23:copyright: Jared Peacock (jpeacock@usgs.gov)
25:license: MIT
27Notes
28-----
29For detailed information about the MTH5 format and metadata standards,
30see https://github.com/kujaku11/MTarchive/
32Examples
33--------
34Create a new MTH5 file and add a station:
36>>> from mth5 import mth5
37>>> mth5_obj = mth5.MTH5(file_version='0.2.0')
38>>> mth5_obj.open_mth5('test.mth5', 'w')
39>>> survey = mth5_obj.add_survey('survey_001')
40>>> station = mth5_obj.add_station('MT001', survey='survey_001')
42See Also
43--------
44h5py : HDF5 library used for file I/O
45mt_metadata : Metadata standards for MT data
46"""
48from __future__ import annotations
50# =============================================================================
51# Imports
52# =============================================================================
53from pathlib import Path
54from platform import platform
56import h5py
57from loguru import logger
58from mt_metadata.common.mttime import get_now_utc
59from mt_metadata.timeseries import Experiment
60from mt_metadata.transfer_functions.core import TF
62from mth5 import __version__ as mth5_version
63from mth5 import (
64 ACCEPTABLE_DATA_LEVELS,
65 ACCEPTABLE_FILE_SUFFIXES,
66 ACCEPTABLE_FILE_TYPES,
67 ACCEPTABLE_FILE_VERSIONS,
68 CHANNEL_DTYPE,
69 FC_DTYPE,
70 groups,
71 helpers,
72 TF_DTYPE,
73)
74from mth5.tables import ChannelSummaryTable, FCSummaryTable, TFSummaryTable
75from mth5.utils.exceptions import MTH5Error
78# =============================================================================
79# MT HDF5 file
80# =============================================================================
83class MTH5:
84 """
85 MTH5 is the main container for the HDF5 file format developed for MT data
87 It uses the metadata standards developled by the
88 `IRIS PASSCAL software group
89 <https://www.iris.edu/hq/about_iris/governance/mt_soft>`_
90 and defined in the
91 `metadata documentation
92 <https://github.com/kujaku11/MTarchive/blob/tables/docs/mt_metadata_guide.pdf>`_.
94 MTH5 is built with h5py and therefore numpy. The structure follows the
95 different levels of MT data collection:
97 For version 0.1.0:
99 - Survey
101 - Reports
102 - Standards
103 - Filters
104 - Stations
106 - Run
108 - Channel
110 For version 0.2.0:
112 - Experiment
114 - Reports
115 - Standards
116 - Surveys
118 - Reports
119 - Standards
120 - Filters
121 - Stations
123 - Run
125 -Channel
128 All timeseries data are stored as individual channels with the appropriate
129 metadata defined for the given channel, i.e. electric, magnetic, auxiliary.
131 Each level is represented as a mth5 group class object which has methods
132 to add, remove, and get a group from the level below. Each group has a
133 metadata attribute that is the approprate metadata class object. For
134 instance the SurveyGroup has an attribute metadata that is a
135 :class:`mth5.metadata.Survey` object. Metadata is stored in the HDF5 group
136 attributes as (key, value) pairs.
138 All groups are represented by their structure tree and can be shown
139 at any time from the command line.
141 Each level has a summary array of the contents of the levels below to
142 hopefully make searching easier.
144 :param filename: name of the to be or existing file
145 :type filename: string or :class:`pathlib.Path`
146 :param compression: compression type. Supported lossless compressions are
148 * 'lzf' - Available with every installation of h5py
149 (C source code also available). Low to
150 moderate compression, very fast. No options.
151 * 'gzip' - Available with every installation of HDF5,
152 so it’s best where portability is required.
153 Good compression, moderate speed.
154 compression_opts sets the compression level
155 and may be an integer from 0 to 9,
156 default is 3.
157 * 'szip' - Patent-encumbered filter used in the NASA
158 community. Not available with all
159 installations of HDF5 due to legal reasons.
160 Consult the HDF5 docs for filter options.
162 :param compression_opts: compression options, see above
163 :type compression_opts: string or int depending on compression type
164 :param shuffle: Block-oriented compressors like GZIP or LZF work better
165 when presented with runs of similar values. Enabling the
166 shuffle filter rearranges the bytes in the chunk and may
167 improve compression ratio. No significant speed penalty,
168 lossless.
169 :type shuffle: boolean
170 :param fletcher32: Adds a checksum to each chunk to detect data corruption.
171 Attempts to read corrupted chunks will fail with an
172 error. No significant speed penalty. Obviously
173 shouldn’t be used with lossy compression filters.
174 :type fletcher32: boolean
175 :param data_level: level the data are stored following levels defined by
176 `NASA ESDS <https://earthdata.nasa.gov/collaborate/open-data-services-and-software/data-information-policy/data-levels>`_
178 * 0 - Raw data
179 * 1 - Raw data with response information and full metadata
180 * 2 - Derived product, raw data has been manipulated
182 :type data_level: integer, defaults to 1
183 :param file_version: Version of the file [ '0.1.0' | '0.2.0' ], defaults to "0.2.0"
184 :type file_version: string, optional
186 :Usage:
188 * Open a new file and show initialized file
190 >>> from mth5 import mth5
191 >>> mth5_obj = mth5.MTH5(file_version='0.1.0')
192 >>> # Have a look at the dataset options
193 >>> mth5.dataset_options
194 {'compression': 'gzip',
195 'compression_opts': 3,
196 'shuffle': True,
197 'fletcher32': True}
198 >>> mth5_obj.open_mth5(r"/home/mtdata/mt01.mth5", 'w')
199 >>> mth5_obj
200 /:
201 ====================
202 |- Group: Survey
203 ----------------
204 |- Group: Filters
205 -----------------
206 --> Dataset: summary
207 ......................
208 |- Group: Reports
209 -----------------
210 --> Dataset: summary
211 ......................
212 |- Group: Standards
213 -------------------
214 --> Dataset: summary
215 ......................
216 |- Group: Stations
217 ------------------
218 --> Dataset: summary
219 ......................
222 * Add metadata for survey from a dictionary
224 >>> survey_dict = {'survey':{'acquired_by': 'me', 'archive_id': 'MTCND'}}
225 >>> survey = mth5_obj.survey_group
226 >>> survey.metadata.from_dict(survey_dict)
227 >>> survey.metadata
228 {
229 "survey": {
230 "acquired_by.author": "me",
231 "acquired_by.comments": null,
232 "archive_id": "MTCND"
233 ...}
234 }
236 * Add a station from the convenience function
238 >>> station = mth5_obj.add_station('MT001')
239 >>> mth5_obj
240 /:
241 ====================
242 |- Group: Survey
243 ----------------
244 |- Group: Filters
245 -----------------
246 --> Dataset: summary
247 ......................
248 |- Group: Reports
249 -----------------
250 --> Dataset: summary
251 ......................
252 |- Group: Standards
253 -------------------
254 --> Dataset: summary
255 ......................
256 |- Group: Stations
257 ------------------
258 |- Group: MT001
259 ---------------
260 --> Dataset: summary
261 ......................
262 --> Dataset: summary
263 ......................
264 >>> station
265 /Survey/Stations/MT001:
266 ====================
267 --> Dataset: summary
268 ......................
270 >>> data.schedule_01.ex[0:10] = np.nan
271 >>> data.calibration_hx[...] = np.logspace(-4, 4, 20)
273 .. note:: if replacing an entire array with a new one you need to use [...]
274 otherwise the data will not be updated.
276 .. warning:: You can only replace entire arrays with arrays of the same
277 size. Otherwise you need to delete the existing data and
278 make a new dataset.
280 .. seealso:: https://www.hdfgroup.org/ and https://www.h5py.org/
282 """
284 def __init__(
285 self,
286 filename=None,
287 compression="gzip",
288 compression_opts=4,
289 shuffle=True,
290 fletcher32=True,
291 data_level=1,
292 file_version="0.2.0",
293 ):
294 self.logger = logger
296 # make these private so the user cant accidentally change anything.
297 self.__hdf5_obj = None
298 (
299 self.__compression,
300 self.__compression_opts,
301 ) = helpers.validate_compression(compression, compression_opts)
302 self.__shuffle = shuffle
303 self.__fletcher32 = fletcher32
305 self.data_level = data_level
306 self.filename = filename
307 self.file_version = file_version
308 self.file_type = "mth5"
310 self._set_default_groups()
312 def __str__(self) -> str:
313 """Return tree structure of the HDF5 file."""
314 if self.h5_is_read():
315 return helpers.get_tree(self.__hdf5_obj)
316 return "HDF5 file is closed and cannot be accessed."
318 def __repr__(self) -> str:
319 """Return repr of MTH5 object."""
320 return self.__str__()
322 def __enter__(self) -> MTH5:
323 """Enter context manager."""
324 return self
326 def __exit__(
327 self,
328 exc_type: type[BaseException] | None,
329 exc_val: BaseException | None,
330 exc_tb,
331 ) -> bool:
332 """Exit context manager and close file."""
333 self.close_mth5()
334 return False
336 @property
337 def dataset_options(self) -> dict[str, str | int | bool]:
338 """
339 Get HDF5 dataset compression and storage options.
341 Returns
342 -------
343 dict[str, str | int | bool]
344 Dictionary containing compression, compression_opts, shuffle, and fletcher32.
346 Examples
347 --------
348 >>> mth5_obj = MTH5()
349 >>> opts = mth5_obj.dataset_options
350 >>> print(opts['compression'])
351 'gzip'
352 """
353 return {
354 "compression": self.__compression,
355 "compression_opts": self.__compression_opts,
356 "shuffle": self.__shuffle,
357 "fletcher32": self.__fletcher32,
358 }
360 @property
361 def file_attributes(self):
362 return {
363 "file.type": "MTH5",
364 "file.version": self.file_version,
365 "file.access.platform": platform(),
366 "file.access.time": get_now_utc(),
367 "mth5.software.version": mth5_version,
368 "mth5.software.name": "mth5",
369 "data_level": self.data_level,
370 }
372 @property
373 def filename(self):
374 """file name of the hdf5 file"""
375 if self.h5_is_read():
376 return Path(self.__hdf5_obj.filename)
377 msg = (
378 "MTH5 file is not open or has not been created yet. "
379 "Returning default name"
380 )
381 self.logger.warning(msg)
382 return self.__filename
384 @filename.setter
385 def filename(self, value):
386 """make sure file has the proper extension"""
387 self.__filename = None
388 if value is not None:
389 if not isinstance(value, Path):
390 value = Path(value)
391 if value.suffix not in ACCEPTABLE_FILE_SUFFIXES:
392 msg = (
393 f"file extension {value.suffix} is not correct. "
394 "Changing to default .h5"
395 )
396 self.logger.info(msg)
397 self.__filename = value.with_suffix(".h5")
398 else:
399 self.__filename = value
401 @property
402 def file_type(self):
403 """File Type should be MTH5"""
405 if self.h5_is_read():
406 # need the try statement for when a file is initialize it does
407 # not have the attributes yet.
408 try:
409 return self.__hdf5_obj.attrs["file.type"]
410 except KeyError:
411 return self.__file_type
412 return self.__file_type
414 @file_type.setter
415 def file_type(self, value):
416 """set file type while validating input"""
417 if not isinstance(value, str):
418 msg = f"Input file type must be a string not {type(value)}"
419 self.logger.error(msg)
420 raise ValueError(msg)
421 if value not in ACCEPTABLE_FILE_TYPES:
422 msg = f"Input file.type is not valid, must be {ACCEPTABLE_FILE_TYPES}"
423 self.logger.error(msg)
424 raise ValueError(msg)
425 self.__file_type = value
427 if self.h5_is_read():
428 self.__hdf5_obj.attrs["file.type"] = value
430 @property
431 def file_version(self):
432 """mth5 file version"""
433 if self.h5_is_read():
434 # need the try statement for when a file is initialize it does
435 # not have the attributes yet.
436 try:
437 return self.__hdf5_obj.attrs["file.version"]
438 except KeyError:
439 return self.__file_version
440 return self.__file_version
442 @file_version.setter
443 def file_version(self, value):
444 """set file version while validating input"""
445 if not isinstance(value, str):
446 msg = f"Input file version must be a string not {type(value)}"
447 self.logger.error(msg)
448 raise ValueError(msg)
449 if value not in ACCEPTABLE_FILE_VERSIONS:
450 msg = f"Input file.version is not valid, must be {ACCEPTABLE_FILE_VERSIONS}"
451 self.logger.error(msg)
452 raise ValueError(msg)
453 self.__file_version = value
454 self._set_default_groups()
456 if self.h5_is_read():
457 self.__hdf5_obj.attrs["file.version"] = value
459 @property
460 def software_name(self):
461 """software name that wrote the file"""
462 if self.h5_is_read():
463 return self.__hdf5_obj.attrs["mth5.software.name"]
464 return "mth5"
466 @property
467 def data_level(self):
468 """data level"""
469 if self.h5_is_read():
470 try:
471 return self.__hdf5_obj.attrs["data_level"]
472 except KeyError:
473 return self.__data_level
474 else:
475 return self.__data_level
477 @data_level.setter
478 def data_level(self, value):
479 """set data level while validating input"""
480 if not isinstance(value, int):
481 msg = f"Input file type must be an integer not {type(value)}"
482 self.logger.error(msg)
483 raise ValueError(msg)
484 if value not in ACCEPTABLE_DATA_LEVELS:
485 msg = f"Input data_level is not valid, must be {ACCEPTABLE_DATA_LEVELS}"
486 self.logger.error(msg)
487 raise ValueError(msg)
488 self.__data_level = value
490 if self.h5_is_read():
491 self.__hdf5_obj.attrs["data_level"] = value
493 def _set_default_groups(self):
494 """get the default groups based on file version"""
496 if self.file_version in ["0.1.0"]:
497 self._default_root_name = "Survey"
498 self._default_subgroup_names = [
499 "Stations",
500 "Reports",
501 "Filters",
502 "Standards",
503 ]
505 self._root_path = "/Survey"
506 elif self.file_version in ["0.2.0"]:
507 self._default_root_name = "Experiment"
508 self._default_subgroup_names = [
509 "Surveys",
510 "Reports",
511 "Standards",
512 ]
514 self._root_path = "/Experiment"
516 @property
517 def experiment_group(self):
518 """Convenience property for /Experiment group"""
519 if self.h5_is_read():
520 if self.file_version in ["0.2.0"]:
521 return groups.ExperimentGroup(
522 self.__hdf5_obj[f"{self._root_path}"],
523 **self.dataset_options,
524 )
525 else:
526 self.logger.info(
527 f"File version {self.file_version} does not have an Experiment Group"
528 )
529 return None
530 self.logger.info("File is closed cannot access /Experiment")
531 return None
533 @property
534 def survey_group(self):
535 """Convenience property for /Survey group"""
536 if self.file_version in ["0.1.0"]:
537 if self.h5_is_read():
538 return groups.SurveyGroup(
539 self.__hdf5_obj[f"{self._root_path}"],
540 **self.dataset_options,
541 )
542 self.logger.info("File is closed cannot access /Survey")
543 return None
544 elif self.file_version in ["0.2.0"]:
545 self.logger.info(
546 f"File version {self.file_version} does not have a survey_group, try surveys_group"
547 )
549 @property
550 def surveys_group(self):
551 """Convenience property for /Surveys group"""
552 if self.file_version in ["0.1.0"]:
553 self.logger.info(
554 f"File version {self.file_version} does not have a surveys_group, try survey_group"
555 )
556 elif self.file_version in ["0.2.0"]:
557 if self.h5_is_read():
558 return groups.MasterSurveyGroup(
559 self.__hdf5_obj[f"{self._root_path}/Surveys"],
560 **self.dataset_options,
561 )
562 self.logger.info("File is closed cannot access /Surveys")
563 return None
565 @property
566 def reports_group(self):
567 """Convenience property for /Survey/Reports group"""
568 if self.h5_is_read():
569 return groups.ReportsGroup(
570 self.__hdf5_obj[f"{self._root_path}/Reports"],
571 **self.dataset_options,
572 )
573 self.logger.info("File is closed cannot access /Reports")
574 return None
576 @property
577 def filters_group(self):
578 """Convenience property for /Survey/Filters group"""
579 if self.h5_is_read():
580 if self.file_version in ["0.1.0"]:
581 return groups.FiltersGroup(
582 self.__hdf5_obj[f"{self._root_path}/Filters"],
583 **self.dataset_options,
584 )
585 else:
586 self.logger.info(
587 "File version 0.2.0 does not have a FiltersGroup at the experiment level"
588 )
589 return None
590 self.logger.info("File is closed cannot access /Filters")
591 return None
593 @property
594 def standards_group(self):
595 """Convenience property for /Standards group"""
596 if self.h5_is_read():
597 return groups.StandardsGroup(
598 self.__hdf5_obj[f"{self._root_path}/Standards"],
599 **self.dataset_options,
600 )
601 self.logger.info("File is closed cannot access /Standards")
602 return None
604 @property
605 def stations_group(self):
606 """Convenience property for /Survey/Stations group"""
607 if self.h5_is_read():
608 if self.file_version not in ["0.1.0"]:
609 self.logger.info(
610 f"File version {self.file_version} does not have a Stations. "
611 "try surveys_group."
612 )
613 return None
614 return groups.MasterStationGroup(
615 self.__hdf5_obj[f"{self._root_path}/Stations"],
616 **self.dataset_options,
617 )
618 self.logger.info("File is closed cannot access /Stations")
619 return None
621 @property
622 def station_list(self):
623 """list of existing stations names"""
624 if not self.h5_is_read():
625 return []
626 if self.file_version in ["0.1.0"]:
627 return self.stations_group.groups_list
628 elif self.file_version in ["0.2.0"]:
629 station_list = []
630 for survey in self.surveys_group.groups_list:
631 sg = self.surveys_group.get_survey(survey)
632 station_list += sg.stations_group.groups_list
633 return station_list
635 def open_mth5(
636 self,
637 filename: str | Path | None = None,
638 mode: str = "a",
639 **kwargs,
640 ) -> MTH5:
641 """
642 Open an MTH5 file.
644 Opens an existing MTH5 file or creates a new one. Validates file structure
645 and initializes summary datasets if needed.
647 Parameters
648 ----------
649 filename : str | Path, optional
650 Path to MTH5 file. If None, uses stored filename.
651 mode : str, default 'a'
652 File opening mode:
654 * 'r' : Read-only
655 * 'a' : Read/write, create if doesn't exist
656 * 'w' : Write, overwrite if exists
657 * 'x' : Write, fail if exists
658 * 'w-' : Write, fail if exists (same as 'x')
659 * 'r+' : Read/write, file must exist
661 **kwargs
662 Additional arguments passed to h5py.File()
664 Returns
665 -------
666 MTH5
667 Returns self for method chaining.
669 Raises
670 ------
671 MTH5Error
672 If file is invalid or mode is not understood.
674 Examples
675 --------
676 Open an existing file for reading:
678 >>> mth5_obj = MTH5()
679 >>> mth5_obj.open_mth5('data.mth5', 'r')
681 Create a new file:
683 >>> mth5_obj = MTH5(file_version='0.2.0')
684 >>> mth5_obj.open_mth5('new_file.mth5', 'w')
686 See Also
687 --------
688 close_mth5 : Close the MTH5 file
689 """
690 if filename is not None:
691 self.__filename = filename
692 if not isinstance(self.__filename, Path):
693 self.__filename = Path(filename)
694 if self.__filename.exists():
695 if mode in ["w"]:
696 self.logger.warning(
697 f"{self.__filename.name} will be overwritten in 'w' mode"
698 )
699 try:
700 self._initialize_file(mode)
701 except OSError as error:
702 msg = (
703 f"{error}. Need to close any references to {self.__filename} first. "
704 "Then reopen the file in the preferred mode"
705 )
706 self.logger.exception(msg)
707 elif mode in ["a", "w-", "x", "r+"]:
708 self.__hdf5_obj = h5py.File(self.__filename, mode=mode, **kwargs)
709 self._set_default_groups()
710 if not self.validate_file():
711 msg = "Input file is not a valid MTH5 file"
712 self.logger.error(msg)
713 raise MTH5Error(msg)
714 elif mode in ["r"]:
715 self.__hdf5_obj = h5py.File(self.__filename, mode=mode, **kwargs)
716 self._set_default_groups()
717 self.validate_file()
718 else:
719 msg = f"mode {mode} is not understood"
720 self.logger.error(msg)
721 raise MTH5Error(msg)
722 else:
723 if mode in ["a", "w", "w-", "x"]:
724 self._initialize_file(mode=mode, **kwargs)
725 else:
726 msg = f"Cannot open new file in mode {mode} "
727 self.logger.error(msg)
728 raise MTH5Error(msg)
729 # TODO need to add a validation step to check for version and legit file
730 if not "channel_summary" in self.__hdf5_obj[self._root_path].keys():
731 self._initialize_summary()
732 return self
734 def _initialize_file(self, mode: str = "w", **kwargs) -> None:
735 """
736 Initialize default groups and metadata for a new MTH5 file.
738 Parameters
739 ----------
740 mode : str, default 'w'
741 File opening mode for h5py.
742 **kwargs
743 Additional arguments passed to h5py.File()
745 Notes
746 -----
747 Creates the default group structure based on file version:
748 - v0.1.0: Survey, Filters, Reports, Standards, Stations
749 - v0.2.0: Experiment, Surveys, Reports, Standards
750 """
751 # open an hdf5 file
752 self.__hdf5_obj = h5py.File(self.__filename, mode, **kwargs)
754 # write general metadata
755 self.__hdf5_obj.attrs.update(self.file_attributes)
757 # create the default group
758 root = self.__hdf5_obj.create_group(self._default_root_name)
759 # version 0.1.0 has a survey group at the root
760 if self._default_root_name == "Survey":
761 root_group = groups.SurveyGroup(root)
762 root_group.metadata.id = "default_survey"
763 root_group.write_metadata()
764 for group_name in self._default_subgroup_names:
765 try:
766 self.__hdf5_obj.create_group(f"{self._default_root_name}/{group_name}")
767 except ValueError:
768 pass
769 m5_grp = getattr(self, f"{group_name.lower()}_group")
770 m5_grp.initialize_group()
771 self._initialize_summary()
773 self.logger.info(
774 f"Initialized MTH5 {self.file_version} file {self.filename} in mode {mode}"
775 )
777 def _initialize_summary(self) -> None:
778 """
779 Initialize summary datasets for channels, Fourier coefficients, and transfer functions.
781 Creates HDF5 datasets for tracking channel, FC, and TF metadata.
782 Handles cases where datasets already exist.
783 """
784 try:
785 # initiate channel and tf summary datasets
786 self.__hdf5_obj[self._default_root_name].create_dataset(
787 "channel_summary",
788 shape=(1,),
789 maxshape=(None,),
790 dtype=CHANNEL_DTYPE,
791 **self.dataset_options,
792 )
793 except ValueError:
794 pass
795 try:
796 self.__hdf5_obj[self._default_root_name].create_dataset(
797 "tf_summary",
798 shape=(1,),
799 maxshape=(None,),
800 dtype=TF_DTYPE,
801 **self.dataset_options,
802 )
803 except ValueError:
804 pass
805 try:
806 self.__hdf5_obj[self._default_root_name].create_dataset(
807 "fc_summary",
808 shape=(1,),
809 maxshape=(None,),
810 dtype=FC_DTYPE,
811 **self.dataset_options,
812 )
813 except ValueError:
814 pass
816 def validate_file(self) -> bool:
817 """
818 Validate an open MTH5 file.
820 Checks file attributes, version, data level, and group structure
821 for compliance with MTH5 format specifications.
823 Returns
824 -------
825 bool
826 True if file is valid, False otherwise.
828 Examples
829 --------
830 >>> mth5_obj = MTH5()
831 >>> mth5_obj.open_mth5('test.mth5', 'r')
832 >>> is_valid = mth5_obj.validate_file()
833 """
834 if self.h5_is_read():
835 if self.file_type not in ACCEPTABLE_FILE_TYPES:
836 msg = f"Unacceptable file type {self.file_type}"
837 self.logger.error(msg)
838 return False
839 if self.file_version not in ACCEPTABLE_FILE_VERSIONS:
840 msg = f"Unacceptable file version {self.file_version}"
841 self.logger.error(msg)
842 return False
843 if self.data_level not in ACCEPTABLE_DATA_LEVELS:
844 msg = f"Unacceptable data_level {self.data_level}"
845 self.logger.error(msg)
846 return False
847 if self.file_version in ["0.1.0"]:
848 for gr in self.survey_group.groups_list:
849 if "summary" in gr:
850 continue
851 if gr not in self._default_subgroup_names:
852 msg = f"Unacceptable group {gr}"
853 self.logger.error(msg)
854 return False
855 elif self.file_version in ["0.2.0"]:
856 for gr in self.experiment_group.groups_list:
857 if "summary" in gr:
858 continue
859 if gr not in self._default_subgroup_names:
860 msg = f"Unacceptable group {gr}"
861 self.logger.error(msg)
862 return False
863 return True
864 self.logger.warning("HDF5 file is not open")
865 return False
867 def close_mth5(self) -> None:
868 """
869 Close MTH5 file.
871 Flushes all data to disk, updates summary tables, and closes the file.
872 Safe to call on already-closed files.
874 Examples
875 --------
876 >>> mth5_obj = MTH5()
877 >>> mth5_obj.open_mth5('test.mth5', 'w')
878 >>> mth5_obj.close_mth5()
880 Notes
881 -----
882 Can be called automatically using context manager:
884 >>> with MTH5().open_mth5('test.mth5', 'w') as m:
885 ... # do work
886 ... pass # file closed automatically
887 """
888 try:
889 # update summary tables
890 if self.h5_is_write():
891 self.channel_summary.summarize()
892 self.tf_summary.summarize()
893 try:
894 self.fc_summary.summarize()
895 except KeyError:
896 self.logger.info("Legacy file has no fc_summary dataset.")
898 self.__hdf5_obj.flush()
899 self.logger.info(f"Flushing and closing {str(self.filename)}")
900 self.__hdf5_obj.close()
901 except (AttributeError, ValueError) as e:
902 self.logger.error(f"Error in close_mth5: {e}")
903 helpers.close_open_files()
905 def h5_is_write(self) -> bool:
906 """
907 Check if HDF5 file is open in write mode.
909 Returns
910 -------
911 bool
912 True if file is open and writable, False otherwise.
914 Examples
915 --------
916 >>> mth5_obj = MTH5()
917 >>> mth5_obj.open_mth5('test.mth5', 'w')
918 >>> mth5_obj.h5_is_write()
919 True
920 """
921 if isinstance(self.__hdf5_obj, h5py.File):
922 try:
923 if "w" in self.__hdf5_obj.mode or "+" in self.__hdf5_obj.mode:
924 return True
925 return False
926 except ValueError:
927 return False
928 return False
930 def h5_is_read(self) -> bool:
931 """
932 Check if HDF5 file is open and readable.
934 Returns
935 -------
936 bool
937 True if file is open and readable, False otherwise.
939 Examples
940 --------
941 >>> mth5_obj = MTH5()
942 >>> mth5_obj.open_mth5('test.mth5', 'r')
943 >>> mth5_obj.h5_is_read()
944 True
945 """
946 if isinstance(self.__hdf5_obj, h5py.File):
947 try:
948 if self.__hdf5_obj.mode in ["r", "r+", "a", "w", "w-", "x"]:
949 return True
950 return False
951 except ValueError:
952 return False
953 return False
955 def has_group(self, group_name):
956 """
957 Check to see if the group name exists
958 """
959 if self.h5_is_read():
961 def has_name(name):
962 if group_name == name:
963 return True
965 if self.__hdf5_obj.visit(has_name):
966 return True
967 return False
969 def _make_h5_path(
970 self, survey=None, station=None, run=None, channel=None, tf_id=None
971 ):
972 """
973 create an h5 path from inputs
974 """
976 if self.file_version == "0.1.0":
977 h5_path = self._root_path
978 elif self.file_version == "0.2.0":
979 if survey is None:
980 raise ValueError("Survey must be input for file type 0.2.0")
981 else:
982 survey = helpers.validate_name(survey)
983 h5_path = f"{self._root_path}/Surveys/{survey}"
984 if station is not None:
985 station = helpers.validate_name(station)
986 h5_path += f"/Stations/{station}"
988 if tf_id is not None:
989 tf_id = helpers.validate_name(tf_id)
990 h5_path += f"/Transfer_Functions/{tf_id}"
991 elif run is not None:
992 run = helpers.validate_name(run)
993 h5_path += f"/{run}"
994 if channel is not None:
995 channel = helpers.validate_name(channel)
996 h5_path += f"/{channel}"
997 return h5_path
999 def get_reference_path(self, h5_reference):
1000 """
1001 Get the HDF5 path from a reference
1003 :param h5_reference: DESCRIPTION
1004 :type h5_reference: TYPE
1005 :return: DESCRIPTION
1006 :rtype: TYPE
1008 """
1009 referenced = self.__hdf5_obj[h5_reference]
1010 return referenced.name
1012 def from_reference(self, h5_reference):
1013 """
1014 Get an HDF5 group, dataset, etc from a reference
1016 :param h5_reference: DESCRIPTION
1017 :type h5_reference: TYPE
1018 :return: DESCRIPTION
1019 :rtype: TYPE
1021 """
1022 ref_dict = {
1023 "survey": groups.SurveyGroup,
1024 "station": groups.StationGroup,
1025 "run": groups.RunGroup,
1026 "electric": groups.ElectricDataset,
1027 "magnetic": groups.MagneticDataset,
1028 "auxiliary": groups.AuxiliaryDataset,
1029 "transferfunction": groups.TransferFunctionGroup,
1030 }
1032 # in the future should allow this to return the proper container.
1033 referenced = self.__hdf5_obj[h5_reference]
1034 mth5_type = referenced.attrs["mth5_type"].lower()
1036 try:
1037 group = ref_dict[mth5_type](referenced)
1038 if mth5_type == "transferfunction":
1039 return group.to_tf_object()
1040 return group
1041 except KeyError:
1042 self.logger.info(
1043 f"Could not identify the MTH5 type {mth5_type}, " "returning h5 group."
1044 )
1045 return referenced
1047 def to_experiment(self, has_data=True):
1048 """
1049 Create an :class:`mt_metadata.timeseries.Experiment` object from the
1050 metadata contained in the MTH5 file.
1052 :returns: :class:`mt_metadata.timeseries.Experiment`
1054 """
1056 if self.h5_is_read():
1057 if self.file_version in ["0.1.0"]:
1058 experiment = Experiment()
1059 experiment.surveys.append(self.survey_group.metadata)
1060 elif self.file_version in ["0.2.0"]:
1061 experiment = self.experiment_group.metadata
1063 # remove runs that have no data.
1064 if has_data:
1065 no_data_runs = self.run_summary[self.run_summary.has_data == False]
1066 if not no_data_runs.empty:
1067 for row in no_data_runs.itertuples():
1068 experiment.surveys[row.survey].stations[row.station].remove_run(
1069 row.run
1070 )
1072 return experiment
1074 def from_experiment(self, experiment, survey_index=0, update=False):
1075 """
1076 Fill out an MTH5 from a :class:`mt_metadata.timeseries.Experiment` object
1077 given a survey_id
1079 :param experiment: Experiment metadata
1080 :type experiment: :class:`mt_metadata.timeseries.Experiment`
1081 :param survey_index: Index of the survey to write
1082 :type survey_index: int, defaults to 0
1084 """
1085 if self.h5_is_write():
1086 if self.file_version in ["0.1.0"]:
1087 sg = self.survey_group
1088 # Use skip_none=True to filter out None mth5_type values
1089 sg.metadata.from_dict(
1090 experiment.surveys[survey_index].to_dict(), skip_none=True
1091 )
1092 sg.write_metadata()
1093 for station in experiment.surveys[0].stations:
1094 mt_station = self.add_station(station.id, station_metadata=station)
1095 if update:
1096 mt_station.metadata.update(station)
1097 mt_station.write_metadata()
1098 for run in station.runs:
1099 mt_run = mt_station.add_run(run.id, run_metadata=run)
1100 if update:
1101 mt_run.metadata.update(run)
1102 mt_run.write_metadata()
1103 for channel in run.channels:
1104 mt_ch = mt_run.add_channel(
1105 channel.component,
1106 channel.type,
1107 None,
1108 channel_metadata=channel,
1109 )
1110 if update:
1111 mt_ch.metadata.update(channel)
1112 mt_ch.write_metadata()
1113 ### need to update from input metadata for time period
1114 ### and channels, runs, stations.
1115 mt_run.update_metadata()
1116 mt_station.update_metadata()
1117 sg.update_metadata()
1118 for k, v in experiment.surveys[0].filters.items():
1119 self.filters_group.add_filter(v)
1120 elif self.file_version in ["0.2.0"]:
1121 for survey in experiment.surveys:
1122 sg = self.add_survey(survey.id, survey_metadata=survey)
1124 for station in survey.stations:
1125 mt_station = self.add_station(
1126 station.id,
1127 station_metadata=station,
1128 survey=sg.metadata.id,
1129 )
1130 if update:
1131 mt_station.metadata.update(station)
1132 mt_station.write_metadata()
1133 for run in station.runs:
1134 mt_run = mt_station.add_run(run.id, run_metadata=run)
1135 if update:
1136 mt_run.metadata.update(run)
1137 mt_run.write_metadata()
1138 for channel in run.channels:
1139 mt_ch = mt_run.add_channel(
1140 channel.component,
1141 channel.type,
1142 None,
1143 channel_metadata=channel,
1144 )
1145 if update:
1146 mt_ch.metadata.update(channel)
1147 mt_ch.write_metadata()
1148 mt_run.update_metadata()
1149 mt_station.update_metadata()
1150 sg.update_metadata()
1151 for k, v in survey.filters.items():
1152 sg.filters_group.add_filter(v)
1154 @property
1155 def channel_summary(self) -> ChannelSummaryTable:
1156 """
1157 Get channel summary table.
1159 Returns
1160 -------
1161 ChannelSummaryTable
1162 Summary of all channels in the file with metadata.
1164 Examples
1165 --------
1166 >>> mth5_obj = MTH5()
1167 >>> mth5_obj.open_mth5('test.mth5', 'r')
1168 >>> summary = mth5_obj.channel_summary
1169 """
1170 return ChannelSummaryTable(
1171 self.__hdf5_obj[f"{self._root_path}/channel_summary"]
1172 )
1174 @property
1175 def fc_summary(self) -> FCSummaryTable:
1176 """
1177 Get Fourier coefficient summary table.
1179 Returns
1180 -------
1181 FCSummaryTable
1182 Summary of all Fourier coefficients in the file.
1183 """
1184 return FCSummaryTable(self.__hdf5_obj[f"{self._root_path}/fc_summary"])
1186 @property
1187 def run_summary(self):
1188 """
1189 Get run summary with MTH5 file path.
1191 Returns
1192 -------
1193 pandas.DataFrame
1194 Summary of runs with mth5_path column added.
1195 """
1196 # need to add mth5 file path for future processing classes.
1197 run_summary_df = self.channel_summary.to_run_summary()
1198 run_summary_df["mth5_path"] = self.filename.as_posix()
1200 return run_summary_df
1202 @property
1203 def tf_summary(self) -> TFSummaryTable:
1204 """
1205 Get transfer function summary table.
1207 Returns
1208 -------
1209 TFSummaryTable
1210 Summary of all transfer functions in the file.
1211 """
1212 return TFSummaryTable(self.__hdf5_obj[f"{self._root_path}/tf_summary"])
1214 def add_survey(self, survey_name, survey_metadata=None):
1215 """
1216 Add a survey with metadata if given with the path:
1217 ``/Experiment/Surveys/survey_name``
1219 If the survey already exists, will return that survey and nothing
1220 is added.
1222 :param survey_name: Name of the survey, should be the same as
1223 metadata.id
1224 :type survey_name: string
1225 :param survey_metadata: survey metadata container, defaults to None
1226 :type survey_metadata: :class:`mth5.metadata.survey`, optional
1227 :return: A convenience class for the added survey
1228 :rtype: :class:`mth5_groups.SurveyGroup`
1230 :Example: ::
1232 >>> from mth5 import mth5
1233 >>> mth5_obj = mth5.MTH5()
1234 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a')
1235 >>> # one option
1236 >>> new_survey = mth5_obj.add_survey('MT001')
1237 >>> # another option
1238 >>> new_station = mth5_obj.experiment_group.surveys_group.add_survey('MT001')
1240 """
1241 return self.surveys_group.add_survey(
1242 survey_name, survey_metadata=survey_metadata
1243 )
1245 def get_survey(self, survey_name):
1246 """
1247 Get a survey with the same name as survey_name
1249 :param survey_name: existing survey name
1250 :type survey_name: string
1251 :return: convenience survey class
1252 :rtype: :class:`mth5.mth5_groups.surveyGroup`
1253 :raises MTH5Error: if the survey name is not found.
1255 :Example:
1257 >>> from mth5 import mth5
1258 >>> mth5_obj = mth5.MTH5()
1259 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a')
1260 >>> # one option
1261 >>> existing_survey = mth5_obj.get_survey('MT001')
1262 >>> # another option
1263 >>> existing_staiton = mth5_obj.experiment_group.surveys_group.get_survey('MT001')
1264 MTH5Error: MT001 does not exist, check groups_list for existing names
1266 """
1268 survey_path = self._make_h5_path(survey=survey_name)
1269 try:
1270 group = groups.SurveyGroup(
1271 self.__hdf5_obj[survey_path],
1272 **self.dataset_options,
1273 )
1274 group.read_metadata()
1275 return group
1276 except KeyError:
1277 msg = (
1278 f"{survey_path} does not exist, check survey_list for "
1279 "existing names."
1280 )
1281 self.logger.warning(msg)
1282 raise MTH5Error(msg)
1284 def remove_survey(self, survey_name):
1285 """
1286 Remove a survey from the file.
1288 .. note:: Deleting a survey is not as simple as del(survey). In HDF5
1289 this does not free up memory, it simply removes the reference
1290 to that survey. The common way to get around this is to
1291 copy what you want into a new file, or overwrite the survey.
1293 :param survey_name: existing survey name
1294 :type survey_name: string
1296 :Example: ::
1298 >>> from mth5 import mth5
1299 >>> mth5_obj = mth5.MTH5()
1300 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a')
1301 >>> # one option
1302 >>> mth5_obj.remove_survey('MT001')
1303 >>> # another option
1304 >>> mth5_obj.experiment_group.surveys_group.remove_survey('MT001')
1306 """
1308 survey_path = self._make_h5_path(survey=survey_name)
1309 try:
1310 del self.__hdf5_obj[f"{survey_path}"]
1311 self.logger.info(
1312 "Deleting a survey does not reduce the HDF5"
1313 "file size it simply remove the reference. If "
1314 "file size reduction is your goal, simply copy"
1315 " what you want into another file."
1316 )
1317 except KeyError:
1318 msg = (
1319 f"{survey_path} does not exist, "
1320 "check station_list for existing names"
1321 )
1322 self.logger.warning(msg)
1323 raise MTH5Error(msg)
1325 def add_station(
1326 self,
1327 station_name: str,
1328 station_metadata=None,
1329 survey: str | None = None,
1330 ) -> groups.StationGroup:
1331 """
1332 Convenience function to add a station.
1334 Adds a new station with optional metadata. For v0.2.0 files, a survey
1335 must be specified.
1337 Parameters
1338 ----------
1339 station_name : str
1340 Name of the station (should match metadata.archive_id).
1341 station_metadata : mt_metadata.timeseries.Station, optional
1342 Station metadata container. Default is None.
1343 survey : str, optional
1344 Survey ID. Required for file version 0.2.0. Default is None.
1346 Returns
1347 -------
1348 groups.StationGroup
1349 The added or existing station group object.
1351 Raises
1352 ------
1353 ValueError
1354 If survey is required (v0.2.0) but not provided.
1356 Examples
1357 --------
1358 Add a station to v0.2.0 file:
1360 >>> mth5_obj = MTH5(file_version='0.2.0')
1361 >>> mth5_obj.open_mth5('test.mth5', 'w')
1362 >>> station = mth5_obj.add_station('MT001', survey='survey_001')
1364 See Also
1365 --------
1366 get_station : Retrieve existing station
1367 remove_station : Delete a station
1368 """
1369 if self.file_version in ["0.1.0"]:
1370 return self.stations_group.add_station(
1371 station_name, station_metadata=station_metadata
1372 )
1373 elif self.file_version in ["0.2.0"]:
1374 if survey is None:
1375 msg = f"Need to input 'survey' for file version {self.file_version}"
1376 self.logger.error(msg)
1377 raise ValueError(msg)
1378 sg = self.get_survey(survey)
1379 return sg.stations_group.add_station(
1380 station_name, station_metadata=station_metadata
1381 )
1383 def get_station(
1384 self,
1385 station_name: str,
1386 survey: str | None = None,
1387 ) -> groups.StationGroup:
1388 """
1389 Get an existing station from the MTH5 file.
1391 Parameters
1392 ----------
1393 station_name : str
1394 Name of the station to retrieve.
1395 survey : str, optional
1396 Survey ID. Required for file version 0.2.0. Default is None.
1398 Returns
1399 -------
1400 groups.StationGroup
1401 The requested station group object.
1403 Raises
1404 ------
1405 MTH5Error
1406 If the station cannot be found.
1408 Examples
1409 --------
1410 Get a station:
1412 >>> mth5_obj = MTH5()
1413 >>> mth5_obj.open_mth5('test.mth5', 'r')
1414 >>> station = mth5_obj.get_station('MT001', survey='survey_001')
1416 See Also
1417 --------
1418 add_station : Create a new station
1419 remove_station : Delete a station
1420 """
1421 station_path = self._make_h5_path(survey=survey, station=station_name)
1422 try:
1423 group = groups.StationGroup(
1424 self.__hdf5_obj[station_path], **self.dataset_options
1425 )
1426 group.read_metadata()
1427 return group
1428 except KeyError:
1429 raise MTH5Error(f"Could not find station {station_name}")
1431 def remove_station(self, station_name, survey=None):
1432 """
1433 Convenience function to remove a station using
1435 Remove a station from the file.
1437 .. note:: Deleting a station is not as simple as del(station). In HDF5
1438 this does not free up memory, it simply removes the reference
1439 to that station. The common way to get around this is to
1440 copy what you want into a new file, or overwrite the station.
1442 :param station_name: existing station name
1443 :type station_name: string
1444 :param survey: existing survey name, needed for file version >= 0.2.0
1445 :type survey: string
1447 :Example:
1449 >>> mth5_obj.remove_station('MT001')
1451 """
1452 station_name = helpers.validate_name(station_name)
1453 if self.file_version in ["0.1.0"]:
1454 return self.stations_group.remove_station(station_name)
1455 elif self.file_version in ["0.2.0"]:
1456 if survey is None:
1457 msg = f"Need to input 'survey' for file version {self.file_version}"
1458 self.logger.error(msg)
1459 raise ValueError(msg)
1460 survey = helpers.validate_name(survey)
1461 sg = self.get_survey(survey)
1462 return sg.stations_group.remove_station(station_name)
1464 def add_run(
1465 self,
1466 station_name: str,
1467 run_name: str,
1468 run_metadata=None,
1469 survey: str | None = None,
1470 ) -> groups.RunGroup:
1471 """
1472 Add a run to a given station.
1474 Parameters
1475 ----------
1476 station_name : str
1477 Existing station name.
1478 run_name : str
1479 Name of the run (typically archive_id followed by a-z).
1480 run_metadata : mt_metadata.timeseries.Run, optional
1481 Run metadata container. Default is None.
1482 survey : str, optional
1483 Survey ID. Required for file version 0.2.0. Default is None.
1485 Returns
1486 -------
1487 groups.RunGroup
1488 The added or existing run group object.
1490 Examples
1491 --------
1492 Add a run to a station:
1494 >>> mth5_obj = MTH5()
1495 >>> mth5_obj.open_mth5('test.mth5', 'w')
1496 >>> run = mth5_obj.add_run('MT001', 'MT001a', survey='survey_001')
1498 See Also
1499 --------
1500 get_run : Retrieve existing run
1501 remove_run : Delete a run
1502 """
1503 return self.get_station(station_name, survey=survey).add_run(
1504 run_name, run_metadata=run_metadata
1505 )
1507 def get_run(self, station_name, run_name, survey=None):
1508 """
1509 Convenience function to get a run using
1510 ``mth5.stations_group.get_station(station_name).get_run()``
1512 get a run from run name for a given station
1514 :param station_name: existing station name
1515 :type station_name: string
1516 :param run_name: existing run name
1517 :type run_name: string
1518 :param survey: existing survey name, needed for file version >= 0.2.0
1519 :type survey: string
1520 :return: Run object
1521 :rtype: :class:`mth5.mth5_groups.RunGroup`
1523 :Example:
1525 >>> existing_run = mth5_obj.get_run('MT001', 'MT001a')
1527 """
1529 run_path = self._make_h5_path(survey=survey, station=station_name, run=run_name)
1530 try:
1531 group = groups.RunGroup(self.__hdf5_obj[run_path], **self.dataset_options)
1532 group.read_metadata()
1533 return group
1534 except KeyError:
1535 raise MTH5Error(f"Could not find {run_path}")
1537 def remove_run(self, station_name, run_name, survey=None):
1538 """
1539 Remove a run from the station.
1541 .. note:: Deleting a run is not as simple as del(run). In HDF5
1542 this does not free up memory, it simply removes the reference
1543 to that station. The common way to get around this is to
1544 copy what you want into a new file, or overwrite the run.
1546 :param station_name: existing station name
1547 :type station_name: string
1548 :param run_name: existing run name
1549 :type run_name: string
1550 :param survey: existing survey name, needed for file version >= 0.2.0
1551 :type survey: string
1553 :Example:
1555 >>> mth5_obj.remove_station('MT001', 'MT001a')
1557 """
1559 return self.get_station(station_name, survey=survey).remove_run(run_name)
1561 def add_channel(
1562 self,
1563 station_name: str,
1564 run_name: str,
1565 channel_name: str,
1566 channel_type: str,
1567 data,
1568 channel_dtype: str = "int32",
1569 max_shape: tuple[int | None, ...] = (None,),
1570 chunks: bool = True,
1571 channel_metadata=None,
1572 survey: str | None = None,
1573 ) -> groups.ElectricDataset | groups.MagneticDataset | groups.AuxiliaryDataset:
1574 """
1575 Add a channel to a given run and station.
1577 Parameters
1578 ----------
1579 station_name : str
1580 Existing station name.
1581 run_name : str
1582 Existing run name.
1583 channel_name : str
1584 Name of the channel (component, e.g., 'Ex', 'Hy').
1585 channel_type : str
1586 Type of channel: 'electric', 'magnetic', or 'auxiliary'.
1587 data : ndarray
1588 Channel data array.
1589 channel_dtype : str, default 'int32'
1590 NumPy data type for storage.
1591 max_shape : tuple[int | None, ...], default (None,)
1592 Maximum shape (allows resizing). None allows unlimited growth.
1593 chunks : bool, default True
1594 Enable HDF5 chunking for better performance.
1595 channel_metadata : mt_metadata.timeseries.Electric | Magnetic | Auxiliary, optional
1596 Channel metadata container. Default is None.
1597 survey : str, optional
1598 Survey ID. Required for file version 0.2.0. Default is None.
1600 Returns
1601 -------
1602 groups.ElectricDataset | groups.MagneticDataset | groups.AuxiliaryDataset
1603 The added channel dataset object.
1605 Raises
1606 ------
1607 MTH5Error
1608 If channel type is not valid.
1610 Examples
1611 --------
1612 Add an electric field channel:
1614 >>> import numpy as np
1615 >>> mth5_obj = MTH5()
1616 >>> mth5_obj.open_mth5('test.mth5', 'w')
1617 >>> data = np.random.random(1000)
1618 >>> ch = mth5_obj.add_channel('MT001', 'MT001a', 'Ex', 'electric',
1619 ... data, survey='survey_001')
1621 See Also
1622 --------
1623 get_channel : Retrieve existing channel
1624 remove_channel : Delete a channel
1625 """
1626 return self.get_run(station_name, run_name, survey=survey).add_channel(
1627 channel_name,
1628 channel_type,
1629 data,
1630 channel_metadata=channel_metadata,
1631 channel_dtype=channel_dtype,
1632 max_shape=max_shape,
1633 chunks=chunks,
1634 **self.dataset_options,
1635 )
1637 def get_channel(self, station_name, run_name, channel_name, survey=None):
1638 """
1639 Convenience function to get a channel using
1640 ``mth5.stations_group.get_station().get_run().get_channel()``
1642 Get a channel from an existing name. Returns the appropriate
1643 container.
1645 :param station_name: existing station name
1646 :type station_name: string
1647 :param run_name: existing run name
1648 :type run_name: string
1649 :param channel_name: name of the channel
1650 :type channel_name: string
1651 :return: Channel container
1652 :rtype: [ :class:`mth5.mth5_groups.ElectricDatset` |
1653 :class:`mth5.mth5_groups.MagneticDatset` |
1654 :class:`mth5.mth5_groups.AuxiliaryDatset` ]
1655 :param survey: existing survey name, needed for file version >= 0.2.0
1656 :type survey: string
1657 :raises MTH5Error: If no channel is found
1659 :Example:
1661 >>> existing_channel = mth5_obj.get_channel(station_name,
1662 >>> ... run_name,
1663 >>> ... channel_name)
1664 >>> existing_channel
1665 Channel Electric:
1666 -------------------
1667 component: Ex
1668 data type: electric
1669 data format: float32
1670 data shape: (4096,)
1671 start: 1980-01-01T00:00:00+00:00
1672 end: 1980-01-01T00:00:01+00:00
1673 sample rate: 4096
1675 """
1676 run_path = self._make_h5_path(survey=survey, station=station_name, run=run_name)
1677 rg = groups.RunGroup(self.__hdf5_obj[run_path], **self.dataset_options)
1678 rg.read_metadata()
1679 try:
1680 return rg.get_channel(helpers.validate_name(channel_name))
1681 except (AttributeError, KeyError):
1682 raise MTH5Error(f"Could not find channel, {run_path}/{channel_name}")
1684 def remove_channel(self, station_name, run_name, channel_name, survey=None):
1685 """
1686 Convenience function to remove a channel using
1687 ``mth5.stations_group.get_station().get_run().remove_channel()``
1689 Remove a channel from a given run and station.
1691 .. note:: Deleting a channel is not as simple as del(channel). In HDF5
1692 this does not free up memory, it simply removes the reference
1693 to that channel. The common way to get around this is to
1694 copy what you want into a new file, or overwrite the channel.
1696 :param station_name: existing station name
1697 :type station_name: string
1698 :param run_name: existing run name
1699 :type run_name: string
1700 :param channel_name: existing station name
1701 :type channel_name: string
1702 :param survey: existing survey name, needed for file version >= 0.2.0
1703 :type survey: string
1705 :Example:
1707 >>> mth5_obj.remove_channel('MT001', 'MT001a', 'Ex')
1709 """
1710 station_name = helpers.validate_name(station_name)
1711 run_name = helpers.validate_name(run_name)
1712 channel_name = helpers.validate_name(channel_name)
1713 return (
1714 self.get_station(station_name, survey=survey)
1715 .get_run(run_name)
1716 .remove_channel(channel_name)
1717 )
1719 def add_transfer_function(self, tf_object, update_metadata=True):
1720 """
1721 Add a transfer function
1722 :param tf_object: DESCRIPTION
1723 :type tf_object: TYPE
1724 :return: DESCRIPTION
1725 :rtype: TYPE
1727 """
1729 if not isinstance(tf_object, TF):
1730 msg = f"Input must be a TF object not {type(tf_object)}"
1731 self.logger.error(msg)
1732 raise ValueError(msg)
1734 if tf_object.survey_metadata.id == "0":
1735 tf_object.survey_metadata.id = "unknown_survey"
1736 tf_object.survey_metadata.id = helpers.validate_name(
1737 tf_object.survey_metadata.id
1738 )
1739 if self.file_version == "0.2.0":
1740 try:
1741 # need to check survey metadata to make sure it matches,
1742 # if it doesn't need to make a new survey group so that
1743 # when a TF is pulled it gets the proper survey metadata.
1744 # this should eventually search over each unknonw survey
1745 # for matching metadata so there aren't 100 groups
1747 survey_group = self.get_survey(tf_object.survey_metadata.id)
1748 if tf_object.survey_metadata.id in ["unknown_survey"]:
1749 for sg_id in self.surveys_group.groups_list:
1750 if "unknown_survey" in sg_id:
1751 match = True
1752 survey_group = self.get_survey(sg_id)
1753 sg_dict = survey_group.metadata.to_dict(
1754 single=True, required=False
1755 )
1756 for (
1757 key,
1758 value,
1759 ) in tf_object.survey_metadata.to_dict(single=True).items():
1760 if key in [
1761 "hdf5_reference",
1762 "mth5_type",
1763 "id",
1764 ]:
1765 continue
1766 if sg_dict[key] != value:
1767 match = False
1768 break
1769 if match:
1770 break
1771 # create a new survey group with a new id, this is likely
1772 # not the best way to do this, it should be strongly
1773 # encouraged that the user assigne a survey id.
1774 if not match:
1775 count = 1
1776 survey_id = f"unknown_survey_{count:03}"
1777 while survey_id in self.surveys_group.groups_list:
1778 count += 1
1779 survey_id = f"unknown_survey_{count:03}"
1780 tf_object.survey_metadata.id = survey_id
1781 survey_group = self.add_survey(
1782 tf_object.survey_metadata.id,
1783 survey_metadata=tf_object.survey_metadata,
1784 )
1785 except MTH5Error:
1786 survey_group = self.add_survey(
1787 tf_object.survey_metadata.id,
1788 survey_metadata=tf_object.survey_metadata,
1789 )
1790 else:
1791 survey_group = self.survey_group
1792 # might need a better test here
1793 if survey_group.metadata.id in [None, "default_survey"]:
1794 survey_group.metadata.update(tf_object.survey_metadata)
1795 survey_group.write_metadata()
1796 try:
1797 station_group = survey_group.stations_group.get_station(
1798 tf_object.station_metadata.id
1799 )
1800 # station_group.metadata.update(tf_object.to_ts_station_metadata())
1801 # station_group.write_metadata()
1802 except MTH5Error:
1803 station_group = survey_group.stations_group.add_station(
1804 tf_object.station_metadata.id,
1805 station_metadata=tf_object.to_ts_station_metadata(),
1806 )
1807 ## need to check for runs and channels
1808 if tf_object.station_metadata.transfer_function.runs_processed in [
1809 [],
1810 [""],
1811 ]:
1812 tf_object.station_metadata.transfer_function.runs_processed = (
1813 tf_object.station_metadata.run_list
1814 )
1815 for run_id in tf_object.station_metadata.transfer_function.runs_processed:
1816 if run_id in ["", None, "None"]:
1817 continue
1818 try:
1819 run_group = station_group.get_run(run_id)
1820 except MTH5Error:
1821 run = tf_object.station_metadata.get_run(run_id)
1822 if run is None:
1823 run = tf_object.station_metadata.runs[0].copy()
1824 run.id = run_id
1825 run_group = station_group.add_run(run_id, run_metadata=run)
1827 if run is not None:
1828 for ch in run.channels:
1829 try:
1830 ch_dataset = run_group.get_channel(ch.component)
1831 except MTH5Error:
1832 ch_dataset = run_group.add_channel(
1833 ch.component,
1834 ch.type,
1835 None,
1836 channel_metadata=ch,
1837 )
1838 try:
1839 tf_group = station_group.transfer_functions_group.add_transfer_function(
1840 tf_object.tf_id, tf_object=tf_object
1841 )
1842 # need to update time_period from TF here
1843 except (OSError, RuntimeError, ValueError):
1844 msg = f"TF {tf_object.tf_id} already exists, returning existing group."
1845 self.logger.debug(msg)
1846 tf_group = station_group.transfer_functions_group.get_transfer_function(
1847 tf_object.tf_id
1848 )
1850 if update_metadata:
1851 survey_group.update_metadata()
1852 return tf_group
1854 def get_transfer_function(self, station_id, tf_id, survey=None):
1855 """
1856 Get a transfer function
1858 :param survey_id: DESCRIPTION
1859 :type survey_id: TYPE
1860 :param station_id: DESCRIPTION
1861 :type station_id: TYPE
1862 :param tf_id: DESCRIPTION
1863 :type tf_id: TYPE
1864 :return: DESCRIPTION
1865 :rtype: TYPE
1867 """
1869 try:
1870 tf_df = self.tf_summary.to_dataframe()
1871 ref = (
1872 tf_df.loc[
1873 (tf_df.station == station_id)
1874 & (tf_df.tf_id == tf_id)
1875 & (tf_df.survey == "unknown_survey")
1876 ]
1877 .iloc[0]
1878 .hdf5_reference
1879 )
1880 return self.from_reference(ref)
1881 except IndexError:
1882 tf_path = self._make_h5_path(survey=survey, station=station_id, tf_id=tf_id)
1883 try:
1884 tg = groups.TransferFunctionGroup(
1885 self.__hdf5_obj[tf_path], **self.dataset_options
1886 )
1887 tg.read_metadata()
1888 return tg.to_tf_object()
1889 except KeyError:
1890 raise MTH5Error(f"Could not find {tf_path}")
1892 def remove_transfer_function(self, station_id, tf_id, survey=None):
1893 """
1894 remove a transfer function
1896 :param survey_id: DESCRIPTION
1897 :type survey_id: TYPE
1898 :param station_id: DESCRIPTION
1899 :type station_id: TYPE
1900 :param tf_id: DESCRIPTION
1901 :type tf_id: TYPE
1902 :return: DESCRIPTION
1903 :rtype: TYPE
1905 """
1907 station_group = self.get_station(station_id, survey=survey)
1909 station_group.transfer_functions_group.remove_transfer_function(tf_id)
1912def _default_table_names() -> list[str]:
1913 """
1914 Get the default MTH5 summary table names.
1916 Returns
1917 -------
1918 list[str]
1919 List of default MTH5 summary table names:
1920 ['channel_summary', 'fc_summary', 'tf_summary']
1922 Examples
1923 --------
1924 >>> names = _default_table_names()
1925 >>> print(names)
1926 ['channel_summary', 'fc_summary', 'tf_summary']
1927 """
1928 return ["channel_summary", "fc_summary", "tf_summary"]