Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ run.py: 78%
270 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2"""
3Created on Sat May 27 09:59:03 2023
5@author: jpeacock
6"""
8# =============================================================================
9# Imports
10# =============================================================================
11from __future__ import annotations
13import inspect
14from typing import Any, Optional
16import h5py
17import numpy as np
18import pandas as pd
19from mt_metadata import timeseries as metadata
21from mth5 import CHUNK_SIZE
22from mth5.groups import (
23 AuxiliaryDataset,
24 BaseGroup,
25 ChannelDataset,
26 ElectricDataset,
27 MagneticDataset,
28)
29from mth5.helpers import read_attrs_to_dict, to_numpy_type, validate_name
30from mth5.timeseries import ChannelTS, RunTS
31from mth5.utils.exceptions import MTH5Error
34meta_classes = dict(inspect.getmembers(metadata, inspect.isclass))
35# =============================================================================
38# =============================================================================
39# Run Group
40# =============================================================================
41class RunGroup(BaseGroup):
42 """
43 Container for a single MT measurement run with multiple channels.
45 Manages time series data and metadata for one measurement run within a station.
46 A run can contain multiple channels of electric, magnetic, and auxiliary data.
47 This class provides methods to add, retrieve, and manage individual channels,
48 along with convenient access to station and survey metadata.
50 The run group is located at ``/Survey/Stations/{station_name}/{run_name}`` in
51 the HDF5 file hierarchy.
53 Attributes
54 ----------
55 metadata : mt_metadata.timeseries.Run
56 Run metadata including sample rate, time period, and channel information.
57 channel_summary : pd.DataFrame
58 Summary table of all channels in the run.
59 groups_list : list[str]
60 List of channel names in the run.
62 Parameters
63 ----------
64 group : h5py.Group
65 HDF5 group for the run, should have path like
66 ``/Survey/Stations/{station_name}/{run_name}``
67 run_metadata : mt_metadata.timeseries.Run, optional
68 Metadata container for the run. Default is None.
69 **kwargs : Any
70 Additional keyword arguments passed to BaseGroup.
72 Notes
73 -----
74 Key behaviors:
76 - Channels can be of type: electric, magnetic, or auxiliary
77 - All metadata updates should use the metadata object for validation
78 - Call write_metadata() after modifying metadata to persist changes
79 - Channel metadata is cached for performance during repeated access
80 - Deleting a channel removes the reference but doesn't reduce file size
82 Examples
83 --------
84 Access run from an open MTH5 file:
86 >>> from mth5 import mth5
87 >>> mth5_obj = mth5.MTH5()
88 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a')
89 >>> run = mth5_obj.stations_group.get_station('MT001').get_run('MT001a')
91 Check available channels:
93 >>> run.groups_list
94 ['Ex', 'Ey', 'Hx', 'Hy']
96 Access HDF5 group directly:
98 >>> run.hdf5_group.ref
99 <HDF5 Group Reference>
101 Update metadata and persist to file:
103 >>> run.metadata.sample_rate = 512.0
104 >>> run.write_metadata()
106 Add a channel:
108 >>> import numpy as np
109 >>> data = np.random.rand(4096)
110 >>> ex = run.add_channel('Ex', 'electric', data=data)
112 This class provides methods to add and get channels. A summary table of
113 all existing channels in the run is also provided as a convenience look up
114 table to make searching easier.
116 :param group: HDF5 group for a station, should have a path
117 ``/Survey/Stations/station_name/run_name``
118 :type group: :class:`h5py.Group`
119 :param station_metadata: metadata container, defaults to None
120 :type station_metadata: :class:`mth5.metadata.Station`, optional
122 :Access RunGroup from an open MTH5 file:
124 >>> from mth5 import mth5
125 >>> mth5_obj = mth5.MTH5()
126 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a')
127 >>> run = mth5_obj.stations_group.get_station('MT001').get_run('MT001a')
129 :Check what channels exist:
131 >>> station.groups_list
132 ['Ex', 'Ey', 'Hx', 'Hy']
134 To access the hdf5 group directly use `RunGroup.hdf5_group`
136 >>> station.hdf5_group.ref
137 <HDF5 Group Reference>
139 .. note:: All attributes should be input into the metadata object, that
140 way all input will be validated against the metadata standards.
141 If you change attributes in metadata object, you should run the
142 `SurveyGroup.write_metadata()` method. This is a temporary
143 solution, working on an automatic updater if metadata is changed.
145 >>> run.metadata.existing_attribute = 'update_existing_attribute'
146 >>> run.write_metadata()
148 If you want to add a new attribute this should be done using the
149 `metadata.add_base_attribute` method.
151 >>> station.metadata.add_base_attribute('new_attribute',
152 >>> ... 'new_attribute_value',
153 >>> ... {'type':str,
154 >>> ... 'required':True,
155 >>> ... 'style':'free form',
156 >>> ... 'description': 'new attribute desc.',
157 >>> ... 'units':None,
158 >>> ... 'options':[],
159 >>> ... 'alias':[],
160 >>> ... 'example':'new attribute
162 :Add a channel:
164 >>> new_channel = run.add_channel('Ex', 'electric',
165 >>> ... data=numpy.random.rand(4096))
166 >>> new_run
167 /Survey/Stations/MT001/MT001a:
168 =======================================
169 --> Dataset: summary
170 ......................
171 --> Dataset: Ex
172 ......................
173 --> Dataset: Ey
174 ......................
175 --> Dataset: Hx
176 ......................
177 --> Dataset: Hy
178 ......................
181 :Add a channel with metadata:
183 >>> from mth5.metadata import Electric
184 >>> ex_metadata = Electric()
185 >>> ex_metadata.time_period.start = '2020-01-01T12:30:00'
186 >>> ex_metadata.time_period.end = '2020-01-03T16:30:00'
187 >>> new_ex = run.add_channel('Ex', 'electric',
188 >>> ... channel_metadata=ex_metadata)
189 >>> # to look at the metadata
190 >>> new_ex.metadata
191 {
192 "electric": {
193 "ac.end": 1.2,
194 "ac.start": 2.3,
195 ...
196 }
197 }
200 .. seealso:: `mth5.metadata` for details on how to add metadata from
201 various files and python objects.
203 :Remove a channel:
205 >>> run.remove_channel('Ex')
206 >>> station
207 /Survey/Stations/MT001/MT001a:
208 =======================================
209 --> Dataset: summary
210 ......................
211 --> Dataset: Ey
212 ......................
213 --> Dataset: Hx
214 ......................
215 --> Dataset: Hy
216 ......................
218 .. note:: Deleting a station is not as simple as del(station). In HDF5
219 this does not free up memory, it simply removes the reference
220 to that station. The common way to get around this is to
221 copy what you want into a new file, or overwrite the station.
223 :Get a channel:
225 >>> existing_ex = stations.get_channel('Ex')
226 >>> existing_ex
227 Channel Electric:
228 -------------------
229 data type: Ex
230 data type: electric
231 data format: float32
232 data shape: (4096,)
233 start: 1980-01-01T00:00:00+00:00
234 end: 1980-01-01T00:32:+08:00
235 sample rate: 8
238 :summary Table:
240 A summary table is provided to make searching easier. The table
241 summarized all stations within a survey. To see what names are in the
242 summary table:
244 >>> run.summary_table.dtype.descr
245 [('component', ('|S5', {'h5py_encoding': 'ascii'})),
246 ('start', ('|S32', {'h5py_encoding': 'ascii'})),
247 ('end', ('|S32', {'h5py_encoding': 'ascii'})),
248 ('n_samples', '<i4'),
249 ('measurement_type', ('|S12', {'h5py_encoding': 'ascii'})),
250 ('units', ('|S25', {'h5py_encoding': 'ascii'})),
251 ('hdf5_reference', ('|O', {'ref': h5py.h5r.Reference}))]
254 .. note:: When a run is added an entry is added to the summary table,
255 where the information is pulled from the metadata.
257 >>> new_run.summary_table
258 index | component | start | end | n_samples | measurement_type | units |
259 hdf5_reference
260 --------------------------------------------------------------------------
261 -------------
262 """
264 def __init__(
265 self,
266 group: h5py.Group,
267 run_metadata: Optional[metadata.Run] = None,
268 **kwargs: Any,
269 ) -> None:
270 """
271 Initialize RunGroup.
273 Parameters
274 ----------
275 group : h5py.Group
276 HDF5 group for the run.
277 run_metadata : mt_metadata.timeseries.Run, optional
278 Metadata container for the run. Default is None.
279 **kwargs : Any
280 Additional keyword arguments passed to BaseGroup.
281 """
282 self._non_channel_groups = ["Features"]
283 super().__init__(group, group_metadata=run_metadata, **kwargs)
284 # Channel metadata cache to share objects between add_channel and metadata property
285 self._channel_metadata_cache: dict[
286 str, metadata.Electric | metadata.Magnetic | metadata.Auxiliary
287 ] = {}
289 @property
290 def station_metadata(self) -> metadata.Station:
291 """
292 Get station metadata with current run included.
294 Returns
295 -------
296 metadata.Station
297 Station metadata object containing this run's information.
299 Examples
300 --------
301 >>> from mth5 import mth5
302 >>> mth5_obj = mth5.MTH5()
303 >>> mth5_obj.open_mth5("example.h5", mode='r')
304 >>> run = mth5_obj.get_run("MT001", "MT001a")
305 >>> station_meta = run.station_metadata
306 >>> print(station_meta.id)
307 MT001
308 """
309 meta_dict = dict(self.hdf5_group.parent.attrs)
310 meta_dict["run_list"] = [self.metadata.id]
311 station_metadata = metadata.Station()
312 meta_dict = read_attrs_to_dict(meta_dict, metadata.Station())
313 station_metadata.from_dict({"station": meta_dict})
314 station_metadata.add_run(self.metadata)
316 return station_metadata
318 @property
319 def survey_metadata(self) -> metadata.Survey:
320 """
321 Get survey metadata with current station and run included.
323 Returns
324 -------
325 metadata.Survey
326 Survey metadata object containing the full hierarchy.
328 Examples
329 --------
330 >>> from mth5 import mth5
331 >>> mth5_obj = mth5.MTH5()
332 >>> mth5_obj.open_mth5("example.h5", mode='r')
333 >>> run = mth5_obj.get_run("MT001", "MT001a")
334 >>> survey_meta = run.survey_metadata
335 >>> print(survey_meta.id)
336 CONUS_South
337 """
338 meta_dict = read_attrs_to_dict(
339 dict(self.hdf5_group.parent.parent.parent.attrs), metadata.Survey()
340 )
341 survey_metadata = metadata.Survey()
342 survey_metadata.from_dict({"survey": meta_dict})
343 survey_metadata.add_station(self.station_metadata)
344 return survey_metadata
346 def _read_channel_metadata_from_hdf5(
347 self, channel_name: str
348 ) -> metadata.Electric | metadata.Magnetic | metadata.Auxiliary:
349 """
350 Read channel metadata from HDF5 and return metadata object.
352 Parameters
353 ----------
354 channel_name : str
355 Name of the channel to read metadata for.
357 Returns
358 -------
359 metadata.Electric | metadata.Magnetic | metadata.Auxiliary
360 Channel metadata object of appropriate type.
362 Examples
363 --------
364 >>> run = mth5_obj.get_run("MT001", "MT001a")
365 >>> ex_meta = run._read_channel_metadata_from_hdf5("ex")
366 >>> print(ex_meta.type)
367 electric
368 """
369 meta_dict = dict(self.hdf5_group[channel_name].attrs)
370 meta_dict = read_attrs_to_dict(
371 meta_dict,
372 meta_classes[meta_dict["type"].capitalize()](),
373 )
374 ch_metadata = meta_classes[meta_dict["type"].capitalize()]()
375 ch_metadata.from_dict(meta_dict)
376 return ch_metadata
378 def recache_channel_metadata(self) -> None:
379 """
380 Clear and rebuild the channel metadata cache from current HDF5 data.
382 This method reads all channel metadata from HDF5 storage and updates
383 the internal cache. Useful when channel metadata has been modified
384 externally or needs to be synchronized.
386 Examples
387 --------
388 >>> run = mth5_obj.get_run("MT001", "MT001a")
389 >>> run.recache_channel_metadata()
390 >>> # Cache is now synchronized with HDF5 storage
391 """
392 self._channel_metadata_cache = {}
393 for ch in self.groups_list:
394 if ch in self._non_channel_groups:
395 continue
396 ch_metadata = self._read_channel_metadata_from_hdf5(ch)
397 self._channel_metadata_cache[ch] = ch_metadata
399 @BaseGroup.metadata.getter
400 def metadata(self) -> metadata.Run:
401 """
402 Get run metadata including all channel information.
404 This property dynamically reads and caches channel metadata from HDF5,
405 ensuring the run metadata always reflects the current state of channels.
407 Returns
408 -------
409 metadata.Run
410 Run metadata object with all channels included.
412 Examples
413 --------
414 >>> run = mth5_obj.get_run("MT001", "MT001a")
415 >>> run_meta = run.metadata
416 >>> print(run_meta.channels_recorded_electric)
417 ['ex', 'ey']
418 >>> print(run_meta.sample_rate)
419 256.0
420 """
421 if not self._has_read_metadata:
422 self.read_metadata()
423 self._has_read_metadata = True
425 if len(self._metadata.channels) > 0:
426 if (
427 self._metadata.time_period.start
428 != self._metadata.channels[0].time_period.start
429 ) or (
430 self._metadata.time_period.end
431 != self._metadata.channels[0].time_period.end
432 ):
433 self.recache_channel_metadata()
435 # Clear and rebuild the channels list
436 self._metadata._empty_channels_recorded()
437 self._metadata.channels = []
439 for ch in self.groups_list:
440 if ch in self._non_channel_groups:
441 continue
442 if ch in self._channel_metadata_cache:
443 # Reuse cached metadata to prevent duplicate processing
444 cached_metadata = self._channel_metadata_cache[ch]
445 self._metadata.add_channel(cached_metadata)
446 else:
447 # Create new metadata if not cached
448 ch_metadata = self._read_channel_metadata_from_hdf5(ch)
449 # Cache the metadata for future use
450 self._channel_metadata_cache[ch] = ch_metadata
451 self._metadata.add_channel(ch_metadata)
453 # Only rebuild channels if they haven't been built yet or if the group list has changed
454 if not self._metadata.channels or len(self._metadata.channels) != len(
455 self.groups_list
456 ):
457 # Get current channel names from the groups and existing channels
458 current_group_names = set(self.groups_list)
459 existing_channel_names = set(ch.component for ch in self._metadata.channels)
461 # Only rebuild if there's actually a difference in the channel sets
462 if current_group_names != existing_channel_names:
463 # Clear and rebuild the channels list
464 self._metadata._empty_channels_recorded()
465 self._metadata.channels = []
467 # List of known non-channel subgroups to skip
468 for ch in self.groups_list:
469 # Skip non-channel groups
470 if ch in self._non_channel_groups:
471 continue
472 if ch in self._channel_metadata_cache:
473 # Reuse cached metadata to prevent duplicate processing
474 cached_metadata = self._channel_metadata_cache[ch]
475 self._metadata.add_channel(cached_metadata)
476 else:
477 # Create new metadata if not cached
478 ch_metadata = self._read_channel_metadata_from_hdf5(ch)
479 # Cache the metadata for future use
480 self._channel_metadata_cache[ch] = ch_metadata
481 self._metadata.add_channel(ch_metadata)
482 # If channel sets are identical, skip rebuilding to prevent duplicates
483 self._metadata.hdf5_reference = self.hdf5_group.ref
484 return self._metadata
486 @property
487 def channel_summary(self) -> pd.DataFrame:
488 """
489 Get summary of all channels in the run as a DataFrame.
491 Returns
492 -------
493 pandas.DataFrame
494 DataFrame with columns: component, start, end, n_samples,
495 sample_rate, measurement_type, units, hdf5_reference.
497 Examples
498 --------
499 >>> run = mth5_obj.get_run("MT001", "MT001a")
500 >>> summary = run.channel_summary
501 >>> print(summary[['component', 'sample_rate', 'n_samples']])
502 component sample_rate n_samples
503 0 ex 256.0 65536
504 1 ey 256.0 65536
505 2 hx 256.0 65536
506 3 hy 256.0 65536
507 """
508 ch_list = []
509 for key, group in self.hdf5_group.items():
510 try:
511 ch_type = group.attrs["type"]
512 if ch_type in ["electric", "magnetic", "auxiliary"]:
513 ch_list.append(
514 (
515 group.attrs["component"],
516 group.attrs["time_period.start"].split("+")[0],
517 group.attrs["time_period.end"].split("+")[0],
518 group.size,
519 group.attrs["sample_rate"],
520 group.attrs["type"],
521 group.attrs["units"],
522 group.ref,
523 )
524 )
525 except KeyError:
526 pass
527 ch_summary = np.array(
528 ch_list,
529 dtype=np.dtype(
530 [
531 ("component", "U20"),
532 ("start", "datetime64[ns]"),
533 ("end", "datetime64[ns]"),
534 ("n_samples", int),
535 ("sample_rate", float),
536 ("measurement_type", "U12"),
537 ("units", "U25"),
538 ("hdf5_reference", h5py.ref_dtype),
539 ]
540 ),
541 )
543 return pd.DataFrame(ch_summary)
545 def write_metadata(self) -> None:
546 """
547 Write run metadata to HDF5 attributes.
549 Converts metadata object to dictionary and writes all attributes
550 to the HDF5 group.
552 Examples
553 --------
554 >>> run = mth5_obj.get_run("MT001", "MT001a")
555 >>> run.metadata.sample_rate = 512.0
556 >>> run.write_metadata()
557 >>> # Metadata is now persisted to HDF5 file
558 """
559 for key, value in self.metadata.to_dict(single=True).items():
560 value = to_numpy_type(value)
561 self.hdf5_group.attrs.create(key, value)
563 def add_channel(
564 self,
565 channel_name,
566 channel_type,
567 data,
568 channel_dtype="int32",
569 shape=None,
570 max_shape=(None,),
571 chunks=True,
572 channel_metadata=None,
573 **kwargs,
574 ):
575 """
576 Add a channel to the run.
578 Parameters
579 ----------
580 channel_name : str
581 Name of the channel (e.g., 'ex', 'ey', 'hx', 'hy', 'hz').
582 channel_type : str
583 Type of channel: 'electric', 'magnetic', or 'auxiliary'.
584 data : numpy.ndarray or None
585 Time series data for the channel. If None, an empty resizable
586 dataset will be created.
587 channel_dtype : str, optional
588 Data type for the channel if data is None, by default "int32".
589 shape : tuple of int, optional
590 Initial shape of the dataset. If None and data is None, shape
591 is estimated from metadata or set to (1,), by default None.
592 max_shape : tuple of int or None, optional
593 Maximum shape the dataset can be resized to. Use None for
594 unlimited growth in that dimension, by default (None,).
595 chunks : bool or int, optional
596 Enable chunked storage. If True, uses automatic chunking.
597 If int, uses that chunk size, by default True.
598 channel_metadata : mt_metadata.timeseries.Electric, Magnetic, or Auxiliary, optional
599 Metadata object for the channel, by default None.
600 **kwargs : dict
601 Additional keyword arguments.
603 Returns
604 -------
605 ElectricDataset or MagneticDataset or AuxiliaryDataset
606 The created channel dataset object.
608 Raises
609 ------
610 MTH5Error
611 If channel_type is not one of: electric, magnetic, auxiliary.
613 Examples
614 --------
615 Add a channel with data:
617 >>> import numpy as np
618 >>> from mth5 import mth5
619 >>> mth5_obj = mth5.MTH5()
620 >>> mth5_obj.open_mth5("example.h5", mode='a')
621 >>> run = mth5_obj.get_run("MT001", "MT001a")
622 >>> data = np.random.rand(4096)
623 >>> ex = run.add_channel('ex', 'electric', data)
624 >>> print(ex.metadata.component)
625 ex
627 Add a channel with metadata:
629 >>> from mt_metadata.timeseries import Electric
630 >>> ex_meta = Electric()
631 >>> ex_meta.time_period.start = '2020-01-01T12:30:00'
632 >>> ex_meta.sample_rate = 256.0
633 >>> ex = run.add_channel('ex', 'electric', None,
634 ... channel_metadata=ex_meta)
635 >>> print(ex.metadata.sample_rate)
636 256.0
638 Add a channel with custom shape:
640 >>> ex = run.add_channel('ex', 'electric', None,
641 ... shape=(8192,), channel_dtype='float32')
642 >>> print(ex.hdf5_dataset.shape)
643 (8192,)
644 """
645 channel_name = validate_name(channel_name.lower())
646 estimate_size = (1,)
647 for key, value in kwargs.items():
648 setattr(self, key, value)
649 if data is not None:
650 if data.size < 1024:
651 chunks = None
652 try:
653 if data is not None:
654 channel_group = self.hdf5_group.create_dataset(
655 channel_name,
656 data=data,
657 dtype=data.dtype,
658 chunks=chunks,
659 maxshape=max_shape,
660 **self.dataset_options,
661 )
662 # initialize a resizable data array
663 # need to set the chunk size to something useful, if the chunk
664 # size is 1 this causes performance issues and bloating of the
665 # hdf5 file. Set to 8196 for now.
666 else:
667 if shape is None:
668 if channel_metadata is not None:
669 # can estimate a size, this will help with allocating
670 # and set the chunk size to a realistic value
671 if (
672 channel_metadata.time_period.start
673 != channel_metadata.time_period.end
674 ):
675 if channel_metadata.sample_rate > 0:
676 estimate_size = (
677 int(
678 (
679 channel_metadata.time_period.end
680 - channel_metadata.time_period.start
681 )
682 * channel_metadata.sample_rate
683 ),
684 )
685 else:
686 estimate_size = (1,)
687 chunks = CHUNK_SIZE
688 else:
689 estimate_size = (1,)
690 chunks = CHUNK_SIZE
691 if estimate_size[0] > 2**31:
692 estimate_size = (1,)
693 self.logger.warning(
694 "Estimated size is too large. Check start and end "
695 "times, initializing with size (1,)"
696 )
697 else:
698 estimate_size = shape
699 ## Create the dataset
700 channel_group = self.hdf5_group.create_dataset(
701 channel_name,
702 shape=estimate_size,
703 maxshape=max_shape,
704 dtype=channel_dtype,
705 chunks=chunks,
706 **self.dataset_options,
707 )
708 if channel_metadata:
709 if channel_metadata.component != channel_name:
710 self.logger.warning(
711 f"Channel name {channel_name} != "
712 f"channel_metadata.component "
713 f"{channel_metadata.component}, setting to {channel_name}"
714 )
715 channel_metadata.component = channel_name
716 if channel_type.lower() in ["magnetic"]:
717 channel_obj = MagneticDataset(
718 channel_group, dataset_metadata=channel_metadata
719 )
720 elif channel_type.lower() in ["electric"]:
721 channel_obj = ElectricDataset(
722 channel_group, dataset_metadata=channel_metadata
723 )
724 elif channel_type.lower() in ["auxiliary"]:
725 channel_obj = AuxiliaryDataset(
726 channel_group, dataset_metadata=channel_metadata
727 )
728 else:
729 msg = (
730 "`channel_type` must be in [ electric | magnetic | "
731 f"auxiliary ]. Input was {channel_type}"
732 )
733 self.logger.error(msg)
734 raise MTH5Error(msg)
735 except (OSError, RuntimeError, ValueError):
736 msg = f"channel {channel_name} already exists, returning existing group."
737 self.logger.debug(msg)
738 channel_obj = self.get_channel(channel_name)
740 if data is not None:
741 self.logger.debug(f"Replacing data with new shape {data.shape}")
742 channel_obj.replace_dataset(data)
744 self.logger.debug("Updating metadata")
745 channel_obj.metadata.update(channel_metadata)
746 channel_obj.write_metadata()
747 self.logger.debug(f"Done with {channel_name}")
748 # need to make sure the channel name is passed.
749 if channel_obj.metadata.component != channel_name:
750 channel_obj.metadata.component = channel_name
751 channel_obj.write_metadata()
753 # Cache the processed channel metadata to prevent duplicate processing in metadata property
754 # Use the channel object's metadata which has already been processed through from_dict
755 self._channel_metadata_cache[channel_name] = channel_obj.metadata
757 return channel_obj
759 def get_channel(
760 self, channel_name: str
761 ) -> ElectricDataset | MagneticDataset | AuxiliaryDataset | ChannelDataset:
762 """
763 Get a channel from an existing name.
765 Returns the appropriate channel dataset container based on the
766 channel type (electric, magnetic, or auxiliary).
768 Parameters
769 ----------
770 channel_name : str
771 Name of the channel to retrieve (e.g., 'ex', 'ey', 'hx').
773 Returns
774 -------
775 ElectricDataset or MagneticDataset or AuxiliaryDataset or ChannelDataset
776 Channel dataset object containing the channel data and metadata.
778 Raises
779 ------
780 MTH5Error
781 If the channel does not exist in the run.
783 Examples
784 --------
785 Attempting to get a non-existent channel:
787 >>> from mth5 import mth5
788 >>> mth5_obj = mth5.MTH5()
789 >>> mth5_obj.open_mth5("example.h5", mode='r')
790 >>> run = mth5_obj.get_run("MT001", "MT001a")
791 >>> ex = run.get_channel('ex')
792 MTH5Error: ex does not exist, check groups_list for existing names
794 Check available channels first:
796 >>> run.groups_list
797 ['ey', 'hx', 'hz']
799 Get an existing channel:
801 >>> ey = run.get_channel('ey')
802 >>> print(ey)
803 Channel Electric:
804 -------------------
805 component: ey
806 data type: electric
807 data format: float32
808 data shape: (4096,)
809 start: 1980-01-01T00:00:00+00:00
810 end: 1980-01-01T00:00:01+00:00
811 sample rate: 4096
812 """
814 channel_name = validate_name(channel_name.lower())
815 try:
816 ch_dataset = self.hdf5_group[channel_name]
817 except KeyError:
818 msg = (
819 f"{channel_name} does not exist, check groups_list "
820 "for existing names"
821 )
822 self.logger.debug(msg)
823 raise MTH5Error(msg)
824 if ch_dataset.attrs["mth5_type"].lower() in ["electric"]:
825 channel = ElectricDataset(
826 ch_dataset,
827 )
828 elif ch_dataset.attrs["mth5_type"].lower() in ["magnetic"]:
829 channel = MagneticDataset(
830 ch_dataset,
831 )
832 elif ch_dataset.attrs["mth5_type"].lower() in ["auxiliary"]:
833 channel = AuxiliaryDataset(
834 ch_dataset,
835 )
836 else:
837 channel = ChannelDataset(ch_dataset)
838 channel.read_metadata()
840 return channel
842 def remove_channel(self, channel_name: str) -> None:
843 """
844 Remove a channel from the run.
846 Deleting a channel is not as simple as del(channel). In HDF5,
847 this does not free up memory; it simply removes the reference
848 to that channel. The common way to get around this is to
849 copy what you want into a new file, or overwrite the channel.
851 Parameters
852 ----------
853 channel_name : str
854 Name of the existing channel to remove.
856 Notes
857 -----
858 Deleting a channel does not reduce the HDF5 file size. It simply
859 removes the reference. If file size reduction is your goal, copy
860 what you want into another file.
862 Todo: Need to remove summary table entry as well.
864 Examples
865 --------
866 >>> from mth5 import mth5
867 >>> mth5_obj = mth5.MTH5()
868 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a')
869 >>> run = mth5_obj.stations_group.get_station('MT001').get_run('MT001a')
870 >>> run.remove_channel('ex')
871 """
873 channel_name = validate_name(channel_name.lower())
875 try:
876 del self.hdf5_group[channel_name]
877 # Remove from metadata cache if present
878 if channel_name in self._channel_metadata_cache:
879 del self._channel_metadata_cache[channel_name]
880 self.logger.info(
881 "Deleting a channel does not reduce the HDF5"
882 "file size it simply remove the reference. If "
883 "file size reduction is your goal, simply copy"
884 " what you want into another file."
885 )
886 except KeyError:
887 msg = (
888 f"{channel_name} does not exist, "
889 "check groups_list for existing names"
890 )
891 self.logger.debug("Error: " + msg)
892 raise MTH5Error(msg)
894 def has_data(self) -> bool:
895 """
896 Check if the run contains any non-empty, non-zero data.
898 Verifies that all channels in the run have valid data (non-zero and
899 non-empty arrays). Returns False if any channel lacks data.
901 Returns
902 -------
903 bool
904 True if all channels have data, False if any channel is empty
905 or all zeros.
907 Notes
908 -----
909 A channel is considered to have data if its has_data() method
910 returns True, meaning it contains non-zero values.
912 Examples
913 --------
914 >>> run = mth5_obj.get_run("MT001", "MT001a")
915 >>> if run.has_data():
916 ... print("Run contains valid data")
917 ... runts = run.to_runts()
918 """
919 has_data_list = []
920 has_data = True
921 for channel in self.groups_list:
922 if channel in ["summary"]:
923 continue
924 ch_obj = self.get_channel(channel)
925 has_data_list.append(f"{ch_obj.metadata.component}: {ch_obj.has_data()}")
926 if not ch_obj.has_data():
927 has_data = False
929 if not has_data:
930 self.logger.info(", ".join(has_data_list))
931 return has_data
933 def to_runts(
934 self,
935 start: Optional[str] = None,
936 end: Optional[str] = None,
937 n_samples: Optional[int] = None,
938 ) -> RunTS:
939 """
940 Convert run to a RunTS timeseries object.
942 Combines all channels in the run into a RunTS object which handles
943 multi-channel time series data with associated metadata.
945 Parameters
946 ----------
947 start : str, optional
948 Start time for time slice in ISO format (e.g., '2023-01-01T12:00:00').
949 If None, uses entire channel data. Default is None.
950 end : str, optional
951 End time for time slice in ISO format. Only used if start is specified.
952 Default is None.
953 n_samples : int, optional
954 Number of samples to extract from start. If both end and n_samples
955 are specified, end takes precedence. Default is None.
957 Returns
958 -------
959 RunTS
960 RunTS object containing all channels with full run and station metadata.
962 Notes
963 -----
964 - Includes run, station, and survey metadata in the output
965 - Skips the 'summary' group which is not a channel
966 - If start is specified, performs time slicing; otherwise returns full data
968 Examples
969 --------
970 Convert entire run to RunTS:
972 >>> run = mth5_obj.get_run("MT001", "MT001a")
973 >>> runts = run.to_runts()
974 >>> print(runts.channels)
975 ['ex', 'ey', 'hx', 'hy']
977 Time slice the run:
979 >>> runts = run.to_runts(start='2023-01-01T12:00:00',
980 ... end='2023-01-01T13:00:00')
981 >>> print(runts.ex.ts.shape)
982 (1024,)
983 """
984 ch_list = []
985 for channel in self.groups_list:
986 if channel in ["summary"]:
987 continue
988 ch_obj = self.get_channel(channel)
990 if start is not None:
991 ts_obj = ch_obj.time_slice(start, end=end, n_samples=n_samples)
992 else:
993 ts_obj = ch_obj.to_channel_ts()
994 ch_list.append(ts_obj)
995 return RunTS(
996 ch_list,
997 run_metadata=self.metadata,
998 station_metadata=self.station_metadata,
999 survey_metadata=self.survey_metadata,
1000 )
1002 def from_runts(
1003 self, run_ts_obj: RunTS, **kwargs: Any
1004 ) -> list[ElectricDataset | MagneticDataset | AuxiliaryDataset]:
1005 """
1006 Create channel datasets from a RunTS timeseries object.
1008 Converts a RunTS object with multiple channels and metadata into
1009 HDF5 channel datasets and updates run metadata accordingly.
1011 Parameters
1012 ----------
1013 run_ts_obj : RunTS
1014 RunTS object containing multiple channels and metadata.
1015 **kwargs : Any
1016 Additional keyword arguments.
1018 Returns
1019 -------
1020 list[ElectricDataset | MagneticDataset | AuxiliaryDataset]
1021 List of created channel dataset objects.
1023 Raises
1024 ------
1025 MTH5Error
1026 If input is not a RunTS object.
1028 Notes
1029 -----
1030 - Updates run metadata from input object
1031 - Validates station and run IDs match current context
1032 - Creates appropriate channel type based on channel metadata
1033 - Automatically registers recorded channels in run metadata
1035 Examples
1036 --------
1037 >>> from mth5.timeseries import RunTS
1038 >>> run = mth5_obj.get_run("MT001", "MT001a")
1039 >>> runts = RunTS.from_file("timeseries_data.txt")
1040 >>> channels = run.from_runts(runts)
1041 >>> print(f"Created {len(channels)} channels")
1042 Created 4 channels
1043 """
1045 if not isinstance(run_ts_obj, RunTS):
1046 msg = f"Input must be a mth5.timeseries.RunTS object not {type(run_ts_obj)}"
1047 self.logger.error(msg)
1048 raise MTH5Error(msg)
1049 self._metadata.update(run_ts_obj.run_metadata)
1051 channels = []
1053 for comp in run_ts_obj.channels:
1054 ch = getattr(run_ts_obj, comp)
1056 if ch.station_metadata.id is not None:
1057 if ch.station_metadata.id != self.station_metadata.id:
1058 if ch.station_metadata.id not in ["0", None]:
1059 self.logger.warning(
1060 f"Channel station.id {ch.station_metadata.id} != "
1061 f" group station.id {self.station_metadata.id}. "
1062 f"Setting to ch.station_metadata.id to {self.station_metadata.id}"
1063 )
1064 ch.station_metadata.id = self.station_metadata.id
1065 if ch.run_metadata.id is not None:
1066 if ch.run_metadata.id != self.metadata.id:
1067 if ch.run_metadata.id not in ["0", None]:
1068 self.logger.warning(
1069 f"Channel run.id {ch.run_metadata.id} != "
1070 f" group run.id {self.metadata.id}. "
1071 f"Setting to ch.run_metadata.id to {self.metadata.id}"
1072 )
1073 ch.run_metadata.id = self.metadata.id
1075 channels.append(self.from_channel_ts(ch))
1076 self.update_metadata()
1077 return channels
1079 def from_channel_ts(
1080 self, channel_ts_obj: ChannelTS
1081 ) -> ElectricDataset | MagneticDataset | AuxiliaryDataset:
1082 """
1083 Create a channel dataset from a ChannelTS timeseries object.
1085 Converts a single ChannelTS object with time series data and metadata
1086 into an HDF5 channel dataset. Handles filter registration and updates
1087 run metadata with channel information.
1089 Parameters
1090 ----------
1091 channel_ts_obj : ChannelTS
1092 ChannelTS object containing time series data and metadata.
1094 Returns
1095 -------
1096 ElectricDataset | MagneticDataset | AuxiliaryDataset
1097 Created channel dataset object.
1099 Raises
1100 ------
1101 MTH5Error
1102 If input is not a ChannelTS object.
1104 Notes
1105 -----
1106 - Registers filters from channel response if present
1107 - Validates and corrects station/run ID mismatches
1108 - Updates run metadata recorded channel lists
1109 - Automatically determines channel type from metadata
1111 Examples
1112 --------
1113 >>> from mth5.timeseries import ChannelTS
1114 >>> run = mth5_obj.get_run("MT001", "MT001a")
1115 >>> channel = ChannelTS.from_file("ex_timeseries.txt")
1116 >>> ex = run.from_channel_ts(channel)
1117 >>> print(ex.metadata.component)
1118 ex
1119 """
1121 if not isinstance(channel_ts_obj, ChannelTS):
1122 msg = f"Input must be a mth5.timeseries.ChannelTS object not {type(channel_ts_obj)}"
1123 self.logger.error(msg)
1124 raise MTH5Error(msg)
1125 ## Need to add in the filters
1126 if channel_ts_obj.channel_response.filters_list != []:
1127 from mth5.groups import FiltersGroup
1129 fg = FiltersGroup(self.hdf5_group.parent.parent.parent["Filters"])
1130 for ff in channel_ts_obj.channel_response.filters_list:
1131 fg.add_filter(ff)
1132 ch_obj = self.add_channel(
1133 channel_ts_obj.component,
1134 channel_ts_obj.channel_metadata.type,
1135 channel_ts_obj.ts,
1136 channel_metadata=channel_ts_obj.channel_metadata,
1137 )
1139 # need to update the channels recorded
1140 if channel_ts_obj.channel_metadata.type == "electric":
1141 if self.metadata.channels_recorded_electric is None:
1142 self.metadata.channels_recorded_electric = [channel_ts_obj.component]
1143 elif (
1144 channel_ts_obj.component not in self.metadata.channels_recorded_electric
1145 ):
1146 self.metadata.channels_recorded_electric.append(
1147 channel_ts_obj.component
1148 )
1149 elif channel_ts_obj.channel_metadata.type == "magnetic":
1150 if self.metadata.channels_recorded_magnetic is None:
1151 self.metadata.channels_recorded_magnetic = [channel_ts_obj.component]
1152 elif (
1153 channel_ts_obj.component not in self.metadata.channels_recorded_magnetic
1154 ):
1155 self.metadata.channels_recorded_magnetic.append(
1156 channel_ts_obj.component
1157 )
1158 elif channel_ts_obj.channel_metadata.type == "auxiliary":
1159 if self.metadata.channels_recorded_auxiliary is None:
1160 self.metadata.channels_recorded_auxiliary = [channel_ts_obj.component]
1161 elif (
1162 channel_ts_obj.component
1163 not in self.metadata.channels_recorded_auxiliary
1164 ):
1165 self.metadata.channels_recorded_auxiliary.append(
1166 channel_ts_obj.component
1167 )
1168 return ch_obj
1170 def update_run_metadata(self) -> None:
1171 """
1172 Update metadata and table entries (Deprecated).
1173 .. deprecated::
1174 Use update_metadata() instead.
1175 Raises
1176 ------
1177 DeprecationWarning
1178 Always raised to indicate this method should not be used.
1179 """
1181 raise DeprecationWarning(
1182 "'update_run_metadata' has been deprecated use 'update_metadata()'"
1183 )
1185 def update_metadata(self) -> None:
1186 """
1187 Update run metadata from all channels and persist to HDF5.
1189 Aggregates metadata from all channels including time period and
1190 sample rate, then writes updated metadata to HDF5 attributes.
1192 Raises
1193 ------
1194 Exception
1195 May raise exceptions if no channels exist (logs warning).
1197 Notes
1198 -----
1199 Updates:
1201 - Time period start from minimum of all channels
1202 - Time period end from maximum of all channels
1203 - Sample rate from first channel (assumes uniform across channels)
1205 Should be called after adding or removing channels to maintain
1206 consistency between channel and run metadata.
1208 Examples
1209 --------
1210 >>> run = mth5_obj.get_run("MT001", "MT001a")
1211 >>> run.add_channel('ex', 'electric', data=ex_data)
1212 >>> run.add_channel('ey', 'electric', data=ey_data)
1213 >>> run.update_metadata() # Updates time period and sample rate
1214 """
1215 channel_summary = self.channel_summary.copy()
1217 self._metadata.time_period.start = channel_summary.start.min().isoformat()
1218 self._metadata.time_period.end = channel_summary.end.max().isoformat()
1219 try:
1220 self._metadata.sample_rate = channel_summary.sample_rate.unique()[0]
1221 except IndexError:
1222 msg = "There maybe no channels associated with this run -- setting sample_rate to 0"
1223 self.logger.critical(msg)
1224 self._metadata.sample_rate = 0
1225 self.write_metadata()
1227 def plot(
1228 self,
1229 start: Optional[str] = None,
1230 end: Optional[str] = None,
1231 n_samples: Optional[int] = None,
1232 ) -> Any:
1233 """
1234 Create a matplotlib plot of all channels in the run.
1236 Generates a multi-panel plot showing all channels in the run using
1237 the RunTS plotting functionality.
1239 Parameters
1240 ----------
1241 start : str, optional
1242 Start time for time slice in ISO format. If None, plots entire
1243 channel data. Default is None.
1244 end : str, optional
1245 End time for time slice in ISO format. Only used if start is
1246 specified. Default is None.
1247 n_samples : int, optional
1248 Number of samples to extract from start. If both end and n_samples
1249 are specified, end takes precedence. Default is None.
1251 Returns
1252 -------
1253 Any
1254 Matplotlib figure or axes object (depends on RunTS.plot() implementation).
1256 Notes
1257 -----
1258 - Creates separate subplots for each channel type (electric, magnetic, auxiliary)
1259 - Time slice parameters work the same as to_runts()
1260 - Requires matplotlib to be installed
1262 Examples
1263 --------
1264 Plot entire run:
1266 >>> run = mth5_obj.get_run("MT001", "MT001a")
1267 >>> fig = run.plot()
1268 >>> fig.show()
1270 Plot time slice:
1272 >>> fig = run.plot(start='2023-01-01T12:00:00',
1273 ... end='2023-01-01T13:00:00')
1274 """
1275 runts = self.to_runts(start=start, end=end, n_samples=n_samples)
1277 return runts.plot()