Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ station.py: 72%
177 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2from __future__ import annotations
5"""Station-level HDF5 helpers for MTH5."""
7# =============================================================================
8# Imports
9# =============================================================================
10import inspect
11from typing import Any
13import h5py
14import numpy as np
15import pandas as pd
16from mt_metadata import timeseries as metadata
17from mt_metadata.common.mttime import MTime
19from mth5.groups import (
20 BaseGroup,
21 MasterFCGroup,
22 MasterFeaturesGroup,
23 RunGroup,
24 TransferFunctionsGroup,
25)
26from mth5.helpers import read_attrs_to_dict
27from mth5.utils.exceptions import MTH5Error
30meta_classes = dict(inspect.getmembers(metadata, inspect.isclass))
31# =============================================================================
32# Standards Group
33# =============================================================================
36class MasterStationGroup(BaseGroup):
37 """Collection helper for all stations in a survey.
39 The group lives at ``/Survey/Stations`` and offers convenience accessors
40 to add, fetch, or remove stations along with a summary table.
42 Examples
43 --------
44 >>> from mth5 import mth5
45 >>> mth5_obj = mth5.MTH5()
46 >>> _ = mth5_obj.open_mth5("/tmp/example.mth5", mode="a")
47 >>> stations = mth5_obj.stations_group
48 >>> _ = stations.add_station("MT001")
49 >>> stations.station_summary.head() # doctest: +SKIP
50 """
52 def __init__(self, group: h5py.Group, **kwargs: Any) -> None:
53 super().__init__(group, **kwargs)
55 @property
56 def station_summary(self) -> pd.DataFrame:
57 """Return a summary DataFrame of all stations in the file.
59 Returns
60 -------
61 pandas.DataFrame
62 Columns include ``station``, ``start``, ``end``, ``latitude``,
63 and ``longitude``. Empty if no stations are present.
65 Notes
66 -----
67 Timestamps are parsed to pandas ``datetime64[ns]`` when possible.
69 Examples
70 --------
71 >>> summary = stations.station_summary
72 >>> list(summary.columns)
73 ['station', 'start', 'end', 'latitude', 'longitude']
74 """
76 def _get_entry(group: h5py.Group) -> dict[str, Any]:
77 return {
78 "station": group.attrs["id"],
79 "start": group.attrs["time_period.start"],
80 "end": group.attrs["time_period.end"],
81 "latitude": group.attrs["location.latitude"],
82 "longitude": group.attrs["location.longitude"],
83 }
85 def _recursive_get_station_entry(
86 group: h5py.Group,
87 entry_list: list[dict[str, Any]] | None = None,
88 ) -> list[dict[str, Any]]:
89 """Collect station entries recursively from nested groups."""
91 if entry_list is None:
92 entry_list = []
94 if isinstance(group, h5py._hl.group.Group):
95 try:
96 group_type = group.attrs["mth5_type"].lower()
97 if group_type in ["station"]:
98 entry_list.append(_get_entry(group))
99 elif group_type in ["masterstation"]:
100 for node in group.values():
101 entry_list = _recursive_get_station_entry(node, entry_list)
102 except KeyError:
103 pass
104 return entry_list
106 st_list: list[dict[str, Any]] = []
107 st_list = _recursive_get_station_entry(self.hdf5_group, st_list)
108 df = pd.DataFrame(st_list)
109 if len(df):
110 try:
111 df.start = pd.to_datetime(df.start, format="mixed")
112 df.end = pd.to_datetime(df.end, format="mixed")
113 except ValueError:
114 df.start = pd.to_datetime(df.start)
115 df.end = pd.to_datetime(df.end)
117 return df
119 def add_station(
120 self, station_name: str, station_metadata: metadata.Station | None = None
121 ) -> "StationGroup":
122 """Add or fetch a station group at ``/Survey/Stations/<name>``.
124 Parameters
125 ----------
126 station_name : str
127 Station identifier, typically matches ``metadata.id``.
128 station_metadata : mt_metadata.timeseries.Station, optional
129 Metadata container to seed the station attributes.
131 Returns
132 -------
133 StationGroup
134 Convenience wrapper for the created or existing station.
136 Raises
137 ------
138 ValueError
139 If ``station_name`` is empty.
141 Examples
142 --------
143 >>> station = stations.add_station("MT001")
144 >>> station.metadata.id
145 'MT001'
146 """
147 if not station_name:
148 raise ValueError("station name is None, do not know what to name it")
150 return self._add_group(station_name, StationGroup, station_metadata, match="id")
152 def get_station(self, station_name: str) -> "StationGroup":
153 """Return an existing station by name.
155 Parameters
156 ----------
157 station_name : str
158 Name of the station to retrieve.
160 Returns
161 -------
162 StationGroup
163 Wrapper for the requested station.
165 Raises
166 ------
167 MTH5Error
168 If the station does not exist.
170 Examples
171 --------
172 >>> existing = stations.get_station("MT001")
173 >>> existing.name
174 'MT001'
175 """
176 return self._get_group(station_name, StationGroup)
178 def remove_station(self, station_name: str) -> None:
179 """Delete a station group reference from the file.
181 Parameters
182 ----------
183 station_name : str
184 Existing station name.
186 Notes
187 -----
188 HDF5 deletion removes the reference only; underlying storage is not
189 reclaimed.
191 Examples
192 --------
193 >>> stations.remove_station("MT001")
194 """
196 self._remove_group(station_name)
199# =============================================================================
200# Station Group
201# =============================================================================
202class StationGroup(BaseGroup):
203 """Utility wrapper for a single station at ``/Survey/Stations/<id>``.
205 Station groups manage run collections, metadata propagation, and provide
206 summary utilities for quick inspection.
208 Examples
209 --------
210 >>> from mth5 import mth5
211 >>> m5 = mth5.MTH5()
212 >>> _ = m5.open_mth5("/tmp/example.mth5", mode="a")
213 >>> station = m5.stations_group.add_station("MT001")
214 >>> _ = station.add_run("MT001a")
215 >>> station.run_summary.shape[0] >= 1
216 True
217 """
219 def __init__(
220 self,
221 group: h5py.Group,
222 station_metadata: metadata.Station | None = None,
223 **kwargs: Any,
224 ) -> None:
225 self._default_subgroup_names = [
226 "Transfer_Functions",
227 "Fourier_Coefficients",
228 "Features",
229 ]
230 super().__init__(group, group_metadata=station_metadata, **kwargs)
232 def initialize_group(self, **kwargs: Any) -> None:
233 """Create default subgroups and write metadata.
235 Parameters
236 ----------
237 **kwargs
238 Additional attributes to set on the instance before initialization.
240 Examples
241 --------
242 >>> station.initialize_group()
243 """
244 for key, value in kwargs.items():
245 setattr(self, key, value)
246 self.write_metadata()
248 for group_name in self._default_subgroup_names:
249 try:
250 self.hdf5_group.create_group(f"{group_name}")
251 m5_grp = getattr(self, f"{group_name.lower()}_group")
252 m5_grp.initialize_group()
253 except ValueError as value_error:
254 if "Unable to synchronously create group" in str(value_error):
255 self.logger.warning("File is in write mode, cannot create group.")
256 else:
257 raise ValueError(value_error)
259 @property
260 def master_station_group(self) -> MasterStationGroup:
261 """Shortcut to the containing master station group."""
262 return MasterStationGroup(self.hdf5_group.parent)
264 @property
265 def transfer_functions_group(self) -> TransferFunctionsGroup:
266 """Convenience accessor for ``/Station/Transfer_Functions``."""
267 return TransferFunctionsGroup(
268 self.hdf5_group["Transfer_Functions"], **self.dataset_options
269 )
271 @property
272 def fourier_coefficients_group(self) -> MasterFCGroup:
273 """Convenience accessor for ``/Station/Fourier_Coefficients``."""
274 return MasterFCGroup(
275 self.hdf5_group["Fourier_Coefficients"], **self.dataset_options
276 )
278 @property
279 def features_group(self) -> MasterFeaturesGroup:
280 """Convenience accessor for ``/Station/Features``."""
281 return MasterFeaturesGroup(self.hdf5_group["Features"], **self.dataset_options)
283 @property
284 def survey_metadata(self) -> metadata.Survey:
285 """Return survey metadata with this station appended."""
287 meta_dict = read_attrs_to_dict(
288 dict(self.hdf5_group.parent.parent.attrs), metadata.Survey()
289 )
290 survey_metadata = metadata.Survey()
291 survey_metadata.from_dict({"survey": meta_dict})
292 survey_metadata.add_station(self.metadata)
293 return survey_metadata
295 @BaseGroup.metadata.getter
296 def metadata(self) -> metadata.Station:
297 """Station metadata enriched with run information."""
299 if not self._has_read_metadata:
300 self.read_metadata()
301 self._has_read_metadata = True
303 for key in self.groups_list:
304 if key.lower() in [name.lower() for name in self._default_subgroup_names]:
305 continue
306 try:
307 key_group = self.get_run(key)
308 if key_group.metadata.mth5_type.lower() in ["run"]:
309 self._metadata.add_run(key_group.metadata)
310 except MTH5Error:
311 self.logger.warning(f"Could not find run {key}")
312 return self._metadata
314 @property
315 def name(self) -> str:
316 return self.metadata.id
318 @name.setter
319 def name(self, name: str) -> None:
320 self.metadata.id = name
322 @property
323 def run_summary(self) -> pd.DataFrame:
324 """Return a summary of runs belonging to the station.
326 Returns
327 -------
328 pandas.DataFrame
329 Columns include ``id``, ``start``, ``end``, ``components``,
330 ``measurement_type``, ``sample_rate``, and ``hdf5_reference``.
332 Notes
333 -----
334 Channel lists stored as byte arrays or JSON strings are normalized
335 before summarization.
337 Examples
338 --------
339 >>> station.run_summary.head() # doctest: +SKIP
340 """
342 run_list = []
343 for key, group in self.hdf5_group.items():
344 if group.attrs["mth5_type"].lower() in ["run"]:
345 # Helper function to handle both array and string cases
346 def get_channel_list(attr_value):
347 if hasattr(attr_value, "tolist"):
348 # If it's an array, use tolist()
349 return attr_value.tolist()
350 elif isinstance(attr_value, str):
351 # If it's a string, try to parse as JSON list
352 try:
353 import json
355 parsed = json.loads(attr_value)
356 if isinstance(parsed, list):
357 return parsed
358 except (json.JSONDecodeError, ValueError):
359 pass
360 # If JSON parsing fails, treat as empty list
361 return []
362 else:
363 # For other types, convert to list if possible
364 try:
365 return list(attr_value)
366 except (TypeError, ValueError):
367 return []
369 # Get channel lists, handling both string and array formats
370 aux_channels = get_channel_list(
371 group.attrs["channels_recorded_auxiliary"]
372 )
373 elec_channels = get_channel_list(
374 group.attrs["channels_recorded_electric"]
375 )
376 mag_channels = get_channel_list(
377 group.attrs["channels_recorded_magnetic"]
378 )
380 comps = ",".join(
381 [
382 ii.decode() if isinstance(ii, bytes) else str(ii)
383 for ii in aux_channels + elec_channels + mag_channels
384 ]
385 )
386 run_list.append(
387 (
388 group.attrs["id"],
389 group.attrs["time_period.start"].split("+")[0],
390 group.attrs["time_period.end"].split("+")[0],
391 comps,
392 group.attrs["data_type"],
393 group.attrs["sample_rate"],
394 group.ref,
395 )
396 )
397 run_summary = np.array(
398 run_list,
399 dtype=np.dtype(
400 [
401 ("id", "U20"),
402 ("start", "datetime64[ns]"),
403 ("end", "datetime64[ns]"),
404 ("components", "U100"),
405 ("measurement_type", "U12"),
406 ("sample_rate", float),
407 ("hdf5_reference", h5py.ref_dtype),
408 ]
409 ),
410 )
412 return pd.DataFrame(run_summary)
414 def make_run_name(self, alphabet: bool = False) -> str | None:
415 """Generate the next run name using an alphabetic or numeric suffix.
417 Parameters
418 ----------
419 alphabet : bool, default False
420 If ``True`` use letters (``a``, ``b``, ...); otherwise use
421 numeric suffixes (``001``).
423 Returns
424 -------
425 str or None
426 Proposed run name or ``None`` if generation fails.
428 Examples
429 --------
430 >>> station.metadata.id = "MT001"
431 >>> station.make_run_name()
432 'MT001a'
433 """
435 run_list = sorted(
436 [group[-1:] for group in self.groups_list if self.name in group]
437 )
439 next_letter = None
440 if len(run_list) == 0:
441 if alphabet:
442 next_letter = "a"
443 else:
444 next_letter = "001"
445 else:
446 try:
447 next_letter = chr(ord(run_list[-1]) + 1)
448 except TypeError:
449 try:
450 next_letter = f"{int(run_list[-1]) + 1}"
451 except ValueError:
452 self.logger.info("Could not create a new run name")
453 return next_letter
455 def locate_run(self, sample_rate: float, start: str | MTime) -> pd.DataFrame | None:
456 """Locate runs matching a sample rate and start time.
458 Parameters
459 ----------
460 sample_rate : float
461 Sample rate in samples per second.
462 start : str or MTime
463 Start time string or ``MTime`` instance.
465 Returns
466 -------
467 pandas.DataFrame or None
468 Matching rows from ``run_summary`` or ``None`` when no match exists.
470 Examples
471 --------
472 >>> station.locate_run(256.0, "2020-01-01T00:00:00") # doctest: +SKIP
473 """
475 if not isinstance(start, MTime):
476 start = MTime(time_stamp=start)
478 run_summary = self.run_summary.copy()
479 if run_summary.size < 1:
480 return None
481 sr_find = run_summary[
482 (run_summary.sample_rate == sample_rate) & (run_summary.start == start)
483 ]
484 if sr_find.size < 1:
485 return None
486 return sr_find
488 def add_run(
489 self, run_name: str, run_metadata: metadata.Run | None = None
490 ) -> RunGroup:
491 """Add a run under this station.
493 Parameters
494 ----------
495 run_name : str
496 Run identifier (for example ``id`` + suffix).
497 run_metadata : mt_metadata.timeseries.Run, optional
498 Metadata container to seed the run attributes.
500 Returns
501 -------
502 RunGroup
503 Wrapper for the created or existing run.
505 Examples
506 --------
507 >>> run = station.add_run("MT001a")
508 >>> run.metadata.id
509 'MT001a'
510 """
512 return self._add_group(
513 run_name, RunGroup, group_metadata=run_metadata, match="id"
514 )
516 def get_run(self, run_name: str) -> RunGroup:
517 """Return a run by name.
519 Parameters
520 ----------
521 run_name : str
522 Existing run name.
524 Returns
525 -------
526 RunGroup
527 Wrapper for the requested run.
529 Raises
530 ------
531 MTH5Error
532 If the run does not exist.
534 Examples
535 --------
536 >>> existing_run = station.get_run("MT001a")
537 >>> existing_run.name
538 'MT001a'
539 """
541 return self._get_group(run_name, RunGroup)
543 def remove_run(self, run_name: str) -> None:
544 """Remove a run from this station.
546 Parameters
547 ----------
548 run_name : str
549 Existing run name.
551 Notes
552 -----
553 Deleting removes the reference only; storage is not reclaimed.
555 Examples
556 --------
557 >>> station.remove_run("MT001a")
558 """
560 self._remove_group(run_name)
562 def update_station_metadata(self) -> None:
563 """Deprecated alias for :py:meth:`update_metadata`.
565 Raises
566 ------
567 DeprecationWarning
568 Always raised to direct callers to ``update_metadata``.
570 Examples
571 --------
572 >>> station.update_station_metadata() # doctest: +ELLIPSIS
573 Traceback (most recent call last):
574 ...
575 DeprecationWarning: 'update_station_metadata' has been deprecated use 'update_metadata()'
576 """
577 raise DeprecationWarning(
578 "'update_station_metadata' has been deprecated use 'update_metadata()'"
579 )
581 def update_metadata(self) -> None:
582 """Synchronize station metadata from contained runs.
584 Notes
585 -----
586 The station ``time_period`` is set to the min/max of all runs, and
587 ``channels_recorded`` combines all recorded components.
589 Examples
590 --------
591 >>> _ = station.update_metadata()
592 >>> station.metadata.time_period.start # doctest: +SKIP
593 '2020-01-01T00:00:00'
594 """
596 run_summary = self.run_summary.copy()
597 self._metadata.time_period.start = run_summary.start.min().isoformat()
598 self._metadata.time_period.end = run_summary.end.max().isoformat()
599 self._metadata.channels_recorded = list(
600 set(",".join(run_summary.components.to_list()).split(","))
601 )
603 self.write_metadata()