Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ survey.py: 68%
154 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2from __future__ import annotations
5"""Survey-level HDF5 helpers for MTH5."""
7from typing import Any
9import h5py
11# =============================================================================
12# Imports
13# =============================================================================
14import numpy as np
15import pandas as pd
16from mt_metadata.timeseries import Survey
18from mth5.groups import (
19 BaseGroup,
20 FiltersGroup,
21 MasterStationGroup,
22 ReportsGroup,
23 StandardsGroup,
24)
25from mth5.helpers import to_numpy_type, validate_name
26from mth5.utils.exceptions import MTH5Error
29# =============================================================================
30# Survey Group
31# =============================================================================
32class MasterSurveyGroup(BaseGroup):
33 """Collection helper for surveys under ``Experiment/Surveys``.
35 Provides helpers to add, fetch, or remove surveys and to summarize all
36 channels in the experiment.
38 Examples
39 --------
40 >>> from mth5 import mth5
41 >>> m5 = mth5.MTH5()
42 >>> _ = m5.open_mth5("/tmp/example.mth5", mode="a")
43 >>> surveys = m5.surveys_group
44 >>> _ = surveys.add_survey("survey_01")
45 >>> surveys.channel_summary.head() # doctest: +SKIP
46 """
48 def __init__(self, group: h5py.Group, **kwargs: Any) -> None:
49 super().__init__(group, **kwargs)
51 @property
52 def channel_summary(self) -> pd.DataFrame:
53 """Return a DataFrame summarizing all channels across surveys.
55 Returns
56 -------
57 pandas.DataFrame
58 Columns include survey, station, run, location, component,
59 start/end, sample info, orientation, units, and HDF5 reference.
61 Examples
62 --------
63 >>> summary = surveys.channel_summary
64 >>> set(summary.columns) >= {"survey", "station", "run", "component"}
65 True
66 """
67 ch_list = []
68 for survey in self.groups_list:
69 survey_group = self.get_survey(survey)
70 for station in survey_group.stations_group.groups_list:
71 station_group = survey_group.stations_group.get_station(station)
72 for run in station_group.groups_list:
73 run_group = station_group.get_run(run)
74 for ch in run_group.groups_list:
75 ch_dataset = run_group.get_channel(ch)
76 entry = np.array(
77 [
78 (
79 survey_group.metadata.id,
80 station_group.metadata.id,
81 run_group.metadata.id,
82 station_group.metadata.location.latitude,
83 station_group.metadata.location.longitude,
84 station_group.metadata.location.elevation,
85 ch_dataset.metadata.component,
86 ch_dataset.metadata.time_period.start,
87 ch_dataset.metadata.time_period.end,
88 ch_dataset.hdf5_dataset.size,
89 ch_dataset.metadata.sample_rate,
90 ch_dataset.metadata.type,
91 ch_dataset.metadata.measurement_azimuth,
92 ch_dataset.metadata.measurement_tilt,
93 ch_dataset.metadata.units,
94 ch_dataset.hdf5_dataset.ref,
95 )
96 ],
97 dtype=np.dtype(
98 [
99 ("survey", "U10"),
100 ("station", "U10"),
101 ("run", "U11"),
102 ("latitude", float),
103 ("longitude", float),
104 ("elevation", float),
105 ("component", "U20"),
106 ("start", "datetime64[ns]"),
107 ("end", "datetime64[ns]"),
108 ("n_samples", int),
109 ("sample_rate", float),
110 ("measurement_type", "U12"),
111 ("azimuth", float),
112 ("tilt", float),
113 ("units", "U25"),
114 ("hdf5_reference", h5py.ref_dtype),
115 ]
116 ),
117 )
118 ch_list.append(entry)
119 ch_list = np.array(ch_list)
120 return pd.DataFrame(ch_list.flatten())
122 def add_survey(
123 self, survey_name: str, survey_metadata: Survey | None = None
124 ) -> "SurveyGroup":
125 """Add or fetch a survey at ``/Experiment/Surveys/<name>``.
127 Parameters
128 ----------
129 survey_name : str
130 Survey identifier; validated with ``validate_name``.
131 survey_metadata : Survey, optional
132 Metadata container used to seed the survey attributes.
134 Returns
135 -------
136 SurveyGroup
137 Wrapper for the created or existing survey.
139 Raises
140 ------
141 ValueError
142 If ``survey_name`` is empty.
143 MTH5Error
144 If the provided metadata id conflicts with the group name.
146 Examples
147 --------
148 >>> survey = surveys.add_survey("survey_01")
149 >>> survey.metadata.id
150 'survey_01'
151 """
152 if not survey_name:
153 raise ValueError("survey name is None, do not know what to name it")
154 survey_name = validate_name(survey_name)
155 try:
156 survey_group = self.hdf5_group.create_group(survey_name)
157 self.logger.debug(f"Created group {survey_group.name}")
159 if survey_metadata is None:
160 survey_metadata = Survey(id=survey_name)
161 else:
162 if validate_name(survey_metadata.id) != survey_name:
163 msg = (
164 f"survey group name {survey_name} must be same as "
165 f"survey id {survey_metadata.id.replace(' ', '_')}"
166 )
167 self.logger.error(msg)
168 raise MTH5Error(msg)
169 survey_obj = SurveyGroup(
170 survey_group,
171 survey_metadata=survey_metadata,
172 **self.dataset_options,
173 )
174 survey_obj.initialize_group()
175 except ValueError:
176 msg = f"survey {survey_name} already exists, returning existing group."
177 self.logger.info(msg)
178 survey_obj = self.get_survey(survey_name)
179 return survey_obj
181 def get_survey(self, survey_name: str) -> "SurveyGroup":
182 """Return an existing survey by name.
184 Parameters
185 ----------
186 survey_name : str
187 Existing survey name.
189 Returns
190 -------
191 SurveyGroup
192 Wrapper for the requested survey.
194 Raises
195 ------
196 MTH5Error
197 If the survey does not exist.
199 Examples
200 --------
201 >>> existing = surveys.get_survey("survey_01")
202 >>> existing.metadata.id
203 'survey_01'
204 """
206 survey_name = validate_name(survey_name)
208 try:
209 return SurveyGroup(self.hdf5_group[survey_name], **self.dataset_options)
210 except KeyError:
211 msg = (
212 f"{survey_name} does not exist, "
213 + "check survey_list for existing names"
214 )
215 self.logger.exception(msg)
216 raise MTH5Error(msg)
218 def remove_survey(self, survey_name: str) -> None:
219 """Delete a survey reference from the file.
221 Parameters
222 ----------
223 survey_name : str
224 Existing survey name.
226 Notes
227 -----
228 HDF5 deletion removes the reference only; storage is not reclaimed.
230 Examples
231 --------
232 >>> surveys.remove_survey("survey_01")
233 """
235 survey_name = validate_name(survey_name)
237 try:
238 del self.hdf5_group[survey_name]
239 self.logger.info(
240 "Deleting a survey does not reduce the HDF5"
241 "file size it simply remove the reference. If "
242 "file size reduction is your goal, simply copy"
243 " what you want into another file."
244 )
245 except KeyError:
246 msg = f"{survey_name} does not exist, check survey_list for existing names"
247 self.logger.exception(msg)
248 raise MTH5Error(msg)
251class SurveyGroup(BaseGroup):
252 """Wrapper for a single survey at ``Experiment/Surveys/<id>``.
254 Handles survey-level metadata, child groups (stations, reports, filters,
255 standards), and synchronization utilities.
257 Examples
258 --------
259 >>> survey = surveys.add_survey("survey_01")
260 >>> survey.metadata.id
261 'survey_01'
262 """
264 def __init__(
265 self,
266 group: h5py.Group,
267 survey_metadata: Survey | None = None,
268 **kwargs: Any,
269 ) -> None:
270 super().__init__(group, group_metadata=survey_metadata, **kwargs)
272 self._default_subgroup_names = [
273 "Stations",
274 "Reports",
275 "Filters",
276 "Standards",
277 ]
279 def initialize_group(self, **kwargs: Any) -> None:
280 """Create default subgroups and write survey metadata.
282 Parameters
283 ----------
284 **kwargs
285 Additional attributes to set on the instance before initialization.
287 Examples
288 --------
289 >>> survey.initialize_group()
290 """
291 # need to make groups first because metadata pulls from them.
292 for group_name in self._default_subgroup_names:
293 self.hdf5_group.create_group(f"{group_name}")
294 m5_grp = getattr(self, f"{group_name.lower()}_group")
295 m5_grp.initialize_group()
297 for key, value in kwargs.items():
298 setattr(self, key, value)
299 self.write_metadata()
301 @BaseGroup.metadata.getter
302 def metadata(self) -> Survey:
303 """Survey metadata enriched with station and filter information."""
305 if not self._has_read_metadata:
306 self.read_metadata()
307 self._has_read_metadata = True
309 try:
310 if self.stations_group.groups_list != self._metadata.station_names:
311 for key in self.stations_group.groups_list:
312 try:
313 key_group = self.stations_group.get_station(key)
314 if key_group.metadata.id in self._metadata.stations.keys():
315 continue
316 # skip non-station groups like Features, FCs, TransferFunction
317 elif key_group.metadata.mth5_type.lower() not in ["station"]:
318 continue
319 self._metadata.add_station(key_group.metadata)
320 except MTH5Error:
321 self.logger.warning(f"Could not find station {key}")
322 except KeyError:
323 self.logger.debug(
324 "Stations Group does not exists yet. Metadata contains no station information"
325 )
327 try:
328 filters_group = self.filters_group
329 if list(filters_group.filter_dict.keys()) != list(
330 self._metadata.filters.keys()
331 ):
332 for key in self.filters_group.filter_dict.keys():
333 try:
334 if key in self._metadata.filters.keys():
335 continue
336 filter_obj = filters_group.to_filter_object(key)
337 self._metadata.filters[key] = filter_obj
338 except MTH5Error:
339 self.logger.warning(f"Could not find filter {key}")
341 except KeyError:
342 self.logger.debug(
343 "Filters Group does not exists yet. Metadata contains no filter information"
344 )
345 return self._metadata
347 def write_metadata(self) -> None:
348 """Write HDF5 attributes from the survey metadata object."""
350 try:
351 for key, value in self._metadata.to_dict(single=True).items():
352 value = to_numpy_type(value)
353 self.logger.debug(f"wrote metadata {key} = {value}")
354 self.hdf5_group.attrs.create(key, value)
355 self._has_read_metadata = True
356 except KeyError as key_error:
357 if "no write intent" in str(key_error):
358 self.logger.warning("File is in read-only mode, cannot write metadata.")
359 else:
360 raise KeyError(key_error)
361 except ValueError as value_error:
362 if "Unable to synchronously create group" in str(value_error):
363 self.logger.warning("File is in read-only mode, cannot write metadata.")
364 else:
365 raise ValueError(value_error)
367 @property
368 def stations_group(self) -> MasterStationGroup:
369 return MasterStationGroup(self.hdf5_group["Stations"])
371 @property
372 def filters_group(self) -> FiltersGroup:
373 """Convenience accessor for ``/Survey/Filters`` group."""
374 return FiltersGroup(self.hdf5_group["Filters"], **self.dataset_options)
376 @property
377 def reports_group(self) -> ReportsGroup:
378 """Convenience accessor for ``/Survey/Reports`` group."""
379 return ReportsGroup(self.hdf5_group["Reports"], **self.dataset_options)
381 @property
382 def standards_group(self) -> StandardsGroup:
383 """Convenience accessor for ``/Survey/Standards`` group."""
384 return StandardsGroup(self.hdf5_group["Standards"], **self.dataset_options)
386 def update_survey_metadata(self, survey_dict: dict[str, Any] | None = None) -> None:
387 """Deprecated alias for :py:meth:`update_metadata`.
389 Raises
390 ------
391 DeprecationWarning
392 Always raised to direct callers to ``update_metadata``.
394 Examples
395 --------
396 >>> survey.update_survey_metadata() # doctest: +ELLIPSIS
397 Traceback (most recent call last):
398 ...
399 DeprecationWarning: 'update_survey_metadata' has been deprecated use 'update_metadata()'
400 """
402 raise DeprecationWarning(
403 "'update_survey_metadata' has been deprecated use 'update_metadata()'"
404 )
406 def update_metadata(self, survey_dict: dict[str, Any] | None = None) -> None:
407 """Synchronize survey metadata from station summaries.
409 Parameters
410 ----------
411 survey_dict : dict, optional
412 Additional metadata values to merge before synchronization.
414 Notes
415 -----
416 Updates survey start/end dates and bounding box from station summaries,
417 then writes metadata to HDF5.
419 Examples
420 --------
421 >>> _ = survey.update_metadata()
422 >>> survey.metadata.time_period.start_date # doctest: +SKIP
423 '2020-01-01'
424 """
426 station_summary = self.stations_group.station_summary.copy()
427 self.logger.debug("Updating survey metadata from stations summary table")
429 if survey_dict:
430 self.metadata.from_dict(survey_dict, skip_none=True)
432 if not len(station_summary): # if station info is empty df, skip parsing
433 self.write_metadata()
434 return
436 self._metadata.time_period.start_date = (
437 station_summary.start.min().isoformat().split("T")[0]
438 )
439 self._metadata.time_period.end_date = (
440 station_summary.end.max().isoformat().split("T")[0]
441 )
442 self._metadata.northwest_corner.latitude = station_summary.latitude.max()
443 self._metadata.northwest_corner.longitude = station_summary.longitude.min()
444 self._metadata.southeast_corner.latitude = station_summary.latitude.min()
445 self._metadata.southeast_corner.longitude = station_summary.longitude.max()
447 # metadata by default comes with stations and runs, need to remove those
448 # before writing the metadata.
449 self.write_metadata()