Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ utils \ helpers.py: 68%
123 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# =============================================================================
2# Imports
3# =============================================================================
4from __future__ import annotations
6import functools
7import pathlib
8from typing import Any, Callable, TypeVar
10from loguru import logger
12from mth5.helpers import close_open_files
13from mth5.mth5 import MTH5
16# =============================================================================
17# Module Documentation
18# =============================================================================
19"""
20MTH5 Utility Helper Functions.
22Provides decorators and utility functions for working with MTH5 objects,
23including path/object conversion, file operations, and data validation.
25Notes
26-----
27Many functions use the `path_or_mth5_object` decorator to transparently
28handle both file paths and MTH5 objects as input.
30Examples
31--------
32Initialize and open an MTH5 file::
34 >>> m = initialize_mth5('/path/to/file.mth5', mode='a')
35 >>> m.close_mth5()
36"""
38T = TypeVar("T")
41# =============================================================================
44def path_or_mth5_object(func: Callable[..., T]) -> Callable[..., T]:
45 """
46 Decorator allowing functions to accept MTH5 file paths or MTH5 objects.
48 Transparently converts file paths to MTH5 objects, opens the file,
49 and passes the MTH5 object to the decorated function.
51 Parameters
52 ----------
53 func : Callable
54 A function that takes an MTH5 object as its first argument.
55 Signature: func(mth5_obj: MTH5, *args, **kwargs) -> T
57 Returns
58 -------
59 Callable
60 Wrapped function accepting str/Path or MTH5 as first argument.
62 Raises
63 ------
64 TypeError
65 If first argument is not a string, pathlib.Path, or MTH5 object.
67 Notes
68 -----
69 The decorated function can be called with either:
70 - A file path string or pathlib.Path
71 - An MTH5 object
73 When given a file path, the decorator automatically opens the file
74 in 'append' mode by default, unless overridden in kwargs.
76 TODO: add support for file_version in kwargs
78 Examples
79 --------
80 Decorate a function to work with both paths and objects::
82 @path_or_mth5_object
83 def get_metadata(m: MTH5) -> dict:
84 return m.survey_group.metadata.to_dict()
86 # Call with file path
87 metadata = get_metadata('/path/to/file.mth5')
89 # Call with MTH5 object
90 with MTH5() as m:
91 m.open_mth5('/path/to/file.mth5', mode='r')
92 metadata = get_metadata(m)
93 """
95 @functools.wraps(func)
96 def wrapper_decorator(*args: Any, **kwargs: Any) -> T:
97 def call_function(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
98 if isinstance(func, staticmethod):
99 callable_func = func.__get__(None, object)
100 result = callable_func(*args, **kwargs)
101 else:
102 result = func(*args, **kwargs)
103 return result
105 if isinstance(args[0], (pathlib.Path, str)):
106 h5_path = args[0]
107 mode = kwargs.get("mode", "a")
108 # with MTH5().open_mth5(h5_path, mode=mode) as m:
109 with MTH5() as m:
110 m.open_mth5(h5_path, mode=mode)
111 new_args = [x for x in args]
112 new_args[0] = m
113 new_args = tuple(new_args)
114 result = call_function(func, *new_args, **kwargs)
116 elif isinstance(args[0], MTH5):
117 result = call_function(func, *args, **kwargs)
118 else:
119 msg = f"expected h5, got {type(args[0])}"
120 logger.error(msg)
121 raise TypeError(msg)
123 return result
125 return wrapper_decorator # type: ignore
128@path_or_mth5_object
129def get_version(m: str | pathlib.Path | MTH5) -> str:
130 """
131 Get the file version from an MTH5 file.
133 Parameters
134 ----------
135 m : str | pathlib.Path | MTH5
136 Path to MTH5 file or MTH5 object.
138 Returns
139 -------
140 str
141 File version string (e.g., '0.1.0', '0.2.0').
143 Examples
144 --------
145 Get version from file path::
147 >>> version = get_version('/path/to/file.mth5')
148 >>> print(version)
149 '0.2.0'
151 Get version from MTH5 object::
153 >>> with MTH5() as m:
154 ... m.open_mth5('/path/to/file.mth5')
155 ... version = get_version(m)
156 """
157 return m.file_version # type: ignore
160@path_or_mth5_object
161def get_channel_summary(m: str | pathlib.Path | MTH5, show: bool = True) -> Any:
162 """
163 Get channel summary from MTH5 file as pandas DataFrame.
165 Retrieves the channel summary table and converts to DataFrame.
166 Automatically re-summarizes if the summary appears incomplete.
168 Parameters
169 ----------
170 m : str | pathlib.Path | MTH5
171 Path to MTH5 file or MTH5 object.
172 show : bool, default True
173 Whether to log the summary DataFrame to console.
175 Returns
176 -------
177 pandas.DataFrame
178 Channel summary with station, run, and channel information.
180 Warnings
181 --------
182 If the summary appears incomplete, the channel summary table is
183 re-summarized which may take time for large files.
185 Examples
186 --------
187 Get channel summary from file path::
189 >>> df = get_channel_summary('/path/to/file.mth5')
190 >>> print(df.shape)
191 (42, 8)
193 Get summary without logging::
195 >>> df = get_channel_summary('/path/to/file.mth5', show=False)
196 """
197 logger.info(f"{m.filename} channel summary") # type: ignore
198 df = m.channel_summary.to_dataframe() # type: ignore
199 if len(df) <= 1:
200 logger.warning("channel summary smaller than expected -- re-summarizing")
201 m.channel_summary.summarize() # type: ignore
202 df = m.channel_summary.to_dataframe() # type: ignore
203 if show:
204 logger.info(f"{df}")
205 return df
208@path_or_mth5_object
209def add_filters(
210 m: str | pathlib.Path | MTH5,
211 filters_list: list[Any],
212 survey_id: str = "",
213) -> None:
214 """
215 Add filter objects to MTH5 file.
217 Adds a list of filter objects to the MTH5 file's filter group.
218 Automatically selects the appropriate filters group based on file version.
220 Parameters
221 ----------
222 m : str | pathlib.Path | MTH5
223 Path to MTH5 file or MTH5 object.
224 filters_list : list
225 List of filter objects to add. Each filter should have a 'name'
226 attribute and be compatible with the filters group.
227 survey_id : str, default ''
228 Survey ID for file version 0.2.0. Required for version 0.2.0,
229 ignored for version 0.1.0.
231 Raises
232 ------
233 AttributeError
234 If filter objects lack required attributes.
235 ValueError
236 If survey_id is not found in version 0.2.0 files.
238 Notes
239 -----
240 File version 0.1.0 stores filters globally.
241 File version 0.2.0 stores filters per survey.
243 Examples
244 --------
245 Add filters to MTH5 file::
247 >>> from mth5.timeseries import Filter
248 >>> filters = [Filter(name='test_filter')]
249 >>> add_filters('/path/to/file.mth5', filters)
251 Add survey-specific filters (version 0.2.0)::
253 >>> add_filters('/path/to/file.mth5', filters, survey_id='MT01')
254 """
255 if m.file_version == "0.1.0": # type: ignore
256 fg = m.filters_group # type: ignore
257 assert fg is not None
258 else:
259 # m.file_version == "0.2.0":
260 survey = m.get_survey(survey_id) # type: ignore
261 fg = survey.filters_group
263 for filt3r in filters_list:
264 if filt3r.name not in fg.filter_dict.keys(): # type: ignore
265 fg.add_filter(filt3r) # type: ignore
266 return
269def initialize_mth5(
270 h5_path: str | pathlib.Path,
271 mode: str = "a",
272 file_version: str = "0.1.0",
273) -> MTH5:
274 """
275 Initialize and open an MTH5 file for reading or writing.
277 Creates or opens an MTH5 file with specified file version.
278 Optionally removes existing files before write operations.
280 Parameters
281 ----------
282 h5_path : str | pathlib.Path
283 Path to MTH5 file. Created if it doesn't exist.
284 mode : {'r', 'w', 'a'}, default 'a'
285 File access mode:
286 - 'r': read-only
287 - 'w': write (overwrites existing file)
288 - 'a': append/read-write
289 file_version : {'0.1.0', '0.2.0'}, default '0.1.0'
290 MTH5 file format version.
292 Returns
293 -------
294 MTH5
295 Initialized and opened MTH5 object.
297 Warnings
298 --------
299 When mode='w' and file exists, all open h5 files are closed before
300 removal. This may affect other processes using HDF5 files.
302 Examples
303 --------
304 Create a new MTH5 file::
306 >>> m = initialize_mth5('/path/to/file.mth5', mode='w')
307 >>> m.file_version
308 '0.1.0'
309 >>> m.close_mth5()
311 Open existing file for appending::
313 >>> m = initialize_mth5('/path/to/file.mth5', mode='a')
314 >>> m.add_station('MT001')
315 >>> m.close_mth5()
317 Open file with version 0.2.0 schema::
319 >>> m = initialize_mth5('/path/to/file.mth5', file_version='0.2.0')
320 """
321 h5_path = pathlib.Path(h5_path)
322 if mode == "w":
323 if h5_path.exists():
324 msg = f"File {h5_path} exists, removing from file system."
325 msg = f"{msg}\n closing all open h5 files before removal"
326 logger.warning(f"{msg}")
327 close_open_files()
328 h5_path.unlink()
329 mth5_obj = MTH5(file_version=file_version)
330 mth5_obj.open_mth5(str(h5_path), mode=mode)
332 return mth5_obj
335def read_back_data(
336 mth5_path: str | pathlib.Path,
337 station_id: str,
338 run_id: str,
339 survey: str | None = None,
340 close_mth5: bool = True,
341 return_objects: list[str] | None = None,
342) -> dict[str, Any]:
343 """
344 Read station/run data from MTH5 file for testing and validation.
346 Helper function to confirm MTH5 file accessibility and validate
347 that data dimensions match expectations.
349 Parameters
350 ----------
351 mth5_path : str | pathlib.Path
352 Full path to MTH5 file to read.
353 station_id : str
354 Station identifier (e.g., 'PKD', 'MT001').
355 run_id : str
356 Run identifier (e.g., '001', '1').
357 survey : str, optional
358 Survey identifier. Required for file version 0.2.0.
359 close_mth5 : bool, default True
360 Whether to close MTH5 object after reading.
361 Set to False if you need to access the object later.
362 return_objects : list of str, optional
363 Specifies what objects to return. Options:
364 - 'run': RunGroup object
365 - 'run_ts': RunTS time series object
366 If None, returns empty dict with only mth5_obj if close_mth5=False.
368 Returns
369 -------
370 dict
371 Dictionary containing requested objects:
372 - 'run': RunGroup (if 'run' in return_objects)
373 - 'run_ts': RunTS (if 'run_ts' in return_objects)
374 - 'mth5_obj': MTH5 (if close_mth5=False)
376 Warnings
377 --------
378 If close_mth5=False, the MTH5 object must be manually closed
379 to avoid resource leaks.
381 Notes
382 -----
383 This is primarily a testing utility. Data shape is logged to console.
385 Examples
386 --------
387 Read run data and close immediately::
389 >>> result = read_back_data(
390 ... '/path/to/file.mth5',
391 ... 'PKD',
392 ... '001',
393 ... return_objects=['run_ts']
394 ... )
395 >>> ts = result['run_ts']
396 >>> print(ts.dataset.shape)
398 Read data and keep MTH5 object open::
400 >>> result = read_back_data(
401 ... '/path/to/file.mth5',
402 ... 'MT001',
403 ... '1',
404 ... survey='survey_01',
405 ... close_mth5=False,
406 ... return_objects=['run', 'run_ts']
407 ... )
408 >>> run = result['run']
409 >>> m = result['mth5_obj']
410 >>> # ... use objects ...
411 >>> m.close_mth5()
413 TODO: add path_or_mth5_decorator to this function
414 """
415 if return_objects is None:
416 return_objects = []
417 processing_config: dict[str, Any] = {}
418 processing_config["mth5_path"] = str(mth5_path)
419 processing_config["local_station_id"] = station_id
420 config = processing_config
421 m = initialize_mth5(config["mth5_path"], mode="r")
422 local_run_obj = m.get_run(config["local_station_id"], run_id, survey=survey)
423 local_run_ts = local_run_obj.to_runts()
424 data_array = local_run_ts.dataset.to_array()
425 logger.info(f"data shape = {data_array.shape}")
427 return_dict: dict[str, Any] = {}
428 if "run" in return_objects:
429 return_dict["run"] = local_run_obj
430 if "run_ts" in return_objects:
431 return_dict["run_ts"] = local_run_ts
432 if close_mth5:
433 m.close_mth5()
434 else:
435 return_dict["mth5_obj"] = m
436 return return_dict
439def get_compare_dict(input_dict: dict[str, Any]) -> dict[str, Any]:
440 """
441 Remove MTH5-specific metadata attributes for comparison.
443 Removes internal attributes added by MTH5 that may interfere
444 with dictionary comparisons between metadata objects.
446 Parameters
447 ----------
448 input_dict : dict
449 Dictionary to clean, typically metadata dictionary.
451 Returns
452 -------
453 dict
454 Dictionary with MTH5 internal attributes removed.
455 Original dict is modified in-place.
457 Notes
458 -----
459 Removed attributes:
460 - hdf5_reference: HDF5 object reference (internal)
461 - mth5_type: MTH5 data type marker (internal)
463 Examples
464 --------
465 Clean metadata dictionary before comparison::
467 >>> metadata = {
468 ... 'id': 'station_001',
469 ... 'latitude': 45.5,
470 ... 'hdf5_reference': <h5py reference>,
471 ... 'mth5_type': 'Station'
472 ... }
473 >>> clean = get_compare_dict(metadata)
474 >>> print(clean)
475 {'id': 'station_001', 'latitude': 45.5}
477 Safe to call with incomplete dicts::
479 >>> metadata = {'id': 'station_001'}
480 >>> clean = get_compare_dict(metadata) # No error if keys absent
481 """
482 for key in ["hdf5_reference", "mth5_type"]:
483 try:
484 input_dict.pop(key)
485 except KeyError:
486 pass
488 return input_dict
491@path_or_mth5_object
492def station_in_mth5(
493 m: str | pathlib.Path | MTH5,
494 station_id: str,
495 survey_id: str | None = None,
496) -> bool:
497 """
498 Check if a station exists in MTH5 file.
500 Determines whether a station with the given ID is present
501 in the MTH5 file using the groups list.
503 Parameters
504 ----------
505 m : str | pathlib.Path | MTH5
506 Path to MTH5 file or MTH5 object.
507 station_id : str
508 Station identifier (e.g., 'PKD', 'MT001').
509 survey_id : str, optional
510 Survey identifier. Required for file version 0.2.0,
511 ignored for version 0.1.0.
513 Returns
514 -------
515 bool
516 True if station exists, False otherwise.
518 Raises
519 ------
520 NotImplementedError
521 If file version is not 0.1.0 or 0.2.0.
523 Notes
524 -----
525 File version 0.1.0 has global stations group.
526 File version 0.2.0 has per-survey stations groups.
528 Alternative method: Use channel_summary DataFrame::
530 df = m.channel_summary.to_dataframe()
531 station_exists = station_id in df['Station'].unique()
533 Examples
534 --------
535 Check if station exists (file version 0.1.0)::
537 >>> exists = station_in_mth5('/path/to/file.mth5', 'PKD')
538 >>> print(exists)
539 True
541 Check in version 0.2.0 with survey ID::
543 >>> exists = station_in_mth5(
544 ... '/path/to/file.mth5',
545 ... 'MT001',
546 ... survey_id='survey_01'
547 ... )
548 """
549 file_version = m.file_version # type: ignore # decorated by path_or_mth5_object
550 if file_version == "0.1.0":
551 station_exists = station_id in m.stations_group.groups_list # type: ignore # decorated by path_or_mth5_object
552 elif file_version == "0.2.0":
553 survey = m.get_survey(survey_id) # type: ignore # decorated by path_or_mth5_object
554 station_exists = station_id in survey.stations_group.groups_list
555 else:
556 msg = f"MTH5 file_version {file_version} not understood"
557 logger.error(msg)
558 raise NotImplementedError(msg)
559 return station_exists
562@path_or_mth5_object
563def survey_in_mth5(m: str | pathlib.Path | MTH5, survey_id: str | None = None) -> bool:
564 """
565 Check if a survey exists in MTH5 file.
567 Determines whether a survey with the given ID exists in the MTH5 file.
568 Behavior varies by file version: 0.1.0 has a single survey, while
569 0.2.0 supports multiple surveys.
571 Parameters
572 ----------
573 m : str | pathlib.Path | MTH5
574 Path to MTH5 file or MTH5 object.
575 survey_id : str, optional
576 Survey identifier. For file version 0.1.0, compared against the
577 global survey ID. For version 0.2.0, checked in surveys group.
579 Returns
580 -------
581 bool
582 True if survey exists, False otherwise.
584 Raises
585 ------
586 NotImplementedError
587 If file version is not 0.1.0 or 0.2.0.
589 Notes
590 -----
591 File version 0.1.0 has a single survey with fixed ID.
592 File version 0.2.0 supports multiple named surveys.
594 Alternative method: Use channel_summary DataFrame::
596 df = m.channel_summary.to_dataframe()
597 surveys = df['Survey'].unique()
598 survey_exists = survey_id in surveys
600 Examples
601 --------
602 Check if survey exists (file version 0.1.0)::
604 >>> exists = survey_in_mth5('/path/to/file.mth5', 'survey_01')
605 >>> print(exists)
606 True
608 Check in version 0.2.0::
610 >>> exists = survey_in_mth5('/path/to/file.mth5', survey_id='MT')
611 >>> if exists:
612 ... print(f"Survey MT found in file")
613 """
614 file_version = m.file_version # type: ignore # decorated by path_or_mth5_object
615 if file_version == "0.1.0":
616 survey_metadata = m.survey_group.metadata # type: ignore
617 survey_exists = survey_metadata.id == survey_id # type: ignore
618 elif file_version == "0.2.0":
619 survey_exists = survey_id in m.surveys_group.groups_list # type: ignore
620 else:
621 msg = f"MTH5 file_version {file_version} not understood"
622 logger.error(msg)
623 raise NotImplementedError(msg)
624 return survey_exists